| 1 | #!/usr/bin/env python2
|
| 2 | """
|
| 3 | grammar_gen.py - Use pgen2 to generate tables from Oil's grammar.
|
| 4 | """
|
| 5 | from __future__ import print_function
|
| 6 |
|
| 7 | import os
|
| 8 | import sys
|
| 9 |
|
| 10 | from _devbuild.gen.id_kind_asdl import Id, Kind
|
| 11 | from _devbuild.gen.syntax_asdl import source
|
| 12 |
|
| 13 | from core import alloc
|
| 14 | from core import optview
|
| 15 | from mycpp.mylib import log
|
| 16 | from frontend import lexer
|
| 17 | from frontend import lexer_def
|
| 18 | from frontend import reader
|
| 19 | from pgen2 import parse, pgen, token
|
| 20 |
|
| 21 |
|
| 22 | class OilTokenDef(object):
|
| 23 |
|
| 24 | def __init__(self, ops, more_ops, keyword_ops):
|
| 25 | self.ops = ops
|
| 26 | self.more_ops = more_ops
|
| 27 | self.keyword_ops = keyword_ops
|
| 28 |
|
| 29 | def GetTerminalNum(self, label):
|
| 30 | """e.g. translate Expr_Name in the grammar to 178."""
|
| 31 | id_ = getattr(Id, label)
|
| 32 | #log('Id %s = %d', id_, id_)
|
| 33 | assert id_ < token.NT_OFFSET, id_
|
| 34 | return id_
|
| 35 |
|
| 36 | def GetKeywordNum(self, s):
|
| 37 | """e.g 'xor' -> Id.Expr_Xor.
|
| 38 |
|
| 39 | Python doesn't have this, but Oil does. Returns None if not
|
| 40 | found.
|
| 41 | """
|
| 42 | id_ = self.keyword_ops.get(s)
|
| 43 | if id_ is None:
|
| 44 | return None
|
| 45 | assert id_ < token.NT_OFFSET, id_
|
| 46 | return id_
|
| 47 |
|
| 48 | def GetOpNum(self, op_str):
|
| 49 | """
|
| 50 | Args:
|
| 51 | op_str: '>='
|
| 52 |
|
| 53 | Returns:
|
| 54 | Integer for '>=' or Id.Arith_GreatEqual
|
| 55 | """
|
| 56 | # Fail if not there
|
| 57 | id_ = self.ops.get(op_str) or self.more_ops[op_str]
|
| 58 | assert id_ < token.NT_OFFSET, id_
|
| 59 | return id_
|
| 60 |
|
| 61 |
|
| 62 | def MakeOilLexer(code_str, arena):
|
| 63 | arena.PushSource(source.MainFile('pgen2_main'))
|
| 64 | line_reader = reader.StringLineReader(code_str, arena)
|
| 65 | line_lexer = lexer.LineLexer(arena)
|
| 66 | lex = lexer.Lexer(line_lexer, line_reader)
|
| 67 | return lex
|
| 68 |
|
| 69 |
|
| 70 | def main(argv):
|
| 71 | action = argv[1]
|
| 72 | argv = argv[2:]
|
| 73 |
|
| 74 | # Used at grammar BUILD time.
|
| 75 | OPS = {
|
| 76 | '!': Id.Expr_Bang,
|
| 77 | '.': Id.Expr_Dot,
|
| 78 | '..': Id.Expr_DDot,
|
| 79 | '->': Id.Expr_RArrow,
|
| 80 | '=>': Id.Expr_RDArrow,
|
| 81 | '//': Id.Expr_DSlash,
|
| 82 | '++': Id.Arith_DPlus,
|
| 83 | '!~': Id.Expr_NotTilde,
|
| 84 | '~~': Id.Expr_DTilde,
|
| 85 | '!~~': Id.Expr_NotDTilde,
|
| 86 | '~==': Id.Expr_TildeDEqual,
|
| 87 | '===': Id.Expr_TEqual,
|
| 88 | '!==': Id.Expr_NotDEqual,
|
| 89 | '@': Id.Expr_At,
|
| 90 | '...': Id.Expr_Ellipsis,
|
| 91 | '$': Id.Expr_Dollar, # Only for legacy eggex /d+$/
|
| 92 | '**=': Id.Expr_DStarEqual,
|
| 93 | '//=': Id.Expr_DSlashEqual,
|
| 94 | }
|
| 95 |
|
| 96 | # Note: We have two lists of ops because Id.Op_Semi is used, not
|
| 97 | # Id.Arith_Semi.
|
| 98 | for _, token_str, id_ in lexer_def.EXPR_OPS:
|
| 99 | assert token_str not in OPS, token_str
|
| 100 | OPS[token_str] = id_
|
| 101 |
|
| 102 | # Tokens that look like / or ${ or @{
|
| 103 | triples = (lexer_def.ID_SPEC.LexerPairs(Kind.Arith) +
|
| 104 | lexer_def.YSH_LEFT_SUBS + lexer_def.YSH_LEFT_UNQUOTED +
|
| 105 | lexer_def.EXPR_WORDS)
|
| 106 | more_ops = {}
|
| 107 | for _, token_str, id_ in triples:
|
| 108 | if token_str in more_ops:
|
| 109 | import pprint
|
| 110 | raise AssertionError(
|
| 111 | '%r %s' % (token_str, pprint.pformat(more_ops, indent=2)))
|
| 112 | more_ops[token_str] = id_
|
| 113 |
|
| 114 | # Tokens that look like 'for'
|
| 115 | keyword_ops = {}
|
| 116 | for _, token_str, id_ in lexer_def.EXPR_WORDS: # for, in, etc.
|
| 117 | assert token_str not in keyword_ops, token_str
|
| 118 | keyword_ops[token_str] = id_
|
| 119 |
|
| 120 | if 0:
|
| 121 | from pprint import pprint
|
| 122 | pprint(OPS)
|
| 123 | print('---')
|
| 124 | pprint(more_ops)
|
| 125 | print('---')
|
| 126 | pprint(keyword_ops)
|
| 127 | print('---')
|
| 128 |
|
| 129 | tok_def = OilTokenDef(OPS, more_ops, keyword_ops)
|
| 130 |
|
| 131 | if action == 'py': # generate the grammar and parse it
|
| 132 | grammar_path = argv[0]
|
| 133 | out_dir = argv[1]
|
| 134 |
|
| 135 | basename, _ = os.path.splitext(os.path.basename(grammar_path))
|
| 136 |
|
| 137 | # HACK for find:
|
| 138 | if basename == 'find':
|
| 139 | from tools.find import tokenizer as find_tokenizer
|
| 140 | tok_def = find_tokenizer.TokenDef()
|
| 141 |
|
| 142 | with open(grammar_path) as f:
|
| 143 | gr = pgen.MakeGrammar(f, tok_def=tok_def)
|
| 144 |
|
| 145 | marshal_path = os.path.join(out_dir, basename + '.marshal')
|
| 146 | with open(marshal_path, 'wb') as out_f:
|
| 147 | gr.dump(out_f)
|
| 148 |
|
| 149 | nonterm_py = os.path.join(out_dir, basename + '_nt.py')
|
| 150 | with open(nonterm_py, 'w') as out_f:
|
| 151 | gr.dump_nonterminals_py(out_f)
|
| 152 |
|
| 153 | log('%s -> (ysh/grammar_gen) -> %s/%s{.marshal,_nt.py}', grammar_path,
|
| 154 | out_dir, basename)
|
| 155 |
|
| 156 | #gr.report()
|
| 157 |
|
| 158 | elif action == 'cpp': # generate the grammar and parse it
|
| 159 | grammar_path = argv[0]
|
| 160 | out_dir = argv[1]
|
| 161 |
|
| 162 | basename, _ = os.path.splitext(os.path.basename(grammar_path))
|
| 163 |
|
| 164 | with open(grammar_path) as f:
|
| 165 | gr = pgen.MakeGrammar(f, tok_def=tok_def)
|
| 166 |
|
| 167 | nonterm_h = os.path.join(out_dir, basename + '_nt.h')
|
| 168 | with open(nonterm_h, 'w') as out_f:
|
| 169 | gr.dump_nonterminals_cpp(out_f)
|
| 170 |
|
| 171 | grammar_cpp_path = os.path.join(out_dir, basename + '_tables.cc')
|
| 172 | with open(grammar_cpp_path, 'w') as src_f:
|
| 173 | gr.dump_cpp(src_f)
|
| 174 |
|
| 175 | if 0:
|
| 176 | log('%s -> (ysh/grammar_gen) -> %s/%s._nt.h', grammar_path,
|
| 177 | out_dir, basename)
|
| 178 |
|
| 179 | elif action == 'parse': # generate the grammar and parse it
|
| 180 | # Remove build dependency
|
| 181 | from frontend import parse_lib
|
| 182 | from ysh import expr_parse
|
| 183 | from ysh import expr_to_ast
|
| 184 |
|
| 185 | grammar_path = argv[0]
|
| 186 | start_symbol = argv[1]
|
| 187 | code_str = argv[2]
|
| 188 |
|
| 189 | # For choosing lexer and semantic actions
|
| 190 | grammar_name, _ = os.path.splitext(os.path.basename(grammar_path))
|
| 191 |
|
| 192 | with open(grammar_path) as f:
|
| 193 | gr = pgen.MakeGrammar(f, tok_def=tok_def)
|
| 194 |
|
| 195 | arena = alloc.Arena()
|
| 196 | lex_ = MakeOilLexer(code_str, arena)
|
| 197 |
|
| 198 | is_expr = grammar_name in ('calc', 'grammar')
|
| 199 |
|
| 200 | parse_opts = optview.Parse([], [])
|
| 201 | parse_ctx = parse_lib.ParseContext(arena, parse_opts, {}, gr)
|
| 202 | p = expr_parse.ExprParser(parse_ctx, gr, False)
|
| 203 | try:
|
| 204 | with expr_parse.ctx_PNodeAllocator(p):
|
| 205 | pnode, _ = p.Parse(lex_, gr.symbol2number[start_symbol])
|
| 206 | except parse.ParseError as e:
|
| 207 | log('Parse Error: %s', e)
|
| 208 | return 1
|
| 209 |
|
| 210 | names = expr_to_ast.MakeGrammarNames(gr)
|
| 211 | p_printer = expr_parse.ParseTreePrinter(names) # print raw nodes
|
| 212 | p_printer.Print(pnode)
|
| 213 |
|
| 214 | if is_expr:
|
| 215 | tr = expr_to_ast.Transformer(gr)
|
| 216 | if start_symbol == 'eval_input':
|
| 217 | ast_node = tr.Expr(pnode)
|
| 218 | elif start_symbol == 'ysh_case_pat':
|
| 219 | ast_node = tr.YshCasePattern(pnode)
|
| 220 | else:
|
| 221 | ast_node = tr.VarDecl(pnode)
|
| 222 | ast_node.PrettyPrint()
|
| 223 | print()
|
| 224 |
|
| 225 | elif action == 'stdlib-test':
|
| 226 | # This shows how deep Python's parse tree is. It doesn't use semantic
|
| 227 | # actions to prune on the fly!
|
| 228 |
|
| 229 | import parser # builtin module
|
| 230 | t = parser.expr('1+2')
|
| 231 | print(t)
|
| 232 | t2 = parser.st2tuple(t)
|
| 233 | print(t2)
|
| 234 |
|
| 235 | else:
|
| 236 | raise RuntimeError('Invalid action %r' % action)
|
| 237 |
|
| 238 |
|
| 239 | if __name__ == '__main__':
|
| 240 | try:
|
| 241 | sys.exit(main(sys.argv))
|
| 242 | except RuntimeError as e:
|
| 243 | print('FATAL: %s' % e, file=sys.stderr)
|
| 244 | sys.exit(1)
|