| 1 | #!/usr/bin/env python2
 | 
| 2 | """
 | 
| 3 | grammar_gen.py - Use pgen2 to generate tables from Oil's grammar.
 | 
| 4 | """
 | 
| 5 | from __future__ import print_function
 | 
| 6 | 
 | 
| 7 | import os
 | 
| 8 | import sys
 | 
| 9 | 
 | 
| 10 | from _devbuild.gen.id_kind_asdl import Id, Kind
 | 
| 11 | from _devbuild.gen.syntax_asdl import source
 | 
| 12 | 
 | 
| 13 | from core import alloc
 | 
| 14 | from core import optview
 | 
| 15 | from mycpp.mylib import log
 | 
| 16 | from frontend import lexer
 | 
| 17 | from frontend import lexer_def
 | 
| 18 | from frontend import reader
 | 
| 19 | from pgen2 import parse, pgen, token
 | 
| 20 | 
 | 
| 21 | 
 | 
| 22 | class OilTokenDef(object):
 | 
| 23 | 
 | 
| 24 |     def __init__(self, ops, more_ops, keyword_ops):
 | 
| 25 |         self.ops = ops
 | 
| 26 |         self.more_ops = more_ops
 | 
| 27 |         self.keyword_ops = keyword_ops
 | 
| 28 | 
 | 
| 29 |     def GetTerminalNum(self, label):
 | 
| 30 |         """e.g. translate Expr_Name in the grammar to 178."""
 | 
| 31 |         id_ = getattr(Id, label)
 | 
| 32 |         #log('Id %s = %d', id_, id_)
 | 
| 33 |         assert id_ < token.NT_OFFSET, id_
 | 
| 34 |         return id_
 | 
| 35 | 
 | 
| 36 |     def GetKeywordNum(self, s):
 | 
| 37 |         """e.g 'xor' -> Id.Expr_Xor.
 | 
| 38 | 
 | 
| 39 |         Python doesn't have this, but Oil does.  Returns None if not
 | 
| 40 |         found.
 | 
| 41 |         """
 | 
| 42 |         id_ = self.keyword_ops.get(s)
 | 
| 43 |         if id_ is None:
 | 
| 44 |             return None
 | 
| 45 |         assert id_ < token.NT_OFFSET, id_
 | 
| 46 |         return id_
 | 
| 47 | 
 | 
| 48 |     def GetOpNum(self, op_str):
 | 
| 49 |         """
 | 
| 50 |         Args:
 | 
| 51 |           op_str: '>='
 | 
| 52 | 
 | 
| 53 |         Returns:
 | 
| 54 |           Integer for '>=' or Id.Arith_GreatEqual
 | 
| 55 |         """
 | 
| 56 |         # Fail if not there
 | 
| 57 |         id_ = self.ops.get(op_str) or self.more_ops[op_str]
 | 
| 58 |         assert id_ < token.NT_OFFSET, id_
 | 
| 59 |         return id_
 | 
| 60 | 
 | 
| 61 | 
 | 
| 62 | def MakeOilLexer(code_str, arena):
 | 
| 63 |     arena.PushSource(source.MainFile('pgen2_main'))
 | 
| 64 |     line_reader = reader.StringLineReader(code_str, arena)
 | 
| 65 |     line_lexer = lexer.LineLexer(arena)
 | 
| 66 |     lex = lexer.Lexer(line_lexer, line_reader)
 | 
| 67 |     return lex
 | 
| 68 | 
 | 
| 69 | 
 | 
| 70 | def main(argv):
 | 
| 71 |     action = argv[1]
 | 
| 72 |     argv = argv[2:]
 | 
| 73 | 
 | 
| 74 |     # Used at grammar BUILD time.
 | 
| 75 |     OPS = {
 | 
| 76 |         '!': Id.Expr_Bang,
 | 
| 77 |         '.': Id.Expr_Dot,
 | 
| 78 |         '..': Id.Expr_DDot,
 | 
| 79 |         '->': Id.Expr_RArrow,
 | 
| 80 |         '=>': Id.Expr_RDArrow,
 | 
| 81 |         '//': Id.Expr_DSlash,
 | 
| 82 |         '++': Id.Arith_DPlus,
 | 
| 83 |         '!~': Id.Expr_NotTilde,
 | 
| 84 |         '~~': Id.Expr_DTilde,
 | 
| 85 |         '!~~': Id.Expr_NotDTilde,
 | 
| 86 |         '~==': Id.Expr_TildeDEqual,
 | 
| 87 |         '===': Id.Expr_TEqual,
 | 
| 88 |         '!==': Id.Expr_NotDEqual,
 | 
| 89 |         '@': Id.Expr_At,
 | 
| 90 |         '...': Id.Expr_Ellipsis,
 | 
| 91 |         '$': Id.Expr_Dollar,  # Only for legacy eggex /d+$/
 | 
| 92 |         '**=': Id.Expr_DStarEqual,
 | 
| 93 |         '//=': Id.Expr_DSlashEqual,
 | 
| 94 |     }
 | 
| 95 | 
 | 
| 96 |     # Note: We have two lists of ops because Id.Op_Semi is used, not
 | 
| 97 |     # Id.Arith_Semi.
 | 
| 98 |     for _, token_str, id_ in lexer_def.EXPR_OPS:
 | 
| 99 |         assert token_str not in OPS, token_str
 | 
| 100 |         OPS[token_str] = id_
 | 
| 101 | 
 | 
| 102 |     # Tokens that look like / or ${ or @{
 | 
| 103 |     triples = (lexer_def.ID_SPEC.LexerPairs(Kind.Arith) +
 | 
| 104 |                lexer_def.YSH_LEFT_SUBS + lexer_def.YSH_LEFT_UNQUOTED +
 | 
| 105 |                lexer_def.EXPR_WORDS)
 | 
| 106 |     more_ops = {}
 | 
| 107 |     for _, token_str, id_ in triples:
 | 
| 108 |         if token_str in more_ops:
 | 
| 109 |             import pprint
 | 
| 110 |             raise AssertionError(
 | 
| 111 |                 '%r %s' % (token_str, pprint.pformat(more_ops, indent=2)))
 | 
| 112 |         more_ops[token_str] = id_
 | 
| 113 | 
 | 
| 114 |     # Tokens that look like 'for'
 | 
| 115 |     keyword_ops = {}
 | 
| 116 |     for _, token_str, id_ in lexer_def.EXPR_WORDS:  # for, in, etc.
 | 
| 117 |         assert token_str not in keyword_ops, token_str
 | 
| 118 |         keyword_ops[token_str] = id_
 | 
| 119 | 
 | 
| 120 |     if 0:
 | 
| 121 |         from pprint import pprint
 | 
| 122 |         pprint(OPS)
 | 
| 123 |         print('---')
 | 
| 124 |         pprint(more_ops)
 | 
| 125 |         print('---')
 | 
| 126 |         pprint(keyword_ops)
 | 
| 127 |         print('---')
 | 
| 128 | 
 | 
| 129 |     tok_def = OilTokenDef(OPS, more_ops, keyword_ops)
 | 
| 130 | 
 | 
| 131 |     if action == 'py':  # generate the grammar and parse it
 | 
| 132 |         grammar_path = argv[0]
 | 
| 133 |         out_dir = argv[1]
 | 
| 134 | 
 | 
| 135 |         basename, _ = os.path.splitext(os.path.basename(grammar_path))
 | 
| 136 | 
 | 
| 137 |         # HACK for find:
 | 
| 138 |         if basename == 'find':
 | 
| 139 |             from tools.find import tokenizer as find_tokenizer
 | 
| 140 |             tok_def = find_tokenizer.TokenDef()
 | 
| 141 | 
 | 
| 142 |         with open(grammar_path) as f:
 | 
| 143 |             gr = pgen.MakeGrammar(f, tok_def=tok_def)
 | 
| 144 | 
 | 
| 145 |         marshal_path = os.path.join(out_dir, basename + '.marshal')
 | 
| 146 |         with open(marshal_path, 'wb') as out_f:
 | 
| 147 |             gr.dump(out_f)
 | 
| 148 | 
 | 
| 149 |         nonterm_py = os.path.join(out_dir, basename + '_nt.py')
 | 
| 150 |         with open(nonterm_py, 'w') as out_f:
 | 
| 151 |             gr.dump_nonterminals_py(out_f)
 | 
| 152 | 
 | 
| 153 |         log('%s -> (ysh/grammar_gen) -> %s/%s{.marshal,_nt.py}', grammar_path,
 | 
| 154 |             out_dir, basename)
 | 
| 155 | 
 | 
| 156 |         #gr.report()
 | 
| 157 | 
 | 
| 158 |     elif action == 'cpp':  # generate the grammar and parse it
 | 
| 159 |         grammar_path = argv[0]
 | 
| 160 |         out_dir = argv[1]
 | 
| 161 | 
 | 
| 162 |         basename, _ = os.path.splitext(os.path.basename(grammar_path))
 | 
| 163 | 
 | 
| 164 |         with open(grammar_path) as f:
 | 
| 165 |             gr = pgen.MakeGrammar(f, tok_def=tok_def)
 | 
| 166 | 
 | 
| 167 |         nonterm_h = os.path.join(out_dir, basename + '_nt.h')
 | 
| 168 |         with open(nonterm_h, 'w') as out_f:
 | 
| 169 |             gr.dump_nonterminals_cpp(out_f)
 | 
| 170 | 
 | 
| 171 |         grammar_cpp_path = os.path.join(out_dir, basename + '_tables.cc')
 | 
| 172 |         with open(grammar_cpp_path, 'w') as src_f:
 | 
| 173 |             gr.dump_cpp(src_f)
 | 
| 174 | 
 | 
| 175 |         if 0:
 | 
| 176 |             log('%s -> (ysh/grammar_gen) -> %s/%s._nt.h', grammar_path,
 | 
| 177 |                 out_dir, basename)
 | 
| 178 | 
 | 
| 179 |     elif action == 'parse':  # generate the grammar and parse it
 | 
| 180 |         # Remove build dependency
 | 
| 181 |         from frontend import parse_lib
 | 
| 182 |         from ysh import expr_parse
 | 
| 183 |         from ysh import expr_to_ast
 | 
| 184 | 
 | 
| 185 |         grammar_path = argv[0]
 | 
| 186 |         start_symbol = argv[1]
 | 
| 187 |         code_str = argv[2]
 | 
| 188 | 
 | 
| 189 |         # For choosing lexer and semantic actions
 | 
| 190 |         grammar_name, _ = os.path.splitext(os.path.basename(grammar_path))
 | 
| 191 | 
 | 
| 192 |         with open(grammar_path) as f:
 | 
| 193 |             gr = pgen.MakeGrammar(f, tok_def=tok_def)
 | 
| 194 | 
 | 
| 195 |         arena = alloc.Arena()
 | 
| 196 |         lex_ = MakeOilLexer(code_str, arena)
 | 
| 197 | 
 | 
| 198 |         is_expr = grammar_name in ('calc', 'grammar')
 | 
| 199 | 
 | 
| 200 |         parse_opts = optview.Parse([], [])
 | 
| 201 |         parse_ctx = parse_lib.ParseContext(arena, parse_opts, {}, gr)
 | 
| 202 |         p = expr_parse.ExprParser(parse_ctx, gr, False)
 | 
| 203 |         try:
 | 
| 204 |             with expr_parse.ctx_PNodeAllocator(p):
 | 
| 205 |                 pnode, _ = p.Parse(lex_, gr.symbol2number[start_symbol])
 | 
| 206 |         except parse.ParseError as e:
 | 
| 207 |             log('Parse Error: %s', e)
 | 
| 208 |             return 1
 | 
| 209 | 
 | 
| 210 |         names = expr_to_ast.MakeGrammarNames(gr)
 | 
| 211 |         p_printer = expr_parse.ParseTreePrinter(names)  # print raw nodes
 | 
| 212 |         p_printer.Print(pnode)
 | 
| 213 | 
 | 
| 214 |         if is_expr:
 | 
| 215 |             tr = expr_to_ast.Transformer(gr)
 | 
| 216 |             if start_symbol == 'eval_input':
 | 
| 217 |                 ast_node = tr.Expr(pnode)
 | 
| 218 |             elif start_symbol == 'ysh_case_pat':
 | 
| 219 |                 ast_node = tr.YshCasePattern(pnode)
 | 
| 220 |             else:
 | 
| 221 |                 ast_node = tr.VarDecl(pnode)
 | 
| 222 |             ast_node.PrettyPrint()
 | 
| 223 |             print()
 | 
| 224 | 
 | 
| 225 |     elif action == 'stdlib-test':
 | 
| 226 |         # This shows how deep Python's parse tree is.  It doesn't use semantic
 | 
| 227 |         # actions to prune on the fly!
 | 
| 228 | 
 | 
| 229 |         import parser  # builtin module
 | 
| 230 |         t = parser.expr('1+2')
 | 
| 231 |         print(t)
 | 
| 232 |         t2 = parser.st2tuple(t)
 | 
| 233 |         print(t2)
 | 
| 234 | 
 | 
| 235 |     else:
 | 
| 236 |         raise RuntimeError('Invalid action %r' % action)
 | 
| 237 | 
 | 
| 238 | 
 | 
| 239 | if __name__ == '__main__':
 | 
| 240 |     try:
 | 
| 241 |         sys.exit(main(sys.argv))
 | 
| 242 |     except RuntimeError as e:
 | 
| 243 |         print('FATAL: %s' % e, file=sys.stderr)
 | 
| 244 |         sys.exit(1)
 |