1 | #!/usr/bin/env python2
|
2 | """
|
3 | grammar_gen.py - Use pgen2 to generate tables from Oil's grammar.
|
4 | """
|
5 | from __future__ import print_function
|
6 |
|
7 | import os
|
8 | import sys
|
9 |
|
10 | from _devbuild.gen.id_kind_asdl import Id, Kind
|
11 | from _devbuild.gen.syntax_asdl import source
|
12 |
|
13 | from core import alloc
|
14 | from core import optview
|
15 | from mycpp.mylib import log
|
16 | from frontend import lexer
|
17 | from frontend import lexer_def
|
18 | from frontend import reader
|
19 | from pgen2 import parse, pgen, token
|
20 |
|
21 |
|
22 | class OilTokenDef(object):
|
23 |
|
24 | def __init__(self, ops, more_ops, keyword_ops):
|
25 | self.ops = ops
|
26 | self.more_ops = more_ops
|
27 | self.keyword_ops = keyword_ops
|
28 |
|
29 | def GetTerminalNum(self, label):
|
30 | """e.g. translate Expr_Name in the grammar to 178."""
|
31 | id_ = getattr(Id, label)
|
32 | #log('Id %s = %d', id_, id_)
|
33 | assert id_ < token.NT_OFFSET, id_
|
34 | return id_
|
35 |
|
36 | def GetKeywordNum(self, s):
|
37 | """e.g 'xor' -> Id.Expr_Xor.
|
38 |
|
39 | Python doesn't have this, but Oil does. Returns None if not
|
40 | found.
|
41 | """
|
42 | id_ = self.keyword_ops.get(s)
|
43 | if id_ is None:
|
44 | return None
|
45 | assert id_ < token.NT_OFFSET, id_
|
46 | return id_
|
47 |
|
48 | def GetOpNum(self, op_str):
|
49 | """
|
50 | Args:
|
51 | op_str: '>='
|
52 |
|
53 | Returns:
|
54 | Integer for '>=' or Id.Arith_GreatEqual
|
55 | """
|
56 | # Fail if not there
|
57 | id_ = self.ops.get(op_str) or self.more_ops[op_str]
|
58 | assert id_ < token.NT_OFFSET, id_
|
59 | return id_
|
60 |
|
61 |
|
62 | def MakeOilLexer(code_str, arena):
|
63 | arena.PushSource(source.MainFile('pgen2_main'))
|
64 | line_reader = reader.StringLineReader(code_str, arena)
|
65 | line_lexer = lexer.LineLexer(arena)
|
66 | lex = lexer.Lexer(line_lexer, line_reader)
|
67 | return lex
|
68 |
|
69 |
|
70 | def main(argv):
|
71 | action = argv[1]
|
72 | argv = argv[2:]
|
73 |
|
74 | # Used at grammar BUILD time.
|
75 | OPS = {
|
76 | '!': Id.Expr_Bang,
|
77 | '.': Id.Expr_Dot,
|
78 | '..': Id.Expr_DDot,
|
79 | '->': Id.Expr_RArrow,
|
80 | '=>': Id.Expr_RDArrow,
|
81 | '//': Id.Expr_DSlash,
|
82 | '++': Id.Arith_DPlus,
|
83 | '!~': Id.Expr_NotTilde,
|
84 | '~~': Id.Expr_DTilde,
|
85 | '!~~': Id.Expr_NotDTilde,
|
86 | '~==': Id.Expr_TildeDEqual,
|
87 | '===': Id.Expr_TEqual,
|
88 | '!==': Id.Expr_NotDEqual,
|
89 | '@': Id.Expr_At,
|
90 | '...': Id.Expr_Ellipsis,
|
91 | '$': Id.Expr_Dollar, # Only for legacy eggex /d+$/
|
92 | '**=': Id.Expr_DStarEqual,
|
93 | '//=': Id.Expr_DSlashEqual,
|
94 | }
|
95 |
|
96 | # Note: We have two lists of ops because Id.Op_Semi is used, not
|
97 | # Id.Arith_Semi.
|
98 | for _, token_str, id_ in lexer_def.EXPR_OPS:
|
99 | assert token_str not in OPS, token_str
|
100 | OPS[token_str] = id_
|
101 |
|
102 | # Tokens that look like / or ${ or @{
|
103 | triples = (lexer_def.ID_SPEC.LexerPairs(Kind.Arith) +
|
104 | lexer_def.YSH_LEFT_SUBS + lexer_def.YSH_LEFT_UNQUOTED +
|
105 | lexer_def.EXPR_WORDS)
|
106 | more_ops = {}
|
107 | for _, token_str, id_ in triples:
|
108 | if token_str in more_ops:
|
109 | import pprint
|
110 | raise AssertionError(
|
111 | '%r %s' % (token_str, pprint.pformat(more_ops, indent=2)))
|
112 | more_ops[token_str] = id_
|
113 |
|
114 | # Tokens that look like 'for'
|
115 | keyword_ops = {}
|
116 | for _, token_str, id_ in lexer_def.EXPR_WORDS: # for, in, etc.
|
117 | assert token_str not in keyword_ops, token_str
|
118 | keyword_ops[token_str] = id_
|
119 |
|
120 | if 0:
|
121 | from pprint import pprint
|
122 | pprint(OPS)
|
123 | print('---')
|
124 | pprint(more_ops)
|
125 | print('---')
|
126 | pprint(keyword_ops)
|
127 | print('---')
|
128 |
|
129 | tok_def = OilTokenDef(OPS, more_ops, keyword_ops)
|
130 |
|
131 | if action == 'py': # generate the grammar and parse it
|
132 | grammar_path = argv[0]
|
133 | out_dir = argv[1]
|
134 |
|
135 | basename, _ = os.path.splitext(os.path.basename(grammar_path))
|
136 |
|
137 | # HACK for find:
|
138 | if basename == 'find':
|
139 | from tools.find import tokenizer as find_tokenizer
|
140 | tok_def = find_tokenizer.TokenDef()
|
141 |
|
142 | with open(grammar_path) as f:
|
143 | gr = pgen.MakeGrammar(f, tok_def=tok_def)
|
144 |
|
145 | marshal_path = os.path.join(out_dir, basename + '.marshal')
|
146 | with open(marshal_path, 'wb') as out_f:
|
147 | gr.dump(out_f)
|
148 |
|
149 | nonterm_py = os.path.join(out_dir, basename + '_nt.py')
|
150 | with open(nonterm_py, 'w') as out_f:
|
151 | gr.dump_nonterminals_py(out_f)
|
152 |
|
153 | log('%s -> (ysh/grammar_gen) -> %s/%s{.marshal,_nt.py}', grammar_path,
|
154 | out_dir, basename)
|
155 |
|
156 | #gr.report()
|
157 |
|
158 | elif action == 'cpp': # generate the grammar and parse it
|
159 | grammar_path = argv[0]
|
160 | out_dir = argv[1]
|
161 |
|
162 | basename, _ = os.path.splitext(os.path.basename(grammar_path))
|
163 |
|
164 | with open(grammar_path) as f:
|
165 | gr = pgen.MakeGrammar(f, tok_def=tok_def)
|
166 |
|
167 | nonterm_h = os.path.join(out_dir, basename + '_nt.h')
|
168 | with open(nonterm_h, 'w') as out_f:
|
169 | gr.dump_nonterminals_cpp(out_f)
|
170 |
|
171 | grammar_cpp_path = os.path.join(out_dir, basename + '_tables.cc')
|
172 | with open(grammar_cpp_path, 'w') as src_f:
|
173 | gr.dump_cpp(src_f)
|
174 |
|
175 | if 0:
|
176 | log('%s -> (ysh/grammar_gen) -> %s/%s._nt.h', grammar_path,
|
177 | out_dir, basename)
|
178 |
|
179 | elif action == 'parse': # generate the grammar and parse it
|
180 | # Remove build dependency
|
181 | from frontend import parse_lib
|
182 | from ysh import expr_parse
|
183 | from ysh import expr_to_ast
|
184 |
|
185 | grammar_path = argv[0]
|
186 | start_symbol = argv[1]
|
187 | code_str = argv[2]
|
188 |
|
189 | # For choosing lexer and semantic actions
|
190 | grammar_name, _ = os.path.splitext(os.path.basename(grammar_path))
|
191 |
|
192 | with open(grammar_path) as f:
|
193 | gr = pgen.MakeGrammar(f, tok_def=tok_def)
|
194 |
|
195 | arena = alloc.Arena()
|
196 | lex_ = MakeOilLexer(code_str, arena)
|
197 |
|
198 | is_expr = grammar_name in ('calc', 'grammar')
|
199 |
|
200 | parse_opts = optview.Parse([], [])
|
201 | parse_ctx = parse_lib.ParseContext(arena, parse_opts, {}, gr)
|
202 | p = expr_parse.ExprParser(parse_ctx, gr, False)
|
203 | try:
|
204 | with expr_parse.ctx_PNodeAllocator(p):
|
205 | pnode, _ = p.Parse(lex_, gr.symbol2number[start_symbol])
|
206 | except parse.ParseError as e:
|
207 | log('Parse Error: %s', e)
|
208 | return 1
|
209 |
|
210 | names = expr_to_ast.MakeGrammarNames(gr)
|
211 | p_printer = expr_parse.ParseTreePrinter(names) # print raw nodes
|
212 | p_printer.Print(pnode)
|
213 |
|
214 | if is_expr:
|
215 | tr = expr_to_ast.Transformer(gr)
|
216 | if start_symbol == 'eval_input':
|
217 | ast_node = tr.Expr(pnode)
|
218 | elif start_symbol == 'ysh_case_pat':
|
219 | ast_node = tr.YshCasePattern(pnode)
|
220 | else:
|
221 | ast_node = tr.VarDecl(pnode)
|
222 | ast_node.PrettyPrint()
|
223 | print()
|
224 |
|
225 | elif action == 'stdlib-test':
|
226 | # This shows how deep Python's parse tree is. It doesn't use semantic
|
227 | # actions to prune on the fly!
|
228 |
|
229 | import parser # builtin module
|
230 | t = parser.expr('1+2')
|
231 | print(t)
|
232 | t2 = parser.st2tuple(t)
|
233 | print(t2)
|
234 |
|
235 | else:
|
236 | raise RuntimeError('Invalid action %r' % action)
|
237 |
|
238 |
|
239 | if __name__ == '__main__':
|
240 | try:
|
241 | sys.exit(main(sys.argv))
|
242 | except RuntimeError as e:
|
243 | print('FATAL: %s' % e, file=sys.stderr)
|
244 | sys.exit(1)
|