OILS / opy / tools / astgen.py View on Github | oilshell.org

341 lines, 256 significant
1#!/usr/bin/env python2
2"""Generate ast module from specification
3
4This script generates the ast module from a simple specification,
5which makes it easy to accommodate changes in the grammar. This
6approach would be quite reasonable if the grammar changed often.
7Instead, it is rather complex to generate the appropriate code. And
8the Node interface has changed more often than the grammar.
9"""
10from __future__ import print_function
11
12import fileinput
13import re
14import sys
15import cStringIO
16
17COMMA = ", "
18
19def load_boilerplate(file):
20 f = open(file)
21 buf = f.read()
22 f.close()
23 i = buf.find('### ''PROLOGUE')
24 j = buf.find('### ''EPILOGUE')
25 pro = buf[i+12:j].strip()
26 epi = buf[j+12:].strip()
27 return pro, epi
28
29def strip_default(arg):
30 """Return the argname from an 'arg = default' string"""
31 i = arg.find('=')
32 if i == -1:
33 return arg
34 t = arg[:i].strip()
35 return t
36
37P_NODE = 1 # It's another node.
38P_OTHER = 2 # * means it's a Python value like string, int, tuple, etc.
39P_NESTED = 3 # ! means this is a list of more nodes.
40P_NONE = 4 # & means it's a node or None.
41
42class NodeInfo(object):
43 """Each instance describes a specific AST node"""
44 def __init__(self, name, args):
45 self.name = name
46 self.args = args.strip()
47 self.argnames = self.get_argnames()
48 self.argprops = self.get_argprops()
49 self.nargs = len(self.argnames)
50 self.init = []
51
52 def get_argnames(self):
53 if '(' in self.args:
54 i = self.args.find('(')
55 j = self.args.rfind(')')
56 args = self.args[i+1:j]
57 else:
58 args = self.args
59 return [strip_default(arg.strip())
60 for arg in args.split(',') if arg]
61
62 def get_argprops(self):
63 """Each argument can have a property like '*' or '!'
64
65 XXX This method modifies the argnames in place!
66 """
67 d = {}
68 hardest_arg = P_NODE
69 for i in range(len(self.argnames)):
70 arg = self.argnames[i]
71 if arg.endswith('*'):
72 arg = self.argnames[i] = arg[:-1]
73 d[arg] = P_OTHER
74 hardest_arg = max(hardest_arg, P_OTHER)
75 elif arg.endswith('!'):
76 arg = self.argnames[i] = arg[:-1]
77 d[arg] = P_NESTED
78 hardest_arg = max(hardest_arg, P_NESTED)
79 elif arg.endswith('&'):
80 arg = self.argnames[i] = arg[:-1]
81 d[arg] = P_NONE
82 hardest_arg = max(hardest_arg, P_NONE)
83 else:
84 d[arg] = P_NODE
85 self.hardest_arg = hardest_arg
86
87 if hardest_arg > P_NODE:
88 self.args = self.args.replace('*', '')
89 self.args = self.args.replace('!', '')
90 self.args = self.args.replace('&', '')
91
92 return d
93
94 def gen_source(self):
95 print("class %s(Node):" % self.name)
96 print(" ARGNAMES = %r" % self.argnames)
97 self._gen_init(sys.stdout)
98 print()
99 self._gen_getChildren(sys.stdout)
100 print()
101 self._gen_getChildNodes(sys.stdout)
102 print()
103
104 def _gen_init(self, buf):
105 if self.args:
106 argtuple = '(' in self.args
107 args = self.args if not argtuple else ''.join(self.argnames)
108 print(" def __init__(self, %s, lineno=None):" % args, file=buf)
109 else:
110 print(" def __init__(self, lineno=None):", file=buf)
111 if self.argnames:
112 if argtuple:
113 for idx, name in enumerate(self.argnames):
114 print(" self.%s = %s[%s]" % (name, args, idx), file=buf)
115 else:
116 for name in self.argnames:
117 print(" self.%s = %s" % (name, name), file=buf)
118 print(" self.lineno = lineno", file=buf)
119 # Copy the lines in self.init, indented four spaces. The rstrip()
120 # business is to get rid of the four spaces if line happens to be
121 # empty, so that reindent.py is happy with the output.
122 for line in self.init:
123 print((" " + line).rstrip(), file=buf)
124
125 def _gen_getChildren(self, buf):
126 print(" def getChildren(self):", file=buf)
127 if len(self.argnames) == 0:
128 print(" return ()", file=buf)
129 else:
130 if self.hardest_arg < P_NESTED:
131 clist = COMMA.join(["self.%s" % c
132 for c in self.argnames])
133 if self.nargs == 1:
134 print(" return %s," % clist, file=buf)
135 else:
136 print(" return %s" % clist, file=buf)
137 else:
138 if len(self.argnames) == 1:
139 print(" return tuple(flatten(self.%s))" % self.argnames[0], file=buf)
140 else:
141 print(" children = []", file=buf)
142 template = " children.%s(%sself.%s%s)"
143 for name in self.argnames:
144 if self.argprops[name] == P_NESTED:
145 print(template % ("extend", "flatten(",
146 name, ")"), file=buf)
147 else:
148 print(template % ("append", "", name, ""), file=buf)
149 print(" return tuple(children)", file=buf)
150
151 def _gen_getChildNodes(self, buf):
152 print(" def getChildNodes(self):", file=buf)
153 if len(self.argnames) == 0:
154 print(" return ()", file=buf)
155 else:
156 if self.hardest_arg < P_NESTED:
157 clist = ["self.%s" % c
158 for c in self.argnames
159 if self.argprops[c] == P_NODE]
160 if len(clist) == 0:
161 print(" return ()", file=buf)
162 elif len(clist) == 1:
163 print(" return %s," % clist[0], file=buf)
164 else:
165 print(" return %s" % COMMA.join(clist), file=buf)
166 else:
167 print(" nodelist = []", file=buf)
168 template = " nodelist.%s(%sself.%s%s)"
169 for name in self.argnames:
170 if self.argprops[name] == P_NONE:
171 tmp = (" if self.%s is not None:\n"
172 " nodelist.append(self.%s)")
173 print(tmp % (name, name), file=buf)
174 elif self.argprops[name] == P_NESTED:
175 print(template % ("extend", "flatten_nodes(",
176 name, ")"), file=buf)
177 elif self.argprops[name] == P_NODE:
178 print(template % ("append", "", name, ""), file=buf)
179 print(" return tuple(nodelist)", file=buf)
180
181
182rx_init = re.compile('init\((.*)\):')
183
184def parse_spec(file):
185 classes = {}
186 cur = None
187 for line in fileinput.input(file):
188 if line.strip().startswith('#'):
189 continue
190 mo = rx_init.search(line)
191 if mo is None:
192 if cur is None:
193 # a normal entry
194 try:
195 name, args = line.split(':')
196 except ValueError:
197 continue
198 classes[name] = NodeInfo(name, args)
199 cur = None
200 else:
201 # some code for the __init__ method
202 cur.init.append(line)
203 else:
204 # some extra code for a Node's __init__ method
205 name = mo.group(1)
206 cur = classes[name]
207 return sorted(classes.values(), key=lambda n: n.name)
208
209def main():
210 prologue, epilogue = load_boilerplate(sys.argv[0])
211 print('from __future__ import print_function')
212 print('import cStringIO')
213 print(prologue)
214 print()
215 classes = parse_spec(sys.argv[1])
216 for info in classes:
217 info.gen_source()
218 print(epilogue)
219
220if __name__ == "__main__":
221 main()
222 sys.exit(0)
223
224### PROLOGUE
225"""Python abstract syntax node definitions
226
227This file is automatically generated by Tools/compiler/astgen.py
228"""
229from .consts import CO_VARARGS, CO_VARKEYWORDS
230
231# NOTE: Similar to pyassem.flatten().
232def flatten(seq):
233 l = []
234 for elt in seq:
235 if isinstance(elt, (tuple, list)):
236 l.extend(flatten(elt))
237 else:
238 l.append(elt)
239 return l
240
241def flatten_nodes(seq):
242 return [n for n in flatten(seq) if isinstance(n, Node)]
243
244nodes = {}
245
246
247# NOTE: after_equals is a hack to make the output prettier. You could copy
248# _TrySingleLine in asdl/format.py. That took a long time to get right!
249def _PrettyPrint(val, f, indent=0, after_equals=False):
250 indent_str = ' ' * indent
251
252 if isinstance(val, Node):
253 val.PrettyPrint(f, indent=indent, after_equals=after_equals)
254
255 elif isinstance(val, list):
256 if not after_equals:
257 print('%s' % indent_str, end='', file=f)
258 print('[', file=f) # No indent here
259 for item in val:
260 _PrettyPrint(item, f, indent=indent+2)
261 # Not indented as much
262 print('%s]' % indent_str, file=f)
263
264 elif isinstance(val, tuple):
265 if not after_equals:
266 print('%s' % indent_str, end='', file=f)
267 print('(', file=f)
268 for item in val:
269 _PrettyPrint(item, f, indent=indent+2)
270 print('%s)' % indent_str, file=f)
271
272 else:
273 if not after_equals:
274 print('%s' % indent_str, end='', file=f)
275 # String or int?
276 print('%r' % val, file=f)
277
278
279class Node(object):
280 """Abstract base class for ast nodes."""
281
282 ARGNAMES = []
283
284 def getChildren(self):
285 pass # implemented by subclasses
286
287 def __iter__(self):
288 for n in self.getChildren():
289 yield n
290
291 def asList(self): # for backwards compatibility
292 return self.getChildren()
293
294 def getChildNodes(self):
295 pass # implemented by subclasses
296
297 def __repr__(self):
298 f = cStringIO.StringIO()
299 self.PrettyPrint(f)
300 return f.getvalue()
301
302 def PrettyPrint(self, f, indent=0, after_equals=False):
303 indent_str = ' ' * indent
304
305 if not after_equals:
306 print('%s' % indent_str, end='', file=f)
307 print('%s(' % self.__class__.__name__, file=f)
308 for name in self.ARGNAMES:
309 # Print the field name
310 print('%s %s = ' % (indent_str, name), end='', file=f)
311
312 # Print the value
313 val = getattr(self, name)
314
315 _PrettyPrint(val, f, indent=indent+2, after_equals=True)
316
317 print('%s) # %s' % (indent_str, self.__class__.__name__), file=f)
318
319
320class EmptyNode(Node):
321 pass
322
323class Expression(Node):
324 # Expression is an artificial node class to support "eval"
325 nodes["expression"] = "Expression"
326 def __init__(self, node):
327 self.node = node
328
329 def getChildren(self):
330 return self.node,
331
332 def getChildNodes(self):
333 return self.node,
334
335 def __repr__(self):
336 return "Expression(%s)" % (repr(self.node))
337
338### EPILOGUE
339for name, obj in globals().items():
340 if isinstance(obj, type) and issubclass(obj, Node):
341 nodes[name.lower()] = obj