| 1 | #!/usr/bin/env python2
 | 
| 2 | """Generate ast module from specification
 | 
| 3 | 
 | 
| 4 | This script generates the ast module from a simple specification,
 | 
| 5 | which makes it easy to accommodate changes in the grammar.  This
 | 
| 6 | approach would be quite reasonable if the grammar changed often.
 | 
| 7 | Instead, it is rather complex to generate the appropriate code.  And
 | 
| 8 | the Node interface has changed more often than the grammar.
 | 
| 9 | """
 | 
| 10 | from __future__ import print_function
 | 
| 11 | 
 | 
| 12 | import fileinput
 | 
| 13 | import re
 | 
| 14 | import sys
 | 
| 15 | import cStringIO
 | 
| 16 | 
 | 
| 17 | COMMA = ", "
 | 
| 18 | 
 | 
| 19 | def load_boilerplate(file):
 | 
| 20 |     f = open(file)
 | 
| 21 |     buf = f.read()
 | 
| 22 |     f.close()
 | 
| 23 |     i = buf.find('### ''PROLOGUE')
 | 
| 24 |     j = buf.find('### ''EPILOGUE')
 | 
| 25 |     pro = buf[i+12:j].strip()
 | 
| 26 |     epi = buf[j+12:].strip()
 | 
| 27 |     return pro, epi
 | 
| 28 | 
 | 
| 29 | def strip_default(arg):
 | 
| 30 |     """Return the argname from an 'arg = default' string"""
 | 
| 31 |     i = arg.find('=')
 | 
| 32 |     if i == -1:
 | 
| 33 |         return arg
 | 
| 34 |     t = arg[:i].strip()
 | 
| 35 |     return t
 | 
| 36 | 
 | 
| 37 | P_NODE = 1    #   It's another node.
 | 
| 38 | P_OTHER = 2   # * means it's a Python value like string, int, tuple, etc.
 | 
| 39 | P_NESTED = 3  # ! means this is a list of more nodes.
 | 
| 40 | P_NONE = 4    # & means it's a node or None.
 | 
| 41 | 
 | 
| 42 | class NodeInfo(object):
 | 
| 43 |     """Each instance describes a specific AST node"""
 | 
| 44 |     def __init__(self, name, args):
 | 
| 45 |         self.name = name
 | 
| 46 |         self.args = args.strip()
 | 
| 47 |         self.argnames = self.get_argnames()
 | 
| 48 |         self.argprops = self.get_argprops()
 | 
| 49 |         self.nargs = len(self.argnames)
 | 
| 50 |         self.init = []
 | 
| 51 | 
 | 
| 52 |     def get_argnames(self):
 | 
| 53 |         if '(' in self.args:
 | 
| 54 |             i = self.args.find('(')
 | 
| 55 |             j = self.args.rfind(')')
 | 
| 56 |             args = self.args[i+1:j]
 | 
| 57 |         else:
 | 
| 58 |             args = self.args
 | 
| 59 |         return [strip_default(arg.strip())
 | 
| 60 |                 for arg in args.split(',') if arg]
 | 
| 61 | 
 | 
| 62 |     def get_argprops(self):
 | 
| 63 |         """Each argument can have a property like '*' or '!'
 | 
| 64 | 
 | 
| 65 |         XXX This method modifies the argnames in place!
 | 
| 66 |         """
 | 
| 67 |         d = {}
 | 
| 68 |         hardest_arg = P_NODE
 | 
| 69 |         for i in range(len(self.argnames)):
 | 
| 70 |             arg = self.argnames[i]
 | 
| 71 |             if arg.endswith('*'):
 | 
| 72 |                 arg = self.argnames[i] = arg[:-1]
 | 
| 73 |                 d[arg] = P_OTHER
 | 
| 74 |                 hardest_arg = max(hardest_arg, P_OTHER)
 | 
| 75 |             elif arg.endswith('!'):
 | 
| 76 |                 arg = self.argnames[i] = arg[:-1]
 | 
| 77 |                 d[arg] = P_NESTED
 | 
| 78 |                 hardest_arg = max(hardest_arg, P_NESTED)
 | 
| 79 |             elif arg.endswith('&'):
 | 
| 80 |                 arg = self.argnames[i] = arg[:-1]
 | 
| 81 |                 d[arg] = P_NONE
 | 
| 82 |                 hardest_arg = max(hardest_arg, P_NONE)
 | 
| 83 |             else:
 | 
| 84 |                 d[arg] = P_NODE
 | 
| 85 |         self.hardest_arg = hardest_arg
 | 
| 86 | 
 | 
| 87 |         if hardest_arg > P_NODE:
 | 
| 88 |             self.args = self.args.replace('*', '')
 | 
| 89 |             self.args = self.args.replace('!', '')
 | 
| 90 |             self.args = self.args.replace('&', '')
 | 
| 91 | 
 | 
| 92 |         return d
 | 
| 93 | 
 | 
| 94 |     def gen_source(self):
 | 
| 95 |         print("class %s(Node):" % self.name)
 | 
| 96 |         print("    ARGNAMES = %r" % self.argnames)
 | 
| 97 |         self._gen_init(sys.stdout)
 | 
| 98 |         print()
 | 
| 99 |         self._gen_getChildren(sys.stdout)
 | 
| 100 |         print()
 | 
| 101 |         self._gen_getChildNodes(sys.stdout)
 | 
| 102 |         print()
 | 
| 103 | 
 | 
| 104 |     def _gen_init(self, buf):
 | 
| 105 |         if self.args:
 | 
| 106 |             argtuple = '(' in self.args
 | 
| 107 |             args = self.args if not argtuple else ''.join(self.argnames)
 | 
| 108 |             print("    def __init__(self, %s, lineno=None):" % args, file=buf)
 | 
| 109 |         else:
 | 
| 110 |             print("    def __init__(self, lineno=None):", file=buf)
 | 
| 111 |         if self.argnames:
 | 
| 112 |             if argtuple:
 | 
| 113 |                 for idx, name in enumerate(self.argnames):
 | 
| 114 |                     print("        self.%s = %s[%s]" % (name, args, idx), file=buf)
 | 
| 115 |             else:
 | 
| 116 |                 for name in self.argnames:
 | 
| 117 |                     print("        self.%s = %s" % (name, name), file=buf)
 | 
| 118 |         print("        self.lineno = lineno", file=buf)
 | 
| 119 |         # Copy the lines in self.init, indented four spaces.  The rstrip()
 | 
| 120 |         # business is to get rid of the four spaces if line happens to be
 | 
| 121 |         # empty, so that reindent.py is happy with the output.
 | 
| 122 |         for line in self.init:
 | 
| 123 |             print(("    " + line).rstrip(), file=buf)
 | 
| 124 | 
 | 
| 125 |     def _gen_getChildren(self, buf):
 | 
| 126 |         print("    def getChildren(self):", file=buf)
 | 
| 127 |         if len(self.argnames) == 0:
 | 
| 128 |             print("        return ()", file=buf)
 | 
| 129 |         else:
 | 
| 130 |             if self.hardest_arg < P_NESTED:
 | 
| 131 |                 clist = COMMA.join(["self.%s" % c
 | 
| 132 |                                     for c in self.argnames])
 | 
| 133 |                 if self.nargs == 1:
 | 
| 134 |                     print("        return %s," % clist, file=buf)
 | 
| 135 |                 else:
 | 
| 136 |                     print("        return %s" % clist, file=buf)
 | 
| 137 |             else:
 | 
| 138 |                 if len(self.argnames) == 1:
 | 
| 139 |                     print("        return tuple(flatten(self.%s))" % self.argnames[0], file=buf)
 | 
| 140 |                 else:
 | 
| 141 |                     print("        children = []", file=buf)
 | 
| 142 |                     template = "        children.%s(%sself.%s%s)"
 | 
| 143 |                     for name in self.argnames:
 | 
| 144 |                         if self.argprops[name] == P_NESTED:
 | 
| 145 |                             print(template % ("extend", "flatten(",
 | 
| 146 |                                                       name, ")"), file=buf)
 | 
| 147 |                         else:
 | 
| 148 |                             print(template % ("append", "", name, ""), file=buf)
 | 
| 149 |                     print("        return tuple(children)", file=buf)
 | 
| 150 | 
 | 
| 151 |     def _gen_getChildNodes(self, buf):
 | 
| 152 |         print("    def getChildNodes(self):", file=buf)
 | 
| 153 |         if len(self.argnames) == 0:
 | 
| 154 |             print("        return ()", file=buf)
 | 
| 155 |         else:
 | 
| 156 |             if self.hardest_arg < P_NESTED:
 | 
| 157 |                 clist = ["self.%s" % c
 | 
| 158 |                          for c in self.argnames
 | 
| 159 |                          if self.argprops[c] == P_NODE]
 | 
| 160 |                 if len(clist) == 0:
 | 
| 161 |                     print("        return ()", file=buf)
 | 
| 162 |                 elif len(clist) == 1:
 | 
| 163 |                     print("        return %s," % clist[0], file=buf)
 | 
| 164 |                 else:
 | 
| 165 |                     print("        return %s" % COMMA.join(clist), file=buf)
 | 
| 166 |             else:
 | 
| 167 |                 print("        nodelist = []", file=buf)
 | 
| 168 |                 template = "        nodelist.%s(%sself.%s%s)"
 | 
| 169 |                 for name in self.argnames:
 | 
| 170 |                     if self.argprops[name] == P_NONE:
 | 
| 171 |                         tmp = ("        if self.%s is not None:\n"
 | 
| 172 |                                "            nodelist.append(self.%s)")
 | 
| 173 |                         print(tmp % (name, name), file=buf)
 | 
| 174 |                     elif self.argprops[name] == P_NESTED:
 | 
| 175 |                         print(template % ("extend", "flatten_nodes(",
 | 
| 176 |                                                   name, ")"), file=buf)
 | 
| 177 |                     elif self.argprops[name] == P_NODE:
 | 
| 178 |                         print(template % ("append", "", name, ""), file=buf)
 | 
| 179 |                 print("        return tuple(nodelist)", file=buf)
 | 
| 180 | 
 | 
| 181 | 
 | 
| 182 | rx_init = re.compile('init\((.*)\):')
 | 
| 183 | 
 | 
| 184 | def parse_spec(file):
 | 
| 185 |     classes = {}
 | 
| 186 |     cur = None
 | 
| 187 |     for line in fileinput.input(file):
 | 
| 188 |         if line.strip().startswith('#'):
 | 
| 189 |             continue
 | 
| 190 |         mo = rx_init.search(line)
 | 
| 191 |         if mo is None:
 | 
| 192 |             if cur is None:
 | 
| 193 |                 # a normal entry
 | 
| 194 |                 try:
 | 
| 195 |                     name, args = line.split(':')
 | 
| 196 |                 except ValueError:
 | 
| 197 |                     continue
 | 
| 198 |                 classes[name] = NodeInfo(name, args)
 | 
| 199 |                 cur = None
 | 
| 200 |             else:
 | 
| 201 |                 # some code for the __init__ method
 | 
| 202 |                 cur.init.append(line)
 | 
| 203 |         else:
 | 
| 204 |             # some extra code for a Node's __init__ method
 | 
| 205 |             name = mo.group(1)
 | 
| 206 |             cur = classes[name]
 | 
| 207 |     return sorted(classes.values(), key=lambda n: n.name)
 | 
| 208 | 
 | 
| 209 | def main():
 | 
| 210 |     prologue, epilogue = load_boilerplate(sys.argv[0])
 | 
| 211 |     print('from __future__ import print_function')
 | 
| 212 |     print('import cStringIO')
 | 
| 213 |     print(prologue)
 | 
| 214 |     print()
 | 
| 215 |     classes = parse_spec(sys.argv[1])
 | 
| 216 |     for info in classes:
 | 
| 217 |         info.gen_source()
 | 
| 218 |     print(epilogue)
 | 
| 219 | 
 | 
| 220 | if __name__ == "__main__":
 | 
| 221 |     main()
 | 
| 222 |     sys.exit(0)
 | 
| 223 | 
 | 
| 224 | ### PROLOGUE
 | 
| 225 | """Python abstract syntax node definitions
 | 
| 226 | 
 | 
| 227 | This file is automatically generated by Tools/compiler/astgen.py
 | 
| 228 | """
 | 
| 229 | from .consts import CO_VARARGS, CO_VARKEYWORDS
 | 
| 230 | 
 | 
| 231 | # NOTE: Similar to pyassem.flatten().
 | 
| 232 | def flatten(seq):
 | 
| 233 |     l = []
 | 
| 234 |     for elt in seq:
 | 
| 235 |         if isinstance(elt, (tuple, list)):
 | 
| 236 |             l.extend(flatten(elt))
 | 
| 237 |         else:
 | 
| 238 |             l.append(elt)
 | 
| 239 |     return l
 | 
| 240 | 
 | 
| 241 | def flatten_nodes(seq):
 | 
| 242 |     return [n for n in flatten(seq) if isinstance(n, Node)]
 | 
| 243 | 
 | 
| 244 | nodes = {}
 | 
| 245 | 
 | 
| 246 | 
 | 
| 247 | # NOTE: after_equals is a hack to make the output prettier.  You could copy
 | 
| 248 | # _TrySingleLine in asdl/format.py.  That took a long time to get right!
 | 
| 249 | def _PrettyPrint(val, f, indent=0, after_equals=False): 
 | 
| 250 |   indent_str = ' ' * indent
 | 
| 251 | 
 | 
| 252 |   if isinstance(val, Node):
 | 
| 253 |     val.PrettyPrint(f, indent=indent, after_equals=after_equals)
 | 
| 254 | 
 | 
| 255 |   elif isinstance(val, list):
 | 
| 256 |     if not after_equals:
 | 
| 257 |       print('%s' % indent_str, end='', file=f)
 | 
| 258 |     print('[', file=f)   # No indent here
 | 
| 259 |     for item in val:
 | 
| 260 |       _PrettyPrint(item, f, indent=indent+2)
 | 
| 261 |     # Not indented as much
 | 
| 262 |     print('%s]' % indent_str, file=f)
 | 
| 263 | 
 | 
| 264 |   elif isinstance(val, tuple):
 | 
| 265 |     if not after_equals:
 | 
| 266 |       print('%s' % indent_str, end='', file=f)
 | 
| 267 |     print('(', file=f)
 | 
| 268 |     for item in val:
 | 
| 269 |       _PrettyPrint(item, f, indent=indent+2)
 | 
| 270 |     print('%s)' % indent_str, file=f)
 | 
| 271 | 
 | 
| 272 |   else:
 | 
| 273 |     if not after_equals:
 | 
| 274 |       print('%s' % indent_str, end='', file=f)
 | 
| 275 |     # String or int?
 | 
| 276 |     print('%r' % val, file=f)
 | 
| 277 | 
 | 
| 278 | 
 | 
| 279 | class Node(object):
 | 
| 280 |     """Abstract base class for ast nodes."""
 | 
| 281 | 
 | 
| 282 |     ARGNAMES = []
 | 
| 283 | 
 | 
| 284 |     def getChildren(self):
 | 
| 285 |         pass # implemented by subclasses
 | 
| 286 | 
 | 
| 287 |     def __iter__(self):
 | 
| 288 |         for n in self.getChildren():
 | 
| 289 |             yield n
 | 
| 290 | 
 | 
| 291 |     def asList(self): # for backwards compatibility
 | 
| 292 |         return self.getChildren()
 | 
| 293 | 
 | 
| 294 |     def getChildNodes(self):
 | 
| 295 |         pass # implemented by subclasses
 | 
| 296 | 
 | 
| 297 |     def __repr__(self):
 | 
| 298 |         f = cStringIO.StringIO()
 | 
| 299 |         self.PrettyPrint(f)
 | 
| 300 |         return f.getvalue()
 | 
| 301 | 
 | 
| 302 |     def PrettyPrint(self, f, indent=0, after_equals=False):
 | 
| 303 |         indent_str = ' ' * indent
 | 
| 304 | 
 | 
| 305 |         if not after_equals:
 | 
| 306 |           print('%s' % indent_str, end='', file=f)
 | 
| 307 |         print('%s(' % self.__class__.__name__, file=f)
 | 
| 308 |         for name in self.ARGNAMES:
 | 
| 309 |           # Print the field name
 | 
| 310 |           print('%s  %s = ' % (indent_str, name), end='', file=f)
 | 
| 311 | 
 | 
| 312 |           # Print the value
 | 
| 313 |           val = getattr(self, name)
 | 
| 314 | 
 | 
| 315 |           _PrettyPrint(val, f, indent=indent+2, after_equals=True)
 | 
| 316 | 
 | 
| 317 |         print('%s)  # %s' % (indent_str, self.__class__.__name__), file=f)
 | 
| 318 | 
 | 
| 319 | 
 | 
| 320 | class EmptyNode(Node):
 | 
| 321 |     pass
 | 
| 322 | 
 | 
| 323 | class Expression(Node):
 | 
| 324 |     # Expression is an artificial node class to support "eval"
 | 
| 325 |     nodes["expression"] = "Expression"
 | 
| 326 |     def __init__(self, node):
 | 
| 327 |         self.node = node
 | 
| 328 | 
 | 
| 329 |     def getChildren(self):
 | 
| 330 |         return self.node,
 | 
| 331 | 
 | 
| 332 |     def getChildNodes(self):
 | 
| 333 |         return self.node,
 | 
| 334 | 
 | 
| 335 |     def __repr__(self):
 | 
| 336 |         return "Expression(%s)" % (repr(self.node))
 | 
| 337 | 
 | 
| 338 | ### EPILOGUE
 | 
| 339 | for name, obj in globals().items():
 | 
| 340 |     if isinstance(obj, type) and issubclass(obj, Node):
 | 
| 341 |         nodes[name.lower()] = obj
 |