| 1 | """Statically classify symbols by scope, for code generation.
 | 
| 2 | 
 | 
| 3 | Great article about CPython's symbol tables, which are slightly different:
 | 
| 4 | 
 | 
| 5 | https://eli.thegreenplace.net/2010/09/20/python-internals-symbol-tables-part-2
 | 
| 6 | 
 | 
| 7 | In particular, see footnote 6: Why does CPython's algorithm have 2 passes,
 | 
| 8 | while there is only one pass here?
 | 
| 9 | """
 | 
| 10 | from __future__ import print_function
 | 
| 11 | 
 | 
| 12 | from . import ast
 | 
| 13 | from . import misc
 | 
| 14 | from .consts import SC_LOCAL, SC_GLOBAL_IMPLICIT, SC_GLOBAL_EXPLICIT, \
 | 
| 15 |     SC_FREE, SC_CELL, SC_UNKNOWN
 | 
| 16 | from .visitor import ASTVisitor
 | 
| 17 | 
 | 
| 18 | import itertools
 | 
| 19 | import types
 | 
| 20 | 
 | 
| 21 | 
 | 
| 22 | class Scope(object):
 | 
| 23 |     # XXX how much information do I need about each name?
 | 
| 24 |     def __init__(self, name, module, klass=None):
 | 
| 25 |         self.name = name
 | 
| 26 |         self.module = module
 | 
| 27 | 
 | 
| 28 |         self.defs = set()
 | 
| 29 |         self.uses = set()
 | 
| 30 |         self.globals = set()
 | 
| 31 |         self.params = set()
 | 
| 32 |         self.frees = set()
 | 
| 33 |         self.cells = set()
 | 
| 34 | 
 | 
| 35 |         self.children = []
 | 
| 36 |         # nested is true if the class could contain free variables,
 | 
| 37 |         # i.e. if it is nested within another function.
 | 
| 38 |         self.nested = None
 | 
| 39 |         self.generator = None
 | 
| 40 |         self.klass = None
 | 
| 41 |         if klass is not None:
 | 
| 42 |             # Strip leading _.  I guess this is for name mangling; you don't
 | 
| 43 |             # want more than one _.
 | 
| 44 |             self.klass = klass.lstrip('_')
 | 
| 45 | 
 | 
| 46 |     def __repr__(self):
 | 
| 47 |         n = '(nested)' if self.nested else ''
 | 
| 48 |         return "<%s: %s> %s" % (self.__class__.__name__, self.name, n)
 | 
| 49 | 
 | 
| 50 |     def PrettyPrint(self):
 | 
| 51 |         def p(s):
 | 
| 52 |           return ' '.join(sorted(s))
 | 
| 53 |         print(repr(self))
 | 
| 54 |         print("\tglobals: ", p(self.globals))
 | 
| 55 |         print("\tcells: ", p(self.cells))
 | 
| 56 |         print("\tdefs: ", p(self.defs))
 | 
| 57 |         print("\tuses: ", p(self.uses))
 | 
| 58 |         print("\tfrees:", p(self.frees))
 | 
| 59 | 
 | 
| 60 |     def mangle(self, name):
 | 
| 61 |         return misc.mangle(name, self.klass)
 | 
| 62 | 
 | 
| 63 |     def add_def(self, name):
 | 
| 64 |         self.defs.add(self.mangle(name))
 | 
| 65 | 
 | 
| 66 |     def add_use(self, name):
 | 
| 67 |         self.uses.add(self.mangle(name))
 | 
| 68 | 
 | 
| 69 |     def add_global(self, name):
 | 
| 70 |         name = self.mangle(name)
 | 
| 71 |         if name in self.uses or name in self.defs:
 | 
| 72 |             pass # XXX warn about global following def/use
 | 
| 73 |         if name in self.params:
 | 
| 74 |             raise SyntaxError, "%s in %s is global and parameter" % \
 | 
| 75 |                   (name, self.name)
 | 
| 76 |         self.globals.add(name)
 | 
| 77 |         self.module.add_def(name)
 | 
| 78 | 
 | 
| 79 |     def add_param(self, name):
 | 
| 80 |         name = self.mangle(name)
 | 
| 81 |         self.defs.add(name)
 | 
| 82 |         self.params.add(name)
 | 
| 83 | 
 | 
| 84 |     def get_names(self):
 | 
| 85 |         return list(self.defs | self.uses | self.globals)  # union
 | 
| 86 | 
 | 
| 87 |     def add_child(self, child):
 | 
| 88 |         self.children.append(child)
 | 
| 89 | 
 | 
| 90 |     def get_children(self):
 | 
| 91 |         return self.children
 | 
| 92 | 
 | 
| 93 |     def check_name(self, name):
 | 
| 94 |         """Return scope of name.
 | 
| 95 | 
 | 
| 96 |         The scope of a name could be LOCAL, GLOBAL, FREE, or CELL.
 | 
| 97 |         """
 | 
| 98 |         if name in self.globals:
 | 
| 99 |             return SC_GLOBAL_EXPLICIT
 | 
| 100 |         if name in self.cells:
 | 
| 101 |             return SC_CELL
 | 
| 102 |         if name in self.defs:
 | 
| 103 |             return SC_LOCAL
 | 
| 104 |         if self.nested and (name in self.frees or name in self.uses):
 | 
| 105 |             return SC_FREE
 | 
| 106 |         if self.nested:
 | 
| 107 |             return SC_UNKNOWN
 | 
| 108 |         else:
 | 
| 109 |             return SC_GLOBAL_IMPLICIT
 | 
| 110 | 
 | 
| 111 |     def get_free_vars(self):
 | 
| 112 |         """Variables not defined in this scope, and that are not globals either.
 | 
| 113 | 
 | 
| 114 |         It must be like this:
 | 
| 115 |         # def MakeAdder(inc):
 | 
| 116 |         #  def Adder(x):
 | 
| 117 |         #    return x + inc  # x is a local, inc is a free var?
 | 
| 118 |         #  return Adder
 | 
| 119 |         """
 | 
| 120 |         # TODO: Fix this calculation!
 | 
| 121 | 
 | 
| 122 |         if not self.nested:
 | 
| 123 |             return []
 | 
| 124 |         free = set()
 | 
| 125 |         free.update(self.frees)
 | 
| 126 |         for name in self.uses:
 | 
| 127 |             if name not in self.defs and name not in self.globals:
 | 
| 128 |                 free.add(name)
 | 
| 129 |         return list(free)
 | 
| 130 | 
 | 
| 131 |     def get_cell_vars(self):
 | 
| 132 |         return list(self.cells)
 | 
| 133 | 
 | 
| 134 |     def handle_children(self):
 | 
| 135 |         for child in self.children:
 | 
| 136 |             frees = child.get_free_vars()
 | 
| 137 |             globals_ = self.add_frees(frees)
 | 
| 138 |             for name in globals_:
 | 
| 139 |                 child.force_global(name)
 | 
| 140 | 
 | 
| 141 |     def force_global(self, name):
 | 
| 142 |         """Force name to be global in scope.
 | 
| 143 | 
 | 
| 144 |         Some child of the current node had a free reference to name.
 | 
| 145 |         When the child was processed, it was labelled a free
 | 
| 146 |         variable.  Now that all its enclosing scope have been
 | 
| 147 |         processed, the name is known to be a global or builtin.  So
 | 
| 148 |         walk back down the child chain and set the name to be global
 | 
| 149 |         rather than free.
 | 
| 150 | 
 | 
| 151 |         Be careful to stop if a child does not think the name is
 | 
| 152 |         free.
 | 
| 153 |         """
 | 
| 154 |         self.globals.add(name)
 | 
| 155 |         if name in self.frees:
 | 
| 156 |             self.frees.remove(name)
 | 
| 157 |         for child in self.children:
 | 
| 158 |             if child.check_name(name) == SC_FREE:
 | 
| 159 |                 child.force_global(name)
 | 
| 160 | 
 | 
| 161 |     def add_frees(self, names):
 | 
| 162 |         """Process list of free vars from nested scope.
 | 
| 163 | 
 | 
| 164 |         Returns a list of names that are either 1) declared global in the
 | 
| 165 |         parent or 2) undefined in a top-level parent.  In either case,
 | 
| 166 |         the nested scope should treat them as globals.
 | 
| 167 |         """
 | 
| 168 |         child_globals = []
 | 
| 169 |         for name in names:
 | 
| 170 |             sc = self.check_name(name)
 | 
| 171 |             if self.nested:
 | 
| 172 |                 if sc == SC_UNKNOWN or sc == SC_FREE \
 | 
| 173 |                    or isinstance(self, ClassScope):
 | 
| 174 |                     self.frees.add(name)
 | 
| 175 |                 elif sc == SC_GLOBAL_IMPLICIT:
 | 
| 176 |                     child_globals.append(name)
 | 
| 177 |                 elif isinstance(self, FunctionScope) and sc == SC_LOCAL:
 | 
| 178 |                     self.cells.add(name)
 | 
| 179 |                 elif sc != SC_CELL:
 | 
| 180 |                     child_globals.append(name)
 | 
| 181 |             else:
 | 
| 182 |                 if sc == SC_LOCAL:
 | 
| 183 |                     self.cells.add(name)
 | 
| 184 |                 elif sc != SC_CELL:
 | 
| 185 |                     child_globals.append(name)
 | 
| 186 |         return child_globals
 | 
| 187 | 
 | 
| 188 | 
 | 
| 189 | class ModuleScope(Scope):
 | 
| 190 | 
 | 
| 191 |     def __init__(self):
 | 
| 192 |         Scope.__init__(self, "global", self)
 | 
| 193 | 
 | 
| 194 | 
 | 
| 195 | class FunctionScope(Scope):
 | 
| 196 |     pass
 | 
| 197 | 
 | 
| 198 | 
 | 
| 199 | class GenExprScope(Scope):
 | 
| 200 | 
 | 
| 201 |     def __init__(self, name, module, klass=None):
 | 
| 202 |         Scope.__init__(self, name, module, klass)
 | 
| 203 |         self.add_param('.0')
 | 
| 204 | 
 | 
| 205 |     def get_names(self):
 | 
| 206 |         keys = Scope.get_names(self)
 | 
| 207 |         return keys
 | 
| 208 | 
 | 
| 209 | 
 | 
| 210 | class LambdaScope(FunctionScope):
 | 
| 211 |     pass
 | 
| 212 | 
 | 
| 213 | 
 | 
| 214 | class ClassScope(Scope):
 | 
| 215 |     pass
 | 
| 216 | 
 | 
| 217 | 
 | 
| 218 | 
 | 
| 219 | gLambdaCounter = itertools.count()
 | 
| 220 | gGenExprCounter = itertools.count()
 | 
| 221 | 
 | 
| 222 | 
 | 
| 223 | class SymbolVisitor(ASTVisitor):
 | 
| 224 |     def __init__(self):
 | 
| 225 |         ASTVisitor.__init__(self)
 | 
| 226 |         self.scopes = {}  # node -> Scope instance, the "return value".
 | 
| 227 |         self.klass = None
 | 
| 228 | 
 | 
| 229 |     # Nodes that define new scopes
 | 
| 230 | 
 | 
| 231 |     def visitModule(self, node):
 | 
| 232 |         scope = self.module = self.scopes[node] = ModuleScope()
 | 
| 233 |         self.visit(node.node, scope)
 | 
| 234 | 
 | 
| 235 |     visitExpression = visitModule
 | 
| 236 | 
 | 
| 237 |     def visitFunction(self, node, parent):
 | 
| 238 |         if node.decorators:
 | 
| 239 |             self.visit(node.decorators, parent)
 | 
| 240 |         parent.add_def(node.name)
 | 
| 241 |         for n in node.defaults:
 | 
| 242 |             self.visit(n, parent)
 | 
| 243 |         scope = FunctionScope(node.name, self.module, self.klass)
 | 
| 244 |         if parent.nested or isinstance(parent, FunctionScope):
 | 
| 245 |             scope.nested = 1
 | 
| 246 |         self.scopes[node] = scope
 | 
| 247 |         self._do_args(scope, node.argnames)
 | 
| 248 |         self.visit(node.code, scope)
 | 
| 249 |         self.handle_free_vars(scope, parent)
 | 
| 250 | 
 | 
| 251 |     def visitGenExpr(self, node, parent):
 | 
| 252 |         obj_name = "generator expression<%d>" % gGenExprCounter.next()
 | 
| 253 | 
 | 
| 254 |         # BUG FIX: These name references happen OUTSIDE the GenExprScope!
 | 
| 255 |         # NOTE: I can see the difference with 'opyc symbols', but somehow I
 | 
| 256 |         # can't construct a difference in the executed code (opyc dis).
 | 
| 257 | 
 | 
| 258 |         # NOTE: This should probably just be quals[0] like visitGenExpr() in
 | 
| 259 |         # pycodegen.py: "precomputation of outmost iterable".
 | 
| 260 |         # TODO: Test it out with gold/genexpr_nested.py.
 | 
| 261 |         for genfor in node.code.quals:
 | 
| 262 |             self.visit(genfor.iter, parent)
 | 
| 263 | 
 | 
| 264 |         scope = GenExprScope(obj_name, self.module, self.klass)
 | 
| 265 | 
 | 
| 266 |         if parent.nested or isinstance(parent, (FunctionScope, GenExprScope)):
 | 
| 267 |             scope.nested = 1
 | 
| 268 | 
 | 
| 269 |         self.scopes[node] = scope
 | 
| 270 | 
 | 
| 271 |         self.visit(node.code, scope)
 | 
| 272 | 
 | 
| 273 |         self.handle_free_vars(scope, parent)
 | 
| 274 | 
 | 
| 275 |     def visitGenExprInner(self, node, scope):
 | 
| 276 |         for genfor in node.quals:
 | 
| 277 |             self.visit(genfor, scope)
 | 
| 278 | 
 | 
| 279 |         self.visit(node.expr, scope)
 | 
| 280 | 
 | 
| 281 |     def visitGenExprFor(self, node, scope):
 | 
| 282 |         self.visit(node.assign, scope, 1)
 | 
| 283 | 
 | 
| 284 |         # BUG FIX: node.iter contains a Name() node for the iterable variable.
 | 
| 285 |         # But it is not really used -- an ITERABLE to it is passed to the
 | 
| 286 |         # generator CodeObject instead!
 | 
| 287 | 
 | 
| 288 |         # That is, in each of these cases, we are using the GET_ITER bytecode, then
 | 
| 289 |         # CALL_FUNCTION to pass it in to the generator.
 | 
| 290 |         # print(x for x in nums) 
 | 
| 291 |         # print(x for x in range(3))
 | 
| 292 |         # print(x for x in [1,2,3])
 | 
| 293 | 
 | 
| 294 |         #self.visit(node.iter, scope)
 | 
| 295 | 
 | 
| 296 |         for if_ in node.ifs:
 | 
| 297 |             self.visit(if_, scope)
 | 
| 298 | 
 | 
| 299 |     def visitGenExprIf(self, node, scope):
 | 
| 300 |         self.visit(node.test, scope)
 | 
| 301 | 
 | 
| 302 |     def visitLambda(self, node, parent, assign=0):
 | 
| 303 |         # Lambda is an expression, so it could appear in an expression
 | 
| 304 |         # context where assign is passed.  The transformer should catch
 | 
| 305 |         # any code that has a lambda on the left-hand side.
 | 
| 306 |         assert not assign
 | 
| 307 | 
 | 
| 308 |         for n in node.defaults:
 | 
| 309 |             self.visit(n, parent)
 | 
| 310 | 
 | 
| 311 |         obj_name = "lambda.%d" % gLambdaCounter.next()
 | 
| 312 | 
 | 
| 313 |         scope = LambdaScope(obj_name, self.module, klass=self.klass)
 | 
| 314 | 
 | 
| 315 |         if parent.nested or isinstance(parent, FunctionScope):
 | 
| 316 |             scope.nested = 1
 | 
| 317 |         self.scopes[node] = scope
 | 
| 318 |         self._do_args(scope, node.argnames)
 | 
| 319 |         self.visit(node.code, scope)
 | 
| 320 |         self.handle_free_vars(scope, parent)
 | 
| 321 | 
 | 
| 322 |     def _do_args(self, scope, args):
 | 
| 323 |         for name in args:
 | 
| 324 |             if type(name) == types.TupleType:
 | 
| 325 |                 self._do_args(scope, name)
 | 
| 326 |             else:
 | 
| 327 |                 scope.add_param(name)
 | 
| 328 | 
 | 
| 329 |     def handle_free_vars(self, scope, parent):
 | 
| 330 |         parent.add_child(scope)
 | 
| 331 |         scope.handle_children()
 | 
| 332 | 
 | 
| 333 |     def visitClass(self, node, parent):
 | 
| 334 |         parent.add_def(node.name)
 | 
| 335 |         for n in node.bases:
 | 
| 336 |             self.visit(n, parent)
 | 
| 337 |         scope = ClassScope(node.name, self.module)
 | 
| 338 |         if parent.nested or isinstance(parent, FunctionScope):
 | 
| 339 |             scope.nested = 1
 | 
| 340 |         if node.doc is not None:
 | 
| 341 |             scope.add_def('__doc__')
 | 
| 342 |         scope.add_def('__module__')
 | 
| 343 |         self.scopes[node] = scope
 | 
| 344 |         prev = self.klass
 | 
| 345 |         self.klass = node.name
 | 
| 346 |         self.visit(node.code, scope)
 | 
| 347 |         self.klass = prev
 | 
| 348 |         self.handle_free_vars(scope, parent)
 | 
| 349 | 
 | 
| 350 |     # name can be a def or a use
 | 
| 351 | 
 | 
| 352 |     # XXX a few calls and nodes expect a third "assign" arg that is
 | 
| 353 |     # true if the name is being used as an assignment.  only
 | 
| 354 |     # expressions contained within statements may have the assign arg.
 | 
| 355 | 
 | 
| 356 |     def visitName(self, node, scope, assign=0):
 | 
| 357 |         if assign:
 | 
| 358 |             scope.add_def(node.name)
 | 
| 359 |         else:
 | 
| 360 |             scope.add_use(node.name)
 | 
| 361 | 
 | 
| 362 |     # operations that bind new names
 | 
| 363 | 
 | 
| 364 |     def visitFor(self, node, scope):
 | 
| 365 |         self.visit(node.assign, scope, 1)
 | 
| 366 |         self.visit(node.list, scope)
 | 
| 367 |         self.visit(node.body, scope)
 | 
| 368 |         if node.else_:
 | 
| 369 |             self.visit(node.else_, scope)
 | 
| 370 | 
 | 
| 371 |     def visitFrom(self, node, scope):
 | 
| 372 |         for name, asname in node.names:
 | 
| 373 |             if name == "*":
 | 
| 374 |                 continue
 | 
| 375 |             scope.add_def(asname or name)
 | 
| 376 | 
 | 
| 377 |     def visitImport(self, node, scope):
 | 
| 378 |         for name, asname in node.names:
 | 
| 379 |             i = name.find(".")
 | 
| 380 |             if i > -1:
 | 
| 381 |                 name = name[:i]
 | 
| 382 |             scope.add_def(asname or name)
 | 
| 383 | 
 | 
| 384 |     def visitGlobal(self, node, scope):
 | 
| 385 |         for name in node.names:
 | 
| 386 |             scope.add_global(name)
 | 
| 387 | 
 | 
| 388 |     def visitAssign(self, node, scope):
 | 
| 389 |         """Propagate assignment flag down to child nodes.
 | 
| 390 | 
 | 
| 391 |         The Assign node doesn't itself contains the variables being
 | 
| 392 |         assigned to.  Instead, the children in node.nodes are visited
 | 
| 393 |         with the assign flag set to true.  When the names occur in
 | 
| 394 |         those nodes, they are marked as defs.
 | 
| 395 | 
 | 
| 396 |         Some names that occur in an assignment target are not bound by
 | 
| 397 |         the assignment, e.g. a name occurring inside a slice.  The
 | 
| 398 |         visitor handles these nodes specially; they do not propagate
 | 
| 399 |         the assign flag to their children.
 | 
| 400 |         """
 | 
| 401 |         for n in node.nodes:
 | 
| 402 |             self.visit(n, scope, 1)
 | 
| 403 |         self.visit(node.expr, scope)
 | 
| 404 | 
 | 
| 405 |     def visitAssName(self, node, scope, assign=1):
 | 
| 406 |         scope.add_def(node.name)
 | 
| 407 | 
 | 
| 408 |     def visitAssAttr(self, node, scope, assign=0):
 | 
| 409 |         self.visit(node.expr, scope, 0)
 | 
| 410 | 
 | 
| 411 |     def visitSubscript(self, node, scope, assign=0):
 | 
| 412 |         self.visit(node.expr, scope, 0)
 | 
| 413 |         for n in node.subs:
 | 
| 414 |             self.visit(n, scope, 0)
 | 
| 415 | 
 | 
| 416 |     def visitSlice(self, node, scope, assign=0):
 | 
| 417 |         self.visit(node.expr, scope, 0)
 | 
| 418 |         if node.lower:
 | 
| 419 |             self.visit(node.lower, scope, 0)
 | 
| 420 |         if node.upper:
 | 
| 421 |             self.visit(node.upper, scope, 0)
 | 
| 422 | 
 | 
| 423 |     def visitAugAssign(self, node, scope):
 | 
| 424 |         # If the LHS is a name, then this counts as assignment.
 | 
| 425 |         # Otherwise, it's just use.
 | 
| 426 |         self.visit(node.node, scope)
 | 
| 427 |         if isinstance(node.node, ast.Name):
 | 
| 428 |             self.visit(node.node, scope, 1) # XXX worry about this
 | 
| 429 |         self.visit(node.expr, scope)
 | 
| 430 | 
 | 
| 431 |     # prune if statements if tests are false
 | 
| 432 | 
 | 
| 433 |     _CONST_TYPES = (types.StringType, types.IntType, types.FloatType)
 | 
| 434 | 
 | 
| 435 |     def visitIf(self, node, scope):
 | 
| 436 |         for test, body in node.tests:
 | 
| 437 |             if isinstance(test, ast.Const):
 | 
| 438 |                 if isinstance(test.value, self._CONST_TYPES):
 | 
| 439 |                     if not test.value:
 | 
| 440 |                         continue
 | 
| 441 |             self.visit(test, scope)
 | 
| 442 |             self.visit(body, scope)
 | 
| 443 |         if node.else_:
 | 
| 444 |             self.visit(node.else_, scope)
 | 
| 445 | 
 | 
| 446 |     # a yield statement signals a generator
 | 
| 447 | 
 | 
| 448 |     def visitYield(self, node, scope):
 | 
| 449 |         scope.generator = 1
 | 
| 450 |         self.visit(node.value, scope)
 |