| 1 | """A flow graph representation for Python bytecode"""
 | 
| 2 | from __future__ import print_function
 | 
| 3 | 
 | 
| 4 | import itertools
 | 
| 5 | import types
 | 
| 6 | 
 | 
| 7 | from .consts import CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS
 | 
| 8 | from opy.lib import dis
 | 
| 9 | 
 | 
| 10 | 
 | 
| 11 | HAS_JREL = set(dis.opname[op] for op in dis.hasjrel)
 | 
| 12 | HAS_JABS = set(dis.opname[op] for op in dis.hasjabs)
 | 
| 13 | OPNUM = dict((name, i) for i, name in enumerate(dis.opname))
 | 
| 14 | 
 | 
| 15 | 
 | 
| 16 | def OrderBlocks(start_block, exit_block):
 | 
| 17 |     """Order blocks so that they are emitted in the right order"""
 | 
| 18 |     # Rules:
 | 
| 19 |     # - when a block has a next block, the next block must be emitted just after
 | 
| 20 |     # - when a block has followers (relative jumps), it must be emitted before
 | 
| 21 |     #   them
 | 
| 22 |     # - all reachable blocks must be emitted
 | 
| 23 |     order = []
 | 
| 24 | 
 | 
| 25 |     # Find all the blocks to be emitted.
 | 
| 26 |     remaining = set()
 | 
| 27 |     todo = [start_block]
 | 
| 28 |     while todo:
 | 
| 29 |         b = todo.pop()
 | 
| 30 |         if b in remaining:
 | 
| 31 |             continue
 | 
| 32 |         remaining.add(b)
 | 
| 33 |         for c in b.get_children():
 | 
| 34 |             if c not in remaining:
 | 
| 35 |                 todo.append(c)
 | 
| 36 | 
 | 
| 37 |     # A block is dominated by another block if that block must be emitted
 | 
| 38 |     # before it.
 | 
| 39 |     dominators = {}
 | 
| 40 |     for b in remaining:
 | 
| 41 |         if __debug__ and b.next:
 | 
| 42 |             assert b is b.next[0].prev[0], (b, b.next)
 | 
| 43 |         # Make sure every block appears in dominators, even if no
 | 
| 44 |         # other block must precede it.
 | 
| 45 |         dominators.setdefault(b, set())
 | 
| 46 |         # preceding blocks dominate following blocks
 | 
| 47 |         for c in b.get_followers():
 | 
| 48 |             while 1:
 | 
| 49 |                 dominators.setdefault(c, set()).add(b)
 | 
| 50 |                 # Any block that has a next pointer leading to c is also
 | 
| 51 |                 # dominated because the whole chain will be emitted at once.
 | 
| 52 |                 # Walk backwards and add them all.
 | 
| 53 |                 if c.prev and c.prev[0] is not b:
 | 
| 54 |                     c = c.prev[0]
 | 
| 55 |                 else:
 | 
| 56 |                     break
 | 
| 57 | 
 | 
| 58 |     def find_next():
 | 
| 59 |         # Find a block that can be emitted next.
 | 
| 60 |         for b in remaining:
 | 
| 61 |             for c in dominators[b]:
 | 
| 62 |                 if c in remaining:
 | 
| 63 |                     break # can't emit yet, dominated by a remaining block
 | 
| 64 |             else:
 | 
| 65 |                 return b
 | 
| 66 |         assert 0, 'circular dependency, cannot find next block'
 | 
| 67 | 
 | 
| 68 |     b = start_block
 | 
| 69 |     while 1:
 | 
| 70 |         order.append(b)
 | 
| 71 |         remaining.discard(b)
 | 
| 72 |         if b.next:
 | 
| 73 |             b = b.next[0]
 | 
| 74 |             continue
 | 
| 75 |         elif b is not exit_block and not b.has_unconditional_transfer():
 | 
| 76 |             order.append(exit_block)
 | 
| 77 |         if not remaining:
 | 
| 78 |             break
 | 
| 79 |         b = find_next()
 | 
| 80 |     return order
 | 
| 81 | 
 | 
| 82 | 
 | 
| 83 | def FlattenBlocks(blocks):
 | 
| 84 |     insts = []
 | 
| 85 | 
 | 
| 86 |     pc = 0
 | 
| 87 |     offsets = {}  # block -> bytecode offset
 | 
| 88 |     for b in blocks:
 | 
| 89 |         offsets[b] = pc
 | 
| 90 |         for inst in b.Instructions():
 | 
| 91 |             insts.append(inst)
 | 
| 92 |             if len(inst) == 1:
 | 
| 93 |                 pc += 1
 | 
| 94 |             elif inst[0] != "SET_LINENO":  # arg takes 2 bytes
 | 
| 95 |                 pc += 3
 | 
| 96 |     return insts, offsets
 | 
| 97 | 
 | 
| 98 | 
 | 
| 99 | def PatchJumps(insts, offsets):
 | 
| 100 |     pc = 0
 | 
| 101 |     for i, inst in enumerate(insts):
 | 
| 102 |         if len(inst) == 1:
 | 
| 103 |             pc += 1
 | 
| 104 |         elif inst[0] != "SET_LINENO":
 | 
| 105 |             pc += 3
 | 
| 106 | 
 | 
| 107 |         opname = inst[0]
 | 
| 108 | 
 | 
| 109 |         # Compute jump locations
 | 
| 110 |         if opname in HAS_JREL:
 | 
| 111 |             block_arg = inst[1]
 | 
| 112 |             insts[i] = (opname, offsets[block_arg] - pc)
 | 
| 113 | 
 | 
| 114 |         elif opname in HAS_JABS:
 | 
| 115 |             block_arg = inst[1]
 | 
| 116 |             insts[i] = (opname, offsets[block_arg])
 | 
| 117 | 
 | 
| 118 | 
 | 
| 119 | gBlockCounter = itertools.count()
 | 
| 120 | 
 | 
| 121 | 
 | 
| 122 | class Block(object):
 | 
| 123 | 
 | 
| 124 |     def __init__(self, label=''):
 | 
| 125 |         self.label = label
 | 
| 126 |         self.bid = gBlockCounter.next()
 | 
| 127 |         self.insts = []
 | 
| 128 |         self.outEdges = set()
 | 
| 129 |         self.next = []
 | 
| 130 |         self.prev = []
 | 
| 131 | 
 | 
| 132 |     # BUG FIX: This is needed for deterministic order in sets (and dicts?).
 | 
| 133 |     # See OrderBlocks() below.  remaining is set() of blocks.  If we rely on
 | 
| 134 |     # the default id(), then the output bytecode is NONDETERMINISTIC.
 | 
| 135 |     def __hash__(self):
 | 
| 136 |         return self.bid
 | 
| 137 | 
 | 
| 138 |     def __repr__(self):
 | 
| 139 |         if self.label:
 | 
| 140 |             return "<block %s id=%d>" % (self.label, self.bid)
 | 
| 141 |         else:
 | 
| 142 |             return "<block id=%d>" % (self.bid)
 | 
| 143 | 
 | 
| 144 |     def __str__(self):
 | 
| 145 |         return "<block %s %d:\n%s>" % (
 | 
| 146 |             self.label, self.bid,
 | 
| 147 |             '\n'.join(str(inst) for inst in self.insts))
 | 
| 148 | 
 | 
| 149 |     def emit(self, inst):
 | 
| 150 |         op = inst[0]
 | 
| 151 |         self.insts.append(inst)
 | 
| 152 | 
 | 
| 153 |     def Instructions(self):
 | 
| 154 |         return self.insts
 | 
| 155 | 
 | 
| 156 |     def addOutEdge(self, block):
 | 
| 157 |         self.outEdges.add(block)
 | 
| 158 | 
 | 
| 159 |     def addNext(self, block):
 | 
| 160 |         self.next.append(block)
 | 
| 161 |         assert len(self.next) == 1, [str(b) for b in self.next]
 | 
| 162 |         block.prev.append(self)
 | 
| 163 |         assert len(block.prev) == 1, [str(b) for b in block.prev]
 | 
| 164 | 
 | 
| 165 |     _uncond_transfer = ('RETURN_VALUE', 'RAISE_VARARGS',
 | 
| 166 |                         'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'CONTINUE_LOOP',
 | 
| 167 |                         )
 | 
| 168 | 
 | 
| 169 |     def has_unconditional_transfer(self):
 | 
| 170 |         """Returns True if there is an unconditional transfer to an other block
 | 
| 171 |         at the end of this block. This means there is no risk for the bytecode
 | 
| 172 |         executer to go past this block's bytecode."""
 | 
| 173 |         try:
 | 
| 174 |             op, arg = self.insts[-1]
 | 
| 175 |         except (IndexError, ValueError):
 | 
| 176 |             return
 | 
| 177 |         return op in self._uncond_transfer
 | 
| 178 | 
 | 
| 179 |     def get_children(self):
 | 
| 180 |         return list(self.outEdges) + self.next
 | 
| 181 | 
 | 
| 182 |     def get_followers(self):
 | 
| 183 |         """Get the whole list of followers, including the next block."""
 | 
| 184 |         followers = set(self.next)
 | 
| 185 |         # Blocks that must be emitted *after* this one, because of
 | 
| 186 |         # bytecode offsets (e.g. relative jumps) pointing to them.
 | 
| 187 |         for inst in self.insts:
 | 
| 188 |             if inst[0] in HAS_JREL:
 | 
| 189 |                 followers.add(inst[1])
 | 
| 190 |         return followers
 | 
| 191 | 
 | 
| 192 |     def getContainedGraphs(self):
 | 
| 193 |         """Return all graphs contained within this block.
 | 
| 194 | 
 | 
| 195 |         For example, a MAKE_FUNCTION block will contain a reference to
 | 
| 196 |         the graph for the function body.
 | 
| 197 |         """
 | 
| 198 |         raise AssertionError('unused')
 | 
| 199 |         contained = []
 | 
| 200 |         for inst in self.insts:
 | 
| 201 |             if len(inst) == 1:
 | 
| 202 |                 continue
 | 
| 203 |             op = inst[1]
 | 
| 204 |             if hasattr(op, 'graph'):
 | 
| 205 |                 contained.append(op.graph)
 | 
| 206 |         return contained
 | 
| 207 | 
 | 
| 208 | 
 | 
| 209 | class FlowGraph(object):
 | 
| 210 | 
 | 
| 211 |     def __init__(self):
 | 
| 212 |         self.current = self.entry = Block()
 | 
| 213 |         self.exit = Block("exit")
 | 
| 214 |         self.blocks = set()
 | 
| 215 |         self.blocks.add(self.entry)
 | 
| 216 |         self.blocks.add(self.exit)
 | 
| 217 | 
 | 
| 218 |     DEBUG = False
 | 
| 219 | 
 | 
| 220 |     def startBlock(self, block):
 | 
| 221 |         if self.DEBUG:
 | 
| 222 |             if self.current:
 | 
| 223 |                 print("end", repr(self.current))
 | 
| 224 |                 print("    next", self.current.next)
 | 
| 225 |                 print("    prev", self.current.prev)
 | 
| 226 |                 print("   ", self.current.get_children())
 | 
| 227 |             print(repr(block))
 | 
| 228 |         self.current = block
 | 
| 229 | 
 | 
| 230 |     def nextBlock(self, block=None):
 | 
| 231 |         # XXX think we need to specify when there is implicit transfer
 | 
| 232 |         # from one block to the next.  might be better to represent this
 | 
| 233 |         # with explicit JUMP_ABSOLUTE instructions that are optimized
 | 
| 234 |         # out when they are unnecessary.
 | 
| 235 |         #
 | 
| 236 |         # I think this strategy works: each block has a child
 | 
| 237 |         # designated as "next" which is returned as the last of the
 | 
| 238 |         # children.  because the nodes in a graph are emitted in
 | 
| 239 |         # reverse post order, the "next" block will always be emitted
 | 
| 240 |         # immediately after its parent.
 | 
| 241 |         # Worry: maintaining this invariant could be tricky
 | 
| 242 |         if block is None:
 | 
| 243 |             block = self.newBlock()
 | 
| 244 | 
 | 
| 245 |         # Note: If the current block ends with an unconditional control
 | 
| 246 |         # transfer, then it is technically incorrect to add an implicit
 | 
| 247 |         # transfer to the block graph. Doing so results in code generation
 | 
| 248 |         # for unreachable blocks.  That doesn't appear to be very common
 | 
| 249 |         # with Python code and since the built-in compiler doesn't optimize
 | 
| 250 |         # it out we don't either.
 | 
| 251 |         self.current.addNext(block)
 | 
| 252 |         self.startBlock(block)
 | 
| 253 | 
 | 
| 254 |     def newBlock(self):
 | 
| 255 |         b = Block()
 | 
| 256 |         self.blocks.add(b)
 | 
| 257 |         return b
 | 
| 258 | 
 | 
| 259 |     def startExitBlock(self):
 | 
| 260 |         self.startBlock(self.exit)
 | 
| 261 | 
 | 
| 262 |     def emit(self, *inst):
 | 
| 263 |         if self.DEBUG:
 | 
| 264 |             print("\t", inst)
 | 
| 265 |         if len(inst) == 2 and isinstance(inst[1], Block):
 | 
| 266 |             self.current.addOutEdge(inst[1])
 | 
| 267 |         self.current.emit(inst)
 | 
| 268 | 
 | 
| 269 |     def getContainedGraphs(self):
 | 
| 270 |         raise AssertionError('unused')
 | 
| 271 |         l = []
 | 
| 272 |         for b in self.getBlocks():
 | 
| 273 |             l.extend(b.getContainedGraphs())
 | 
| 274 |         return l
 | 
| 275 | 
 | 
| 276 | 
 | 
| 277 | class Frame(object):
 | 
| 278 |     """Something that gets turned into a single code object.
 | 
| 279 | 
 | 
| 280 |     Code objects and consts are mutually recursive.
 | 
| 281 |     """
 | 
| 282 | 
 | 
| 283 |     def __init__(self, name, filename, optimized=0, klass=None):
 | 
| 284 |         """
 | 
| 285 |         Args:
 | 
| 286 |           klass: Whether we're compiling a class block.
 | 
| 287 |         """
 | 
| 288 |         self.name = name  # name that is put in the code object
 | 
| 289 |         self.filename = filename
 | 
| 290 |         self.flags = (CO_OPTIMIZED | CO_NEWLOCALS) if optimized else 0
 | 
| 291 |         self.klass = klass
 | 
| 292 | 
 | 
| 293 |         # Mutated by setArgs()
 | 
| 294 |         self.varnames = []
 | 
| 295 |         self.argcount = 0
 | 
| 296 | 
 | 
| 297 |         # Mutated by setVars().  Free variables found by the symbol table scan,
 | 
| 298 |         # including variables used only in nested scopes, are included here.
 | 
| 299 |         self.freevars = []
 | 
| 300 |         self.cellvars = []
 | 
| 301 | 
 | 
| 302 |         self.docstring = None
 | 
| 303 | 
 | 
| 304 |     def setArgs(self, args):
 | 
| 305 |         """Only called by functions, not modules or classes."""
 | 
| 306 |         assert not self.varnames   # Nothing should have been added
 | 
| 307 |         if args:
 | 
| 308 |             self.varnames = list(args)
 | 
| 309 |             self.argcount = len(args)
 | 
| 310 | 
 | 
| 311 |     def setVars(self, freevars, cellvars):
 | 
| 312 |         self.freevars = freevars
 | 
| 313 |         self.cellvars = cellvars
 | 
| 314 | 
 | 
| 315 |     def setDocstring(self, doc):
 | 
| 316 |         self.docstring = doc
 | 
| 317 | 
 | 
| 318 |     def setFlag(self, flag):
 | 
| 319 |         self.flags |= flag
 | 
| 320 | 
 | 
| 321 |     def checkFlag(self, flag):
 | 
| 322 |         return bool(self.flags & flag)
 | 
| 323 | 
 | 
| 324 |     def NumLocals(self):
 | 
| 325 |         return len(self.varnames) if self.flags & CO_NEWLOCALS else 0
 | 
| 326 | 
 | 
| 327 |     def ArgCount(self):
 | 
| 328 |         argcount = self.argcount
 | 
| 329 |         if self.flags & CO_VARKEYWORDS:
 | 
| 330 |             argcount -= 1
 | 
| 331 |         if self.flags & CO_VARARGS:
 | 
| 332 |             argcount -= 1
 | 
| 333 |         return argcount
 | 
| 334 | 
 | 
| 335 | 
 | 
| 336 | def ReorderCellVars(frame):
 | 
| 337 |     """Reorder cellvars so the ones in self.varnames are first.
 | 
| 338 | 
 | 
| 339 |     And prune from freevars (?)
 | 
| 340 |     """
 | 
| 341 |     lookup = set(frame.cellvars)
 | 
| 342 |     remaining = lookup - set(frame.varnames)
 | 
| 343 | 
 | 
| 344 |     cellvars = [n for n in frame.varnames if n in lookup]
 | 
| 345 |     cellvars.extend(remaining)
 | 
| 346 |     return cellvars
 | 
| 347 | 
 | 
| 348 | 
 | 
| 349 | def _NameToIndex(name, L):
 | 
| 350 |     """Return index of name in list, appending if necessary
 | 
| 351 | 
 | 
| 352 |     This routine uses a list instead of a dictionary, because a
 | 
| 353 |     dictionary can't store two different keys if the keys have the
 | 
| 354 |     same value but different types, e.g. 2 and 2L.  The compiler
 | 
| 355 |     must treat these two separately, so it does an explicit type
 | 
| 356 |     comparison before comparing the values.
 | 
| 357 |     """
 | 
| 358 |     t = type(name)
 | 
| 359 |     for i, item in enumerate(L):
 | 
| 360 |         if t == type(item) and item == name:
 | 
| 361 |             return i
 | 
| 362 |     end = len(L)
 | 
| 363 |     L.append(name)
 | 
| 364 |     return end
 | 
| 365 | 
 | 
| 366 | 
 | 
| 367 | class ArgEncoder(object):
 | 
| 368 |     """Replaces arg objects with indices."""
 | 
| 369 | 
 | 
| 370 |     def __init__(self, klass, consts, names, varnames, closure):
 | 
| 371 |         """
 | 
| 372 |         Args:
 | 
| 373 |           consts ... closure are all potentially mutated!
 | 
| 374 |         """
 | 
| 375 |         self.klass = klass
 | 
| 376 |         self.consts = consts
 | 
| 377 |         self.names = names
 | 
| 378 |         self.varnames = varnames
 | 
| 379 |         self.closure = closure
 | 
| 380 | 
 | 
| 381 |     def Run(self, insts):
 | 
| 382 |         """Mutates insts."""
 | 
| 383 |         for i, t in enumerate(insts):
 | 
| 384 |             if len(t) == 2:
 | 
| 385 |                 opname, oparg = t
 | 
| 386 |                 method = self._converters.get(opname, None)
 | 
| 387 |                 if method:
 | 
| 388 |                     arg_index = method(self, oparg)
 | 
| 389 |                     insts[i] = opname, arg_index
 | 
| 390 | 
 | 
| 391 |     # TODO: This should just be a simple switch
 | 
| 392 | 
 | 
| 393 |     def _convert_LOAD_CONST(self, arg):
 | 
| 394 |         return _NameToIndex(arg, self.consts)
 | 
| 395 | 
 | 
| 396 |     def _convert_LOAD_FAST(self, arg):
 | 
| 397 |         _NameToIndex(arg, self.names)
 | 
| 398 |         return _NameToIndex(arg, self.varnames)
 | 
| 399 |     _convert_STORE_FAST = _convert_LOAD_FAST
 | 
| 400 |     _convert_DELETE_FAST = _convert_LOAD_FAST
 | 
| 401 | 
 | 
| 402 |     def _convert_LOAD_NAME(self, arg):
 | 
| 403 |         # TODO: This is wrong.  It leads to too many files.
 | 
| 404 |         # https://github.com/oilshell/oil/issues/180
 | 
| 405 |         if self.klass is None:
 | 
| 406 |             _NameToIndex(arg, self.varnames)
 | 
| 407 |         return _NameToIndex(arg, self.names)
 | 
| 408 | 
 | 
| 409 |     def _convert_NAME(self, arg):
 | 
| 410 |         if self.klass is None:
 | 
| 411 |             _NameToIndex(arg, self.varnames)
 | 
| 412 |         return _NameToIndex(arg, self.names)
 | 
| 413 |     _convert_STORE_NAME = _convert_NAME
 | 
| 414 |     _convert_DELETE_NAME = _convert_NAME
 | 
| 415 |     _convert_IMPORT_NAME = _convert_NAME
 | 
| 416 |     _convert_IMPORT_FROM = _convert_NAME
 | 
| 417 |     _convert_STORE_ATTR = _convert_NAME
 | 
| 418 |     _convert_LOAD_ATTR = _convert_NAME
 | 
| 419 |     _convert_DELETE_ATTR = _convert_NAME
 | 
| 420 |     _convert_LOAD_GLOBAL = _convert_NAME
 | 
| 421 |     _convert_STORE_GLOBAL = _convert_NAME
 | 
| 422 |     _convert_DELETE_GLOBAL = _convert_NAME
 | 
| 423 | 
 | 
| 424 |     def _convert_DEREF(self, arg):
 | 
| 425 |         _NameToIndex(arg, self.names)
 | 
| 426 |         _NameToIndex(arg, self.varnames)
 | 
| 427 |         return _NameToIndex(arg, self.closure)
 | 
| 428 |     _convert_LOAD_DEREF = _convert_DEREF
 | 
| 429 |     _convert_STORE_DEREF = _convert_DEREF
 | 
| 430 | 
 | 
| 431 |     def _convert_LOAD_CLOSURE(self, arg):
 | 
| 432 |         _NameToIndex(arg, self.varnames)
 | 
| 433 |         return _NameToIndex(arg, self.closure)
 | 
| 434 | 
 | 
| 435 |     _cmp = list(dis.cmp_op)
 | 
| 436 |     def _convert_COMPARE_OP(self, arg):
 | 
| 437 |         return self._cmp.index(arg)
 | 
| 438 | 
 | 
| 439 |     _converters = {}
 | 
| 440 | 
 | 
| 441 |     # similarly for other opcodes...
 | 
| 442 | 
 | 
| 443 |     for name, obj in locals().items():
 | 
| 444 |         if name[:9] == "_convert_":
 | 
| 445 |             opname = name[9:]
 | 
| 446 |             _converters[opname] = obj
 | 
| 447 |     del name, obj, opname
 | 
| 448 | 
 | 
| 449 | 
 | 
| 450 | class Assembler(object):
 | 
| 451 |     """Builds co_code and lnotab.
 | 
| 452 | 
 | 
| 453 |     This class builds the lnotab, which is documented in compile.c.  Here's a
 | 
| 454 |     brief recap:
 | 
| 455 | 
 | 
| 456 |     For each SET_LINENO instruction after the first one, two bytes are added to
 | 
| 457 |     lnotab.  (In some cases, multiple two-byte entries are added.)  The first
 | 
| 458 |     byte is the distance in bytes between the instruction for the last
 | 
| 459 |     SET_LINENO and the current SET_LINENO.  The second byte is offset in line
 | 
| 460 |     numbers.  If either offset is greater than 255, multiple two-byte entries
 | 
| 461 |     are added -- see compile.c for the delicate details.
 | 
| 462 |     """
 | 
| 463 |     def __init__(self):
 | 
| 464 |         self.code = []
 | 
| 465 |         self.codeOffset = 0
 | 
| 466 |         self.firstline = 0
 | 
| 467 |         self.lastline = 0
 | 
| 468 |         self.lastoff = 0
 | 
| 469 |         self.lnotab = []
 | 
| 470 | 
 | 
| 471 |     def addCode(self, *args):
 | 
| 472 |         for arg in args:
 | 
| 473 |             self.code.append(chr(arg))
 | 
| 474 |         self.codeOffset += len(args)
 | 
| 475 | 
 | 
| 476 |     def nextLine(self, lineno):
 | 
| 477 |         if self.firstline == 0:
 | 
| 478 |             self.firstline = lineno
 | 
| 479 |             self.lastline = lineno
 | 
| 480 |         else:
 | 
| 481 |             # compute deltas
 | 
| 482 |             addr = self.codeOffset - self.lastoff
 | 
| 483 |             line = lineno - self.lastline
 | 
| 484 |             # Python assumes that lineno always increases with
 | 
| 485 |             # increasing bytecode address (lnotab is unsigned char).
 | 
| 486 |             # Depending on when SET_LINENO instructions are emitted
 | 
| 487 |             # this is not always true.  Consider the code:
 | 
| 488 |             #     a = (1,
 | 
| 489 |             #          b)
 | 
| 490 |             # In the bytecode stream, the assignment to "a" occurs
 | 
| 491 |             # after the loading of "b".  This works with the C Python
 | 
| 492 |             # compiler because it only generates a SET_LINENO instruction
 | 
| 493 |             # for the assignment.
 | 
| 494 |             if line >= 0:
 | 
| 495 |                 push = self.lnotab.append
 | 
| 496 |                 while addr > 255:
 | 
| 497 |                     push(255); push(0)
 | 
| 498 |                     addr -= 255
 | 
| 499 |                 while line > 255:
 | 
| 500 |                     push(addr); push(255)
 | 
| 501 |                     line -= 255
 | 
| 502 |                     addr = 0
 | 
| 503 |                 if addr > 0 or line > 0:
 | 
| 504 |                     push(addr); push(line)
 | 
| 505 |                 self.lastline = lineno
 | 
| 506 |                 self.lastoff = self.codeOffset
 | 
| 507 | 
 | 
| 508 |     def Run(self, insts):
 | 
| 509 |         for t in insts:
 | 
| 510 |             opname = t[0]
 | 
| 511 |             if len(t) == 1:
 | 
| 512 |                 self.addCode(OPNUM[opname])
 | 
| 513 |             else:
 | 
| 514 |                 oparg = t[1]
 | 
| 515 |                 if opname == "SET_LINENO":
 | 
| 516 |                     self.nextLine(oparg)
 | 
| 517 |                     continue
 | 
| 518 |                 hi, lo = divmod(oparg, 256)
 | 
| 519 |                 try:
 | 
| 520 |                     self.addCode(OPNUM[opname], lo, hi)
 | 
| 521 |                 except ValueError:
 | 
| 522 |                     print(opname, oparg)
 | 
| 523 |                     print(OPNUM[opname], lo, hi)
 | 
| 524 |                     raise
 | 
| 525 | 
 | 
| 526 |         bytecode = ''.join(self.code)
 | 
| 527 |         lnotab = ''.join(chr(c) for c in self.lnotab)
 | 
| 528 |         return bytecode, self.firstline, lnotab
 | 
| 529 | 
 | 
| 530 | 
 | 
| 531 | class BlockStackDepth(object):
 | 
| 532 |     # XXX 1. need to keep track of stack depth on jumps
 | 
| 533 |     # XXX 2. at least partly as a result, this code is broken
 | 
| 534 | 
 | 
| 535 |     def Sum(self, insts, debug=0):
 | 
| 536 |         depth = 0
 | 
| 537 |         maxDepth = 0
 | 
| 538 | 
 | 
| 539 |         for inst in insts:
 | 
| 540 |             opname = inst[0]
 | 
| 541 |             if debug:
 | 
| 542 |                 print(inst, end=' ')
 | 
| 543 | 
 | 
| 544 |             delta = self.effect.get(opname, None)
 | 
| 545 |             if delta is None:
 | 
| 546 |                 # now check patterns
 | 
| 547 |                 for pat, pat_delta in self.patterns:
 | 
| 548 |                     if opname[:len(pat)] == pat:
 | 
| 549 |                         delta = pat_delta
 | 
| 550 |                         break
 | 
| 551 | 
 | 
| 552 |             if delta is None:
 | 
| 553 |                 meth = getattr(self, opname, None)
 | 
| 554 |                 if meth is not None:
 | 
| 555 |                     delta = meth(inst[1])
 | 
| 556 | 
 | 
| 557 |             if delta is None:
 | 
| 558 |                 # Jumps missing here.
 | 
| 559 |                 #assert opname in ('POP_JUMP_IF_FALSE', 'SET_LINENO'), opname
 | 
| 560 |                 delta = 0
 | 
| 561 | 
 | 
| 562 |             depth += delta
 | 
| 563 |             maxDepth = max(depth, maxDepth)
 | 
| 564 | 
 | 
| 565 |             if debug:
 | 
| 566 |                 print('%s %s' % (depth, maxDepth))
 | 
| 567 |         return maxDepth
 | 
| 568 | 
 | 
| 569 |     effect = {
 | 
| 570 |         'POP_TOP': -1,
 | 
| 571 |         'DUP_TOP': 1,
 | 
| 572 |         'LIST_APPEND': -1,
 | 
| 573 |         'SET_ADD': -1,
 | 
| 574 |         'MAP_ADD': -2,
 | 
| 575 |         'SLICE+1': -1,
 | 
| 576 |         'SLICE+2': -1,
 | 
| 577 |         'SLICE+3': -2,
 | 
| 578 |         'STORE_SLICE+0': -1,
 | 
| 579 |         'STORE_SLICE+1': -2,
 | 
| 580 |         'STORE_SLICE+2': -2,
 | 
| 581 |         'STORE_SLICE+3': -3,
 | 
| 582 |         'DELETE_SLICE+0': -1,
 | 
| 583 |         'DELETE_SLICE+1': -2,
 | 
| 584 |         'DELETE_SLICE+2': -2,
 | 
| 585 |         'DELETE_SLICE+3': -3,
 | 
| 586 |         'STORE_SUBSCR': -3,
 | 
| 587 |         'DELETE_SUBSCR': -2,
 | 
| 588 |         # PRINT_EXPR?
 | 
| 589 |         'PRINT_ITEM': -1,
 | 
| 590 |         'RETURN_VALUE': -1,
 | 
| 591 |         'YIELD_VALUE': -1,
 | 
| 592 |         'EXEC_STMT': -3,
 | 
| 593 |         'BUILD_CLASS': -2,
 | 
| 594 |         'STORE_NAME': -1,
 | 
| 595 |         'STORE_ATTR': -2,
 | 
| 596 |         'DELETE_ATTR': -1,
 | 
| 597 |         'STORE_GLOBAL': -1,
 | 
| 598 |         'BUILD_MAP': 1,
 | 
| 599 |         'COMPARE_OP': -1,
 | 
| 600 |         'STORE_FAST': -1,
 | 
| 601 |         'IMPORT_STAR': -1,
 | 
| 602 |         'IMPORT_NAME': -1,
 | 
| 603 |         'IMPORT_FROM': 1,
 | 
| 604 |         'LOAD_ATTR': 0, # unlike other loads
 | 
| 605 |         # close enough...
 | 
| 606 |         'SETUP_EXCEPT': 3,
 | 
| 607 |         'SETUP_FINALLY': 3,
 | 
| 608 |         'FOR_ITER': 1,
 | 
| 609 |         'WITH_CLEANUP': -1,
 | 
| 610 |         }
 | 
| 611 |     # use pattern match
 | 
| 612 |     patterns = [
 | 
| 613 |         ('BINARY_', -1),
 | 
| 614 |         ('LOAD_', 1),
 | 
| 615 |         ]
 | 
| 616 | 
 | 
| 617 |     def UNPACK_SEQUENCE(self, count):
 | 
| 618 |         return count-1
 | 
| 619 |     def BUILD_TUPLE(self, count):
 | 
| 620 |         return -count+1
 | 
| 621 |     def BUILD_LIST(self, count):
 | 
| 622 |         return -count+1
 | 
| 623 |     def BUILD_SET(self, count):
 | 
| 624 |         return -count+1
 | 
| 625 |     def CALL_FUNCTION(self, argc):
 | 
| 626 |         hi, lo = divmod(argc, 256)
 | 
| 627 |         return -(lo + hi * 2)
 | 
| 628 |     def CALL_FUNCTION_VAR(self, argc):
 | 
| 629 |         return self.CALL_FUNCTION(argc)-1
 | 
| 630 |     def CALL_FUNCTION_KW(self, argc):
 | 
| 631 |         return self.CALL_FUNCTION(argc)-1
 | 
| 632 |     def CALL_FUNCTION_VAR_KW(self, argc):
 | 
| 633 |         return self.CALL_FUNCTION(argc)-2
 | 
| 634 |     def MAKE_FUNCTION(self, argc):
 | 
| 635 |         return -argc
 | 
| 636 |     def MAKE_CLOSURE(self, argc):
 | 
| 637 |         # XXX need to account for free variables too!
 | 
| 638 |         return -argc
 | 
| 639 |     def BUILD_SLICE(self, argc):
 | 
| 640 |         if argc == 2:
 | 
| 641 |             return -1
 | 
| 642 |         elif argc == 3:
 | 
| 643 |             return -2
 | 
| 644 |     def DUP_TOPX(self, argc):
 | 
| 645 |         return argc
 | 
| 646 | 
 | 
| 647 | 
 | 
| 648 | class _GraphStackDepth(object):
 | 
| 649 |     """Walk the CFG, computing the maximum stack depth."""
 | 
| 650 | 
 | 
| 651 |     def __init__(self, depths, exit_block):
 | 
| 652 |         """
 | 
| 653 |         Args:
 | 
| 654 |            depths: is the stack effect of each basic block.  Then find the path
 | 
| 655 |            through the code with the largest total effect.
 | 
| 656 |         """
 | 
| 657 |         self.depths = depths
 | 
| 658 |         self.exit_block = exit_block
 | 
| 659 |         self.seen = set()
 | 
| 660 | 
 | 
| 661 |     def Max(self, block, d):
 | 
| 662 |         if block in self.seen:
 | 
| 663 |             return d
 | 
| 664 |         self.seen.add(block)
 | 
| 665 | 
 | 
| 666 |         d += self.depths[block]
 | 
| 667 | 
 | 
| 668 |         children = block.get_children()
 | 
| 669 |         if children:
 | 
| 670 |             return max(self.Max(c, d) for c in children)
 | 
| 671 | 
 | 
| 672 |         if block.label == "exit":
 | 
| 673 |             return d
 | 
| 674 | 
 | 
| 675 |         return self.Max(self.exit_block, d)
 | 
| 676 | 
 | 
| 677 | 
 | 
| 678 | def MaxStackDepth(block_depths, entry_block, exit_block):
 | 
| 679 |     """Compute maximum stack depth for any path through the CFG."""
 | 
| 680 |     g = _GraphStackDepth(block_depths, exit_block)
 | 
| 681 |     return g.Max(entry_block, 0)
 | 
| 682 | 
 | 
| 683 | 
 | 
| 684 | def MakeCodeObject(frame, graph, comp_opt):
 | 
| 685 |     """Order blocks, encode instructions, and create types.CodeType().
 | 
| 686 | 
 | 
| 687 |     Called by Compile below, and also recursively by ArgEncoder.
 | 
| 688 |     """
 | 
| 689 |     # Compute stack depth per basic block.
 | 
| 690 |     b = BlockStackDepth()
 | 
| 691 |     block_depths = {
 | 
| 692 |         block: b.Sum(block.Instructions()) for block in graph.blocks
 | 
| 693 |     }
 | 
| 694 | 
 | 
| 695 |     stacksize = MaxStackDepth(block_depths, graph.entry, graph.exit)
 | 
| 696 | 
 | 
| 697 |     # Order blocks so jump offsets can be encoded.
 | 
| 698 |     blocks = OrderBlocks(graph.entry, graph.exit)
 | 
| 699 | 
 | 
| 700 |     # Produce a stream of initial instructions.
 | 
| 701 |     insts, block_offsets = FlattenBlocks(blocks)
 | 
| 702 | 
 | 
| 703 |     # Now that we know the offsets, make another pass.
 | 
| 704 |     PatchJumps(insts, block_offsets)
 | 
| 705 | 
 | 
| 706 |     # What variables should be available at runtime?
 | 
| 707 | 
 | 
| 708 |     cellvars = ReorderCellVars(frame)
 | 
| 709 | 
 | 
| 710 |     # NOTE: Modules docstrings are assigned to __doc__ in pycodegen.py.visitModule.
 | 
| 711 |     consts = []
 | 
| 712 |     if comp_opt.emit_docstring:
 | 
| 713 |       consts.append(frame.docstring)
 | 
| 714 | 
 | 
| 715 |     names = []
 | 
| 716 |     # The closure list is used to track the order of cell variables and
 | 
| 717 |     # free variables in the resulting code object.  The offsets used by
 | 
| 718 |     # LOAD_CLOSURE/LOAD_DEREF refer to both kinds of variables.
 | 
| 719 |     closure = cellvars + frame.freevars
 | 
| 720 | 
 | 
| 721 |     # Convert arguments from symbolic to concrete form.
 | 
| 722 |     enc = ArgEncoder(frame.klass, consts, names, frame.varnames,
 | 
| 723 |                      closure)
 | 
| 724 |     # Mutates not only insts, but also appends to consts, names, etc.
 | 
| 725 |     enc.Run(insts)
 | 
| 726 | 
 | 
| 727 |     # Binary encoding.
 | 
| 728 |     a = Assembler()
 | 
| 729 |     bytecode, firstline, lnotab = a.Run(insts)
 | 
| 730 | 
 | 
| 731 |     return types.CodeType(
 | 
| 732 |         frame.ArgCount(), frame.NumLocals(), stacksize, frame.flags,
 | 
| 733 |         bytecode,
 | 
| 734 |         tuple(consts),
 | 
| 735 |         tuple(names),
 | 
| 736 |         tuple(frame.varnames),
 | 
| 737 |         frame.filename, frame.name, firstline,
 | 
| 738 |         lnotab,
 | 
| 739 |         tuple(frame.freevars),
 | 
| 740 |         tuple(cellvars))
 |