| 1 | """A flow graph representation for Python bytecode"""
|
| 2 | from __future__ import print_function
|
| 3 |
|
| 4 | import itertools
|
| 5 | import types
|
| 6 |
|
| 7 | from .consts import CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS
|
| 8 | from opy.lib import dis
|
| 9 |
|
| 10 |
|
| 11 | HAS_JREL = set(dis.opname[op] for op in dis.hasjrel)
|
| 12 | HAS_JABS = set(dis.opname[op] for op in dis.hasjabs)
|
| 13 | OPNUM = dict((name, i) for i, name in enumerate(dis.opname))
|
| 14 |
|
| 15 |
|
| 16 | def OrderBlocks(start_block, exit_block):
|
| 17 | """Order blocks so that they are emitted in the right order"""
|
| 18 | # Rules:
|
| 19 | # - when a block has a next block, the next block must be emitted just after
|
| 20 | # - when a block has followers (relative jumps), it must be emitted before
|
| 21 | # them
|
| 22 | # - all reachable blocks must be emitted
|
| 23 | order = []
|
| 24 |
|
| 25 | # Find all the blocks to be emitted.
|
| 26 | remaining = set()
|
| 27 | todo = [start_block]
|
| 28 | while todo:
|
| 29 | b = todo.pop()
|
| 30 | if b in remaining:
|
| 31 | continue
|
| 32 | remaining.add(b)
|
| 33 | for c in b.get_children():
|
| 34 | if c not in remaining:
|
| 35 | todo.append(c)
|
| 36 |
|
| 37 | # A block is dominated by another block if that block must be emitted
|
| 38 | # before it.
|
| 39 | dominators = {}
|
| 40 | for b in remaining:
|
| 41 | if __debug__ and b.next:
|
| 42 | assert b is b.next[0].prev[0], (b, b.next)
|
| 43 | # Make sure every block appears in dominators, even if no
|
| 44 | # other block must precede it.
|
| 45 | dominators.setdefault(b, set())
|
| 46 | # preceding blocks dominate following blocks
|
| 47 | for c in b.get_followers():
|
| 48 | while 1:
|
| 49 | dominators.setdefault(c, set()).add(b)
|
| 50 | # Any block that has a next pointer leading to c is also
|
| 51 | # dominated because the whole chain will be emitted at once.
|
| 52 | # Walk backwards and add them all.
|
| 53 | if c.prev and c.prev[0] is not b:
|
| 54 | c = c.prev[0]
|
| 55 | else:
|
| 56 | break
|
| 57 |
|
| 58 | def find_next():
|
| 59 | # Find a block that can be emitted next.
|
| 60 | for b in remaining:
|
| 61 | for c in dominators[b]:
|
| 62 | if c in remaining:
|
| 63 | break # can't emit yet, dominated by a remaining block
|
| 64 | else:
|
| 65 | return b
|
| 66 | assert 0, 'circular dependency, cannot find next block'
|
| 67 |
|
| 68 | b = start_block
|
| 69 | while 1:
|
| 70 | order.append(b)
|
| 71 | remaining.discard(b)
|
| 72 | if b.next:
|
| 73 | b = b.next[0]
|
| 74 | continue
|
| 75 | elif b is not exit_block and not b.has_unconditional_transfer():
|
| 76 | order.append(exit_block)
|
| 77 | if not remaining:
|
| 78 | break
|
| 79 | b = find_next()
|
| 80 | return order
|
| 81 |
|
| 82 |
|
| 83 | def FlattenBlocks(blocks):
|
| 84 | insts = []
|
| 85 |
|
| 86 | pc = 0
|
| 87 | offsets = {} # block -> bytecode offset
|
| 88 | for b in blocks:
|
| 89 | offsets[b] = pc
|
| 90 | for inst in b.Instructions():
|
| 91 | insts.append(inst)
|
| 92 | if len(inst) == 1:
|
| 93 | pc += 1
|
| 94 | elif inst[0] != "SET_LINENO": # arg takes 2 bytes
|
| 95 | pc += 3
|
| 96 | return insts, offsets
|
| 97 |
|
| 98 |
|
| 99 | def PatchJumps(insts, offsets):
|
| 100 | pc = 0
|
| 101 | for i, inst in enumerate(insts):
|
| 102 | if len(inst) == 1:
|
| 103 | pc += 1
|
| 104 | elif inst[0] != "SET_LINENO":
|
| 105 | pc += 3
|
| 106 |
|
| 107 | opname = inst[0]
|
| 108 |
|
| 109 | # Compute jump locations
|
| 110 | if opname in HAS_JREL:
|
| 111 | block_arg = inst[1]
|
| 112 | insts[i] = (opname, offsets[block_arg] - pc)
|
| 113 |
|
| 114 | elif opname in HAS_JABS:
|
| 115 | block_arg = inst[1]
|
| 116 | insts[i] = (opname, offsets[block_arg])
|
| 117 |
|
| 118 |
|
| 119 | gBlockCounter = itertools.count()
|
| 120 |
|
| 121 |
|
| 122 | class Block(object):
|
| 123 |
|
| 124 | def __init__(self, label=''):
|
| 125 | self.label = label
|
| 126 | self.bid = gBlockCounter.next()
|
| 127 | self.insts = []
|
| 128 | self.outEdges = set()
|
| 129 | self.next = []
|
| 130 | self.prev = []
|
| 131 |
|
| 132 | # BUG FIX: This is needed for deterministic order in sets (and dicts?).
|
| 133 | # See OrderBlocks() below. remaining is set() of blocks. If we rely on
|
| 134 | # the default id(), then the output bytecode is NONDETERMINISTIC.
|
| 135 | def __hash__(self):
|
| 136 | return self.bid
|
| 137 |
|
| 138 | def __repr__(self):
|
| 139 | if self.label:
|
| 140 | return "<block %s id=%d>" % (self.label, self.bid)
|
| 141 | else:
|
| 142 | return "<block id=%d>" % (self.bid)
|
| 143 |
|
| 144 | def __str__(self):
|
| 145 | return "<block %s %d:\n%s>" % (
|
| 146 | self.label, self.bid,
|
| 147 | '\n'.join(str(inst) for inst in self.insts))
|
| 148 |
|
| 149 | def emit(self, inst):
|
| 150 | op = inst[0]
|
| 151 | self.insts.append(inst)
|
| 152 |
|
| 153 | def Instructions(self):
|
| 154 | return self.insts
|
| 155 |
|
| 156 | def addOutEdge(self, block):
|
| 157 | self.outEdges.add(block)
|
| 158 |
|
| 159 | def addNext(self, block):
|
| 160 | self.next.append(block)
|
| 161 | assert len(self.next) == 1, [str(b) for b in self.next]
|
| 162 | block.prev.append(self)
|
| 163 | assert len(block.prev) == 1, [str(b) for b in block.prev]
|
| 164 |
|
| 165 | _uncond_transfer = ('RETURN_VALUE', 'RAISE_VARARGS',
|
| 166 | 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'CONTINUE_LOOP',
|
| 167 | )
|
| 168 |
|
| 169 | def has_unconditional_transfer(self):
|
| 170 | """Returns True if there is an unconditional transfer to an other block
|
| 171 | at the end of this block. This means there is no risk for the bytecode
|
| 172 | executer to go past this block's bytecode."""
|
| 173 | try:
|
| 174 | op, arg = self.insts[-1]
|
| 175 | except (IndexError, ValueError):
|
| 176 | return
|
| 177 | return op in self._uncond_transfer
|
| 178 |
|
| 179 | def get_children(self):
|
| 180 | return list(self.outEdges) + self.next
|
| 181 |
|
| 182 | def get_followers(self):
|
| 183 | """Get the whole list of followers, including the next block."""
|
| 184 | followers = set(self.next)
|
| 185 | # Blocks that must be emitted *after* this one, because of
|
| 186 | # bytecode offsets (e.g. relative jumps) pointing to them.
|
| 187 | for inst in self.insts:
|
| 188 | if inst[0] in HAS_JREL:
|
| 189 | followers.add(inst[1])
|
| 190 | return followers
|
| 191 |
|
| 192 | def getContainedGraphs(self):
|
| 193 | """Return all graphs contained within this block.
|
| 194 |
|
| 195 | For example, a MAKE_FUNCTION block will contain a reference to
|
| 196 | the graph for the function body.
|
| 197 | """
|
| 198 | raise AssertionError('unused')
|
| 199 | contained = []
|
| 200 | for inst in self.insts:
|
| 201 | if len(inst) == 1:
|
| 202 | continue
|
| 203 | op = inst[1]
|
| 204 | if hasattr(op, 'graph'):
|
| 205 | contained.append(op.graph)
|
| 206 | return contained
|
| 207 |
|
| 208 |
|
| 209 | class FlowGraph(object):
|
| 210 |
|
| 211 | def __init__(self):
|
| 212 | self.current = self.entry = Block()
|
| 213 | self.exit = Block("exit")
|
| 214 | self.blocks = set()
|
| 215 | self.blocks.add(self.entry)
|
| 216 | self.blocks.add(self.exit)
|
| 217 |
|
| 218 | DEBUG = False
|
| 219 |
|
| 220 | def startBlock(self, block):
|
| 221 | if self.DEBUG:
|
| 222 | if self.current:
|
| 223 | print("end", repr(self.current))
|
| 224 | print(" next", self.current.next)
|
| 225 | print(" prev", self.current.prev)
|
| 226 | print(" ", self.current.get_children())
|
| 227 | print(repr(block))
|
| 228 | self.current = block
|
| 229 |
|
| 230 | def nextBlock(self, block=None):
|
| 231 | # XXX think we need to specify when there is implicit transfer
|
| 232 | # from one block to the next. might be better to represent this
|
| 233 | # with explicit JUMP_ABSOLUTE instructions that are optimized
|
| 234 | # out when they are unnecessary.
|
| 235 | #
|
| 236 | # I think this strategy works: each block has a child
|
| 237 | # designated as "next" which is returned as the last of the
|
| 238 | # children. because the nodes in a graph are emitted in
|
| 239 | # reverse post order, the "next" block will always be emitted
|
| 240 | # immediately after its parent.
|
| 241 | # Worry: maintaining this invariant could be tricky
|
| 242 | if block is None:
|
| 243 | block = self.newBlock()
|
| 244 |
|
| 245 | # Note: If the current block ends with an unconditional control
|
| 246 | # transfer, then it is technically incorrect to add an implicit
|
| 247 | # transfer to the block graph. Doing so results in code generation
|
| 248 | # for unreachable blocks. That doesn't appear to be very common
|
| 249 | # with Python code and since the built-in compiler doesn't optimize
|
| 250 | # it out we don't either.
|
| 251 | self.current.addNext(block)
|
| 252 | self.startBlock(block)
|
| 253 |
|
| 254 | def newBlock(self):
|
| 255 | b = Block()
|
| 256 | self.blocks.add(b)
|
| 257 | return b
|
| 258 |
|
| 259 | def startExitBlock(self):
|
| 260 | self.startBlock(self.exit)
|
| 261 |
|
| 262 | def emit(self, *inst):
|
| 263 | if self.DEBUG:
|
| 264 | print("\t", inst)
|
| 265 | if len(inst) == 2 and isinstance(inst[1], Block):
|
| 266 | self.current.addOutEdge(inst[1])
|
| 267 | self.current.emit(inst)
|
| 268 |
|
| 269 | def getContainedGraphs(self):
|
| 270 | raise AssertionError('unused')
|
| 271 | l = []
|
| 272 | for b in self.getBlocks():
|
| 273 | l.extend(b.getContainedGraphs())
|
| 274 | return l
|
| 275 |
|
| 276 |
|
| 277 | class Frame(object):
|
| 278 | """Something that gets turned into a single code object.
|
| 279 |
|
| 280 | Code objects and consts are mutually recursive.
|
| 281 | """
|
| 282 |
|
| 283 | def __init__(self, name, filename, optimized=0, klass=None):
|
| 284 | """
|
| 285 | Args:
|
| 286 | klass: Whether we're compiling a class block.
|
| 287 | """
|
| 288 | self.name = name # name that is put in the code object
|
| 289 | self.filename = filename
|
| 290 | self.flags = (CO_OPTIMIZED | CO_NEWLOCALS) if optimized else 0
|
| 291 | self.klass = klass
|
| 292 |
|
| 293 | # Mutated by setArgs()
|
| 294 | self.varnames = []
|
| 295 | self.argcount = 0
|
| 296 |
|
| 297 | # Mutated by setVars(). Free variables found by the symbol table scan,
|
| 298 | # including variables used only in nested scopes, are included here.
|
| 299 | self.freevars = []
|
| 300 | self.cellvars = []
|
| 301 |
|
| 302 | self.docstring = None
|
| 303 |
|
| 304 | def setArgs(self, args):
|
| 305 | """Only called by functions, not modules or classes."""
|
| 306 | assert not self.varnames # Nothing should have been added
|
| 307 | if args:
|
| 308 | self.varnames = list(args)
|
| 309 | self.argcount = len(args)
|
| 310 |
|
| 311 | def setVars(self, freevars, cellvars):
|
| 312 | self.freevars = freevars
|
| 313 | self.cellvars = cellvars
|
| 314 |
|
| 315 | def setDocstring(self, doc):
|
| 316 | self.docstring = doc
|
| 317 |
|
| 318 | def setFlag(self, flag):
|
| 319 | self.flags |= flag
|
| 320 |
|
| 321 | def checkFlag(self, flag):
|
| 322 | return bool(self.flags & flag)
|
| 323 |
|
| 324 | def NumLocals(self):
|
| 325 | return len(self.varnames) if self.flags & CO_NEWLOCALS else 0
|
| 326 |
|
| 327 | def ArgCount(self):
|
| 328 | argcount = self.argcount
|
| 329 | if self.flags & CO_VARKEYWORDS:
|
| 330 | argcount -= 1
|
| 331 | if self.flags & CO_VARARGS:
|
| 332 | argcount -= 1
|
| 333 | return argcount
|
| 334 |
|
| 335 |
|
| 336 | def ReorderCellVars(frame):
|
| 337 | """Reorder cellvars so the ones in self.varnames are first.
|
| 338 |
|
| 339 | And prune from freevars (?)
|
| 340 | """
|
| 341 | lookup = set(frame.cellvars)
|
| 342 | remaining = lookup - set(frame.varnames)
|
| 343 |
|
| 344 | cellvars = [n for n in frame.varnames if n in lookup]
|
| 345 | cellvars.extend(remaining)
|
| 346 | return cellvars
|
| 347 |
|
| 348 |
|
| 349 | def _NameToIndex(name, L):
|
| 350 | """Return index of name in list, appending if necessary
|
| 351 |
|
| 352 | This routine uses a list instead of a dictionary, because a
|
| 353 | dictionary can't store two different keys if the keys have the
|
| 354 | same value but different types, e.g. 2 and 2L. The compiler
|
| 355 | must treat these two separately, so it does an explicit type
|
| 356 | comparison before comparing the values.
|
| 357 | """
|
| 358 | t = type(name)
|
| 359 | for i, item in enumerate(L):
|
| 360 | if t == type(item) and item == name:
|
| 361 | return i
|
| 362 | end = len(L)
|
| 363 | L.append(name)
|
| 364 | return end
|
| 365 |
|
| 366 |
|
| 367 | class ArgEncoder(object):
|
| 368 | """Replaces arg objects with indices."""
|
| 369 |
|
| 370 | def __init__(self, klass, consts, names, varnames, closure):
|
| 371 | """
|
| 372 | Args:
|
| 373 | consts ... closure are all potentially mutated!
|
| 374 | """
|
| 375 | self.klass = klass
|
| 376 | self.consts = consts
|
| 377 | self.names = names
|
| 378 | self.varnames = varnames
|
| 379 | self.closure = closure
|
| 380 |
|
| 381 | def Run(self, insts):
|
| 382 | """Mutates insts."""
|
| 383 | for i, t in enumerate(insts):
|
| 384 | if len(t) == 2:
|
| 385 | opname, oparg = t
|
| 386 | method = self._converters.get(opname, None)
|
| 387 | if method:
|
| 388 | arg_index = method(self, oparg)
|
| 389 | insts[i] = opname, arg_index
|
| 390 |
|
| 391 | # TODO: This should just be a simple switch
|
| 392 |
|
| 393 | def _convert_LOAD_CONST(self, arg):
|
| 394 | return _NameToIndex(arg, self.consts)
|
| 395 |
|
| 396 | def _convert_LOAD_FAST(self, arg):
|
| 397 | _NameToIndex(arg, self.names)
|
| 398 | return _NameToIndex(arg, self.varnames)
|
| 399 | _convert_STORE_FAST = _convert_LOAD_FAST
|
| 400 | _convert_DELETE_FAST = _convert_LOAD_FAST
|
| 401 |
|
| 402 | def _convert_LOAD_NAME(self, arg):
|
| 403 | # TODO: This is wrong. It leads to too many files.
|
| 404 | # https://github.com/oilshell/oil/issues/180
|
| 405 | if self.klass is None:
|
| 406 | _NameToIndex(arg, self.varnames)
|
| 407 | return _NameToIndex(arg, self.names)
|
| 408 |
|
| 409 | def _convert_NAME(self, arg):
|
| 410 | if self.klass is None:
|
| 411 | _NameToIndex(arg, self.varnames)
|
| 412 | return _NameToIndex(arg, self.names)
|
| 413 | _convert_STORE_NAME = _convert_NAME
|
| 414 | _convert_DELETE_NAME = _convert_NAME
|
| 415 | _convert_IMPORT_NAME = _convert_NAME
|
| 416 | _convert_IMPORT_FROM = _convert_NAME
|
| 417 | _convert_STORE_ATTR = _convert_NAME
|
| 418 | _convert_LOAD_ATTR = _convert_NAME
|
| 419 | _convert_DELETE_ATTR = _convert_NAME
|
| 420 | _convert_LOAD_GLOBAL = _convert_NAME
|
| 421 | _convert_STORE_GLOBAL = _convert_NAME
|
| 422 | _convert_DELETE_GLOBAL = _convert_NAME
|
| 423 |
|
| 424 | def _convert_DEREF(self, arg):
|
| 425 | _NameToIndex(arg, self.names)
|
| 426 | _NameToIndex(arg, self.varnames)
|
| 427 | return _NameToIndex(arg, self.closure)
|
| 428 | _convert_LOAD_DEREF = _convert_DEREF
|
| 429 | _convert_STORE_DEREF = _convert_DEREF
|
| 430 |
|
| 431 | def _convert_LOAD_CLOSURE(self, arg):
|
| 432 | _NameToIndex(arg, self.varnames)
|
| 433 | return _NameToIndex(arg, self.closure)
|
| 434 |
|
| 435 | _cmp = list(dis.cmp_op)
|
| 436 | def _convert_COMPARE_OP(self, arg):
|
| 437 | return self._cmp.index(arg)
|
| 438 |
|
| 439 | _converters = {}
|
| 440 |
|
| 441 | # similarly for other opcodes...
|
| 442 |
|
| 443 | for name, obj in locals().items():
|
| 444 | if name[:9] == "_convert_":
|
| 445 | opname = name[9:]
|
| 446 | _converters[opname] = obj
|
| 447 | del name, obj, opname
|
| 448 |
|
| 449 |
|
| 450 | class Assembler(object):
|
| 451 | """Builds co_code and lnotab.
|
| 452 |
|
| 453 | This class builds the lnotab, which is documented in compile.c. Here's a
|
| 454 | brief recap:
|
| 455 |
|
| 456 | For each SET_LINENO instruction after the first one, two bytes are added to
|
| 457 | lnotab. (In some cases, multiple two-byte entries are added.) The first
|
| 458 | byte is the distance in bytes between the instruction for the last
|
| 459 | SET_LINENO and the current SET_LINENO. The second byte is offset in line
|
| 460 | numbers. If either offset is greater than 255, multiple two-byte entries
|
| 461 | are added -- see compile.c for the delicate details.
|
| 462 | """
|
| 463 | def __init__(self):
|
| 464 | self.code = []
|
| 465 | self.codeOffset = 0
|
| 466 | self.firstline = 0
|
| 467 | self.lastline = 0
|
| 468 | self.lastoff = 0
|
| 469 | self.lnotab = []
|
| 470 |
|
| 471 | def addCode(self, *args):
|
| 472 | for arg in args:
|
| 473 | self.code.append(chr(arg))
|
| 474 | self.codeOffset += len(args)
|
| 475 |
|
| 476 | def nextLine(self, lineno):
|
| 477 | if self.firstline == 0:
|
| 478 | self.firstline = lineno
|
| 479 | self.lastline = lineno
|
| 480 | else:
|
| 481 | # compute deltas
|
| 482 | addr = self.codeOffset - self.lastoff
|
| 483 | line = lineno - self.lastline
|
| 484 | # Python assumes that lineno always increases with
|
| 485 | # increasing bytecode address (lnotab is unsigned char).
|
| 486 | # Depending on when SET_LINENO instructions are emitted
|
| 487 | # this is not always true. Consider the code:
|
| 488 | # a = (1,
|
| 489 | # b)
|
| 490 | # In the bytecode stream, the assignment to "a" occurs
|
| 491 | # after the loading of "b". This works with the C Python
|
| 492 | # compiler because it only generates a SET_LINENO instruction
|
| 493 | # for the assignment.
|
| 494 | if line >= 0:
|
| 495 | push = self.lnotab.append
|
| 496 | while addr > 255:
|
| 497 | push(255); push(0)
|
| 498 | addr -= 255
|
| 499 | while line > 255:
|
| 500 | push(addr); push(255)
|
| 501 | line -= 255
|
| 502 | addr = 0
|
| 503 | if addr > 0 or line > 0:
|
| 504 | push(addr); push(line)
|
| 505 | self.lastline = lineno
|
| 506 | self.lastoff = self.codeOffset
|
| 507 |
|
| 508 | def Run(self, insts):
|
| 509 | for t in insts:
|
| 510 | opname = t[0]
|
| 511 | if len(t) == 1:
|
| 512 | self.addCode(OPNUM[opname])
|
| 513 | else:
|
| 514 | oparg = t[1]
|
| 515 | if opname == "SET_LINENO":
|
| 516 | self.nextLine(oparg)
|
| 517 | continue
|
| 518 | hi, lo = divmod(oparg, 256)
|
| 519 | try:
|
| 520 | self.addCode(OPNUM[opname], lo, hi)
|
| 521 | except ValueError:
|
| 522 | print(opname, oparg)
|
| 523 | print(OPNUM[opname], lo, hi)
|
| 524 | raise
|
| 525 |
|
| 526 | bytecode = ''.join(self.code)
|
| 527 | lnotab = ''.join(chr(c) for c in self.lnotab)
|
| 528 | return bytecode, self.firstline, lnotab
|
| 529 |
|
| 530 |
|
| 531 | class BlockStackDepth(object):
|
| 532 | # XXX 1. need to keep track of stack depth on jumps
|
| 533 | # XXX 2. at least partly as a result, this code is broken
|
| 534 |
|
| 535 | def Sum(self, insts, debug=0):
|
| 536 | depth = 0
|
| 537 | maxDepth = 0
|
| 538 |
|
| 539 | for inst in insts:
|
| 540 | opname = inst[0]
|
| 541 | if debug:
|
| 542 | print(inst, end=' ')
|
| 543 |
|
| 544 | delta = self.effect.get(opname, None)
|
| 545 | if delta is None:
|
| 546 | # now check patterns
|
| 547 | for pat, pat_delta in self.patterns:
|
| 548 | if opname[:len(pat)] == pat:
|
| 549 | delta = pat_delta
|
| 550 | break
|
| 551 |
|
| 552 | if delta is None:
|
| 553 | meth = getattr(self, opname, None)
|
| 554 | if meth is not None:
|
| 555 | delta = meth(inst[1])
|
| 556 |
|
| 557 | if delta is None:
|
| 558 | # Jumps missing here.
|
| 559 | #assert opname in ('POP_JUMP_IF_FALSE', 'SET_LINENO'), opname
|
| 560 | delta = 0
|
| 561 |
|
| 562 | depth += delta
|
| 563 | maxDepth = max(depth, maxDepth)
|
| 564 |
|
| 565 | if debug:
|
| 566 | print('%s %s' % (depth, maxDepth))
|
| 567 | return maxDepth
|
| 568 |
|
| 569 | effect = {
|
| 570 | 'POP_TOP': -1,
|
| 571 | 'DUP_TOP': 1,
|
| 572 | 'LIST_APPEND': -1,
|
| 573 | 'SET_ADD': -1,
|
| 574 | 'MAP_ADD': -2,
|
| 575 | 'SLICE+1': -1,
|
| 576 | 'SLICE+2': -1,
|
| 577 | 'SLICE+3': -2,
|
| 578 | 'STORE_SLICE+0': -1,
|
| 579 | 'STORE_SLICE+1': -2,
|
| 580 | 'STORE_SLICE+2': -2,
|
| 581 | 'STORE_SLICE+3': -3,
|
| 582 | 'DELETE_SLICE+0': -1,
|
| 583 | 'DELETE_SLICE+1': -2,
|
| 584 | 'DELETE_SLICE+2': -2,
|
| 585 | 'DELETE_SLICE+3': -3,
|
| 586 | 'STORE_SUBSCR': -3,
|
| 587 | 'DELETE_SUBSCR': -2,
|
| 588 | # PRINT_EXPR?
|
| 589 | 'PRINT_ITEM': -1,
|
| 590 | 'RETURN_VALUE': -1,
|
| 591 | 'YIELD_VALUE': -1,
|
| 592 | 'EXEC_STMT': -3,
|
| 593 | 'BUILD_CLASS': -2,
|
| 594 | 'STORE_NAME': -1,
|
| 595 | 'STORE_ATTR': -2,
|
| 596 | 'DELETE_ATTR': -1,
|
| 597 | 'STORE_GLOBAL': -1,
|
| 598 | 'BUILD_MAP': 1,
|
| 599 | 'COMPARE_OP': -1,
|
| 600 | 'STORE_FAST': -1,
|
| 601 | 'IMPORT_STAR': -1,
|
| 602 | 'IMPORT_NAME': -1,
|
| 603 | 'IMPORT_FROM': 1,
|
| 604 | 'LOAD_ATTR': 0, # unlike other loads
|
| 605 | # close enough...
|
| 606 | 'SETUP_EXCEPT': 3,
|
| 607 | 'SETUP_FINALLY': 3,
|
| 608 | 'FOR_ITER': 1,
|
| 609 | 'WITH_CLEANUP': -1,
|
| 610 | }
|
| 611 | # use pattern match
|
| 612 | patterns = [
|
| 613 | ('BINARY_', -1),
|
| 614 | ('LOAD_', 1),
|
| 615 | ]
|
| 616 |
|
| 617 | def UNPACK_SEQUENCE(self, count):
|
| 618 | return count-1
|
| 619 | def BUILD_TUPLE(self, count):
|
| 620 | return -count+1
|
| 621 | def BUILD_LIST(self, count):
|
| 622 | return -count+1
|
| 623 | def BUILD_SET(self, count):
|
| 624 | return -count+1
|
| 625 | def CALL_FUNCTION(self, argc):
|
| 626 | hi, lo = divmod(argc, 256)
|
| 627 | return -(lo + hi * 2)
|
| 628 | def CALL_FUNCTION_VAR(self, argc):
|
| 629 | return self.CALL_FUNCTION(argc)-1
|
| 630 | def CALL_FUNCTION_KW(self, argc):
|
| 631 | return self.CALL_FUNCTION(argc)-1
|
| 632 | def CALL_FUNCTION_VAR_KW(self, argc):
|
| 633 | return self.CALL_FUNCTION(argc)-2
|
| 634 | def MAKE_FUNCTION(self, argc):
|
| 635 | return -argc
|
| 636 | def MAKE_CLOSURE(self, argc):
|
| 637 | # XXX need to account for free variables too!
|
| 638 | return -argc
|
| 639 | def BUILD_SLICE(self, argc):
|
| 640 | if argc == 2:
|
| 641 | return -1
|
| 642 | elif argc == 3:
|
| 643 | return -2
|
| 644 | def DUP_TOPX(self, argc):
|
| 645 | return argc
|
| 646 |
|
| 647 |
|
| 648 | class _GraphStackDepth(object):
|
| 649 | """Walk the CFG, computing the maximum stack depth."""
|
| 650 |
|
| 651 | def __init__(self, depths, exit_block):
|
| 652 | """
|
| 653 | Args:
|
| 654 | depths: is the stack effect of each basic block. Then find the path
|
| 655 | through the code with the largest total effect.
|
| 656 | """
|
| 657 | self.depths = depths
|
| 658 | self.exit_block = exit_block
|
| 659 | self.seen = set()
|
| 660 |
|
| 661 | def Max(self, block, d):
|
| 662 | if block in self.seen:
|
| 663 | return d
|
| 664 | self.seen.add(block)
|
| 665 |
|
| 666 | d += self.depths[block]
|
| 667 |
|
| 668 | children = block.get_children()
|
| 669 | if children:
|
| 670 | return max(self.Max(c, d) for c in children)
|
| 671 |
|
| 672 | if block.label == "exit":
|
| 673 | return d
|
| 674 |
|
| 675 | return self.Max(self.exit_block, d)
|
| 676 |
|
| 677 |
|
| 678 | def MaxStackDepth(block_depths, entry_block, exit_block):
|
| 679 | """Compute maximum stack depth for any path through the CFG."""
|
| 680 | g = _GraphStackDepth(block_depths, exit_block)
|
| 681 | return g.Max(entry_block, 0)
|
| 682 |
|
| 683 |
|
| 684 | def MakeCodeObject(frame, graph, comp_opt):
|
| 685 | """Order blocks, encode instructions, and create types.CodeType().
|
| 686 |
|
| 687 | Called by Compile below, and also recursively by ArgEncoder.
|
| 688 | """
|
| 689 | # Compute stack depth per basic block.
|
| 690 | b = BlockStackDepth()
|
| 691 | block_depths = {
|
| 692 | block: b.Sum(block.Instructions()) for block in graph.blocks
|
| 693 | }
|
| 694 |
|
| 695 | stacksize = MaxStackDepth(block_depths, graph.entry, graph.exit)
|
| 696 |
|
| 697 | # Order blocks so jump offsets can be encoded.
|
| 698 | blocks = OrderBlocks(graph.entry, graph.exit)
|
| 699 |
|
| 700 | # Produce a stream of initial instructions.
|
| 701 | insts, block_offsets = FlattenBlocks(blocks)
|
| 702 |
|
| 703 | # Now that we know the offsets, make another pass.
|
| 704 | PatchJumps(insts, block_offsets)
|
| 705 |
|
| 706 | # What variables should be available at runtime?
|
| 707 |
|
| 708 | cellvars = ReorderCellVars(frame)
|
| 709 |
|
| 710 | # NOTE: Modules docstrings are assigned to __doc__ in pycodegen.py.visitModule.
|
| 711 | consts = []
|
| 712 | if comp_opt.emit_docstring:
|
| 713 | consts.append(frame.docstring)
|
| 714 |
|
| 715 | names = []
|
| 716 | # The closure list is used to track the order of cell variables and
|
| 717 | # free variables in the resulting code object. The offsets used by
|
| 718 | # LOAD_CLOSURE/LOAD_DEREF refer to both kinds of variables.
|
| 719 | closure = cellvars + frame.freevars
|
| 720 |
|
| 721 | # Convert arguments from symbolic to concrete form.
|
| 722 | enc = ArgEncoder(frame.klass, consts, names, frame.varnames,
|
| 723 | closure)
|
| 724 | # Mutates not only insts, but also appends to consts, names, etc.
|
| 725 | enc.Run(insts)
|
| 726 |
|
| 727 | # Binary encoding.
|
| 728 | a = Assembler()
|
| 729 | bytecode, firstline, lnotab = a.Run(insts)
|
| 730 |
|
| 731 | return types.CodeType(
|
| 732 | frame.ArgCount(), frame.NumLocals(), stacksize, frame.flags,
|
| 733 | bytecode,
|
| 734 | tuple(consts),
|
| 735 | tuple(names),
|
| 736 | tuple(frame.varnames),
|
| 737 | frame.filename, frame.name, firstline,
|
| 738 | lnotab,
|
| 739 | tuple(frame.freevars),
|
| 740 | tuple(cellvars))
|