1 | """A flow graph representation for Python bytecode"""
|
2 | from __future__ import print_function
|
3 |
|
4 | import itertools
|
5 | import types
|
6 |
|
7 | from .consts import CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS
|
8 | from opy.lib import dis
|
9 |
|
10 |
|
11 | HAS_JREL = set(dis.opname[op] for op in dis.hasjrel)
|
12 | HAS_JABS = set(dis.opname[op] for op in dis.hasjabs)
|
13 | OPNUM = dict((name, i) for i, name in enumerate(dis.opname))
|
14 |
|
15 |
|
16 | def OrderBlocks(start_block, exit_block):
|
17 | """Order blocks so that they are emitted in the right order"""
|
18 | # Rules:
|
19 | # - when a block has a next block, the next block must be emitted just after
|
20 | # - when a block has followers (relative jumps), it must be emitted before
|
21 | # them
|
22 | # - all reachable blocks must be emitted
|
23 | order = []
|
24 |
|
25 | # Find all the blocks to be emitted.
|
26 | remaining = set()
|
27 | todo = [start_block]
|
28 | while todo:
|
29 | b = todo.pop()
|
30 | if b in remaining:
|
31 | continue
|
32 | remaining.add(b)
|
33 | for c in b.get_children():
|
34 | if c not in remaining:
|
35 | todo.append(c)
|
36 |
|
37 | # A block is dominated by another block if that block must be emitted
|
38 | # before it.
|
39 | dominators = {}
|
40 | for b in remaining:
|
41 | if __debug__ and b.next:
|
42 | assert b is b.next[0].prev[0], (b, b.next)
|
43 | # Make sure every block appears in dominators, even if no
|
44 | # other block must precede it.
|
45 | dominators.setdefault(b, set())
|
46 | # preceding blocks dominate following blocks
|
47 | for c in b.get_followers():
|
48 | while 1:
|
49 | dominators.setdefault(c, set()).add(b)
|
50 | # Any block that has a next pointer leading to c is also
|
51 | # dominated because the whole chain will be emitted at once.
|
52 | # Walk backwards and add them all.
|
53 | if c.prev and c.prev[0] is not b:
|
54 | c = c.prev[0]
|
55 | else:
|
56 | break
|
57 |
|
58 | def find_next():
|
59 | # Find a block that can be emitted next.
|
60 | for b in remaining:
|
61 | for c in dominators[b]:
|
62 | if c in remaining:
|
63 | break # can't emit yet, dominated by a remaining block
|
64 | else:
|
65 | return b
|
66 | assert 0, 'circular dependency, cannot find next block'
|
67 |
|
68 | b = start_block
|
69 | while 1:
|
70 | order.append(b)
|
71 | remaining.discard(b)
|
72 | if b.next:
|
73 | b = b.next[0]
|
74 | continue
|
75 | elif b is not exit_block and not b.has_unconditional_transfer():
|
76 | order.append(exit_block)
|
77 | if not remaining:
|
78 | break
|
79 | b = find_next()
|
80 | return order
|
81 |
|
82 |
|
83 | def FlattenBlocks(blocks):
|
84 | insts = []
|
85 |
|
86 | pc = 0
|
87 | offsets = {} # block -> bytecode offset
|
88 | for b in blocks:
|
89 | offsets[b] = pc
|
90 | for inst in b.Instructions():
|
91 | insts.append(inst)
|
92 | if len(inst) == 1:
|
93 | pc += 1
|
94 | elif inst[0] != "SET_LINENO": # arg takes 2 bytes
|
95 | pc += 3
|
96 | return insts, offsets
|
97 |
|
98 |
|
99 | def PatchJumps(insts, offsets):
|
100 | pc = 0
|
101 | for i, inst in enumerate(insts):
|
102 | if len(inst) == 1:
|
103 | pc += 1
|
104 | elif inst[0] != "SET_LINENO":
|
105 | pc += 3
|
106 |
|
107 | opname = inst[0]
|
108 |
|
109 | # Compute jump locations
|
110 | if opname in HAS_JREL:
|
111 | block_arg = inst[1]
|
112 | insts[i] = (opname, offsets[block_arg] - pc)
|
113 |
|
114 | elif opname in HAS_JABS:
|
115 | block_arg = inst[1]
|
116 | insts[i] = (opname, offsets[block_arg])
|
117 |
|
118 |
|
119 | gBlockCounter = itertools.count()
|
120 |
|
121 |
|
122 | class Block(object):
|
123 |
|
124 | def __init__(self, label=''):
|
125 | self.label = label
|
126 | self.bid = gBlockCounter.next()
|
127 | self.insts = []
|
128 | self.outEdges = set()
|
129 | self.next = []
|
130 | self.prev = []
|
131 |
|
132 | # BUG FIX: This is needed for deterministic order in sets (and dicts?).
|
133 | # See OrderBlocks() below. remaining is set() of blocks. If we rely on
|
134 | # the default id(), then the output bytecode is NONDETERMINISTIC.
|
135 | def __hash__(self):
|
136 | return self.bid
|
137 |
|
138 | def __repr__(self):
|
139 | if self.label:
|
140 | return "<block %s id=%d>" % (self.label, self.bid)
|
141 | else:
|
142 | return "<block id=%d>" % (self.bid)
|
143 |
|
144 | def __str__(self):
|
145 | return "<block %s %d:\n%s>" % (
|
146 | self.label, self.bid,
|
147 | '\n'.join(str(inst) for inst in self.insts))
|
148 |
|
149 | def emit(self, inst):
|
150 | op = inst[0]
|
151 | self.insts.append(inst)
|
152 |
|
153 | def Instructions(self):
|
154 | return self.insts
|
155 |
|
156 | def addOutEdge(self, block):
|
157 | self.outEdges.add(block)
|
158 |
|
159 | def addNext(self, block):
|
160 | self.next.append(block)
|
161 | assert len(self.next) == 1, [str(b) for b in self.next]
|
162 | block.prev.append(self)
|
163 | assert len(block.prev) == 1, [str(b) for b in block.prev]
|
164 |
|
165 | _uncond_transfer = ('RETURN_VALUE', 'RAISE_VARARGS',
|
166 | 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'CONTINUE_LOOP',
|
167 | )
|
168 |
|
169 | def has_unconditional_transfer(self):
|
170 | """Returns True if there is an unconditional transfer to an other block
|
171 | at the end of this block. This means there is no risk for the bytecode
|
172 | executer to go past this block's bytecode."""
|
173 | try:
|
174 | op, arg = self.insts[-1]
|
175 | except (IndexError, ValueError):
|
176 | return
|
177 | return op in self._uncond_transfer
|
178 |
|
179 | def get_children(self):
|
180 | return list(self.outEdges) + self.next
|
181 |
|
182 | def get_followers(self):
|
183 | """Get the whole list of followers, including the next block."""
|
184 | followers = set(self.next)
|
185 | # Blocks that must be emitted *after* this one, because of
|
186 | # bytecode offsets (e.g. relative jumps) pointing to them.
|
187 | for inst in self.insts:
|
188 | if inst[0] in HAS_JREL:
|
189 | followers.add(inst[1])
|
190 | return followers
|
191 |
|
192 | def getContainedGraphs(self):
|
193 | """Return all graphs contained within this block.
|
194 |
|
195 | For example, a MAKE_FUNCTION block will contain a reference to
|
196 | the graph for the function body.
|
197 | """
|
198 | raise AssertionError('unused')
|
199 | contained = []
|
200 | for inst in self.insts:
|
201 | if len(inst) == 1:
|
202 | continue
|
203 | op = inst[1]
|
204 | if hasattr(op, 'graph'):
|
205 | contained.append(op.graph)
|
206 | return contained
|
207 |
|
208 |
|
209 | class FlowGraph(object):
|
210 |
|
211 | def __init__(self):
|
212 | self.current = self.entry = Block()
|
213 | self.exit = Block("exit")
|
214 | self.blocks = set()
|
215 | self.blocks.add(self.entry)
|
216 | self.blocks.add(self.exit)
|
217 |
|
218 | DEBUG = False
|
219 |
|
220 | def startBlock(self, block):
|
221 | if self.DEBUG:
|
222 | if self.current:
|
223 | print("end", repr(self.current))
|
224 | print(" next", self.current.next)
|
225 | print(" prev", self.current.prev)
|
226 | print(" ", self.current.get_children())
|
227 | print(repr(block))
|
228 | self.current = block
|
229 |
|
230 | def nextBlock(self, block=None):
|
231 | # XXX think we need to specify when there is implicit transfer
|
232 | # from one block to the next. might be better to represent this
|
233 | # with explicit JUMP_ABSOLUTE instructions that are optimized
|
234 | # out when they are unnecessary.
|
235 | #
|
236 | # I think this strategy works: each block has a child
|
237 | # designated as "next" which is returned as the last of the
|
238 | # children. because the nodes in a graph are emitted in
|
239 | # reverse post order, the "next" block will always be emitted
|
240 | # immediately after its parent.
|
241 | # Worry: maintaining this invariant could be tricky
|
242 | if block is None:
|
243 | block = self.newBlock()
|
244 |
|
245 | # Note: If the current block ends with an unconditional control
|
246 | # transfer, then it is technically incorrect to add an implicit
|
247 | # transfer to the block graph. Doing so results in code generation
|
248 | # for unreachable blocks. That doesn't appear to be very common
|
249 | # with Python code and since the built-in compiler doesn't optimize
|
250 | # it out we don't either.
|
251 | self.current.addNext(block)
|
252 | self.startBlock(block)
|
253 |
|
254 | def newBlock(self):
|
255 | b = Block()
|
256 | self.blocks.add(b)
|
257 | return b
|
258 |
|
259 | def startExitBlock(self):
|
260 | self.startBlock(self.exit)
|
261 |
|
262 | def emit(self, *inst):
|
263 | if self.DEBUG:
|
264 | print("\t", inst)
|
265 | if len(inst) == 2 and isinstance(inst[1], Block):
|
266 | self.current.addOutEdge(inst[1])
|
267 | self.current.emit(inst)
|
268 |
|
269 | def getContainedGraphs(self):
|
270 | raise AssertionError('unused')
|
271 | l = []
|
272 | for b in self.getBlocks():
|
273 | l.extend(b.getContainedGraphs())
|
274 | return l
|
275 |
|
276 |
|
277 | class Frame(object):
|
278 | """Something that gets turned into a single code object.
|
279 |
|
280 | Code objects and consts are mutually recursive.
|
281 | """
|
282 |
|
283 | def __init__(self, name, filename, optimized=0, klass=None):
|
284 | """
|
285 | Args:
|
286 | klass: Whether we're compiling a class block.
|
287 | """
|
288 | self.name = name # name that is put in the code object
|
289 | self.filename = filename
|
290 | self.flags = (CO_OPTIMIZED | CO_NEWLOCALS) if optimized else 0
|
291 | self.klass = klass
|
292 |
|
293 | # Mutated by setArgs()
|
294 | self.varnames = []
|
295 | self.argcount = 0
|
296 |
|
297 | # Mutated by setVars(). Free variables found by the symbol table scan,
|
298 | # including variables used only in nested scopes, are included here.
|
299 | self.freevars = []
|
300 | self.cellvars = []
|
301 |
|
302 | self.docstring = None
|
303 |
|
304 | def setArgs(self, args):
|
305 | """Only called by functions, not modules or classes."""
|
306 | assert not self.varnames # Nothing should have been added
|
307 | if args:
|
308 | self.varnames = list(args)
|
309 | self.argcount = len(args)
|
310 |
|
311 | def setVars(self, freevars, cellvars):
|
312 | self.freevars = freevars
|
313 | self.cellvars = cellvars
|
314 |
|
315 | def setDocstring(self, doc):
|
316 | self.docstring = doc
|
317 |
|
318 | def setFlag(self, flag):
|
319 | self.flags |= flag
|
320 |
|
321 | def checkFlag(self, flag):
|
322 | return bool(self.flags & flag)
|
323 |
|
324 | def NumLocals(self):
|
325 | return len(self.varnames) if self.flags & CO_NEWLOCALS else 0
|
326 |
|
327 | def ArgCount(self):
|
328 | argcount = self.argcount
|
329 | if self.flags & CO_VARKEYWORDS:
|
330 | argcount -= 1
|
331 | if self.flags & CO_VARARGS:
|
332 | argcount -= 1
|
333 | return argcount
|
334 |
|
335 |
|
336 | def ReorderCellVars(frame):
|
337 | """Reorder cellvars so the ones in self.varnames are first.
|
338 |
|
339 | And prune from freevars (?)
|
340 | """
|
341 | lookup = set(frame.cellvars)
|
342 | remaining = lookup - set(frame.varnames)
|
343 |
|
344 | cellvars = [n for n in frame.varnames if n in lookup]
|
345 | cellvars.extend(remaining)
|
346 | return cellvars
|
347 |
|
348 |
|
349 | def _NameToIndex(name, L):
|
350 | """Return index of name in list, appending if necessary
|
351 |
|
352 | This routine uses a list instead of a dictionary, because a
|
353 | dictionary can't store two different keys if the keys have the
|
354 | same value but different types, e.g. 2 and 2L. The compiler
|
355 | must treat these two separately, so it does an explicit type
|
356 | comparison before comparing the values.
|
357 | """
|
358 | t = type(name)
|
359 | for i, item in enumerate(L):
|
360 | if t == type(item) and item == name:
|
361 | return i
|
362 | end = len(L)
|
363 | L.append(name)
|
364 | return end
|
365 |
|
366 |
|
367 | class ArgEncoder(object):
|
368 | """Replaces arg objects with indices."""
|
369 |
|
370 | def __init__(self, klass, consts, names, varnames, closure):
|
371 | """
|
372 | Args:
|
373 | consts ... closure are all potentially mutated!
|
374 | """
|
375 | self.klass = klass
|
376 | self.consts = consts
|
377 | self.names = names
|
378 | self.varnames = varnames
|
379 | self.closure = closure
|
380 |
|
381 | def Run(self, insts):
|
382 | """Mutates insts."""
|
383 | for i, t in enumerate(insts):
|
384 | if len(t) == 2:
|
385 | opname, oparg = t
|
386 | method = self._converters.get(opname, None)
|
387 | if method:
|
388 | arg_index = method(self, oparg)
|
389 | insts[i] = opname, arg_index
|
390 |
|
391 | # TODO: This should just be a simple switch
|
392 |
|
393 | def _convert_LOAD_CONST(self, arg):
|
394 | return _NameToIndex(arg, self.consts)
|
395 |
|
396 | def _convert_LOAD_FAST(self, arg):
|
397 | _NameToIndex(arg, self.names)
|
398 | return _NameToIndex(arg, self.varnames)
|
399 | _convert_STORE_FAST = _convert_LOAD_FAST
|
400 | _convert_DELETE_FAST = _convert_LOAD_FAST
|
401 |
|
402 | def _convert_LOAD_NAME(self, arg):
|
403 | # TODO: This is wrong. It leads to too many files.
|
404 | # https://github.com/oilshell/oil/issues/180
|
405 | if self.klass is None:
|
406 | _NameToIndex(arg, self.varnames)
|
407 | return _NameToIndex(arg, self.names)
|
408 |
|
409 | def _convert_NAME(self, arg):
|
410 | if self.klass is None:
|
411 | _NameToIndex(arg, self.varnames)
|
412 | return _NameToIndex(arg, self.names)
|
413 | _convert_STORE_NAME = _convert_NAME
|
414 | _convert_DELETE_NAME = _convert_NAME
|
415 | _convert_IMPORT_NAME = _convert_NAME
|
416 | _convert_IMPORT_FROM = _convert_NAME
|
417 | _convert_STORE_ATTR = _convert_NAME
|
418 | _convert_LOAD_ATTR = _convert_NAME
|
419 | _convert_DELETE_ATTR = _convert_NAME
|
420 | _convert_LOAD_GLOBAL = _convert_NAME
|
421 | _convert_STORE_GLOBAL = _convert_NAME
|
422 | _convert_DELETE_GLOBAL = _convert_NAME
|
423 |
|
424 | def _convert_DEREF(self, arg):
|
425 | _NameToIndex(arg, self.names)
|
426 | _NameToIndex(arg, self.varnames)
|
427 | return _NameToIndex(arg, self.closure)
|
428 | _convert_LOAD_DEREF = _convert_DEREF
|
429 | _convert_STORE_DEREF = _convert_DEREF
|
430 |
|
431 | def _convert_LOAD_CLOSURE(self, arg):
|
432 | _NameToIndex(arg, self.varnames)
|
433 | return _NameToIndex(arg, self.closure)
|
434 |
|
435 | _cmp = list(dis.cmp_op)
|
436 | def _convert_COMPARE_OP(self, arg):
|
437 | return self._cmp.index(arg)
|
438 |
|
439 | _converters = {}
|
440 |
|
441 | # similarly for other opcodes...
|
442 |
|
443 | for name, obj in locals().items():
|
444 | if name[:9] == "_convert_":
|
445 | opname = name[9:]
|
446 | _converters[opname] = obj
|
447 | del name, obj, opname
|
448 |
|
449 |
|
450 | class Assembler(object):
|
451 | """Builds co_code and lnotab.
|
452 |
|
453 | This class builds the lnotab, which is documented in compile.c. Here's a
|
454 | brief recap:
|
455 |
|
456 | For each SET_LINENO instruction after the first one, two bytes are added to
|
457 | lnotab. (In some cases, multiple two-byte entries are added.) The first
|
458 | byte is the distance in bytes between the instruction for the last
|
459 | SET_LINENO and the current SET_LINENO. The second byte is offset in line
|
460 | numbers. If either offset is greater than 255, multiple two-byte entries
|
461 | are added -- see compile.c for the delicate details.
|
462 | """
|
463 | def __init__(self):
|
464 | self.code = []
|
465 | self.codeOffset = 0
|
466 | self.firstline = 0
|
467 | self.lastline = 0
|
468 | self.lastoff = 0
|
469 | self.lnotab = []
|
470 |
|
471 | def addCode(self, *args):
|
472 | for arg in args:
|
473 | self.code.append(chr(arg))
|
474 | self.codeOffset += len(args)
|
475 |
|
476 | def nextLine(self, lineno):
|
477 | if self.firstline == 0:
|
478 | self.firstline = lineno
|
479 | self.lastline = lineno
|
480 | else:
|
481 | # compute deltas
|
482 | addr = self.codeOffset - self.lastoff
|
483 | line = lineno - self.lastline
|
484 | # Python assumes that lineno always increases with
|
485 | # increasing bytecode address (lnotab is unsigned char).
|
486 | # Depending on when SET_LINENO instructions are emitted
|
487 | # this is not always true. Consider the code:
|
488 | # a = (1,
|
489 | # b)
|
490 | # In the bytecode stream, the assignment to "a" occurs
|
491 | # after the loading of "b". This works with the C Python
|
492 | # compiler because it only generates a SET_LINENO instruction
|
493 | # for the assignment.
|
494 | if line >= 0:
|
495 | push = self.lnotab.append
|
496 | while addr > 255:
|
497 | push(255); push(0)
|
498 | addr -= 255
|
499 | while line > 255:
|
500 | push(addr); push(255)
|
501 | line -= 255
|
502 | addr = 0
|
503 | if addr > 0 or line > 0:
|
504 | push(addr); push(line)
|
505 | self.lastline = lineno
|
506 | self.lastoff = self.codeOffset
|
507 |
|
508 | def Run(self, insts):
|
509 | for t in insts:
|
510 | opname = t[0]
|
511 | if len(t) == 1:
|
512 | self.addCode(OPNUM[opname])
|
513 | else:
|
514 | oparg = t[1]
|
515 | if opname == "SET_LINENO":
|
516 | self.nextLine(oparg)
|
517 | continue
|
518 | hi, lo = divmod(oparg, 256)
|
519 | try:
|
520 | self.addCode(OPNUM[opname], lo, hi)
|
521 | except ValueError:
|
522 | print(opname, oparg)
|
523 | print(OPNUM[opname], lo, hi)
|
524 | raise
|
525 |
|
526 | bytecode = ''.join(self.code)
|
527 | lnotab = ''.join(chr(c) for c in self.lnotab)
|
528 | return bytecode, self.firstline, lnotab
|
529 |
|
530 |
|
531 | class BlockStackDepth(object):
|
532 | # XXX 1. need to keep track of stack depth on jumps
|
533 | # XXX 2. at least partly as a result, this code is broken
|
534 |
|
535 | def Sum(self, insts, debug=0):
|
536 | depth = 0
|
537 | maxDepth = 0
|
538 |
|
539 | for inst in insts:
|
540 | opname = inst[0]
|
541 | if debug:
|
542 | print(inst, end=' ')
|
543 |
|
544 | delta = self.effect.get(opname, None)
|
545 | if delta is None:
|
546 | # now check patterns
|
547 | for pat, pat_delta in self.patterns:
|
548 | if opname[:len(pat)] == pat:
|
549 | delta = pat_delta
|
550 | break
|
551 |
|
552 | if delta is None:
|
553 | meth = getattr(self, opname, None)
|
554 | if meth is not None:
|
555 | delta = meth(inst[1])
|
556 |
|
557 | if delta is None:
|
558 | # Jumps missing here.
|
559 | #assert opname in ('POP_JUMP_IF_FALSE', 'SET_LINENO'), opname
|
560 | delta = 0
|
561 |
|
562 | depth += delta
|
563 | maxDepth = max(depth, maxDepth)
|
564 |
|
565 | if debug:
|
566 | print('%s %s' % (depth, maxDepth))
|
567 | return maxDepth
|
568 |
|
569 | effect = {
|
570 | 'POP_TOP': -1,
|
571 | 'DUP_TOP': 1,
|
572 | 'LIST_APPEND': -1,
|
573 | 'SET_ADD': -1,
|
574 | 'MAP_ADD': -2,
|
575 | 'SLICE+1': -1,
|
576 | 'SLICE+2': -1,
|
577 | 'SLICE+3': -2,
|
578 | 'STORE_SLICE+0': -1,
|
579 | 'STORE_SLICE+1': -2,
|
580 | 'STORE_SLICE+2': -2,
|
581 | 'STORE_SLICE+3': -3,
|
582 | 'DELETE_SLICE+0': -1,
|
583 | 'DELETE_SLICE+1': -2,
|
584 | 'DELETE_SLICE+2': -2,
|
585 | 'DELETE_SLICE+3': -3,
|
586 | 'STORE_SUBSCR': -3,
|
587 | 'DELETE_SUBSCR': -2,
|
588 | # PRINT_EXPR?
|
589 | 'PRINT_ITEM': -1,
|
590 | 'RETURN_VALUE': -1,
|
591 | 'YIELD_VALUE': -1,
|
592 | 'EXEC_STMT': -3,
|
593 | 'BUILD_CLASS': -2,
|
594 | 'STORE_NAME': -1,
|
595 | 'STORE_ATTR': -2,
|
596 | 'DELETE_ATTR': -1,
|
597 | 'STORE_GLOBAL': -1,
|
598 | 'BUILD_MAP': 1,
|
599 | 'COMPARE_OP': -1,
|
600 | 'STORE_FAST': -1,
|
601 | 'IMPORT_STAR': -1,
|
602 | 'IMPORT_NAME': -1,
|
603 | 'IMPORT_FROM': 1,
|
604 | 'LOAD_ATTR': 0, # unlike other loads
|
605 | # close enough...
|
606 | 'SETUP_EXCEPT': 3,
|
607 | 'SETUP_FINALLY': 3,
|
608 | 'FOR_ITER': 1,
|
609 | 'WITH_CLEANUP': -1,
|
610 | }
|
611 | # use pattern match
|
612 | patterns = [
|
613 | ('BINARY_', -1),
|
614 | ('LOAD_', 1),
|
615 | ]
|
616 |
|
617 | def UNPACK_SEQUENCE(self, count):
|
618 | return count-1
|
619 | def BUILD_TUPLE(self, count):
|
620 | return -count+1
|
621 | def BUILD_LIST(self, count):
|
622 | return -count+1
|
623 | def BUILD_SET(self, count):
|
624 | return -count+1
|
625 | def CALL_FUNCTION(self, argc):
|
626 | hi, lo = divmod(argc, 256)
|
627 | return -(lo + hi * 2)
|
628 | def CALL_FUNCTION_VAR(self, argc):
|
629 | return self.CALL_FUNCTION(argc)-1
|
630 | def CALL_FUNCTION_KW(self, argc):
|
631 | return self.CALL_FUNCTION(argc)-1
|
632 | def CALL_FUNCTION_VAR_KW(self, argc):
|
633 | return self.CALL_FUNCTION(argc)-2
|
634 | def MAKE_FUNCTION(self, argc):
|
635 | return -argc
|
636 | def MAKE_CLOSURE(self, argc):
|
637 | # XXX need to account for free variables too!
|
638 | return -argc
|
639 | def BUILD_SLICE(self, argc):
|
640 | if argc == 2:
|
641 | return -1
|
642 | elif argc == 3:
|
643 | return -2
|
644 | def DUP_TOPX(self, argc):
|
645 | return argc
|
646 |
|
647 |
|
648 | class _GraphStackDepth(object):
|
649 | """Walk the CFG, computing the maximum stack depth."""
|
650 |
|
651 | def __init__(self, depths, exit_block):
|
652 | """
|
653 | Args:
|
654 | depths: is the stack effect of each basic block. Then find the path
|
655 | through the code with the largest total effect.
|
656 | """
|
657 | self.depths = depths
|
658 | self.exit_block = exit_block
|
659 | self.seen = set()
|
660 |
|
661 | def Max(self, block, d):
|
662 | if block in self.seen:
|
663 | return d
|
664 | self.seen.add(block)
|
665 |
|
666 | d += self.depths[block]
|
667 |
|
668 | children = block.get_children()
|
669 | if children:
|
670 | return max(self.Max(c, d) for c in children)
|
671 |
|
672 | if block.label == "exit":
|
673 | return d
|
674 |
|
675 | return self.Max(self.exit_block, d)
|
676 |
|
677 |
|
678 | def MaxStackDepth(block_depths, entry_block, exit_block):
|
679 | """Compute maximum stack depth for any path through the CFG."""
|
680 | g = _GraphStackDepth(block_depths, exit_block)
|
681 | return g.Max(entry_block, 0)
|
682 |
|
683 |
|
684 | def MakeCodeObject(frame, graph, comp_opt):
|
685 | """Order blocks, encode instructions, and create types.CodeType().
|
686 |
|
687 | Called by Compile below, and also recursively by ArgEncoder.
|
688 | """
|
689 | # Compute stack depth per basic block.
|
690 | b = BlockStackDepth()
|
691 | block_depths = {
|
692 | block: b.Sum(block.Instructions()) for block in graph.blocks
|
693 | }
|
694 |
|
695 | stacksize = MaxStackDepth(block_depths, graph.entry, graph.exit)
|
696 |
|
697 | # Order blocks so jump offsets can be encoded.
|
698 | blocks = OrderBlocks(graph.entry, graph.exit)
|
699 |
|
700 | # Produce a stream of initial instructions.
|
701 | insts, block_offsets = FlattenBlocks(blocks)
|
702 |
|
703 | # Now that we know the offsets, make another pass.
|
704 | PatchJumps(insts, block_offsets)
|
705 |
|
706 | # What variables should be available at runtime?
|
707 |
|
708 | cellvars = ReorderCellVars(frame)
|
709 |
|
710 | # NOTE: Modules docstrings are assigned to __doc__ in pycodegen.py.visitModule.
|
711 | consts = []
|
712 | if comp_opt.emit_docstring:
|
713 | consts.append(frame.docstring)
|
714 |
|
715 | names = []
|
716 | # The closure list is used to track the order of cell variables and
|
717 | # free variables in the resulting code object. The offsets used by
|
718 | # LOAD_CLOSURE/LOAD_DEREF refer to both kinds of variables.
|
719 | closure = cellvars + frame.freevars
|
720 |
|
721 | # Convert arguments from symbolic to concrete form.
|
722 | enc = ArgEncoder(frame.klass, consts, names, frame.varnames,
|
723 | closure)
|
724 | # Mutates not only insts, but also appends to consts, names, etc.
|
725 | enc.Run(insts)
|
726 |
|
727 | # Binary encoding.
|
728 | a = Assembler()
|
729 | bytecode, firstline, lnotab = a.Run(insts)
|
730 |
|
731 | return types.CodeType(
|
732 | frame.ArgCount(), frame.NumLocals(), stacksize, frame.flags,
|
733 | bytecode,
|
734 | tuple(consts),
|
735 | tuple(names),
|
736 | tuple(frame.varnames),
|
737 | frame.filename, frame.name, firstline,
|
738 | lnotab,
|
739 | tuple(frame.freevars),
|
740 | tuple(cellvars))
|