1 | from __future__ import print_function
|
2 | """
|
3 | ovm.py
|
4 |
|
5 | TODO:
|
6 |
|
7 | Define a VM that doesn't use exceptions for control flow!
|
8 | """
|
9 |
|
10 | import operator # for + - * / etc.
|
11 | import os
|
12 | import sys
|
13 | import repr as repr_lib
|
14 |
|
15 | from .pyvm2 import debug1
|
16 | from ..lib import dis
|
17 |
|
18 | # Create a repr that won't overflow.
|
19 | repr_obj = repr_lib.Repr()
|
20 | repr_obj.maxother = 120
|
21 | repper = repr_obj.repr
|
22 |
|
23 | # Different than log
|
24 | def debug(msg, *args):
|
25 | if not VERBOSE:
|
26 | return
|
27 |
|
28 | debug1(msg, *args)
|
29 |
|
30 |
|
31 | VERBOSE = False
|
32 | #VERBOSE = True
|
33 |
|
34 |
|
35 | def run_code(vm, code):
|
36 | """Main entry point.
|
37 |
|
38 | Used by tests and by execfile.
|
39 | """
|
40 | frame = vm.make_frame(code)
|
41 | val = vm.run_frame(frame)
|
42 | vm.check_invariants()
|
43 | if os.getenv('VM_SUMMARY'):
|
44 | debug1('*** OVM executed for %d ticks', vm.num_ticks)
|
45 | # If we return the number of ticks here, the unit tests break.
|
46 | return val
|
47 |
|
48 |
|
49 | def run_code_object(code, args, package=None):
|
50 | sys.argv = args
|
51 | vm = VirtualMachine()
|
52 | run_code(vm, code)
|
53 |
|
54 |
|
55 | # Note: handler is a jump target!
|
56 | import collections
|
57 | Block = collections.namedtuple("Block", "type, handler, level")
|
58 |
|
59 |
|
60 | class Frame(object):
|
61 | def __init__(self, f_code, callargs):
|
62 | self.f_code = f_code
|
63 | self.f_locals = dict(callargs) # Do we need to make a copy?
|
64 | self.stack = [] # expression stack
|
65 | self.f_lineno = f_code.co_firstlineno
|
66 | self.f_lasti = 0
|
67 |
|
68 | self.block_stack = []
|
69 | self.generator = None
|
70 |
|
71 | def __repr__(self): # pragma: no cover
|
72 | return '<Frame at 0x%08x: %r @ %d>' % (
|
73 | id(self), self.f_code.co_filename, self.f_lineno
|
74 | )
|
75 |
|
76 | def top(self):
|
77 | """Return the value at the top of the stack, with no changes."""
|
78 | return self.stack[-1]
|
79 |
|
80 | def pop(self, i=0):
|
81 | """Pop a value from the stack.
|
82 |
|
83 | Default to the top of the stack, but `i` can be a count from the top
|
84 | instead.
|
85 |
|
86 | """
|
87 | return self.stack.pop(-1-i)
|
88 |
|
89 | def push(self, *vals):
|
90 | """Push values onto the value stack."""
|
91 | self.stack.extend(vals)
|
92 |
|
93 | def popn(self, n):
|
94 | """Pop a number of values from the value stack.
|
95 |
|
96 | A list of `n` values is returned, the deepest value first.
|
97 | """
|
98 | if n:
|
99 | ret = self.stack[-n:]
|
100 | del self.stack[-n:]
|
101 | return ret
|
102 | else:
|
103 | return []
|
104 |
|
105 | def peek(self, n):
|
106 | """Get a value `n` entries down in the stack, without changing the stack."""
|
107 | return self.stack[-n]
|
108 |
|
109 | def jump(self, offset):
|
110 | """Move the bytecode pointer to `offset`, so it will execute next."""
|
111 | self.f_lasti = offset
|
112 |
|
113 | def push_block(self, type, handler=None, level=None):
|
114 | """Used for SETUP_{LOOP,EXCEPT,FINALLY,WITH}."""
|
115 | if level is None:
|
116 | level = len(self.stack)
|
117 | self.block_stack.append(Block(type, handler, level))
|
118 |
|
119 | def pop_block(self):
|
120 | return self.block_stack.pop()
|
121 |
|
122 | def _unwind_block(self, block, vm):
|
123 | """
|
124 | Args:
|
125 | vm: VirtualMachineError to possibly mutate
|
126 | """
|
127 | while len(self.stack) > block.level:
|
128 | self.pop()
|
129 |
|
130 | def handle_block_stack(self, why, vm):
|
131 | """
|
132 | After every bytecode that returns why != None, handle everything on the
|
133 | block stack.
|
134 |
|
135 | The block stack and data stack are shuffled for looping, exception
|
136 | handling, or returning.
|
137 | """
|
138 | assert why != 'yield'
|
139 |
|
140 | block = self.block_stack[-1]
|
141 | assert block.type == 'loop', block.type
|
142 |
|
143 | if why == 'continue':
|
144 | self.jump(vm.return_value) # why is it left in the return value?
|
145 | return None
|
146 |
|
147 | self.pop_block()
|
148 | self._unwind_block(block, vm)
|
149 |
|
150 | if why == 'break':
|
151 | self.jump(block.handler)
|
152 | return None
|
153 |
|
154 | raise AssertionError('why = %r' % why)
|
155 |
|
156 | def decode_next_raw(self):
|
157 | """
|
158 | Parse 1 - 3 bytes of bytecode into an instruction and maybe arguments.
|
159 | """
|
160 | opcode = ord(self.f_code.co_code[self.f_lasti])
|
161 | self.f_lasti += 1
|
162 |
|
163 | arguments = []
|
164 | if opcode >= dis.HAVE_ARGUMENT:
|
165 | a1 = self.f_code.co_code[self.f_lasti]
|
166 | a2 = self.f_code.co_code[self.f_lasti+1]
|
167 | arg = ord(a1) + (ord(a2) << 8)
|
168 | self.f_lasti += 2
|
169 | else:
|
170 | arg = None
|
171 | return opcode, arg
|
172 |
|
173 | def line_number(self):
|
174 | """Get the current line number the frame is executing."""
|
175 | # We don't keep f_lineno up to date, so calculate it based on the
|
176 | # instruction address and the line number table.
|
177 | lnotab = self.f_code.co_lnotab
|
178 | byte_increments = lnotab[0::2]
|
179 | line_increments = lnotab[1::2]
|
180 |
|
181 | byte_num = 0
|
182 | line_num = self.f_code.co_firstlineno
|
183 |
|
184 | for byte_incr, line_incr in zip(byte_increments, line_increments):
|
185 | byte_incr = ord(byte_incr)
|
186 | line_incr = ord(line_incr)
|
187 |
|
188 | byte_num += byte_incr
|
189 | if byte_num > self.f_lasti:
|
190 | break
|
191 | line_num += line_incr
|
192 |
|
193 | return line_num
|
194 |
|
195 |
|
196 | class VirtualMachineError(Exception):
|
197 | """For raising errors in the operation of the VM."""
|
198 | pass
|
199 |
|
200 |
|
201 | BINARY_OPERATORS = {
|
202 | 'POWER': pow,
|
203 | 'MULTIPLY': operator.mul,
|
204 | 'DIVIDE': getattr(operator, 'div', lambda x, y: None),
|
205 | 'FLOOR_DIVIDE': operator.floordiv,
|
206 | 'TRUE_DIVIDE': operator.truediv,
|
207 | 'MODULO': operator.mod,
|
208 | 'ADD': operator.add,
|
209 | 'SUBTRACT': operator.sub,
|
210 | 'SUBSCR': operator.getitem,
|
211 | 'LSHIFT': operator.lshift,
|
212 | 'RSHIFT': operator.rshift,
|
213 | 'AND': operator.and_,
|
214 | 'XOR': operator.xor,
|
215 | 'OR': operator.or_,
|
216 | }
|
217 |
|
218 | COMPARE_OPERATORS = [
|
219 | operator.lt,
|
220 | operator.le,
|
221 | operator.eq,
|
222 | operator.ne,
|
223 | operator.gt,
|
224 | operator.ge,
|
225 | lambda x, y: x in y,
|
226 | lambda x, y: x not in y,
|
227 | lambda x, y: x is y,
|
228 | lambda x, y: x is not y,
|
229 | lambda x, y: issubclass(x, Exception) and issubclass(x, y),
|
230 | ]
|
231 |
|
232 |
|
233 | class VirtualMachine(object):
|
234 |
|
235 | def __init__(self, verbose=VERBOSE):
|
236 | """
|
237 | Args:
|
238 | subset: turn off bytecodes that OPy doesn't need (e.g. print
|
239 | statement, etc.)
|
240 | verbose: turn on logging
|
241 | """
|
242 | self.verbose = verbose
|
243 | # some objects define __repr__, which means our debug() logging screws
|
244 | # things up! Even though they don't have side effects, this somehow
|
245 | # matters.
|
246 | self.repr_ok = True
|
247 |
|
248 | # The call stack of frames.
|
249 | self.frames = []
|
250 | # The current frame.
|
251 | self.frame = None
|
252 | self.return_value = None
|
253 |
|
254 | self.cur_line = None # current line number
|
255 | self.num_ticks = 0
|
256 |
|
257 | def top(self):
|
258 | return self.frame.top()
|
259 |
|
260 | def pop(self, i=0):
|
261 | return self.frame.pop(i=i)
|
262 |
|
263 | def push(self, *vals):
|
264 | self.frame.push(*vals)
|
265 |
|
266 | def popn(self, n):
|
267 | return self.frame.popn(n)
|
268 |
|
269 | def peek(self, n):
|
270 | return self.frame.peek(n)
|
271 |
|
272 | def jump(self, offset):
|
273 | self.frame.jump(offset)
|
274 |
|
275 | # TODO: The frame should only have locals?
|
276 | # All globals are constants? No "rebindable" globals. (You can have
|
277 | # mutable objects but not rebind them.)
|
278 | def make_frame(self, code, callargs={}):
|
279 | """
|
280 | Called by self.run_code and Function.__call__.
|
281 | """
|
282 | frame = Frame(code, callargs)
|
283 | return frame
|
284 |
|
285 | def log_tick(self, byteName, arguments, opoffset, linestarts):
|
286 | """ Log arguments, block stack, and data stack for each opcode."""
|
287 | indent = " " * (len(self.frames)-1)
|
288 | stack_rep = repper(self.frame.stack)
|
289 | #block_stack_rep = repper(self.frame.block_stack)
|
290 | # repr_lib is causing problems
|
291 | if self.repr_ok:
|
292 | stack_rep = repr(self.frame.stack)
|
293 | #block_stack_rep = repr(self.frame.block_stack)
|
294 |
|
295 | arg_str = ''
|
296 | if arguments and self.repr_ok:
|
297 | arg_str = ' %r' % (arguments[0],)
|
298 |
|
299 | # TODO: Should increment
|
300 |
|
301 | li = linestarts.get(opoffset, None)
|
302 | if li is not None and self.cur_line != li:
|
303 | self.cur_line = li
|
304 |
|
305 | debug('%s%d: %s%s (line %s)', indent, opoffset, byteName, arg_str,
|
306 | self.cur_line)
|
307 | if self.repr_ok:
|
308 | debug(' %sval stack: %s', indent, stack_rep)
|
309 | #debug(' %sblock stack: %s', indent, block_stack_rep)
|
310 | debug('')
|
311 |
|
312 | # Helpers for run_frame
|
313 | def _push_frame(self, frame):
|
314 | self.frames.append(frame)
|
315 | self.frame = frame
|
316 |
|
317 | def _pop_frame(self):
|
318 | # NOTE: Block(handler) is a jump address.
|
319 | #if self.frame.block_stack:
|
320 | if False:
|
321 | debug1('block stack: %s', self.frame.block_stack)
|
322 | raise VirtualMachineError(
|
323 | "Block stack still has %d entries" %
|
324 | len(self.frame.block_stack))
|
325 |
|
326 | self.frames.pop()
|
327 | if self.frames:
|
328 | self.frame = self.frames[-1]
|
329 | else:
|
330 | self.frame = None
|
331 |
|
332 | def run_frame(self, frame):
|
333 | """Run a frame until it returns or raises an exception.
|
334 |
|
335 | This function raises GuestException or returns the return value.
|
336 |
|
337 | Corresponds to PyEval_EvalFrameEx in ceval.c. That returns 'PyObject*
|
338 | retval' -- but how does it indicate an exception?
|
339 |
|
340 | I think retval is NULL, and then
|
341 |
|
342 | """
|
343 | # bytecode offset -> line number
|
344 | #print('frame %s ' % frame)
|
345 | # NOTE: Also done in Frmae.line_number()
|
346 | linestarts = dict(dis.findlinestarts(frame.f_code))
|
347 | #print('STARTS %s ' % linestarts)
|
348 |
|
349 | self._push_frame(frame)
|
350 | while True:
|
351 | self.num_ticks += 1
|
352 | if self.num_ticks == 300:
|
353 | raise VirtualMachineError('Too many ticks')
|
354 |
|
355 | opoffset = self.frame.f_lasti # For logging only
|
356 | opcode, arg = self.frame.decode_next_raw()
|
357 | inst_name = dis.opname[opcode]
|
358 | arguments = []
|
359 | if self.verbose:
|
360 | self.log_tick(inst_name, arguments, opoffset, linestarts)
|
361 | debug1('arg %s', arg)
|
362 |
|
363 | # When unwinding the block stack, we need to keep track of why we
|
364 | # are doing it.
|
365 |
|
366 | # NOTE: In addition to returning why == 'exception', this can also
|
367 | # RAISE GuestException from recursive call via call_function.
|
368 |
|
369 | why = False
|
370 |
|
371 | if inst_name == 'POP_TOP':
|
372 | self.pop()
|
373 |
|
374 | elif inst_name == 'LOAD_CONST':
|
375 | const = self.frame.f_code.co_consts[arg]
|
376 | self.push(const)
|
377 |
|
378 | elif inst_name == 'STORE_NAME':
|
379 | # This is true. NOTE: dis.hasname is a list.
|
380 | #debug1('STORE_NAME %d', opcode)
|
381 | name = self.frame.f_code.co_names[arg]
|
382 | self.frame.f_locals[name] = self.pop()
|
383 |
|
384 | elif inst_name == 'LOAD_NAME':
|
385 | #debug1('NAME arg %d', arg)
|
386 |
|
387 | name = self.frame.f_code.co_names[arg]
|
388 | #debug1('NAME %r', name)
|
389 | frame = self.frame
|
390 | if name in frame.f_locals:
|
391 | val = frame.f_locals[name]
|
392 | elif name == 'print': # Special case!
|
393 | val = print
|
394 | #elif name in frame.f_globals:
|
395 | # val = frame.f_globals[name]
|
396 | #elif name in frame.f_builtins:
|
397 | # val = frame.f_builtins[name]
|
398 | else:
|
399 | raise NameError("name '%s' is not defined" % name)
|
400 | self.push(val)
|
401 | # Hack because this is not a global in OVM.
|
402 | #if name == 'True':
|
403 | # self.push(True)
|
404 | #else:
|
405 | # self.push(self.frame.f_locals[name])
|
406 |
|
407 | #
|
408 | # Operations
|
409 | #
|
410 | elif inst_name == 'BINARY_ADD':
|
411 | # I guess popn() is slightly faster because you don't decrement.
|
412 | # twice? Even in C?
|
413 | x, y = self.popn(2)
|
414 | self.push(BINARY_OPERATORS['ADD'](x, y))
|
415 |
|
416 | elif inst_name == 'COMPARE_OP':
|
417 | x, y = self.popn(2)
|
418 | self.push(COMPARE_OPERATORS[arg](x, y))
|
419 |
|
420 | #
|
421 | # FUNCTIONS / LOOPS
|
422 | #
|
423 | elif inst_name == 'CALL_FUNCTION':
|
424 | # NOTE: There are different bytecodes for
|
425 | # CALL_FUNCTION_{VAR,KW,VAR_KW}.
|
426 | # I don't thinks we need those in OPy. I think that is for each
|
427 | # combination of foo(*args, **kwargs). I guess I do need _VAR
|
428 | # for log(msg, *args).
|
429 |
|
430 | len_kw, len_pos= divmod(arg, 256)
|
431 | assert len_kw == 0
|
432 | namedargs = {}
|
433 | for i in xrange(len_kw):
|
434 | key, val = self.popn(2)
|
435 | namedargs[key] = val
|
436 |
|
437 | posargs = self.popn(len_pos)
|
438 | #posargs.extend(args) # For extras
|
439 |
|
440 | func = self.pop() # TODO: assert that it's the print function
|
441 |
|
442 | do_wrap = False
|
443 | if do_wrap:
|
444 | # Hm there needs to be a better way of doing this?
|
445 | #callargs = inspect.getcallargs(func, *posargs, **namedargs)
|
446 | callargs = None
|
447 | frame = self.make_frame(func.func_code, callargs)
|
448 | retval = self.run_frame(frame)
|
449 | else:
|
450 | byterun_func = func
|
451 | retval = byterun_func(*posargs, **namedargs)
|
452 | self.push(retval)
|
453 |
|
454 | elif inst_name == 'RETURN_VALUE':
|
455 | why = 'return'
|
456 |
|
457 | elif inst_name == 'SETUP_LOOP':
|
458 | dest = self.frame.f_lasti + arg # dis.hasjrel
|
459 | self.frame.push_block('loop', dest)
|
460 |
|
461 | elif inst_name == 'POP_BLOCK':
|
462 | self.frame.pop_block()
|
463 |
|
464 | elif inst_name == 'BREAK_LOOP':
|
465 | # TODO: Get rid of this; it should be a jump.
|
466 | why = 'break'
|
467 |
|
468 | #
|
469 | # JUMPS
|
470 | #
|
471 | elif inst_name == 'POP_JUMP_IF_FALSE':
|
472 | # NOTE: This is a "superinstruction"; it could just be POP, then
|
473 | # JUMP_IF_FALSE.
|
474 | val = self.pop()
|
475 | if not val:
|
476 | self.jump(arg)
|
477 |
|
478 | elif inst_name == 'JUMP_ABSOLUTE':
|
479 | self.jump(arg) # dis.hasjabs
|
480 |
|
481 | elif inst_name == 'JUMP_FORWARD':
|
482 | self.jump(self.frame.f_lasti + arg) # dis.hasjrel
|
483 |
|
484 | #
|
485 | # Intentionally ignored imports
|
486 | #
|
487 | elif inst_name == 'IMPORT_NAME':
|
488 | pass
|
489 | elif inst_name == 'IMPORT_FROM':
|
490 | pass
|
491 |
|
492 | else:
|
493 | raise AssertionError('OVM not handling %r' % inst_name)
|
494 |
|
495 | while why and frame.block_stack:
|
496 | debug('WHY %s', why)
|
497 | debug('STACK %s', frame.block_stack)
|
498 | why = self.frame.handle_block_stack(why, self)
|
499 |
|
500 | # TODO: I should be popping and cleaning up blocks here.
|
501 | if why:
|
502 | break
|
503 |
|
504 | self._pop_frame()
|
505 |
|
506 | #debug1('num_ticks: %d' % num_ticks)
|
507 | return self.return_value
|
508 |
|
509 | def check_invariants(self):
|
510 | # Check some invariants
|
511 | if self.frames: # pragma: no cover
|
512 | raise VirtualMachineError("Frames left over!")
|
513 | # NOTE: self.frame is None at the end.
|
514 | if self.frame and self.frame.stack: # pragma: no cover
|
515 | raise VirtualMachineError("Data left on stack! %r" % self.frame.stack)
|