OILS / opy / byterun / ovm.py View on Github | oilshell.org

515 lines, 284 significant
1from __future__ import print_function
2"""
3ovm.py
4
5TODO:
6
7 Define a VM that doesn't use exceptions for control flow!
8"""
9
10import operator # for + - * / etc.
11import os
12import sys
13import repr as repr_lib
14
15from .pyvm2 import debug1
16from ..lib import dis
17
18# Create a repr that won't overflow.
19repr_obj = repr_lib.Repr()
20repr_obj.maxother = 120
21repper = repr_obj.repr
22
23# Different than log
24def debug(msg, *args):
25 if not VERBOSE:
26 return
27
28 debug1(msg, *args)
29
30
31VERBOSE = False
32#VERBOSE = True
33
34
35def run_code(vm, code):
36 """Main entry point.
37
38 Used by tests and by execfile.
39 """
40 frame = vm.make_frame(code)
41 val = vm.run_frame(frame)
42 vm.check_invariants()
43 if os.getenv('VM_SUMMARY'):
44 debug1('*** OVM executed for %d ticks', vm.num_ticks)
45 # If we return the number of ticks here, the unit tests break.
46 return val
47
48
49def run_code_object(code, args, package=None):
50 sys.argv = args
51 vm = VirtualMachine()
52 run_code(vm, code)
53
54
55# Note: handler is a jump target!
56import collections
57Block = collections.namedtuple("Block", "type, handler, level")
58
59
60class Frame(object):
61 def __init__(self, f_code, callargs):
62 self.f_code = f_code
63 self.f_locals = dict(callargs) # Do we need to make a copy?
64 self.stack = [] # expression stack
65 self.f_lineno = f_code.co_firstlineno
66 self.f_lasti = 0
67
68 self.block_stack = []
69 self.generator = None
70
71 def __repr__(self): # pragma: no cover
72 return '<Frame at 0x%08x: %r @ %d>' % (
73 id(self), self.f_code.co_filename, self.f_lineno
74 )
75
76 def top(self):
77 """Return the value at the top of the stack, with no changes."""
78 return self.stack[-1]
79
80 def pop(self, i=0):
81 """Pop a value from the stack.
82
83 Default to the top of the stack, but `i` can be a count from the top
84 instead.
85
86 """
87 return self.stack.pop(-1-i)
88
89 def push(self, *vals):
90 """Push values onto the value stack."""
91 self.stack.extend(vals)
92
93 def popn(self, n):
94 """Pop a number of values from the value stack.
95
96 A list of `n` values is returned, the deepest value first.
97 """
98 if n:
99 ret = self.stack[-n:]
100 del self.stack[-n:]
101 return ret
102 else:
103 return []
104
105 def peek(self, n):
106 """Get a value `n` entries down in the stack, without changing the stack."""
107 return self.stack[-n]
108
109 def jump(self, offset):
110 """Move the bytecode pointer to `offset`, so it will execute next."""
111 self.f_lasti = offset
112
113 def push_block(self, type, handler=None, level=None):
114 """Used for SETUP_{LOOP,EXCEPT,FINALLY,WITH}."""
115 if level is None:
116 level = len(self.stack)
117 self.block_stack.append(Block(type, handler, level))
118
119 def pop_block(self):
120 return self.block_stack.pop()
121
122 def _unwind_block(self, block, vm):
123 """
124 Args:
125 vm: VirtualMachineError to possibly mutate
126 """
127 while len(self.stack) > block.level:
128 self.pop()
129
130 def handle_block_stack(self, why, vm):
131 """
132 After every bytecode that returns why != None, handle everything on the
133 block stack.
134
135 The block stack and data stack are shuffled for looping, exception
136 handling, or returning.
137 """
138 assert why != 'yield'
139
140 block = self.block_stack[-1]
141 assert block.type == 'loop', block.type
142
143 if why == 'continue':
144 self.jump(vm.return_value) # why is it left in the return value?
145 return None
146
147 self.pop_block()
148 self._unwind_block(block, vm)
149
150 if why == 'break':
151 self.jump(block.handler)
152 return None
153
154 raise AssertionError('why = %r' % why)
155
156 def decode_next_raw(self):
157 """
158 Parse 1 - 3 bytes of bytecode into an instruction and maybe arguments.
159 """
160 opcode = ord(self.f_code.co_code[self.f_lasti])
161 self.f_lasti += 1
162
163 arguments = []
164 if opcode >= dis.HAVE_ARGUMENT:
165 a1 = self.f_code.co_code[self.f_lasti]
166 a2 = self.f_code.co_code[self.f_lasti+1]
167 arg = ord(a1) + (ord(a2) << 8)
168 self.f_lasti += 2
169 else:
170 arg = None
171 return opcode, arg
172
173 def line_number(self):
174 """Get the current line number the frame is executing."""
175 # We don't keep f_lineno up to date, so calculate it based on the
176 # instruction address and the line number table.
177 lnotab = self.f_code.co_lnotab
178 byte_increments = lnotab[0::2]
179 line_increments = lnotab[1::2]
180
181 byte_num = 0
182 line_num = self.f_code.co_firstlineno
183
184 for byte_incr, line_incr in zip(byte_increments, line_increments):
185 byte_incr = ord(byte_incr)
186 line_incr = ord(line_incr)
187
188 byte_num += byte_incr
189 if byte_num > self.f_lasti:
190 break
191 line_num += line_incr
192
193 return line_num
194
195
196class VirtualMachineError(Exception):
197 """For raising errors in the operation of the VM."""
198 pass
199
200
201BINARY_OPERATORS = {
202 'POWER': pow,
203 'MULTIPLY': operator.mul,
204 'DIVIDE': getattr(operator, 'div', lambda x, y: None),
205 'FLOOR_DIVIDE': operator.floordiv,
206 'TRUE_DIVIDE': operator.truediv,
207 'MODULO': operator.mod,
208 'ADD': operator.add,
209 'SUBTRACT': operator.sub,
210 'SUBSCR': operator.getitem,
211 'LSHIFT': operator.lshift,
212 'RSHIFT': operator.rshift,
213 'AND': operator.and_,
214 'XOR': operator.xor,
215 'OR': operator.or_,
216}
217
218COMPARE_OPERATORS = [
219 operator.lt,
220 operator.le,
221 operator.eq,
222 operator.ne,
223 operator.gt,
224 operator.ge,
225 lambda x, y: x in y,
226 lambda x, y: x not in y,
227 lambda x, y: x is y,
228 lambda x, y: x is not y,
229 lambda x, y: issubclass(x, Exception) and issubclass(x, y),
230]
231
232
233class VirtualMachine(object):
234
235 def __init__(self, verbose=VERBOSE):
236 """
237 Args:
238 subset: turn off bytecodes that OPy doesn't need (e.g. print
239 statement, etc.)
240 verbose: turn on logging
241 """
242 self.verbose = verbose
243 # some objects define __repr__, which means our debug() logging screws
244 # things up! Even though they don't have side effects, this somehow
245 # matters.
246 self.repr_ok = True
247
248 # The call stack of frames.
249 self.frames = []
250 # The current frame.
251 self.frame = None
252 self.return_value = None
253
254 self.cur_line = None # current line number
255 self.num_ticks = 0
256
257 def top(self):
258 return self.frame.top()
259
260 def pop(self, i=0):
261 return self.frame.pop(i=i)
262
263 def push(self, *vals):
264 self.frame.push(*vals)
265
266 def popn(self, n):
267 return self.frame.popn(n)
268
269 def peek(self, n):
270 return self.frame.peek(n)
271
272 def jump(self, offset):
273 self.frame.jump(offset)
274
275 # TODO: The frame should only have locals?
276 # All globals are constants? No "rebindable" globals. (You can have
277 # mutable objects but not rebind them.)
278 def make_frame(self, code, callargs={}):
279 """
280 Called by self.run_code and Function.__call__.
281 """
282 frame = Frame(code, callargs)
283 return frame
284
285 def log_tick(self, byteName, arguments, opoffset, linestarts):
286 """ Log arguments, block stack, and data stack for each opcode."""
287 indent = " " * (len(self.frames)-1)
288 stack_rep = repper(self.frame.stack)
289 #block_stack_rep = repper(self.frame.block_stack)
290 # repr_lib is causing problems
291 if self.repr_ok:
292 stack_rep = repr(self.frame.stack)
293 #block_stack_rep = repr(self.frame.block_stack)
294
295 arg_str = ''
296 if arguments and self.repr_ok:
297 arg_str = ' %r' % (arguments[0],)
298
299 # TODO: Should increment
300
301 li = linestarts.get(opoffset, None)
302 if li is not None and self.cur_line != li:
303 self.cur_line = li
304
305 debug('%s%d: %s%s (line %s)', indent, opoffset, byteName, arg_str,
306 self.cur_line)
307 if self.repr_ok:
308 debug(' %sval stack: %s', indent, stack_rep)
309 #debug(' %sblock stack: %s', indent, block_stack_rep)
310 debug('')
311
312 # Helpers for run_frame
313 def _push_frame(self, frame):
314 self.frames.append(frame)
315 self.frame = frame
316
317 def _pop_frame(self):
318 # NOTE: Block(handler) is a jump address.
319 #if self.frame.block_stack:
320 if False:
321 debug1('block stack: %s', self.frame.block_stack)
322 raise VirtualMachineError(
323 "Block stack still has %d entries" %
324 len(self.frame.block_stack))
325
326 self.frames.pop()
327 if self.frames:
328 self.frame = self.frames[-1]
329 else:
330 self.frame = None
331
332 def run_frame(self, frame):
333 """Run a frame until it returns or raises an exception.
334
335 This function raises GuestException or returns the return value.
336
337 Corresponds to PyEval_EvalFrameEx in ceval.c. That returns 'PyObject*
338 retval' -- but how does it indicate an exception?
339
340 I think retval is NULL, and then
341
342 """
343 # bytecode offset -> line number
344 #print('frame %s ' % frame)
345 # NOTE: Also done in Frmae.line_number()
346 linestarts = dict(dis.findlinestarts(frame.f_code))
347 #print('STARTS %s ' % linestarts)
348
349 self._push_frame(frame)
350 while True:
351 self.num_ticks += 1
352 if self.num_ticks == 300:
353 raise VirtualMachineError('Too many ticks')
354
355 opoffset = self.frame.f_lasti # For logging only
356 opcode, arg = self.frame.decode_next_raw()
357 inst_name = dis.opname[opcode]
358 arguments = []
359 if self.verbose:
360 self.log_tick(inst_name, arguments, opoffset, linestarts)
361 debug1('arg %s', arg)
362
363 # When unwinding the block stack, we need to keep track of why we
364 # are doing it.
365
366 # NOTE: In addition to returning why == 'exception', this can also
367 # RAISE GuestException from recursive call via call_function.
368
369 why = False
370
371 if inst_name == 'POP_TOP':
372 self.pop()
373
374 elif inst_name == 'LOAD_CONST':
375 const = self.frame.f_code.co_consts[arg]
376 self.push(const)
377
378 elif inst_name == 'STORE_NAME':
379 # This is true. NOTE: dis.hasname is a list.
380 #debug1('STORE_NAME %d', opcode)
381 name = self.frame.f_code.co_names[arg]
382 self.frame.f_locals[name] = self.pop()
383
384 elif inst_name == 'LOAD_NAME':
385 #debug1('NAME arg %d', arg)
386
387 name = self.frame.f_code.co_names[arg]
388 #debug1('NAME %r', name)
389 frame = self.frame
390 if name in frame.f_locals:
391 val = frame.f_locals[name]
392 elif name == 'print': # Special case!
393 val = print
394 #elif name in frame.f_globals:
395 # val = frame.f_globals[name]
396 #elif name in frame.f_builtins:
397 # val = frame.f_builtins[name]
398 else:
399 raise NameError("name '%s' is not defined" % name)
400 self.push(val)
401 # Hack because this is not a global in OVM.
402 #if name == 'True':
403 # self.push(True)
404 #else:
405 # self.push(self.frame.f_locals[name])
406
407 #
408 # Operations
409 #
410 elif inst_name == 'BINARY_ADD':
411 # I guess popn() is slightly faster because you don't decrement.
412 # twice? Even in C?
413 x, y = self.popn(2)
414 self.push(BINARY_OPERATORS['ADD'](x, y))
415
416 elif inst_name == 'COMPARE_OP':
417 x, y = self.popn(2)
418 self.push(COMPARE_OPERATORS[arg](x, y))
419
420 #
421 # FUNCTIONS / LOOPS
422 #
423 elif inst_name == 'CALL_FUNCTION':
424 # NOTE: There are different bytecodes for
425 # CALL_FUNCTION_{VAR,KW,VAR_KW}.
426 # I don't thinks we need those in OPy. I think that is for each
427 # combination of foo(*args, **kwargs). I guess I do need _VAR
428 # for log(msg, *args).
429
430 len_kw, len_pos= divmod(arg, 256)
431 assert len_kw == 0
432 namedargs = {}
433 for i in xrange(len_kw):
434 key, val = self.popn(2)
435 namedargs[key] = val
436
437 posargs = self.popn(len_pos)
438 #posargs.extend(args) # For extras
439
440 func = self.pop() # TODO: assert that it's the print function
441
442 do_wrap = False
443 if do_wrap:
444 # Hm there needs to be a better way of doing this?
445 #callargs = inspect.getcallargs(func, *posargs, **namedargs)
446 callargs = None
447 frame = self.make_frame(func.func_code, callargs)
448 retval = self.run_frame(frame)
449 else:
450 byterun_func = func
451 retval = byterun_func(*posargs, **namedargs)
452 self.push(retval)
453
454 elif inst_name == 'RETURN_VALUE':
455 why = 'return'
456
457 elif inst_name == 'SETUP_LOOP':
458 dest = self.frame.f_lasti + arg # dis.hasjrel
459 self.frame.push_block('loop', dest)
460
461 elif inst_name == 'POP_BLOCK':
462 self.frame.pop_block()
463
464 elif inst_name == 'BREAK_LOOP':
465 # TODO: Get rid of this; it should be a jump.
466 why = 'break'
467
468 #
469 # JUMPS
470 #
471 elif inst_name == 'POP_JUMP_IF_FALSE':
472 # NOTE: This is a "superinstruction"; it could just be POP, then
473 # JUMP_IF_FALSE.
474 val = self.pop()
475 if not val:
476 self.jump(arg)
477
478 elif inst_name == 'JUMP_ABSOLUTE':
479 self.jump(arg) # dis.hasjabs
480
481 elif inst_name == 'JUMP_FORWARD':
482 self.jump(self.frame.f_lasti + arg) # dis.hasjrel
483
484 #
485 # Intentionally ignored imports
486 #
487 elif inst_name == 'IMPORT_NAME':
488 pass
489 elif inst_name == 'IMPORT_FROM':
490 pass
491
492 else:
493 raise AssertionError('OVM not handling %r' % inst_name)
494
495 while why and frame.block_stack:
496 debug('WHY %s', why)
497 debug('STACK %s', frame.block_stack)
498 why = self.frame.handle_block_stack(why, self)
499
500 # TODO: I should be popping and cleaning up blocks here.
501 if why:
502 break
503
504 self._pop_frame()
505
506 #debug1('num_ticks: %d' % num_ticks)
507 return self.return_value
508
509 def check_invariants(self):
510 # Check some invariants
511 if self.frames: # pragma: no cover
512 raise VirtualMachineError("Frames left over!")
513 # NOTE: self.frame is None at the end.
514 if self.frame and self.frame.stack: # pragma: no cover
515 raise VirtualMachineError("Data left on stack! %r" % self.frame.stack)