OILS / opy / opy_main.py View on Github | oilshell.org

601 lines, 409 significant
1#!/usr/bin/env python2
2"""
3opy_main.py
4"""
5from __future__ import print_function
6
7import cStringIO
8import hashlib
9import optparse
10import os
11import sys
12import marshal
13import types
14
15from opy import pytree
16from opy import skeleton
17
18from opy.compiler2 import consts
19from opy.compiler2 import dis_tool
20from opy.compiler2 import misc
21from opy.compiler2 import transformer
22
23# Disabled for now because byterun imports 'six', and that breaks the build.
24from opy.byterun import execfile
25from opy.byterun import ovm
26from opy.util import log
27
28from pgen2 import driver, parse, pgen, grammar
29from pgen2 import token
30from pgen2 import tokenize
31
32from frontend import args
33from core import error
34from core import pyutil
35
36from typing import TYPE_CHECKING
37
38if TYPE_CHECKING:
39 from typing import Dict
40 from pgen2.parse import PNode
41
42
43# From lib2to3/pygram.py. This takes the place of the 'symbol' module.
44# compiler/transformer module needs this.
45
46class Symbols(object):
47
48 def __init__(self, gr):
49 """
50 Creates an attribute for each grammar symbol (nonterminal), whose value is
51 the symbol's type (an int >= 256).
52 """
53 for name, symbol in gr.symbol2number.items():
54 setattr(self, name, symbol)
55 #log('%s -> %d' % (name, symbol))
56 # For transformer to use
57 self.number2symbol = gr.number2symbol
58
59
60def HostStdlibNames():
61 import symbol
62 import token
63 names = {}
64 for k, v in symbol.sym_name.items():
65 names[k] = v
66 for k, v in token.tok_name.items():
67 names[k] = v
68 return names
69
70
71def WriteGrammar(grammar_path, marshal_path):
72 """Used for py27.grammar.
73
74 ysh/grammar.pgen2 uses ysh/grammar_gen.py
75 """
76 with open(grammar_path) as f:
77 gr = pgen.MakeGrammar(f)
78
79 with open(marshal_path, 'wb') as out_f:
80 gr.dump(out_f)
81
82 log('Compiled %s -> grammar tables in %s', grammar_path, marshal_path)
83
84
85def CountTupleTree(tu):
86 """Count the nodes in a tuple parse tree."""
87 if isinstance(tu, tuple):
88 s = 0
89 for entry in tu:
90 s += CountTupleTree(entry)
91 return s
92 elif isinstance(tu, int):
93 return 1
94 elif isinstance(tu, str):
95 return 1
96 else:
97 raise AssertionError(tu)
98
99
100class TupleTreePrinter(object):
101 def __init__(self, names):
102 self._names = names
103 # TODO: parameterize by grammar.
104 self.max_token_index = max(token.tok_name)
105
106 def Print(self, tu, f=sys.stdout, indent=0):
107 ind = ' ' * indent
108 f.write(ind)
109 if isinstance(tu, tuple):
110 num = tu[0]
111 if num < self.max_token_index:
112 f.write(self._names[num])
113 f.write(' %s (%d, %d)\n' % (tu[1], tu[2], tu[3]))
114 return
115 f.write(self._names[num])
116 f.write('\n')
117 for entry in tu[1:]:
118 self.Print(entry, f, indent=indent+1)
119 elif isinstance(tu, int):
120 f.write(str(tu))
121 f.write('\n')
122 elif isinstance(tu, str):
123 f.write(tu)
124 f.write('\n')
125 else:
126 raise AssertionError(tu)
127
128
129class ParseTreePrinter(object):
130 """Prints a tree of PNode instances."""
131 def __init__(self, names):
132 # type: (Dict[int, str]) -> None
133 self.names = names
134 self.f = sys.stdout
135
136 def Print(self, pnode, f=sys.stdout, indent=0, i=0):
137 # type: (PNode, int, int) -> None
138
139 ind = ' ' * indent
140 # NOTE:
141 # - 'tok' used to be opaque context
142 # - it's None for PRODUCTIONS (nonterminals)
143 # - for terminals, it's (prefix, (lineno, column)), where lineno is
144 # 1-based, and 'prefix' is a string of whitespace.
145 # e.g. for 'f(1, 3)', the "3" token has a prefix of ' '.
146 if isinstance(pnode.tok, tuple):
147 # Used for ParseWith
148 v = pnode.tok[0]
149 else:
150 v = '-'
151 self.f.write('%s%d %s %s\n' % (ind, i, self.names[pnode.typ], v))
152 if pnode.children: # could be None
153 for i, child in enumerate(pnode.children):
154 self.Print(child, indent=indent+1, i=i)
155
156
157class TableOutput(object):
158
159 def __init__(self, out_dir):
160 self.out_dir = out_dir
161 self.frames_f = open(os.path.join(out_dir, 'frames.tsv2'), 'w')
162 self.names_f = open(os.path.join(out_dir, 'names.tsv2'), 'w')
163 self.consts_f = open(os.path.join(out_dir, 'consts.tsv2'), 'w')
164 self.flags_f = open(os.path.join(out_dir, 'flags.tsv2'), 'w')
165 self.ops_f = open(os.path.join(out_dir, 'ops.tsv2'), 'w')
166
167 # NOTE: The opcode encoding is variable length, so bytecode_bytes is
168 # different than the number of instructions.
169 print('path\tcode_name\targcount\tnlocals\tstacksize\tbytecode_bytes',
170 file=self.frames_f)
171 print('path\tcode_name\tkind\tname', file=self.names_f)
172 print('path\tcode_name\ttype\tlen_or_val', file=self.consts_f)
173 print('path\tcode_name\tflag', file=self.flags_f)
174 print('path\tcode_name\top_name\top_arg', file=self.ops_f)
175
176 def WriteFrameRow(self, path, code_name, argcount, nlocals, stacksize,
177 bytecode_bytes):
178 row = [path, code_name, str(argcount), str(nlocals), str(stacksize),
179 str(bytecode_bytes)]
180 print('\t'.join(row), file=self.frames_f)
181
182 def WriteNameRow(self, path, code_name, kind, name):
183 row = [path, code_name, kind, name]
184 print('\t'.join(row), file=self.names_f)
185
186 def WriteConstRow(self, path, code_name, type_, len_or_val):
187 row = [path, code_name, type_, str(len_or_val)]
188 print('\t'.join(row), file=self.consts_f)
189
190 def WriteFlagRow(self, path, code_name, flag_name):
191 row = [path, code_name, flag_name]
192 print('\t'.join(row), file=self.flags_f)
193
194 def WriteOpRow(self, path, code_name, op_name, op_arg):
195 row = [path, code_name, op_name, str(op_arg)]
196 print('\t'.join(row), file=self.ops_f)
197
198 def Close(self):
199 self.frames_f.close()
200 self.names_f.close()
201 self.consts_f.close()
202 self.flags_f.close()
203 self.ops_f.close()
204 log('Wrote 5 files in %s', self.out_dir)
205
206
207def WriteDisTables(pyc_path, co, out):
208 """Write 3 TSV files."""
209 #log('Disassembling %s in %s', co, pyc_path)
210 out.WriteFrameRow(pyc_path, co.co_name, co.co_argcount, co.co_nlocals,
211 co.co_stacksize, len(co.co_code))
212
213 # Write a row for every name
214 for name in co.co_names:
215 out.WriteNameRow(pyc_path, co.co_name, 'name', name)
216 for name in co.co_varnames:
217 out.WriteNameRow(pyc_path, co.co_name, 'var', name)
218 for name in co.co_cellvars:
219 out.WriteNameRow(pyc_path, co.co_name, 'cell', name)
220 for name in co.co_freevars:
221 out.WriteNameRow(pyc_path, co.co_name, 'free', name)
222
223 # Write a row for every op.
224 for op_name, op_arg in dis_tool.ParseOps(co.co_code):
225 out.WriteOpRow(pyc_path, co.co_name, op_name, op_arg)
226
227 # TODO: Write a row for every flag. OPy outputs these:
228 # CO_VARARGS, CO_VAR_KEYWORDS, CO_GENERATOR, CO_NEWLOCALS (we only support
229 # this?) FUTURE_DIVISION, FUTURE_ABSOLUTE_IMPORT, etc.
230 for flag in sorted(consts.VALUE_TO_NAME):
231 if co.co_flags & flag:
232 flag_name = consts.VALUE_TO_NAME[flag]
233 out.WriteFlagRow(pyc_path, co.co_name, flag_name)
234
235 # Write a row for every constant
236 for const in co.co_consts:
237 if isinstance(const, int):
238 len_or_val = const
239 elif isinstance(const, (str, tuple)):
240 len_or_val = len(const)
241 else:
242 len_or_val = 'NA'
243 out.WriteConstRow(pyc_path, co.co_name, const.__class__.__name__, len_or_val)
244
245 if isinstance(const, types.CodeType):
246 WriteDisTables(pyc_path, const, out)
247
248
249def Options():
250 """Returns an option parser instance."""
251 p = optparse.OptionParser()
252
253 # NOTE: default command is None because empty string is valid.
254
255 # NOTE: In 'opy run oil.pyc -c', -c is an arg to opy, and not a flag.
256
257 p.add_option(
258 '-c', dest='command', default=None,
259 help='Python command to run')
260 return p
261
262
263# Made by the Makefile.
264GRAMMAR_REL_PATH = '_build/opy/py27.grammar.marshal'
265
266def OpyCommandMain(argv):
267 """Dispatch to the right action."""
268
269 #opts, argv = Options().parse_args(argv)
270
271 try:
272 action = argv[0]
273 except IndexError:
274 raise error.Usage('Missing required subcommand.')
275
276 argv = argv[1:] # TODO: Should I do input.ReadRequiredArg()?
277 # That will shift the input.
278
279 if action in (
280 'parse', 'parse-with', 'compile', 'dis', 'ast', 'symbols', 'cfg',
281 'compile-ovm', 'eval', 'repl', 'run', 'run-ovm'):
282 loader = pyutil.GetResourceLoader()
283 contents = loader.Get(GRAMMAR_REL_PATH)
284 gr = grammar.Grammar()
285 gr.loads(contents)
286
287 # In Python 2 code, always use from __future__ import print_function.
288 try:
289 del gr.keywords["print"]
290 except KeyError:
291 pass
292
293 symbols = Symbols(gr)
294 pytree.Init(symbols) # for type_repr() pretty printing
295 transformer.Init(symbols) # for _names and other dicts
296
297 compiler = skeleton.Compiler(gr)
298 else:
299 # e.g. pgen2 doesn't use any of these. Maybe we should make a different
300 # tool.
301 compiler = None
302
303 c_parser = optparse.OptionParser()
304 c_parser.add_option(
305 '--emit-docstring', default=True,
306 help='Whether to emit docstrings')
307 c_parser.add_option(
308 '--fast-ops', default=True,
309 help='Whether to emit LOAD_FAST, STORE_FAST, etc.')
310 c_parser.add_option(
311 '--oil-subset', action='store_true', default=False,
312 help='Only allow the constructs necessary to implement'
313 'Oil. Example: using multiple inheritance will abort '
314 'compilation.')
315
316 #
317 # Actions
318 #
319
320 if action == 'pgen2':
321 grammar_path = argv[0]
322 marshal_path = argv[1]
323 WriteGrammar(grammar_path, marshal_path)
324
325 elif action == 'stdlib-parse':
326 # This is what the compiler/ package was written against.
327 import parser
328
329 py_path = argv[1]
330 with open(py_path) as f:
331 st = parser.suite(f.read())
332
333 tree = st.totuple()
334
335 printer = TupleTreePrinter(HostStdlibNames())
336 printer.Print(tree)
337 n = CountTupleTree(tree)
338 log('COUNT %d', n)
339
340 elif action == 'lex':
341 py_path = argv[0]
342 with open(py_path) as f:
343 tokens = tokenize.generate_tokens(f.readline)
344 for typ, val, start, end, unused_line in tokens:
345 print('%10s %10s %-10s %r' % (start, end, token.tok_name[typ], val))
346
347 elif action == 'lex-names': # Print all the NAME tokens.
348 for py_path in argv:
349 log('Lexing %s', py_path)
350 with open(py_path) as f:
351 tokens = tokenize.generate_tokens(f.readline)
352 for typ, val, start, end, unused_line in tokens:
353 if typ == token.NAME:
354 print(val)
355
356 elif action == 'parse':
357 py_path = argv[0]
358 with open(py_path) as f:
359 tokens = tokenize.generate_tokens(f.readline)
360 p = parse.Parser(gr)
361 pnode = driver.PushTokens(p, tokens, gr, 'file_input')
362
363 printer = ParseTreePrinter(transformer._names) # print raw nodes
364 printer.Print(pnode)
365
366 # Parse with an arbitrary grammar, but the Python lexer.
367 elif action == 'parse-with':
368 grammar_path = argv[0]
369 start_symbol = argv[1]
370 code_str = argv[2]
371
372 with open(grammar_path) as f:
373 gr = pgen.MakeGrammar(f)
374
375 f = cStringIO.StringIO(code_str)
376 tokens = tokenize.generate_tokens(f.readline)
377 p = parse.Parser(gr) # no convert=
378 try:
379 pnode = driver.PushTokens(p, tokens, gr, start_symbol)
380 except parse.ParseError as e:
381 # Extract location information and show it.
382 _, _, (lineno, offset) = e.opaque
383 # extra line needed for '\n' ?
384 lines = code_str.splitlines() + ['']
385
386 line = lines[lineno-1]
387 log(' %s', line)
388 log(' %s^', ' '*offset)
389 log('Parse Error: %s', e)
390 return 1
391 printer = ParseTreePrinter(transformer._names) # print raw nodes
392 printer.Print(pnode)
393
394 elif action == 'ast': # output AST
395 opt, args = c_parser.parse_args(argv)
396 py_path = args[0]
397 with open(py_path) as f:
398 graph = compiler.Compile(f, opt, 'exec', print_action='ast')
399
400 elif action == 'symbols': # output symbols
401 opt, args = c_parser.parse_args(argv)
402 py_path = args[0]
403 with open(py_path) as f:
404 graph = compiler.Compile(f, opt, 'exec', print_action='symbols')
405
406 elif action == 'cfg': # output Control Flow Graph
407 opt, args = c_parser.parse_args(argv)
408 py_path = args[0]
409 with open(py_path) as f:
410 graph = compiler.Compile(f, opt, 'exec', print_action='cfg')
411
412 elif action == 'compile': # 'opyc compile' is pgen2 + compiler2
413 # spec.Arg('action', ['foo', 'bar'])
414 # But that leads to some duplication.
415
416 opt, args = c_parser.parse_args(argv)
417 py_path = args[0]
418 out_path = args[1]
419
420 with open(py_path) as f:
421 co = compiler.Compile(f, opt, 'exec')
422
423 log("Compiled to %d bytes of top-level bytecode", len(co.co_code))
424
425 # Write the .pyc file
426 with open(out_path, 'wb') as out_f:
427 h = misc.getPycHeader(py_path)
428 out_f.write(h)
429 marshal.dump(co, out_f)
430
431 elif action == 'compile-ovm':
432 # NOTE: obsolete
433 from ovm2 import oheap2
434
435 opt, args = c_parser.parse_args(argv)
436 py_path = args[0]
437 out_path = args[1]
438
439 # Compile to Python bytecode (TODO: remove ovm_codegen.py)
440 mode = 'exec'
441 with open(py_path) as f:
442 co = compiler.Compile(f, opt, mode)
443
444 if 1:
445 with open(out_path, 'wb') as out_f:
446 oheap2.Write(co, out_f)
447 return 0
448
449 log("Compiled to %d bytes of top-level bytecode", len(co.co_code))
450 # Write the .pyc file
451 with open(out_path, 'wb') as out_f:
452 if 1:
453 out_f.write(co.co_code)
454 else:
455 h = misc.getPycHeader(py_path)
456 out_f.write(h)
457 marshal.dump(co, out_f)
458 log('Wrote only the bytecode to %r', out_path)
459
460 elif action == 'eval': # Like compile, but parses to a code object and prints it
461
462 opt, args = c_parser.parse_args(argv)
463 py_expr = args[0]
464
465 f = skeleton.StringInput(py_expr, '<eval input>')
466 co = compiler.Compile(f, opt, 'eval')
467
468 v = dis_tool.Visitor()
469 v.show_code(co)
470 print()
471 print('RESULT:')
472 print(eval(co))
473
474 elif action == 'repl': # Like eval in a loop
475 while True:
476 py_expr = raw_input('opy> ')
477 f = skeleton.StringInput(py_expr, '<REPL input>')
478
479 # TODO: change this to 'single input'? Why doesn't this work?
480 co = compiler.Compile(f, opt, 'eval')
481
482 v = dis_tool.Visitor()
483 v.show_code(co)
484 print(eval(co))
485
486 elif action == 'dis-tables':
487 out_dir = argv[0]
488 pyc_paths = argv[1:]
489
490 out = TableOutput(out_dir)
491
492 for pyc_path in pyc_paths:
493 with open(pyc_path) as f:
494 magic, unixtime, timestamp, code = dis_tool.unpack_pyc(f)
495 WriteDisTables(pyc_path, code, out)
496
497 out.Close()
498
499 elif action == 'dis':
500 opt, args = c_parser.parse_args(argv)
501 path = args[0]
502
503 v = dis_tool.Visitor()
504
505 if path.endswith('.py'):
506 with open(path) as f:
507 co = compiler.Compile(f, opt, 'exec')
508
509 log("Compiled to %d bytes of top-level bytecode", len(co.co_code))
510 v.show_code(co)
511
512 else: # assume pyc_path
513 with open(path, 'rb') as f:
514 v.Visit(f)
515
516 elif action == 'dis-md5':
517 pyc_paths = argv
518 if not pyc_paths:
519 raise error.Usage('dis-md5: At least one .pyc path is required.')
520
521 for path in pyc_paths:
522 h = hashlib.md5()
523 with open(path) as f:
524 magic = f.read(4)
525 h.update(magic)
526 ignored_timestamp = f.read(4)
527 while True:
528 b = f.read(64 * 1024)
529 if not b:
530 break
531 h.update(b)
532 print('%6d %s %s' % (os.path.getsize(path), h.hexdigest(), path))
533
534 elif action == 'run': # Compile and run, without writing pyc file
535 # TODO: Add an option like -v in __main__
536
537 #level = logging.DEBUG if args.verbose else logging.WARNING
538 #logging.basicConfig(level=level)
539 #logging.basicConfig(level=logging.DEBUG)
540
541 opt, args = c_parser.parse_args(argv)
542 opy_argv = args # including args[0]
543 py_path = args[0]
544
545 if py_path.endswith('.py'):
546 with open(py_path) as f:
547 co = compiler.Compile(f, opt, 'exec')
548 num_ticks = execfile.run_code_object(co, opy_argv)
549
550 elif py_path.endswith('.pyc') or py_path.endswith('.opyc'):
551 with open(py_path) as f:
552 f.seek(8) # past header. TODO: validate it!
553 co = marshal.load(f)
554 num_ticks = execfile.run_code_object(co, opy_argv)
555
556 else:
557 raise error.Usage('Invalid path %r' % py_path)
558
559 elif action == 'run-ovm': # Compile and run, without writing pyc file
560 arg_r = args.Reader(argv)
561 opt = args.Parse(compile_spec, arg_r)
562
563 py_path = arg_r.ReadRequired('Expected path to Python input')
564 opy_argv = arg_r.Rest()
565
566 if py_path.endswith('.py'):
567 #mode = 'exec'
568 mode = 'ovm' # OVM bytecode is different!
569 with open(py_path) as f:
570 co = compiler.Compile(f, opt, mode)
571 log('Compiled to %d bytes of OVM code', len(co.co_code))
572 num_ticks = ovm.run_code_object(co, opy_argv)
573
574 elif py_path.endswith('.pyc') or py_path.endswith('.opyc'):
575 with open(py_path) as f:
576 f.seek(8) # past header. TODO: validate it!
577 co = marshal.load(f)
578 num_ticks = ovm.run_code_object(co, opy_argv)
579
580 else:
581 raise error.Usage('Invalid path %r' % py_path)
582
583 else:
584 raise error.Usage('Invalid action %r' % action)
585
586
587def main(argv):
588 try:
589 sys.exit(OpyCommandMain(argv[1:]))
590 except error.Usage as e:
591 #print(_OPY_USAGE, file=sys.stderr)
592 log('opy: %s', e.msg)
593 return 2
594 except RuntimeError as e:
595 log('FATAL: %s', e)
596 return 1
597
598
599if __name__ == '__main__':
600 sys.exit(main(sys.argv))
601