| 1 | """
|
| 2 | skeleton.py: The compiler pipeline.
|
| 3 | """
|
| 4 | from __future__ import print_function
|
| 5 |
|
| 6 | import cStringIO
|
| 7 |
|
| 8 | from .compiler2 import future
|
| 9 | from .compiler2 import pyassem
|
| 10 | from .compiler2 import pycodegen
|
| 11 | from .compiler2 import syntax
|
| 12 | from .compiler2 import symbols
|
| 13 | from .compiler2 import transformer
|
| 14 |
|
| 15 | from pgen2 import tokenize
|
| 16 | from pgen2 import driver
|
| 17 | from pgen2 import parse
|
| 18 |
|
| 19 |
|
| 20 | def _PrintScopes(scopes):
|
| 21 | # This is already flattened out.
|
| 22 | for node, scope in scopes.iteritems():
|
| 23 | scope.PrettyPrint()
|
| 24 | print()
|
| 25 | #for c in scope.children:
|
| 26 | # print(c)
|
| 27 |
|
| 28 |
|
| 29 | class _ModuleContext(object):
|
| 30 | """Module-level data for the CodeGenerator tree."""
|
| 31 |
|
| 32 | def __init__(self, filename, comp_opt, scopes, futures=()):
|
| 33 | self.filename = filename
|
| 34 | self.comp_opt = comp_opt # compilation options
|
| 35 | self.scopes = scopes
|
| 36 | self.futures = futures
|
| 37 |
|
| 38 |
|
| 39 | def _ParseTreeToTuples(pnode):
|
| 40 | """
|
| 41 | parser.st objects from parsermodule.c have a totuple() method, which outputs
|
| 42 | tuples like this. The original "compiler2" module expected this format, but
|
| 43 | the original pgen2 produced a different format.
|
| 44 | """
|
| 45 | if pnode.tok:
|
| 46 | value, _, (lineno, column) = pnode.tok # opaque
|
| 47 | else:
|
| 48 | value = None
|
| 49 | lineno = 0
|
| 50 | column = 0
|
| 51 |
|
| 52 | if pnode.children:
|
| 53 | return (pnode.typ,) + tuple(_ParseTreeToTuples(p) for p in pnode.children)
|
| 54 | else:
|
| 55 | return (pnode.typ, value, lineno, column)
|
| 56 |
|
| 57 |
|
| 58 | class StringInput(object):
|
| 59 | """A wrapper for a StringIO object .
|
| 60 |
|
| 61 | It follows Python's convention of having an f.name attribute.
|
| 62 | """
|
| 63 | def __init__(self, s, name):
|
| 64 | self.f = cStringIO.StringIO(s)
|
| 65 | self.name = name
|
| 66 |
|
| 67 | def read(self, *args):
|
| 68 | return self.f.read(*args)
|
| 69 |
|
| 70 | def readline(self, *args):
|
| 71 | return self.f.readline(*args)
|
| 72 |
|
| 73 | def close(self):
|
| 74 | return self.f.close()
|
| 75 |
|
| 76 |
|
| 77 | class ParseMode(object):
|
| 78 | """A shortcut."""
|
| 79 | def __init__(self, gr, start_symbol):
|
| 80 | self.gr = gr
|
| 81 | self.start_symbol = start_symbol
|
| 82 |
|
| 83 |
|
| 84 | class Compiler(object):
|
| 85 | def __init__(self, gr):
|
| 86 | self.gr = gr
|
| 87 |
|
| 88 | def Compile(self, f, opt, *args, **kwargs):
|
| 89 | # TODO: inline this call
|
| 90 | return Compile(f, opt, self.gr, *args, **kwargs)
|
| 91 |
|
| 92 |
|
| 93 | def Compile(f, opt, gr, mode, print_action=None):
|
| 94 | """Run the full compiler pipeline.
|
| 95 |
|
| 96 | Args:
|
| 97 | f: file handle with input source code
|
| 98 | opt: Parsed command line flags
|
| 99 | gr: Grammar
|
| 100 | start_symbol: name of the grammar start symbol
|
| 101 | mode: 'exec', 'eval', or 'single', like Python's builtin compile()
|
| 102 | print_action: 'ast' or 'cfg'. Print an intermediate representation.
|
| 103 | opt: Command line flags
|
| 104 | """
|
| 105 | filename = f.name
|
| 106 |
|
| 107 | tokens = tokenize.generate_tokens(f.readline)
|
| 108 |
|
| 109 | p = parse.Parser(gr)
|
| 110 | if mode == 'single':
|
| 111 | start_symbol = 'single_input'
|
| 112 | elif mode == 'exec':
|
| 113 | start_symbol = 'file_input'
|
| 114 | elif mode == 'eval':
|
| 115 | start_symbol = 'eval_input'
|
| 116 |
|
| 117 | parse_tree = driver.PushTokens(p, tokens, gr, start_symbol)
|
| 118 |
|
| 119 | parse_tuples = _ParseTreeToTuples(parse_tree)
|
| 120 |
|
| 121 | tr = transformer.Transformer()
|
| 122 | as_tree = tr.transform(parse_tuples)
|
| 123 |
|
| 124 | if print_action == 'ast':
|
| 125 | print(as_tree)
|
| 126 | return
|
| 127 |
|
| 128 | # NOTE: This currently does nothing!
|
| 129 | v = syntax.SyntaxErrorChecker()
|
| 130 | v.Dispatch(as_tree)
|
| 131 |
|
| 132 | s = symbols.SymbolVisitor()
|
| 133 | s.Dispatch(as_tree)
|
| 134 |
|
| 135 | if print_action == 'symbols':
|
| 136 | _PrintScopes(s.scopes)
|
| 137 | return
|
| 138 |
|
| 139 | graph = pyassem.FlowGraph() # Mutated by code generator
|
| 140 |
|
| 141 | if mode == "single": # Not used now?
|
| 142 | ctx = _ModuleContext(filename, opt, s.scopes)
|
| 143 | # NOTE: the name of the Frame is a comment, not exposed to users.
|
| 144 | frame = pyassem.Frame("<interactive>", filename) # mutated
|
| 145 | gen = pycodegen.InteractiveCodeGenerator(ctx, frame, graph)
|
| 146 | gen.set_lineno(as_tree)
|
| 147 |
|
| 148 | elif mode == "exec":
|
| 149 | # TODO: Does this need to be made more efficient?
|
| 150 | p1 = future.FutureParser()
|
| 151 | p2 = future.BadFutureParser()
|
| 152 | p1.Dispatch(as_tree)
|
| 153 | p2.Dispatch(as_tree)
|
| 154 |
|
| 155 | ctx = _ModuleContext(filename, opt, s.scopes, futures=p1.get_features())
|
| 156 | frame = pyassem.Frame("<module>", filename) # mutated
|
| 157 |
|
| 158 | gen = pycodegen.TopLevelCodeGenerator(ctx, frame, graph)
|
| 159 |
|
| 160 | elif mode == "eval":
|
| 161 | ctx = _ModuleContext(filename, opt, s.scopes)
|
| 162 | frame = pyassem.Frame("<expression>", filename) # mutated
|
| 163 | gen = pycodegen.TopLevelCodeGenerator(ctx, frame, graph)
|
| 164 |
|
| 165 | else:
|
| 166 | raise AssertionError('Invalid mode %r' % mode)
|
| 167 |
|
| 168 | # NOTE: There is no Start() or FindLocals() at the top level.
|
| 169 | gen.Dispatch(as_tree) # mutates graph
|
| 170 | gen.Finish()
|
| 171 |
|
| 172 | if print_action == 'cfg':
|
| 173 | print(graph)
|
| 174 | return
|
| 175 |
|
| 176 | co = pyassem.MakeCodeObject(frame, graph, opt)
|
| 177 |
|
| 178 | # NOTE: Could call marshal.dump here?
|
| 179 | return co
|