1 | """
|
2 | skeleton.py: The compiler pipeline.
|
3 | """
|
4 | from __future__ import print_function
|
5 |
|
6 | import cStringIO
|
7 |
|
8 | from .compiler2 import future
|
9 | from .compiler2 import pyassem
|
10 | from .compiler2 import pycodegen
|
11 | from .compiler2 import syntax
|
12 | from .compiler2 import symbols
|
13 | from .compiler2 import transformer
|
14 |
|
15 | from pgen2 import tokenize
|
16 | from pgen2 import driver
|
17 | from pgen2 import parse
|
18 |
|
19 |
|
20 | def _PrintScopes(scopes):
|
21 | # This is already flattened out.
|
22 | for node, scope in scopes.iteritems():
|
23 | scope.PrettyPrint()
|
24 | print()
|
25 | #for c in scope.children:
|
26 | # print(c)
|
27 |
|
28 |
|
29 | class _ModuleContext(object):
|
30 | """Module-level data for the CodeGenerator tree."""
|
31 |
|
32 | def __init__(self, filename, comp_opt, scopes, futures=()):
|
33 | self.filename = filename
|
34 | self.comp_opt = comp_opt # compilation options
|
35 | self.scopes = scopes
|
36 | self.futures = futures
|
37 |
|
38 |
|
39 | def _ParseTreeToTuples(pnode):
|
40 | """
|
41 | parser.st objects from parsermodule.c have a totuple() method, which outputs
|
42 | tuples like this. The original "compiler2" module expected this format, but
|
43 | the original pgen2 produced a different format.
|
44 | """
|
45 | if pnode.tok:
|
46 | value, _, (lineno, column) = pnode.tok # opaque
|
47 | else:
|
48 | value = None
|
49 | lineno = 0
|
50 | column = 0
|
51 |
|
52 | if pnode.children:
|
53 | return (pnode.typ,) + tuple(_ParseTreeToTuples(p) for p in pnode.children)
|
54 | else:
|
55 | return (pnode.typ, value, lineno, column)
|
56 |
|
57 |
|
58 | class StringInput(object):
|
59 | """A wrapper for a StringIO object .
|
60 |
|
61 | It follows Python's convention of having an f.name attribute.
|
62 | """
|
63 | def __init__(self, s, name):
|
64 | self.f = cStringIO.StringIO(s)
|
65 | self.name = name
|
66 |
|
67 | def read(self, *args):
|
68 | return self.f.read(*args)
|
69 |
|
70 | def readline(self, *args):
|
71 | return self.f.readline(*args)
|
72 |
|
73 | def close(self):
|
74 | return self.f.close()
|
75 |
|
76 |
|
77 | class ParseMode(object):
|
78 | """A shortcut."""
|
79 | def __init__(self, gr, start_symbol):
|
80 | self.gr = gr
|
81 | self.start_symbol = start_symbol
|
82 |
|
83 |
|
84 | class Compiler(object):
|
85 | def __init__(self, gr):
|
86 | self.gr = gr
|
87 |
|
88 | def Compile(self, f, opt, *args, **kwargs):
|
89 | # TODO: inline this call
|
90 | return Compile(f, opt, self.gr, *args, **kwargs)
|
91 |
|
92 |
|
93 | def Compile(f, opt, gr, mode, print_action=None):
|
94 | """Run the full compiler pipeline.
|
95 |
|
96 | Args:
|
97 | f: file handle with input source code
|
98 | opt: Parsed command line flags
|
99 | gr: Grammar
|
100 | start_symbol: name of the grammar start symbol
|
101 | mode: 'exec', 'eval', or 'single', like Python's builtin compile()
|
102 | print_action: 'ast' or 'cfg'. Print an intermediate representation.
|
103 | opt: Command line flags
|
104 | """
|
105 | filename = f.name
|
106 |
|
107 | tokens = tokenize.generate_tokens(f.readline)
|
108 |
|
109 | p = parse.Parser(gr)
|
110 | if mode == 'single':
|
111 | start_symbol = 'single_input'
|
112 | elif mode == 'exec':
|
113 | start_symbol = 'file_input'
|
114 | elif mode == 'eval':
|
115 | start_symbol = 'eval_input'
|
116 |
|
117 | parse_tree = driver.PushTokens(p, tokens, gr, start_symbol)
|
118 |
|
119 | parse_tuples = _ParseTreeToTuples(parse_tree)
|
120 |
|
121 | tr = transformer.Transformer()
|
122 | as_tree = tr.transform(parse_tuples)
|
123 |
|
124 | if print_action == 'ast':
|
125 | print(as_tree)
|
126 | return
|
127 |
|
128 | # NOTE: This currently does nothing!
|
129 | v = syntax.SyntaxErrorChecker()
|
130 | v.Dispatch(as_tree)
|
131 |
|
132 | s = symbols.SymbolVisitor()
|
133 | s.Dispatch(as_tree)
|
134 |
|
135 | if print_action == 'symbols':
|
136 | _PrintScopes(s.scopes)
|
137 | return
|
138 |
|
139 | graph = pyassem.FlowGraph() # Mutated by code generator
|
140 |
|
141 | if mode == "single": # Not used now?
|
142 | ctx = _ModuleContext(filename, opt, s.scopes)
|
143 | # NOTE: the name of the Frame is a comment, not exposed to users.
|
144 | frame = pyassem.Frame("<interactive>", filename) # mutated
|
145 | gen = pycodegen.InteractiveCodeGenerator(ctx, frame, graph)
|
146 | gen.set_lineno(as_tree)
|
147 |
|
148 | elif mode == "exec":
|
149 | # TODO: Does this need to be made more efficient?
|
150 | p1 = future.FutureParser()
|
151 | p2 = future.BadFutureParser()
|
152 | p1.Dispatch(as_tree)
|
153 | p2.Dispatch(as_tree)
|
154 |
|
155 | ctx = _ModuleContext(filename, opt, s.scopes, futures=p1.get_features())
|
156 | frame = pyassem.Frame("<module>", filename) # mutated
|
157 |
|
158 | gen = pycodegen.TopLevelCodeGenerator(ctx, frame, graph)
|
159 |
|
160 | elif mode == "eval":
|
161 | ctx = _ModuleContext(filename, opt, s.scopes)
|
162 | frame = pyassem.Frame("<expression>", filename) # mutated
|
163 | gen = pycodegen.TopLevelCodeGenerator(ctx, frame, graph)
|
164 |
|
165 | else:
|
166 | raise AssertionError('Invalid mode %r' % mode)
|
167 |
|
168 | # NOTE: There is no Start() or FindLocals() at the top level.
|
169 | gen.Dispatch(as_tree) # mutates graph
|
170 | gen.Finish()
|
171 |
|
172 | if print_action == 'cfg':
|
173 | print(graph)
|
174 | return
|
175 |
|
176 | co = pyassem.MakeCodeObject(frame, graph, opt)
|
177 |
|
178 | # NOTE: Could call marshal.dump here?
|
179 | return co
|