OILS / mycpp / pass_state.py View on Github | oilshell.org

363 lines, 188 significant
1"""
2pass_state.py
3"""
4from __future__ import print_function
5
6import os
7from collections import defaultdict
8
9from mycpp.util import join_name, log, SymbolPath
10
11from typing import Optional
12
13_ = log
14
15
16class Virtual(object):
17 """Calculate which C++ methods need the virtual keyword.
18
19 See unit test for example usage.
20 """
21
22 def __init__(self) -> None:
23 self.methods: dict[SymbolPath, list[str]] = defaultdict(list)
24 self.subclasses: dict[SymbolPath, list[tuple[str]]] = defaultdict(list)
25 self.virtuals: dict[tuple[SymbolPath, str], Optional[tuple[SymbolPath,
26 str]]] = {}
27 self.has_vtable: dict[SymbolPath, bool] = {}
28 self.can_reorder_fields: dict[SymbolPath, bool] = {}
29
30 # _Executor -> vm::_Executor
31 self.base_class_unique: dict[str, SymbolPath] = {}
32
33 # These are called on the Forward Declare pass
34 def OnMethod(self, class_name: SymbolPath, method_name: str) -> None:
35 #log('OnMethod %s %s', class_name, method_name)
36
37 # __init__ and so forth don't count
38 if method_name.startswith('__') and method_name.endswith('__'):
39 return
40
41 self.methods[class_name].append(method_name)
42
43 def OnSubclass(self, base_class: SymbolPath, subclass: SymbolPath) -> None:
44 if len(base_class) > 1:
45 # Hack for
46 #
47 # class _Executor: pass
48 # versus
49 # class MyExecutor(vm._Executor): pass
50 base_key = base_class[-1]
51
52 # Fail if we have two base classes in different namespaces with the same
53 # name.
54 if base_key in self.base_class_unique:
55 # Make sure we don't have collisions
56 assert (self.base_class_unique[base_key] == base_class or
57 base_class
58 in self.subclasses[self.base_class_unique[base_key]]
59 ), base_class
60 else:
61 self.base_class_unique[base_key] = base_class
62
63 else:
64 base_key = base_class
65
66 self.subclasses[base_class].append(subclass)
67
68 def Calculate(self) -> None:
69 """Call this after the forward declare pass."""
70 for base_class, subclasses in self.subclasses.items():
71 self.can_reorder_fields[base_class] = False
72
73 for subclass in subclasses:
74 self.can_reorder_fields[subclass] = False
75
76 b_methods = self.methods[base_class]
77 s_methods = self.methods[subclass]
78 overlapping = set(b_methods) & set(s_methods)
79 for method in overlapping:
80 self.virtuals[(base_class, method)] = None
81 self.virtuals[(subclass, method)] = (base_class, method)
82 if overlapping:
83 self.has_vtable[base_class] = True
84 self.has_vtable[subclass] = True
85
86 # These is called on the Decl pass
87 def IsVirtual(self, class_name: SymbolPath, method_name: str) -> bool:
88 return (class_name, method_name) in self.virtuals
89
90 def HasVTable(self, class_name: SymbolPath) -> bool:
91 return class_name in self.has_vtable
92
93 def CanReorderFields(self, class_name: SymbolPath) -> bool:
94 if class_name in self.can_reorder_fields:
95 return self.can_reorder_fields[class_name]
96 else:
97 return True # by default they can be reordered
98
99
100class Fact(object):
101 """
102 An abstract fact. These can be used to build up datalog programs.
103 """
104
105 def __init__(self) -> None:
106 pass
107
108 def name(self) -> str:
109 raise NotImplementedError()
110
111 def Generate(self, func: str, statement: int) -> str:
112 raise NotImplementedError()
113
114
115class ControlFlowGraph(object):
116 """
117 A simple control-flow graph.
118
119 Every statement in the program is represented as a node in a graph with
120 unique a numeric ID. Control flow is represented as directed edges through
121 the graph. Loops can introduce back-edges. Every node in the graph will
122 satisfy at least one of the following conditions:
123
124 - Its indegree is at least one.
125
126 - Its outdegree is at least one.
127
128 For simple linear graphs all you need is the AddStatement method. For more
129 complex flows there is a set of context managers below to help simplify
130 construction.
131
132 - For branches-like statements (e.g. if- and try- statements) use
133 CfgBranchContext. It will take care of the details associated with
134 stitching the different branches to statements in the next statement.
135
136 - For loops, use CfgLoopContext. It will take care of adding back-edges
137 and connecting break statements to any statements that proceed the
138 loop.
139
140 - CfgBlockContext can be used for simple cases where you just want to
141 track the beginning and end of a sequence of statements.
142
143 Statements can carry annotations called facts, which are used as inputs to
144 datalog programs to perform dataflow diffrent kinds of dataflow analyses.
145 To annotate a statement, use the AddFact method with any object that
146 implements the Fact interface.
147
148 See the unit tests in pass_state_test.py and the mycpp phase in
149 control_flow_pass.py for detailed examples of usage.
150 """
151
152 def __init__(self) -> None:
153 self.statement_counter: int = 0
154 self.edges: set[tuple[int, int]] = set({})
155 self.block_stack: list[int] = []
156 self.predecessors: set[int] = set({})
157 self.deadends: set[int] = set({})
158
159 # order doesn't actually matter here, but sets require elements to be
160 # hashable
161 self.facts: dict[int, list[Fact]] = defaultdict(list)
162
163 def AddEdge(self, pred: int, succ: int) -> None:
164 """
165 Add a directed edge from pred to succ. If pred is a deadend, its
166 non-deadends will be used instead.
167 """
168 if pred in self.deadends:
169 for w in [u for (u, v) in self.edges if v == pred]:
170 self.AddEdge(w, succ)
171 else:
172 self.edges.add((pred, succ))
173
174 def AddDeadend(self, statement: int):
175 """
176 Mark a statement as a dead-end (e.g. return or continue).
177 """
178 self.deadends.add(statement)
179
180 def AddStatement(self) -> int:
181 """
182 Add a new statement and return its ID.
183 """
184 if len(self.predecessors) == 0:
185 if len(self.block_stack):
186 self.predecessors.add(self.block_stack[-1])
187 else:
188 self.predecessors.add(self.statement_counter)
189
190 self.statement_counter += 1
191 for pred in self.predecessors:
192 self.AddEdge(pred, self.statement_counter)
193
194 self.predecessors = set({})
195
196 if len(self.block_stack):
197 self.block_stack[-1] = self.statement_counter
198
199 return self.statement_counter
200
201 def AddFact(self, statement: int, fact: Fact) -> None:
202 """
203 Annotate a statement with a fact.
204 """
205 self.facts[statement].append(fact)
206
207 def _PushBlock(self, begin: Optional[int] = None) -> int:
208 """
209 Start a block at the given statement ID. If a beginning statement isn't
210 provided one will be created and its ID will be returend.
211
212 Direct use of this function is discouraged. Consider using one of the
213 block context managers below instead.
214 """
215 if begin is None:
216 begin = self.AddStatement()
217 else:
218 self.predecessors.add(begin)
219
220 self.block_stack.append(begin)
221 return begin
222
223 def _PopBlock(self) -> int:
224 """
225 Pop a block from the top of the stack and return the ID of the block's
226 last statement.
227
228 Direct use of this function is discouraged. Consider using one of the
229 block context managers below instead.
230 """
231 assert len(self.block_stack)
232 last = self.block_stack.pop()
233 if len(self.block_stack) and last not in self.deadends:
234 self.block_stack[-1] = last
235
236 return last
237
238
239class CfgBlockContext(object):
240 """
241 Context manager to make dealing with things like with-statements easier.
242 """
243
244 def __init__(self,
245 cfg: ControlFlowGraph,
246 begin: Optional[int] = None) -> None:
247 self.cfg = cfg
248 if cfg is None:
249 return
250
251 self.entry = self.cfg._PushBlock(begin)
252 self.exit = self.entry
253
254 def __enter__(self) -> None:
255 return self if self.cfg else None
256
257 def __exit__(self, *args) -> None:
258 if not self.cfg:
259 return
260
261 self.exit = self.cfg._PopBlock()
262
263
264class CfgBranchContext(object):
265 """
266 Context manager to make dealing with if-else blocks easier.
267 """
268
269 def __init__(self, cfg: ControlFlowGraph, branch_point: int) -> None:
270 self.cfg = cfg
271 self.entry = branch_point
272 self.exit = self.entry
273 if cfg is None:
274 return
275
276 self.arms = []
277 self.pushed = False
278
279 def AddBranch(self, entry: Optional[int] = None):
280 if not self.cfg:
281 return CfgBranchContext(None, None)
282
283 self.arms.append(CfgBranchContext(self.cfg, entry or self.entry))
284 self.cfg._PushBlock(self.arms[-1].entry)
285 self.arms[-1].pushed = True
286 return self.arms[-1]
287
288 def __enter__(self) -> None:
289 return self
290
291 def __exit__(self, *args) -> None:
292 if not self.cfg:
293 return
294
295 if self.pushed:
296 self.exit = self.cfg._PopBlock()
297
298 for arm in self.arms:
299 if arm.exit not in self.cfg.deadends:
300 self.cfg.predecessors.add(arm.exit)
301
302
303class CfgLoopContext(object):
304 """
305 Context manager to make dealing with loops easier.
306 """
307
308 def __init__(self, cfg: ControlFlowGraph) -> None:
309 self.cfg = cfg
310 self.breaks = set({})
311 if cfg is None:
312 return
313
314 self.entry = self.cfg._PushBlock()
315 self.exit = self.entry
316
317 def AddBreak(self, statement: int) -> None:
318 assert self.cfg
319 self.breaks.add(statement)
320 self.cfg.AddDeadend(statement)
321
322 def AddContinue(self, statement: int) -> None:
323 self.cfg.AddEdge(statement, self.entry)
324 self.cfg.AddDeadend(statement)
325
326 def __enter__(self) -> None:
327 return self if self.cfg else None
328
329 def __exit__(self, *args) -> None:
330 if not self.cfg:
331 return
332
333 self.exit = self.cfg._PopBlock()
334 self.cfg.AddEdge(self.exit, self.entry)
335 for pred in self.cfg.predecessors:
336 self.cfg.AddEdge(pred, self.entry)
337
338 # If we had any breaks, arm the predecessor set with the current
339 # statement and the break statements.
340 if len(self.breaks):
341 if len(self.cfg.block_stack):
342 self.cfg.predecessors.add(self.cfg.block_stack[-1])
343 else:
344 self.cfg.predecessors.add(self.cfg.statement_counter)
345
346 for b in self.breaks:
347 self.cfg.deadends.remove(b)
348 self.cfg.predecessors.add(b)
349
350
351def DumpControlFlowGraphs(cfgs: dict[str, ControlFlowGraph],
352 facts_dir='_tmp/mycpp-facts') -> None:
353 """
354 Dump the given control flow graphs and associated facts into the given
355 directory as text files that can be consumed by datalog.
356 """
357 edge_facts = '{}/cf_edge.facts'.format(facts_dir)
358 os.makedirs(facts_dir, exist_ok=True)
359 with open(edge_facts, 'w') as cfg_f:
360 for func, cfg in sorted(cfgs.items()):
361 joined = join_name(func, delim='.')
362 for (u, v) in sorted(cfg.edges):
363 cfg_f.write('{}\t{}\t{}\n'.format(joined, u, v))