OILS / mycpp / pass_state.py View on Github | oilshell.org

446 lines, 230 significant
1"""
2pass_state.py
3"""
4from __future__ import print_function
5
6import os
7from collections import defaultdict
8
9from mypy.types import Type
10from mypy.nodes import Expression
11
12from mycpp.util import join_name, log, SymbolPath
13
14from typing import Optional
15
16_ = log
17
18
19class ModuleMember(object):
20 """
21 A member of a Python module.
22
23 e.g. core.state.Mem => core::state::Mem
24 """
25
26 def __init__(self, module_path: SymbolPath, member: str) -> None:
27 self.module_path = module_path
28 self.member = member
29
30
31class StaticObjectMember(object):
32 """
33 A static member of an object. Usually a a method like an alternative constructor.
34
35 e.g. runtime_asdl.Cell.CreateNull() => runtime_asdl::Cell::CreateNull()
36 """
37
38 def __init__(self, base_type_name: SymbolPath, member: str) -> None:
39 self.base_type_name = base_type_name
40 self.member = member
41
42
43class HeapObjectMember(object):
44 """
45 A member of a heap-allocated object.
46
47 e.g foo.empty() => foo->empty()
48 """
49
50 def __init__(self, object_expr: Expression, object_type: Optional[Type],
51 member: str) -> None:
52 self.ojbect_expr = object_expr
53 self.object_type = object_type
54 self.member = member
55
56
57class StackObjectMember(object):
58 """
59 A member of a stack-allocated object.
60
61 e.g foo.empty() => foo.empty()
62 """
63
64 def __init__(self, object_expr: Expression, object_type: Type,
65 member: str) -> None:
66 self.ojbect_expr = object_expr
67 self.object_type = object_type
68 self.member = member
69
70
71class Virtual(object):
72 """Calculate which C++ methods need the virtual keyword.
73
74 See unit test for example usage.
75 """
76
77 def __init__(self) -> None:
78 self.methods: dict[SymbolPath, list[str]] = defaultdict(list)
79 self.subclasses: dict[SymbolPath, list[tuple[str]]] = defaultdict(list)
80 self.virtuals: dict[tuple[SymbolPath, str], Optional[tuple[SymbolPath,
81 str]]] = {}
82 self.has_vtable: dict[SymbolPath, bool] = {}
83 self.can_reorder_fields: dict[SymbolPath, bool] = {}
84
85 # _Executor -> vm::_Executor
86 self.base_class_unique: dict[str, SymbolPath] = {}
87
88 # These are called on the Forward Declare pass
89 def OnMethod(self, class_name: SymbolPath, method_name: str) -> None:
90 #log('OnMethod %s %s', class_name, method_name)
91
92 # __init__ and so forth don't count
93 if method_name.startswith('__') and method_name.endswith('__'):
94 return
95
96 self.methods[class_name].append(method_name)
97
98 def OnSubclass(self, base_class: SymbolPath, subclass: SymbolPath) -> None:
99 if len(base_class) > 1:
100 # Hack for
101 #
102 # class _Executor: pass
103 # versus
104 # class MyExecutor(vm._Executor): pass
105 base_key = base_class[-1]
106
107 # Fail if we have two base classes in different namespaces with the same
108 # name.
109 if base_key in self.base_class_unique:
110 # Make sure we don't have collisions
111 assert (self.base_class_unique[base_key] == base_class or
112 base_class
113 in self.subclasses[self.base_class_unique[base_key]]
114 ), base_class
115 else:
116 self.base_class_unique[base_key] = base_class
117
118 else:
119 base_key = base_class
120
121 self.subclasses[base_class].append(subclass)
122
123 def Calculate(self) -> None:
124 """Call this after the forward declare pass."""
125 for base_class, subclasses in self.subclasses.items():
126 self.can_reorder_fields[base_class] = False
127
128 for subclass in subclasses:
129 self.can_reorder_fields[subclass] = False
130
131 b_methods = self.methods[base_class]
132 s_methods = self.methods[subclass]
133 overlapping = set(b_methods) & set(s_methods)
134 for method in overlapping:
135 self.virtuals[(base_class, method)] = None
136 self.virtuals[(subclass, method)] = (base_class, method)
137 if overlapping:
138 self.has_vtable[base_class] = True
139 self.has_vtable[subclass] = True
140
141 # These is called on the Decl pass
142 def IsVirtual(self, class_name: SymbolPath, method_name: str) -> bool:
143 return (class_name, method_name) in self.virtuals
144
145 def HasVTable(self, class_name: SymbolPath) -> bool:
146 return class_name in self.has_vtable
147
148 def CanReorderFields(self, class_name: SymbolPath) -> bool:
149 if class_name in self.can_reorder_fields:
150 return self.can_reorder_fields[class_name]
151 else:
152 return True # by default they can be reordered
153
154
155class Fact(object):
156 """
157 An abstract fact. These can be used to build up datalog programs.
158 """
159
160 def __init__(self) -> None:
161 pass
162
163 def name(self) -> str:
164 raise NotImplementedError()
165
166 def Generate(self, func: str, statement: int) -> str:
167 raise NotImplementedError()
168
169
170class FunctionCall(Fact):
171
172 def __init__(self, callee: str) -> None:
173 self.callee = callee
174
175 def name(self) -> str:
176 return 'call'
177
178 def Generate(self, func: str, statement: int) -> str:
179 return '{}\t{}\t{}\n'.format(func, statement, self.callee)
180
181
182class ControlFlowGraph(object):
183 """
184 A simple control-flow graph.
185
186 Every statement in the program is represented as a node in a graph with
187 unique a numeric ID. Control flow is represented as directed edges through
188 the graph. Loops can introduce back-edges. Every node in the graph will
189 satisfy at least one of the following conditions:
190
191 - Its indegree is at least one.
192
193 - Its outdegree is at least one.
194
195 For simple linear graphs all you need is the AddStatement method. For more
196 complex flows there is a set of context managers below to help simplify
197 construction.
198
199 - For branches-like statements (e.g. if- and try- statements) use
200 CfgBranchContext. It will take care of the details associated with
201 stitching the different branches to statements in the next statement.
202
203 - For loops, use CfgLoopContext. It will take care of adding back-edges
204 and connecting break statements to any statements that proceed the
205 loop.
206
207 - CfgBlockContext can be used for simple cases where you just want to
208 track the beginning and end of a sequence of statements.
209
210 Statements can carry annotations called facts, which are used as inputs to
211 datalog programs to perform dataflow diffrent kinds of dataflow analyses.
212 To annotate a statement, use the AddFact method with any object that
213 implements the Fact interface.
214
215 See the unit tests in pass_state_test.py and the mycpp phase in
216 control_flow_pass.py for detailed examples of usage.
217 """
218
219 def __init__(self) -> None:
220 self.statement_counter: int = 0
221 self.edges: set[tuple[int, int]] = set({})
222 self.block_stack: list[int] = []
223 self.predecessors: set[int] = set({})
224 self.deadends: set[int] = set({})
225
226 # order doesn't actually matter here, but sets require elements to be
227 # hashable
228 self.facts: dict[int, list[Fact]] = defaultdict(list)
229
230 def AddEdge(self, pred: int, succ: int) -> None:
231 """
232 Add a directed edge from pred to succ. If pred is a deadend, its
233 non-deadends will be used instead.
234 """
235 if pred in self.deadends:
236 for w in [u for (u, v) in self.edges if v == pred]:
237 self.AddEdge(w, succ)
238 else:
239 self.edges.add((pred, succ))
240
241 def AddDeadend(self, statement: int):
242 """
243 Mark a statement as a dead-end (e.g. return or continue).
244 """
245 self.deadends.add(statement)
246
247 def AddStatement(self) -> int:
248 """
249 Add a new statement and return its ID.
250 """
251 if len(self.predecessors) == 0:
252 if len(self.block_stack):
253 self.predecessors.add(self.block_stack[-1])
254 else:
255 self.predecessors.add(self.statement_counter)
256
257 self.statement_counter += 1
258 for pred in self.predecessors:
259 self.AddEdge(pred, self.statement_counter)
260
261 self.predecessors = set({})
262
263 if len(self.block_stack):
264 self.block_stack[-1] = self.statement_counter
265
266 return self.statement_counter
267
268 def AddFact(self, statement: int, fact: Fact) -> None:
269 """
270 Annotate a statement with a fact.
271 """
272 self.facts[statement].append(fact)
273
274 def _PushBlock(self, begin: Optional[int] = None) -> int:
275 """
276 Start a block at the given statement ID. If a beginning statement isn't
277 provided one will be created and its ID will be returend.
278
279 Direct use of this function is discouraged. Consider using one of the
280 block context managers below instead.
281 """
282 if begin is None:
283 begin = self.AddStatement()
284 else:
285 self.predecessors.add(begin)
286
287 self.block_stack.append(begin)
288 return begin
289
290 def _PopBlock(self) -> int:
291 """
292 Pop a block from the top of the stack and return the ID of the block's
293 last statement.
294
295 Direct use of this function is discouraged. Consider using one of the
296 block context managers below instead.
297 """
298 assert len(self.block_stack)
299 last = self.block_stack.pop()
300 if len(self.block_stack) and last not in self.deadends:
301 self.block_stack[-1] = last
302
303 return last
304
305
306class CfgBlockContext(object):
307 """
308 Context manager to make dealing with things like with-statements easier.
309 """
310
311 def __init__(self,
312 cfg: ControlFlowGraph,
313 begin: Optional[int] = None) -> None:
314 self.cfg = cfg
315 if cfg is None:
316 return
317
318 self.entry = self.cfg._PushBlock(begin)
319 self.exit = self.entry
320
321 def __enter__(self) -> None:
322 return self if self.cfg else None
323
324 def __exit__(self, *args) -> None:
325 if not self.cfg:
326 return
327
328 self.exit = self.cfg._PopBlock()
329
330
331class CfgBranchContext(object):
332 """
333 Context manager to make dealing with if-else blocks easier.
334 """
335
336 def __init__(self, cfg: ControlFlowGraph, branch_point: int) -> None:
337 self.cfg = cfg
338 self.entry = branch_point
339 self.exit = self.entry
340 if cfg is None:
341 return
342
343 self.arms = []
344 self.pushed = False
345
346 def AddBranch(self, entry: Optional[int] = None):
347 if not self.cfg:
348 return CfgBranchContext(None, None)
349
350 self.arms.append(CfgBranchContext(self.cfg, entry or self.entry))
351 self.cfg._PushBlock(self.arms[-1].entry)
352 self.arms[-1].pushed = True
353 return self.arms[-1]
354
355 def __enter__(self) -> None:
356 return self
357
358 def __exit__(self, *args) -> None:
359 if not self.cfg:
360 return
361
362 if self.pushed:
363 self.exit = self.cfg._PopBlock()
364
365 for arm in self.arms:
366 if arm.exit not in self.cfg.deadends:
367 self.cfg.predecessors.add(arm.exit)
368
369
370class CfgLoopContext(object):
371 """
372 Context manager to make dealing with loops easier.
373 """
374
375 def __init__(self,
376 cfg: ControlFlowGraph,
377 entry: Optional[int] = None) -> None:
378 self.cfg = cfg
379 self.breaks = set({})
380 if cfg is None:
381 return
382
383 self.entry = self.cfg._PushBlock(entry)
384 self.exit = self.entry
385
386 def AddBreak(self, statement: int) -> None:
387 assert self.cfg
388 self.breaks.add(statement)
389 self.cfg.AddDeadend(statement)
390
391 def AddContinue(self, statement: int) -> None:
392 self.cfg.AddEdge(statement, self.entry)
393 self.cfg.AddDeadend(statement)
394
395 def __enter__(self) -> None:
396 return self if self.cfg else None
397
398 def __exit__(self, *args) -> None:
399 if not self.cfg:
400 return
401
402 self.exit = self.cfg._PopBlock()
403 self.cfg.AddEdge(self.exit, self.entry)
404 for pred in self.cfg.predecessors:
405 self.cfg.AddEdge(pred, self.entry)
406
407 # If we had any breaks, arm the predecessor set with the current
408 # statement and the break statements.
409 if len(self.breaks):
410 if len(self.cfg.block_stack):
411 self.cfg.predecessors.add(self.cfg.block_stack[-1])
412 else:
413 self.cfg.predecessors.add(self.cfg.statement_counter)
414
415 for b in self.breaks:
416 self.cfg.deadends.remove(b)
417 self.cfg.predecessors.add(b)
418
419
420def DumpControlFlowGraphs(cfgs: dict[str, ControlFlowGraph],
421 facts_dir='_tmp/mycpp-facts') -> None:
422 """
423 Dump the given control flow graphs and associated facts into the given
424 directory as text files that can be consumed by datalog.
425 """
426 edge_facts = '{}/cf_edge.facts'.format(facts_dir)
427 fact_files = {}
428 os.makedirs(facts_dir, exist_ok=True)
429 with open(edge_facts, 'w') as cfg_f:
430 for func, cfg in sorted(cfgs.items()):
431 joined = join_name(func, delim='.')
432 for (u, v) in sorted(cfg.edges):
433 cfg_f.write('{}\t{}\t{}\n'.format(joined, u, v))
434
435 for statement, facts in sorted(cfg.facts.items()):
436 for fact in facts: # already sorted temporally
437 fact_f = fact_files.get(fact.name())
438 if not fact_f:
439 fact_f = open(
440 '{}/{}.facts'.format(facts_dir, fact.name()), 'w')
441 fact_files[fact.name()] = fact_f
442
443 fact_f.write(fact.Generate(joined, statement))
444
445 for f in fact_files.values():
446 f.close()