OILS / mycpp / pass_state.py View on Github | oilshell.org

487 lines, 252 significant
1"""
2pass_state.py
3"""
4from __future__ import print_function
5
6import os
7from collections import defaultdict
8
9from mypy.types import Type
10from mypy.nodes import Expression
11
12from mycpp.util import join_name, log, SymbolPath
13
14from typing import Optional
15
16_ = log
17
18
19class ModuleMember(object):
20 """
21 A member of a Python module.
22
23 e.g. core.state.Mem => core::state::Mem
24 """
25
26 def __init__(self, module_path: SymbolPath, member: str) -> None:
27 self.module_path = module_path
28 self.member = member
29
30
31class StaticObjectMember(object):
32 """
33 A static member of an object. Usually a a method like an alternative constructor.
34
35 e.g. runtime_asdl.Cell.CreateNull() => runtime_asdl::Cell::CreateNull()
36 """
37
38 def __init__(self, base_type_name: SymbolPath, member: str) -> None:
39 self.base_type_name = base_type_name
40 self.member = member
41
42
43class HeapObjectMember(object):
44 """
45 A member of a heap-allocated object.
46
47 e.g foo.empty() => foo->empty()
48 """
49
50 def __init__(self, object_expr: Expression, object_type: Type,
51 member: str) -> None:
52 self.ojbect_expr = object_expr
53 self.object_type = object_type
54 self.member = member
55
56
57class StackObjectMember(object):
58 """
59 A member of a stack-allocated object.
60
61 e.g foo.empty() => foo.empty()
62 """
63
64 def __init__(self, object_expr: Expression, object_type: Type,
65 member: str) -> None:
66 self.ojbect_expr = object_expr
67 self.object_type = object_type
68 self.member = member
69
70
71class Virtual(object):
72 """Calculate which C++ methods need the virtual keyword.
73
74 See unit test for example usage.
75 """
76
77 def __init__(self) -> None:
78 self.methods: dict[SymbolPath, list[str]] = defaultdict(list)
79 self.subclasses: dict[SymbolPath, list[tuple[str]]] = defaultdict(list)
80 self.virtuals: dict[tuple[SymbolPath, str], Optional[tuple[SymbolPath,
81 str]]] = {}
82 self.has_vtable: dict[SymbolPath, bool] = {}
83 self.can_reorder_fields: dict[SymbolPath, bool] = {}
84
85 # _Executor -> vm::_Executor
86 self.base_class_unique: dict[str, SymbolPath] = {}
87
88 # These are called on the Forward Declare pass
89 def OnMethod(self, class_name: SymbolPath, method_name: str) -> None:
90 #log('OnMethod %s %s', class_name, method_name)
91
92 # __init__ and so forth don't count
93 if method_name.startswith('__') and method_name.endswith('__'):
94 return
95
96 self.methods[class_name].append(method_name)
97
98 def OnSubclass(self, base_class: SymbolPath, subclass: SymbolPath) -> None:
99 if len(base_class) > 1:
100 # Hack for
101 #
102 # class _Executor: pass
103 # versus
104 # class MyExecutor(vm._Executor): pass
105 base_key = base_class[-1]
106
107 # Fail if we have two base classes in different namespaces with the same
108 # name.
109 if base_key in self.base_class_unique:
110 # Make sure we don't have collisions
111 assert (self.base_class_unique[base_key] == base_class or
112 base_class
113 in self.subclasses[self.base_class_unique[base_key]]
114 ), base_class
115 else:
116 self.base_class_unique[base_key] = base_class
117
118 else:
119 base_key = base_class
120
121 self.subclasses[base_class].append(subclass)
122
123 def Calculate(self) -> None:
124 """Call this after the forward declare pass."""
125 for base_class, subclasses in self.subclasses.items():
126 self.can_reorder_fields[base_class] = False
127
128 for subclass in subclasses:
129 self.can_reorder_fields[subclass] = False
130
131 b_methods = self.methods[base_class]
132 s_methods = self.methods[subclass]
133 overlapping = set(b_methods) & set(s_methods)
134 for method in overlapping:
135 self.virtuals[(base_class, method)] = None
136 self.virtuals[(subclass, method)] = (base_class, method)
137 if overlapping:
138 self.has_vtable[base_class] = True
139 self.has_vtable[subclass] = True
140
141 # These is called on the Decl pass
142 def IsVirtual(self, class_name: SymbolPath, method_name: str) -> bool:
143 return (class_name, method_name) in self.virtuals
144
145 def HasVTable(self, class_name: SymbolPath) -> bool:
146 return class_name in self.has_vtable
147
148 def CanReorderFields(self, class_name: SymbolPath) -> bool:
149 if class_name in self.can_reorder_fields:
150 return self.can_reorder_fields[class_name]
151 else:
152 return True # by default they can be reordered
153
154
155def SymbolPathToSouffle(p: SymbolPath) -> str:
156 if len(p) > 1:
157 return '$Member({}, {})'.format(join_name(p[:-1], delim='.'), p[-1])
158
159 return '$Variable({})'.format(p[0])
160
161
162class Fact(object):
163 """
164 An abstract fact. These can be used to build up datalog programs.
165 """
166
167 def __init__(self) -> None:
168 pass
169
170 def name(self) -> str:
171 raise NotImplementedError()
172
173 def Generate(self, func: str, statement: int) -> str:
174 raise NotImplementedError()
175
176
177class FunctionCall(Fact):
178
179 def __init__(self, callee: str) -> None:
180 self.callee = callee
181
182 def name(self) -> str:
183 return 'call'
184
185 def Generate(self, func: str, statement: int) -> str:
186 return '{}\t{}\t{}\n'.format(func, statement, self.callee)
187
188
189class Definition(Fact):
190 """
191 The definition of a variable. This corresponds to an allocation.
192 """
193
194 def __init__(self, variable: SymbolPath) -> None:
195 self.variable = variable
196
197 def name(self) -> str:
198 return 'define'
199
200 def Generate(self, func: str, statement: int) -> str:
201 return '{}\t{}\t{}\n'.format(func, statement,
202 SymbolPathToSouffle(self.variable))
203
204
205class Assignment(Fact):
206 """
207 The assignment of one variable or object member to another.
208 """
209
210 def __init__(self, lhs: SymbolPath, rhs: SymbolPath) -> None:
211 self.lhs = lhs
212 self.rhs = rhs
213
214 def name(self) -> str:
215 return 'assign'
216
217 def Generate(self, func: str, statement: int) -> str:
218 return '{}\t{}\t{}\t{}\n'.format(func, statement,
219 SymbolPathToSouffle(self.lhs),
220 SymbolPathToSouffle(self.rhs))
221
222
223class ControlFlowGraph(object):
224 """
225 A simple control-flow graph.
226
227 Every statement in the program is represented as a node in a graph with
228 unique a numeric ID. Control flow is represented as directed edges through
229 the graph. Loops can introduce back-edges. Every node in the graph will
230 satisfy at least one of the following conditions:
231
232 - Its indegree is at least one.
233
234 - Its outdegree is at least one.
235
236 For simple linear graphs all you need is the AddStatement method. For more
237 complex flows there is a set of context managers below to help simplify
238 construction.
239
240 - For branches-like statements (e.g. if- and try- statements) use
241 CfgBranchContext. It will take care of the details associated with
242 stitching the different branches to statements in the next statement.
243
244 - For loops, use CfgLoopContext. It will take care of adding back-edges
245 and connecting break statements to any statements that proceed the
246 loop.
247
248 - CfgBlockContext can be used for simple cases where you just want to
249 track the beginning and end of a sequence of statements.
250
251 Statements can carry annotations called facts, which are used as inputs to
252 datalog programs to perform dataflow diffrent kinds of dataflow analyses.
253 To annotate a statement, use the AddFact method with any object that
254 implements the Fact interface.
255
256 See the unit tests in pass_state_test.py and the mycpp phase in
257 control_flow_pass.py for detailed examples of usage.
258 """
259
260 def __init__(self) -> None:
261 self.statement_counter: int = 0
262 self.edges: set[tuple[int, int]] = set({})
263 self.block_stack: list[int] = []
264 self.predecessors: set[int] = set({})
265 self.deadends: set[int] = set({})
266
267 # order doesn't actually matter here, but sets require elements to be
268 # hashable
269 self.facts: dict[int, list[Fact]] = defaultdict(list)
270
271 def AddEdge(self, pred: int, succ: int) -> None:
272 """
273 Add a directed edge from pred to succ. If pred is a deadend, its
274 non-deadends will be used instead.
275 """
276 if pred in self.deadends:
277 for w in [u for (u, v) in self.edges if v == pred]:
278 self.AddEdge(w, succ)
279 else:
280 self.edges.add((pred, succ))
281
282 def AddDeadend(self, statement: int):
283 """
284 Mark a statement as a dead-end (e.g. return or continue).
285 """
286 self.deadends.add(statement)
287
288 def AddStatement(self) -> int:
289 """
290 Add a new statement and return its ID.
291 """
292 if len(self.predecessors) == 0:
293 if len(self.block_stack):
294 self.predecessors.add(self.block_stack[-1])
295 else:
296 self.predecessors.add(self.statement_counter)
297
298 self.statement_counter += 1
299 for pred in self.predecessors:
300 self.AddEdge(pred, self.statement_counter)
301
302 self.predecessors = set({})
303
304 if len(self.block_stack):
305 self.block_stack[-1] = self.statement_counter
306
307 return self.statement_counter
308
309 def AddFact(self, statement: int, fact: Fact) -> None:
310 """
311 Annotate a statement with a fact.
312 """
313 self.facts[statement].append(fact)
314
315 def _PushBlock(self, begin: Optional[int] = None) -> int:
316 """
317 Start a block at the given statement ID. If a beginning statement isn't
318 provided one will be created and its ID will be returend.
319
320 Direct use of this function is discouraged. Consider using one of the
321 block context managers below instead.
322 """
323 if begin is None:
324 begin = self.AddStatement()
325 else:
326 self.predecessors.add(begin)
327
328 self.block_stack.append(begin)
329 return begin
330
331 def _PopBlock(self) -> int:
332 """
333 Pop a block from the top of the stack and return the ID of the block's
334 last statement.
335
336 Direct use of this function is discouraged. Consider using one of the
337 block context managers below instead.
338 """
339 assert len(self.block_stack)
340 last = self.block_stack.pop()
341 if len(self.block_stack) and last not in self.deadends:
342 self.block_stack[-1] = last
343
344 return last
345
346
347class CfgBlockContext(object):
348 """
349 Context manager to make dealing with things like with-statements easier.
350 """
351
352 def __init__(self,
353 cfg: ControlFlowGraph,
354 begin: Optional[int] = None) -> None:
355 self.cfg = cfg
356 if cfg is None:
357 return
358
359 self.entry = self.cfg._PushBlock(begin)
360 self.exit = self.entry
361
362 def __enter__(self) -> None:
363 return self if self.cfg else None
364
365 def __exit__(self, *args) -> None:
366 if not self.cfg:
367 return
368
369 self.exit = self.cfg._PopBlock()
370
371
372class CfgBranchContext(object):
373 """
374 Context manager to make dealing with if-else blocks easier.
375 """
376
377 def __init__(self, cfg: ControlFlowGraph, branch_point: int) -> None:
378 self.cfg = cfg
379 self.entry = branch_point
380 self.exit = self.entry
381 if cfg is None:
382 return
383
384 self.arms = []
385 self.pushed = False
386
387 def AddBranch(self, entry: Optional[int] = None):
388 if not self.cfg:
389 return CfgBranchContext(None, None)
390
391 self.arms.append(CfgBranchContext(self.cfg, entry or self.entry))
392 self.cfg._PushBlock(self.arms[-1].entry)
393 self.arms[-1].pushed = True
394 return self.arms[-1]
395
396 def __enter__(self) -> None:
397 return self
398
399 def __exit__(self, *args) -> None:
400 if not self.cfg:
401 return
402
403 if self.pushed:
404 self.exit = self.cfg._PopBlock()
405
406 for arm in self.arms:
407 if arm.exit not in self.cfg.deadends:
408 self.cfg.predecessors.add(arm.exit)
409
410
411class CfgLoopContext(object):
412 """
413 Context manager to make dealing with loops easier.
414 """
415
416 def __init__(self,
417 cfg: ControlFlowGraph,
418 entry: Optional[int] = None) -> None:
419 self.cfg = cfg
420 self.breaks = set({})
421 if cfg is None:
422 return
423
424 self.entry = self.cfg._PushBlock(entry)
425 self.exit = self.entry
426
427 def AddBreak(self, statement: int) -> None:
428 assert self.cfg
429 self.breaks.add(statement)
430 self.cfg.AddDeadend(statement)
431
432 def AddContinue(self, statement: int) -> None:
433 self.cfg.AddEdge(statement, self.entry)
434 self.cfg.AddDeadend(statement)
435
436 def __enter__(self) -> None:
437 return self if self.cfg else None
438
439 def __exit__(self, *args) -> None:
440 if not self.cfg:
441 return
442
443 self.exit = self.cfg._PopBlock()
444 self.cfg.AddEdge(self.exit, self.entry)
445 for pred in self.cfg.predecessors:
446 self.cfg.AddEdge(pred, self.entry)
447
448 # If we had any breaks, arm the predecessor set with the current
449 # statement and the break statements.
450 if len(self.breaks):
451 if len(self.cfg.block_stack):
452 self.cfg.predecessors.add(self.cfg.block_stack[-1])
453 else:
454 self.cfg.predecessors.add(self.cfg.statement_counter)
455
456 for b in self.breaks:
457 self.cfg.deadends.remove(b)
458 self.cfg.predecessors.add(b)
459
460
461def DumpControlFlowGraphs(cfgs: dict[str, ControlFlowGraph],
462 facts_dir='_tmp/mycpp-facts') -> None:
463 """
464 Dump the given control flow graphs and associated facts into the given
465 directory as text files that can be consumed by datalog.
466 """
467 edge_facts = '{}/cf_edge.facts'.format(facts_dir)
468 fact_files = {}
469 os.makedirs(facts_dir, exist_ok=True)
470 with open(edge_facts, 'w') as cfg_f:
471 for func, cfg in sorted(cfgs.items()):
472 joined = join_name(func, delim='.')
473 for (u, v) in sorted(cfg.edges):
474 cfg_f.write('{}\t{}\t{}\n'.format(joined, u, v))
475
476 for statement, facts in sorted(cfg.facts.items()):
477 for fact in facts: # already sorted temporally
478 fact_f = fact_files.get(fact.name())
479 if not fact_f:
480 fact_f = open(
481 '{}/{}.facts'.format(facts_dir, fact.name()), 'w')
482 fact_files[fact.name()] = fact_f
483
484 fact_f.write(fact.Generate(joined, statement))
485
486 for f in fact_files.values():
487 f.close()