OILS / mycpp / pass_state.py View on Github | oilshell.org

494 lines, 256 significant
1"""
2pass_state.py
3"""
4from __future__ import print_function
5
6import os
7from collections import defaultdict
8
9from mypy.types import Type
10from mypy.nodes import Expression
11
12from mycpp.util import join_name, log, SymbolPath
13
14from typing import Optional
15
16_ = log
17
18
19class ModuleMember(object):
20 """
21 A member of a Python module.
22
23 e.g. core.state.Mem => core::state::Mem
24 """
25
26 def __init__(self, module_path: SymbolPath, member: str) -> None:
27 self.module_path = module_path
28 self.member = member
29
30
31class StaticObjectMember(object):
32 """
33 A static member of an object. Usually a a method like an alternative constructor.
34
35 e.g. runtime_asdl.Cell.CreateNull() => runtime_asdl::Cell::CreateNull()
36 """
37
38 def __init__(self, base_type_name: SymbolPath, member: str) -> None:
39 self.base_type_name = base_type_name
40 self.member = member
41
42
43class HeapObjectMember(object):
44 """
45 A member of a heap-allocated object.
46
47 e.g foo.empty() => foo->empty()
48 """
49
50 def __init__(self, object_expr: Expression, object_type: Type,
51 member: str) -> None:
52 self.ojbect_expr = object_expr
53 self.object_type = object_type
54 self.member = member
55
56
57class StackObjectMember(object):
58 """
59 A member of a stack-allocated object.
60
61 e.g foo.empty() => foo.empty()
62 """
63
64 def __init__(self, object_expr: Expression, object_type: Type,
65 member: str) -> None:
66 self.ojbect_expr = object_expr
67 self.object_type = object_type
68 self.member = member
69
70
71class Virtual(object):
72 """Calculate which C++ methods need the virtual keyword.
73
74 See unit test for example usage.
75 """
76
77 def __init__(self) -> None:
78 self.methods: dict[SymbolPath, list[str]] = defaultdict(list)
79 self.subclasses: dict[SymbolPath, list[tuple[str]]] = defaultdict(list)
80 self.virtuals: dict[tuple[SymbolPath, str], Optional[tuple[SymbolPath,
81 str]]] = {}
82 self.has_vtable: dict[SymbolPath, bool] = {}
83 self.can_reorder_fields: dict[SymbolPath, bool] = {}
84
85 # _Executor -> vm::_Executor
86 self.base_class_unique: dict[str, SymbolPath] = {}
87
88 # These are called on the Forward Declare pass
89 def OnMethod(self, class_name: SymbolPath, method_name: str) -> None:
90 #log('OnMethod %s %s', class_name, method_name)
91
92 # __init__ and so forth don't count
93 if method_name.startswith('__') and method_name.endswith('__'):
94 return
95
96 self.methods[class_name].append(method_name)
97
98 def OnSubclass(self, base_class: SymbolPath, subclass: SymbolPath) -> None:
99 if len(base_class) > 1:
100 # Hack for
101 #
102 # class _Executor: pass
103 # versus
104 # class MyExecutor(vm._Executor): pass
105 base_key = base_class[-1]
106
107 # Fail if we have two base classes in different namespaces with the same
108 # name.
109 if base_key in self.base_class_unique:
110 # Make sure we don't have collisions
111 assert (self.base_class_unique[base_key] == base_class or
112 base_class
113 in self.subclasses[self.base_class_unique[base_key]]
114 ), base_class
115 else:
116 self.base_class_unique[base_key] = base_class
117
118 else:
119 base_key = base_class
120
121 self.subclasses[base_class].append(subclass)
122
123 def Calculate(self) -> None:
124 """Call this after the forward declare pass."""
125 for base_class, subclasses in self.subclasses.items():
126 self.can_reorder_fields[base_class] = False
127
128 for subclass in subclasses:
129 self.can_reorder_fields[subclass] = False
130
131 b_methods = self.methods[base_class]
132 s_methods = self.methods[subclass]
133 overlapping = set(b_methods) & set(s_methods)
134 for method in overlapping:
135 self.virtuals[(base_class, method)] = None
136 self.virtuals[(subclass, method)] = (base_class, method)
137 if overlapping:
138 self.has_vtable[base_class] = True
139 self.has_vtable[subclass] = True
140
141 # These is called on the Decl pass
142 def IsVirtual(self, class_name: SymbolPath, method_name: str) -> bool:
143 return (class_name, method_name) in self.virtuals
144
145 def HasVTable(self, class_name: SymbolPath) -> bool:
146 return class_name in self.has_vtable
147
148 def CanReorderFields(self, class_name: SymbolPath) -> bool:
149 if class_name in self.can_reorder_fields:
150 return self.can_reorder_fields[class_name]
151 else:
152 return True # by default they can be reordered
153
154
155def SymbolPathToPlace(func: str, p: SymbolPath) -> str:
156 if len(p) > 1:
157 return '$ObjectMember({}, {})'.format(join_name(p[:-1], delim='.'), p[-1])
158
159 return '$LocalVariable({}, {})'.format(func, p[0])
160
161
162def SymbolPathToReference(p: SymbolPath) -> str:
163 if len(p) > 1:
164 return '$MemberRef({}, {})'.format(join_name(p[:-1], delim='.'), p[-1])
165
166 return '$VariableRef({})'.format(p[0])
167
168
169class Fact(object):
170 """
171 An abstract fact. These can be used to build up datalog programs.
172 """
173
174 def __init__(self) -> None:
175 pass
176
177 def name(self) -> str:
178 raise NotImplementedError()
179
180 def Generate(self, func: str, statement: int) -> str:
181 raise NotImplementedError()
182
183
184class FunctionCall(Fact):
185
186 def __init__(self, callee: str) -> None:
187 self.callee = callee
188
189 def name(self) -> str:
190 return 'call'
191
192 def Generate(self, func: str, statement: int) -> str:
193 return '{}\t{}\t{}\n'.format(func, statement, self.callee)
194
195
196class Definition(Fact):
197 """
198 The definition of a variable. This corresponds to an allocation.
199 """
200
201 def __init__(self, variable: SymbolPath) -> None:
202 self.variable = variable
203
204 def name(self) -> str:
205 return 'define'
206
207 def Generate(self, func: str, statement: int) -> str:
208 return '{}\t{}\t{}\n'.format(func, statement,
209 SymbolPathToPlace(func, self.variable))
210
211
212class Assignment(Fact):
213 """
214 The assignment of one variable or object member to another.
215 """
216
217 def __init__(self, lhs: SymbolPath, rhs: SymbolPath) -> None:
218 self.lhs = lhs
219 self.rhs = rhs
220
221 def name(self) -> str:
222 return 'assign'
223
224 def Generate(self, func: str, statement: int) -> str:
225 return '{}\t{}\t{}\t{}\n'.format(func, statement,
226 SymbolPathToPlace(func, self.lhs),
227 SymbolPathToReference(self.rhs))
228
229
230class ControlFlowGraph(object):
231 """
232 A simple control-flow graph.
233
234 Every statement in the program is represented as a node in a graph with
235 unique a numeric ID. Control flow is represented as directed edges through
236 the graph. Loops can introduce back-edges. Every node in the graph will
237 satisfy at least one of the following conditions:
238
239 - Its indegree is at least one.
240
241 - Its outdegree is at least one.
242
243 For simple linear graphs all you need is the AddStatement method. For more
244 complex flows there is a set of context managers below to help simplify
245 construction.
246
247 - For branches-like statements (e.g. if- and try- statements) use
248 CfgBranchContext. It will take care of the details associated with
249 stitching the different branches to statements in the next statement.
250
251 - For loops, use CfgLoopContext. It will take care of adding back-edges
252 and connecting break statements to any statements that proceed the
253 loop.
254
255 - CfgBlockContext can be used for simple cases where you just want to
256 track the beginning and end of a sequence of statements.
257
258 Statements can carry annotations called facts, which are used as inputs to
259 datalog programs to perform dataflow diffrent kinds of dataflow analyses.
260 To annotate a statement, use the AddFact method with any object that
261 implements the Fact interface.
262
263 See the unit tests in pass_state_test.py and the mycpp phase in
264 control_flow_pass.py for detailed examples of usage.
265 """
266
267 def __init__(self) -> None:
268 self.statement_counter: int = 0
269 self.edges: set[tuple[int, int]] = set({})
270 self.block_stack: list[int] = []
271 self.predecessors: set[int] = set({})
272 self.deadends: set[int] = set({})
273
274 # order doesn't actually matter here, but sets require elements to be
275 # hashable
276 self.facts: dict[int, list[Fact]] = defaultdict(list)
277
278 def AddEdge(self, pred: int, succ: int) -> None:
279 """
280 Add a directed edge from pred to succ. If pred is a deadend, its
281 non-deadends will be used instead.
282 """
283 if pred in self.deadends:
284 for w in [u for (u, v) in self.edges if v == pred]:
285 self.AddEdge(w, succ)
286 else:
287 self.edges.add((pred, succ))
288
289 def AddDeadend(self, statement: int):
290 """
291 Mark a statement as a dead-end (e.g. return or continue).
292 """
293 self.deadends.add(statement)
294
295 def AddStatement(self) -> int:
296 """
297 Add a new statement and return its ID.
298 """
299 if len(self.predecessors) == 0:
300 if len(self.block_stack):
301 self.predecessors.add(self.block_stack[-1])
302 else:
303 self.predecessors.add(self.statement_counter)
304
305 self.statement_counter += 1
306 for pred in self.predecessors:
307 self.AddEdge(pred, self.statement_counter)
308
309 self.predecessors = set({})
310
311 if len(self.block_stack):
312 self.block_stack[-1] = self.statement_counter
313
314 return self.statement_counter
315
316 def AddFact(self, statement: int, fact: Fact) -> None:
317 """
318 Annotate a statement with a fact.
319 """
320 self.facts[statement].append(fact)
321
322 def _PushBlock(self, begin: Optional[int] = None) -> int:
323 """
324 Start a block at the given statement ID. If a beginning statement isn't
325 provided one will be created and its ID will be returend.
326
327 Direct use of this function is discouraged. Consider using one of the
328 block context managers below instead.
329 """
330 if begin is None:
331 begin = self.AddStatement()
332 else:
333 self.predecessors.add(begin)
334
335 self.block_stack.append(begin)
336 return begin
337
338 def _PopBlock(self) -> int:
339 """
340 Pop a block from the top of the stack and return the ID of the block's
341 last statement.
342
343 Direct use of this function is discouraged. Consider using one of the
344 block context managers below instead.
345 """
346 assert len(self.block_stack)
347 last = self.block_stack.pop()
348 if len(self.block_stack) and last not in self.deadends:
349 self.block_stack[-1] = last
350
351 return last
352
353
354class CfgBlockContext(object):
355 """
356 Context manager to make dealing with things like with-statements easier.
357 """
358
359 def __init__(self,
360 cfg: ControlFlowGraph,
361 begin: Optional[int] = None) -> None:
362 self.cfg = cfg
363 if cfg is None:
364 return
365
366 self.entry = self.cfg._PushBlock(begin)
367 self.exit = self.entry
368
369 def __enter__(self) -> None:
370 return self if self.cfg else None
371
372 def __exit__(self, *args) -> None:
373 if not self.cfg:
374 return
375
376 self.exit = self.cfg._PopBlock()
377
378
379class CfgBranchContext(object):
380 """
381 Context manager to make dealing with if-else blocks easier.
382 """
383
384 def __init__(self, cfg: ControlFlowGraph, branch_point: int) -> None:
385 self.cfg = cfg
386 self.entry = branch_point
387 self.exit = self.entry
388 if cfg is None:
389 return
390
391 self.arms = []
392 self.pushed = False
393
394 def AddBranch(self, entry: Optional[int] = None):
395 if not self.cfg:
396 return CfgBranchContext(None, None)
397
398 self.arms.append(CfgBranchContext(self.cfg, entry or self.entry))
399 self.cfg._PushBlock(self.arms[-1].entry)
400 self.arms[-1].pushed = True
401 return self.arms[-1]
402
403 def __enter__(self) -> None:
404 return self
405
406 def __exit__(self, *args) -> None:
407 if not self.cfg:
408 return
409
410 if self.pushed:
411 self.exit = self.cfg._PopBlock()
412
413 for arm in self.arms:
414 if arm.exit not in self.cfg.deadends:
415 self.cfg.predecessors.add(arm.exit)
416
417
418class CfgLoopContext(object):
419 """
420 Context manager to make dealing with loops easier.
421 """
422
423 def __init__(self,
424 cfg: ControlFlowGraph,
425 entry: Optional[int] = None) -> None:
426 self.cfg = cfg
427 self.breaks = set({})
428 if cfg is None:
429 return
430
431 self.entry = self.cfg._PushBlock(entry)
432 self.exit = self.entry
433
434 def AddBreak(self, statement: int) -> None:
435 assert self.cfg
436 self.breaks.add(statement)
437 self.cfg.AddDeadend(statement)
438
439 def AddContinue(self, statement: int) -> None:
440 self.cfg.AddEdge(statement, self.entry)
441 self.cfg.AddDeadend(statement)
442
443 def __enter__(self) -> None:
444 return self if self.cfg else None
445
446 def __exit__(self, *args) -> None:
447 if not self.cfg:
448 return
449
450 self.exit = self.cfg._PopBlock()
451 self.cfg.AddEdge(self.exit, self.entry)
452 for pred in self.cfg.predecessors:
453 self.cfg.AddEdge(pred, self.entry)
454
455 # If we had any breaks, arm the predecessor set with the current
456 # statement and the break statements.
457 if len(self.breaks):
458 if len(self.cfg.block_stack):
459 self.cfg.predecessors.add(self.cfg.block_stack[-1])
460 else:
461 self.cfg.predecessors.add(self.cfg.statement_counter)
462
463 for b in self.breaks:
464 self.cfg.deadends.remove(b)
465 self.cfg.predecessors.add(b)
466
467
468def DumpControlFlowGraphs(cfgs: dict[str, ControlFlowGraph],
469 facts_dir='_tmp/mycpp-facts') -> None:
470 """
471 Dump the given control flow graphs and associated facts into the given
472 directory as text files that can be consumed by datalog.
473 """
474 edge_facts = '{}/cf_edge.facts'.format(facts_dir)
475 fact_files = {}
476 os.makedirs(facts_dir, exist_ok=True)
477 with open(edge_facts, 'w') as cfg_f:
478 for func, cfg in sorted(cfgs.items()):
479 joined = join_name(func, delim='.')
480 for (u, v) in sorted(cfg.edges):
481 cfg_f.write('{}\t{}\t{}\n'.format(joined, u, v))
482
483 for statement, facts in sorted(cfg.facts.items()):
484 for fact in facts: # already sorted temporally
485 fact_f = fact_files.get(fact.name())
486 if not fact_f:
487 fact_f = open(
488 '{}/{}.facts'.format(facts_dir, fact.name()), 'w')
489 fact_files[fact.name()] = fact_f
490
491 fact_f.write(fact.Generate(joined, statement))
492
493 for f in fact_files.values():
494 f.close()