| 1 | """
 | 
| 2 | pass_state.py
 | 
| 3 | """
 | 
| 4 | from __future__ import print_function
 | 
| 5 | 
 | 
| 6 | import os
 | 
| 7 | from collections import defaultdict
 | 
| 8 | 
 | 
| 9 | from mypy.types import Type
 | 
| 10 | from mypy.nodes import Expression
 | 
| 11 | 
 | 
| 12 | from mycpp.util import join_name, log, SymbolPath
 | 
| 13 | 
 | 
| 14 | from typing import Optional
 | 
| 15 | 
 | 
| 16 | _ = log
 | 
| 17 | 
 | 
| 18 | 
 | 
| 19 | class ModuleMember(object):
 | 
| 20 |     """
 | 
| 21 |     A member of a Python module.
 | 
| 22 | 
 | 
| 23 |     e.g. core.state.Mem => core::state::Mem
 | 
| 24 |     """
 | 
| 25 | 
 | 
| 26 |     def __init__(self, module_path: SymbolPath, member: str) -> None:
 | 
| 27 |         self.module_path = module_path
 | 
| 28 |         self.member = member
 | 
| 29 | 
 | 
| 30 | 
 | 
| 31 | class StaticObjectMember(object):
 | 
| 32 |     """
 | 
| 33 |     A static member of an object. Usually a a method like an alternative constructor.
 | 
| 34 | 
 | 
| 35 |     e.g. runtime_asdl.Cell.CreateNull() => runtime_asdl::Cell::CreateNull()
 | 
| 36 |     """
 | 
| 37 | 
 | 
| 38 |     def __init__(self, base_type_name: SymbolPath, member: str) -> None:
 | 
| 39 |         self.base_type_name = base_type_name
 | 
| 40 |         self.member = member
 | 
| 41 | 
 | 
| 42 | 
 | 
| 43 | class HeapObjectMember(object):
 | 
| 44 |     """
 | 
| 45 |     A member of a heap-allocated object.
 | 
| 46 | 
 | 
| 47 |     e.g foo.empty() => foo->empty()
 | 
| 48 |     """
 | 
| 49 | 
 | 
| 50 |     def __init__(self, object_expr: Expression, object_type: Type,
 | 
| 51 |                  member: str) -> None:
 | 
| 52 |         self.ojbect_expr = object_expr
 | 
| 53 |         self.object_type = object_type
 | 
| 54 |         self.member = member
 | 
| 55 | 
 | 
| 56 | 
 | 
| 57 | class StackObjectMember(object):
 | 
| 58 |     """
 | 
| 59 |     A member of a stack-allocated object.
 | 
| 60 | 
 | 
| 61 |     e.g foo.empty() => foo.empty()
 | 
| 62 |     """
 | 
| 63 | 
 | 
| 64 |     def __init__(self, object_expr: Expression, object_type: Type,
 | 
| 65 |                  member: str) -> None:
 | 
| 66 |         self.ojbect_expr = object_expr
 | 
| 67 |         self.object_type = object_type
 | 
| 68 |         self.member = member
 | 
| 69 | 
 | 
| 70 | 
 | 
| 71 | class Virtual(object):
 | 
| 72 |     """Calculate which C++ methods need the virtual keyword.
 | 
| 73 | 
 | 
| 74 |     See unit test for example usage.
 | 
| 75 |     """
 | 
| 76 | 
 | 
| 77 |     def __init__(self) -> None:
 | 
| 78 |         self.methods: dict[SymbolPath, list[str]] = defaultdict(list)
 | 
| 79 |         self.subclasses: dict[SymbolPath, list[tuple[str]]] = defaultdict(list)
 | 
| 80 |         self.virtuals: dict[tuple[SymbolPath, str], Optional[tuple[SymbolPath,
 | 
| 81 |                                                                    str]]] = {}
 | 
| 82 |         self.has_vtable: dict[SymbolPath, bool] = {}
 | 
| 83 |         self.can_reorder_fields: dict[SymbolPath, bool] = {}
 | 
| 84 | 
 | 
| 85 |         # _Executor -> vm::_Executor
 | 
| 86 |         self.base_class_unique: dict[str, SymbolPath] = {}
 | 
| 87 | 
 | 
| 88 |     # These are called on the Forward Declare pass
 | 
| 89 |     def OnMethod(self, class_name: SymbolPath, method_name: str) -> None:
 | 
| 90 |         #log('OnMethod %s %s', class_name, method_name)
 | 
| 91 | 
 | 
| 92 |         # __init__ and so forth don't count
 | 
| 93 |         if method_name.startswith('__') and method_name.endswith('__'):
 | 
| 94 |             return
 | 
| 95 | 
 | 
| 96 |         self.methods[class_name].append(method_name)
 | 
| 97 | 
 | 
| 98 |     def OnSubclass(self, base_class: SymbolPath, subclass: SymbolPath) -> None:
 | 
| 99 |         if len(base_class) > 1:
 | 
| 100 |             # Hack for
 | 
| 101 |             #
 | 
| 102 |             # class _Executor: pass
 | 
| 103 |             #   versus
 | 
| 104 |             # class MyExecutor(vm._Executor): pass
 | 
| 105 |             base_key = base_class[-1]
 | 
| 106 | 
 | 
| 107 |             # Fail if we have two base classes in different namespaces with the same
 | 
| 108 |             # name.
 | 
| 109 |             if base_key in self.base_class_unique:
 | 
| 110 |                 # Make sure we don't have collisions
 | 
| 111 |                 assert (self.base_class_unique[base_key] == base_class or
 | 
| 112 |                         base_class
 | 
| 113 |                         in self.subclasses[self.base_class_unique[base_key]]
 | 
| 114 |                         ), base_class
 | 
| 115 |             else:
 | 
| 116 |                 self.base_class_unique[base_key] = base_class
 | 
| 117 | 
 | 
| 118 |         else:
 | 
| 119 |             base_key = base_class
 | 
| 120 | 
 | 
| 121 |         self.subclasses[base_class].append(subclass)
 | 
| 122 | 
 | 
| 123 |     def Calculate(self) -> None:
 | 
| 124 |         """Call this after the forward declare pass."""
 | 
| 125 |         for base_class, subclasses in self.subclasses.items():
 | 
| 126 |             self.can_reorder_fields[base_class] = False
 | 
| 127 | 
 | 
| 128 |             for subclass in subclasses:
 | 
| 129 |                 self.can_reorder_fields[subclass] = False
 | 
| 130 | 
 | 
| 131 |                 b_methods = self.methods[base_class]
 | 
| 132 |                 s_methods = self.methods[subclass]
 | 
| 133 |                 overlapping = set(b_methods) & set(s_methods)
 | 
| 134 |                 for method in overlapping:
 | 
| 135 |                     self.virtuals[(base_class, method)] = None
 | 
| 136 |                     self.virtuals[(subclass, method)] = (base_class, method)
 | 
| 137 |                 if overlapping:
 | 
| 138 |                     self.has_vtable[base_class] = True
 | 
| 139 |                     self.has_vtable[subclass] = True
 | 
| 140 | 
 | 
| 141 |     # These is called on the Decl pass
 | 
| 142 |     def IsVirtual(self, class_name: SymbolPath, method_name: str) -> bool:
 | 
| 143 |         return (class_name, method_name) in self.virtuals
 | 
| 144 | 
 | 
| 145 |     def HasVTable(self, class_name: SymbolPath) -> bool:
 | 
| 146 |         return class_name in self.has_vtable
 | 
| 147 | 
 | 
| 148 |     def CanReorderFields(self, class_name: SymbolPath) -> bool:
 | 
| 149 |         if class_name in self.can_reorder_fields:
 | 
| 150 |             return self.can_reorder_fields[class_name]
 | 
| 151 |         else:
 | 
| 152 |             return True  # by default they can be reordered
 | 
| 153 | 
 | 
| 154 | 
 | 
| 155 | def SymbolPathToSouffle(p: SymbolPath) -> str:
 | 
| 156 |     if len(p) > 1:
 | 
| 157 |         return '$Member({}, {})'.format(join_name(p[:-1], delim='.'), p[-1])
 | 
| 158 | 
 | 
| 159 |     return '$Variable({})'.format(p[0])
 | 
| 160 | 
 | 
| 161 | 
 | 
| 162 | class Fact(object):
 | 
| 163 |     """
 | 
| 164 |     An abstract fact. These can be used to build up datalog programs.
 | 
| 165 |     """
 | 
| 166 | 
 | 
| 167 |     def __init__(self) -> None:
 | 
| 168 |         pass
 | 
| 169 | 
 | 
| 170 |     def name(self) -> str:
 | 
| 171 |         raise NotImplementedError()
 | 
| 172 | 
 | 
| 173 |     def Generate(self, func: str, statement: int) -> str:
 | 
| 174 |         raise NotImplementedError()
 | 
| 175 | 
 | 
| 176 | 
 | 
| 177 | class FunctionCall(Fact):
 | 
| 178 | 
 | 
| 179 |     def __init__(self, callee: str) -> None:
 | 
| 180 |         self.callee = callee
 | 
| 181 | 
 | 
| 182 |     def name(self) -> str:
 | 
| 183 |         return 'call'
 | 
| 184 | 
 | 
| 185 |     def Generate(self, func: str, statement: int) -> str:
 | 
| 186 |         return '{}\t{}\t{}\n'.format(func, statement, self.callee)
 | 
| 187 | 
 | 
| 188 | 
 | 
| 189 | class Definition(Fact):
 | 
| 190 |     """
 | 
| 191 |     The definition of a variable. This corresponds to an allocation.
 | 
| 192 |     """
 | 
| 193 | 
 | 
| 194 |     def __init__(self, variable: SymbolPath) -> None:
 | 
| 195 |         self.variable = variable
 | 
| 196 | 
 | 
| 197 |     def name(self) -> str:
 | 
| 198 |         return 'define'
 | 
| 199 | 
 | 
| 200 |     def Generate(self, func: str, statement: int) -> str:
 | 
| 201 |         return '{}\t{}\t{}\n'.format(func, statement,
 | 
| 202 |                                      SymbolPathToSouffle(self.variable))
 | 
| 203 | 
 | 
| 204 | 
 | 
| 205 | class Assignment(Fact):
 | 
| 206 |     """
 | 
| 207 |     The assignment of one variable or object member to another.
 | 
| 208 |     """
 | 
| 209 | 
 | 
| 210 |     def __init__(self, lhs: SymbolPath, rhs: SymbolPath) -> None:
 | 
| 211 |         self.lhs = lhs
 | 
| 212 |         self.rhs = rhs
 | 
| 213 | 
 | 
| 214 |     def name(self) -> str:
 | 
| 215 |         return 'assign'
 | 
| 216 | 
 | 
| 217 |     def Generate(self, func: str, statement: int) -> str:
 | 
| 218 |         return '{}\t{}\t{}\t{}\n'.format(func, statement,
 | 
| 219 |                                          SymbolPathToSouffle(self.lhs),
 | 
| 220 |                                          SymbolPathToSouffle(self.rhs))
 | 
| 221 | 
 | 
| 222 | 
 | 
| 223 | class ControlFlowGraph(object):
 | 
| 224 |     """
 | 
| 225 |     A simple control-flow graph.
 | 
| 226 | 
 | 
| 227 |     Every statement in the program is represented as a node in a graph with
 | 
| 228 |     unique a numeric ID. Control flow is represented as directed edges through
 | 
| 229 |     the graph. Loops can introduce back-edges. Every node in the graph will
 | 
| 230 |     satisfy at least one of the following conditions:
 | 
| 231 | 
 | 
| 232 |         - Its indegree is at least one.
 | 
| 233 | 
 | 
| 234 |         - Its outdegree is at least one.
 | 
| 235 | 
 | 
| 236 |     For simple linear graphs all you need is the AddStatement method. For more
 | 
| 237 |     complex flows there is a set of context managers below to help simplify
 | 
| 238 |     construction.
 | 
| 239 | 
 | 
| 240 |         - For branches-like statements (e.g. if- and try- statements) use
 | 
| 241 |           CfgBranchContext. It will take care of the details associated with
 | 
| 242 |           stitching the different branches to statements in the next statement.
 | 
| 243 | 
 | 
| 244 |         - For loops, use CfgLoopContext. It will take care of adding back-edges
 | 
| 245 |           and connecting break statements to any statements that proceed the
 | 
| 246 |           loop.
 | 
| 247 | 
 | 
| 248 |         - CfgBlockContext can be used for simple cases where you just want to
 | 
| 249 |           track the beginning and end of a sequence of statements.
 | 
| 250 | 
 | 
| 251 |     Statements can carry annotations called facts, which are used as inputs to
 | 
| 252 |     datalog programs to perform dataflow diffrent kinds of dataflow analyses.
 | 
| 253 |     To annotate a statement, use the AddFact method with any object that
 | 
| 254 |     implements the Fact interface.
 | 
| 255 | 
 | 
| 256 |     See the unit tests in pass_state_test.py and the mycpp phase in
 | 
| 257 |     control_flow_pass.py for detailed examples of usage.
 | 
| 258 |     """
 | 
| 259 | 
 | 
| 260 |     def __init__(self) -> None:
 | 
| 261 |         self.statement_counter: int = 0
 | 
| 262 |         self.edges: set[tuple[int, int]] = set({})
 | 
| 263 |         self.block_stack: list[int] = []
 | 
| 264 |         self.predecessors: set[int] = set({})
 | 
| 265 |         self.deadends: set[int] = set({})
 | 
| 266 | 
 | 
| 267 |         # order doesn't actually matter here, but sets require elements to be
 | 
| 268 |         # hashable
 | 
| 269 |         self.facts: dict[int, list[Fact]] = defaultdict(list)
 | 
| 270 | 
 | 
| 271 |     def AddEdge(self, pred: int, succ: int) -> None:
 | 
| 272 |         """
 | 
| 273 |         Add a directed edge from pred to succ. If pred is a deadend, its
 | 
| 274 |         non-deadends will be used instead.
 | 
| 275 |         """
 | 
| 276 |         if pred in self.deadends:
 | 
| 277 |             for w in [u for (u, v) in self.edges if v == pred]:
 | 
| 278 |                 self.AddEdge(w, succ)
 | 
| 279 |         else:
 | 
| 280 |             self.edges.add((pred, succ))
 | 
| 281 | 
 | 
| 282 |     def AddDeadend(self, statement: int):
 | 
| 283 |         """
 | 
| 284 |         Mark a statement as a dead-end (e.g. return or continue).
 | 
| 285 |         """
 | 
| 286 |         self.deadends.add(statement)
 | 
| 287 | 
 | 
| 288 |     def AddStatement(self) -> int:
 | 
| 289 |         """
 | 
| 290 |         Add a new statement and return its ID.
 | 
| 291 |         """
 | 
| 292 |         if len(self.predecessors) == 0:
 | 
| 293 |             if len(self.block_stack):
 | 
| 294 |                 self.predecessors.add(self.block_stack[-1])
 | 
| 295 |             else:
 | 
| 296 |                 self.predecessors.add(self.statement_counter)
 | 
| 297 | 
 | 
| 298 |         self.statement_counter += 1
 | 
| 299 |         for pred in self.predecessors:
 | 
| 300 |             self.AddEdge(pred, self.statement_counter)
 | 
| 301 | 
 | 
| 302 |         self.predecessors = set({})
 | 
| 303 | 
 | 
| 304 |         if len(self.block_stack):
 | 
| 305 |             self.block_stack[-1] = self.statement_counter
 | 
| 306 | 
 | 
| 307 |         return self.statement_counter
 | 
| 308 | 
 | 
| 309 |     def AddFact(self, statement: int, fact: Fact) -> None:
 | 
| 310 |         """
 | 
| 311 |         Annotate a statement with a fact.
 | 
| 312 |         """
 | 
| 313 |         self.facts[statement].append(fact)
 | 
| 314 | 
 | 
| 315 |     def _PushBlock(self, begin: Optional[int] = None) -> int:
 | 
| 316 |         """
 | 
| 317 |         Start a block at the given statement ID. If a beginning statement isn't
 | 
| 318 |         provided one will be created and its ID will be returend.
 | 
| 319 | 
 | 
| 320 |         Direct use of this function is discouraged. Consider using one of the
 | 
| 321 |         block context managers below instead.
 | 
| 322 |         """
 | 
| 323 |         if begin is None:
 | 
| 324 |             begin = self.AddStatement()
 | 
| 325 |         else:
 | 
| 326 |             self.predecessors.add(begin)
 | 
| 327 | 
 | 
| 328 |         self.block_stack.append(begin)
 | 
| 329 |         return begin
 | 
| 330 | 
 | 
| 331 |     def _PopBlock(self) -> int:
 | 
| 332 |         """
 | 
| 333 |         Pop a block from the top of the stack and return the ID of the block's
 | 
| 334 |         last statement.
 | 
| 335 | 
 | 
| 336 |         Direct use of this function is discouraged. Consider using one of the
 | 
| 337 |         block context managers below instead.
 | 
| 338 |         """
 | 
| 339 |         assert len(self.block_stack)
 | 
| 340 |         last = self.block_stack.pop()
 | 
| 341 |         if len(self.block_stack) and last not in self.deadends:
 | 
| 342 |             self.block_stack[-1] = last
 | 
| 343 | 
 | 
| 344 |         return last
 | 
| 345 | 
 | 
| 346 | 
 | 
| 347 | class CfgBlockContext(object):
 | 
| 348 |     """
 | 
| 349 |     Context manager to make dealing with things like with-statements easier.
 | 
| 350 |     """
 | 
| 351 | 
 | 
| 352 |     def __init__(self,
 | 
| 353 |                  cfg: ControlFlowGraph,
 | 
| 354 |                  begin: Optional[int] = None) -> None:
 | 
| 355 |         self.cfg = cfg
 | 
| 356 |         if cfg is None:
 | 
| 357 |             return
 | 
| 358 | 
 | 
| 359 |         self.entry = self.cfg._PushBlock(begin)
 | 
| 360 |         self.exit = self.entry
 | 
| 361 | 
 | 
| 362 |     def __enter__(self) -> None:
 | 
| 363 |         return self if self.cfg else None
 | 
| 364 | 
 | 
| 365 |     def __exit__(self, *args) -> None:
 | 
| 366 |         if not self.cfg:
 | 
| 367 |             return
 | 
| 368 | 
 | 
| 369 |         self.exit = self.cfg._PopBlock()
 | 
| 370 | 
 | 
| 371 | 
 | 
| 372 | class CfgBranchContext(object):
 | 
| 373 |     """
 | 
| 374 |     Context manager to make dealing with if-else blocks easier.
 | 
| 375 |     """
 | 
| 376 | 
 | 
| 377 |     def __init__(self, cfg: ControlFlowGraph, branch_point: int) -> None:
 | 
| 378 |         self.cfg = cfg
 | 
| 379 |         self.entry = branch_point
 | 
| 380 |         self.exit = self.entry
 | 
| 381 |         if cfg is None:
 | 
| 382 |             return
 | 
| 383 | 
 | 
| 384 |         self.arms = []
 | 
| 385 |         self.pushed = False
 | 
| 386 | 
 | 
| 387 |     def AddBranch(self, entry: Optional[int] = None):
 | 
| 388 |         if not self.cfg:
 | 
| 389 |             return CfgBranchContext(None, None)
 | 
| 390 | 
 | 
| 391 |         self.arms.append(CfgBranchContext(self.cfg, entry or self.entry))
 | 
| 392 |         self.cfg._PushBlock(self.arms[-1].entry)
 | 
| 393 |         self.arms[-1].pushed = True
 | 
| 394 |         return self.arms[-1]
 | 
| 395 | 
 | 
| 396 |     def __enter__(self) -> None:
 | 
| 397 |         return self
 | 
| 398 | 
 | 
| 399 |     def __exit__(self, *args) -> None:
 | 
| 400 |         if not self.cfg:
 | 
| 401 |             return
 | 
| 402 | 
 | 
| 403 |         if self.pushed:
 | 
| 404 |             self.exit = self.cfg._PopBlock()
 | 
| 405 | 
 | 
| 406 |         for arm in self.arms:
 | 
| 407 |             if arm.exit not in self.cfg.deadends:
 | 
| 408 |                 self.cfg.predecessors.add(arm.exit)
 | 
| 409 | 
 | 
| 410 | 
 | 
| 411 | class CfgLoopContext(object):
 | 
| 412 |     """
 | 
| 413 |     Context manager to make dealing with loops easier.
 | 
| 414 |     """
 | 
| 415 | 
 | 
| 416 |     def __init__(self,
 | 
| 417 |                  cfg: ControlFlowGraph,
 | 
| 418 |                  entry: Optional[int] = None) -> None:
 | 
| 419 |         self.cfg = cfg
 | 
| 420 |         self.breaks = set({})
 | 
| 421 |         if cfg is None:
 | 
| 422 |             return
 | 
| 423 | 
 | 
| 424 |         self.entry = self.cfg._PushBlock(entry)
 | 
| 425 |         self.exit = self.entry
 | 
| 426 | 
 | 
| 427 |     def AddBreak(self, statement: int) -> None:
 | 
| 428 |         assert self.cfg
 | 
| 429 |         self.breaks.add(statement)
 | 
| 430 |         self.cfg.AddDeadend(statement)
 | 
| 431 | 
 | 
| 432 |     def AddContinue(self, statement: int) -> None:
 | 
| 433 |         self.cfg.AddEdge(statement, self.entry)
 | 
| 434 |         self.cfg.AddDeadend(statement)
 | 
| 435 | 
 | 
| 436 |     def __enter__(self) -> None:
 | 
| 437 |         return self if self.cfg else None
 | 
| 438 | 
 | 
| 439 |     def __exit__(self, *args) -> None:
 | 
| 440 |         if not self.cfg:
 | 
| 441 |             return
 | 
| 442 | 
 | 
| 443 |         self.exit = self.cfg._PopBlock()
 | 
| 444 |         self.cfg.AddEdge(self.exit, self.entry)
 | 
| 445 |         for pred in self.cfg.predecessors:
 | 
| 446 |             self.cfg.AddEdge(pred, self.entry)
 | 
| 447 | 
 | 
| 448 |         # If we had any breaks, arm the predecessor set with the current
 | 
| 449 |         # statement and the break statements.
 | 
| 450 |         if len(self.breaks):
 | 
| 451 |             if len(self.cfg.block_stack):
 | 
| 452 |                 self.cfg.predecessors.add(self.cfg.block_stack[-1])
 | 
| 453 |             else:
 | 
| 454 |                 self.cfg.predecessors.add(self.cfg.statement_counter)
 | 
| 455 | 
 | 
| 456 |         for b in self.breaks:
 | 
| 457 |             self.cfg.deadends.remove(b)
 | 
| 458 |             self.cfg.predecessors.add(b)
 | 
| 459 | 
 | 
| 460 | 
 | 
| 461 | def DumpControlFlowGraphs(cfgs: dict[str, ControlFlowGraph],
 | 
| 462 |                           facts_dir='_tmp/mycpp-facts') -> None:
 | 
| 463 |     """
 | 
| 464 |     Dump the given control flow graphs and associated facts into the given
 | 
| 465 |     directory as text files that can be consumed by datalog.
 | 
| 466 |     """
 | 
| 467 |     edge_facts = '{}/cf_edge.facts'.format(facts_dir)
 | 
| 468 |     fact_files = {}
 | 
| 469 |     os.makedirs(facts_dir, exist_ok=True)
 | 
| 470 |     with open(edge_facts, 'w') as cfg_f:
 | 
| 471 |         for func, cfg in sorted(cfgs.items()):
 | 
| 472 |             joined = join_name(func, delim='.')
 | 
| 473 |             for (u, v) in sorted(cfg.edges):
 | 
| 474 |                 cfg_f.write('{}\t{}\t{}\n'.format(joined, u, v))
 | 
| 475 | 
 | 
| 476 |             for statement, facts in sorted(cfg.facts.items()):
 | 
| 477 |                 for fact in facts:  # already sorted temporally
 | 
| 478 |                     fact_f = fact_files.get(fact.name())
 | 
| 479 |                     if not fact_f:
 | 
| 480 |                         fact_f = open(
 | 
| 481 |                             '{}/{}.facts'.format(facts_dir, fact.name()), 'w')
 | 
| 482 |                         fact_files[fact.name()] = fact_f
 | 
| 483 | 
 | 
| 484 |                     fact_f.write(fact.Generate(joined, statement))
 | 
| 485 | 
 | 
| 486 |     for f in fact_files.values():
 | 
| 487 |         f.close()
 |