| 1 | #!/usr/bin/python
|
| 2 | from __future__ import print_function
|
| 3 | """
|
| 4 | callgraph.py
|
| 5 | """
|
| 6 |
|
| 7 | import collections
|
| 8 | import dis
|
| 9 | import inspect
|
| 10 | import os
|
| 11 | import sys
|
| 12 |
|
| 13 | import __builtin__ # For looking up names
|
| 14 | import types
|
| 15 |
|
| 16 | from core import util
|
| 17 | log = util.log
|
| 18 |
|
| 19 |
|
| 20 | def Disassemble(co):
|
| 21 | """Given a code object, yield instructions of interest.
|
| 22 |
|
| 23 | Args:
|
| 24 | co: __code__ attribute
|
| 25 |
|
| 26 | Structure copied from misc/inspect_pyc.py, which was copied from
|
| 27 | dis.disassemble().
|
| 28 | """
|
| 29 | code = co.co_code
|
| 30 | extended_arg = 0 # Not used
|
| 31 |
|
| 32 | i = 0
|
| 33 | n = len(code)
|
| 34 | #log('\tLENGTH OF CODE %s: %d', co.co_name, n)
|
| 35 |
|
| 36 | while i < n:
|
| 37 | #log('\ti = %d, n = %d', i, n)
|
| 38 | op = ord(code[i])
|
| 39 | i += 1
|
| 40 |
|
| 41 | op_name = dis.opname[op]
|
| 42 |
|
| 43 | # operation is 1 byte, argument is 2 bytes.
|
| 44 | if op >= dis.HAVE_ARGUMENT:
|
| 45 | oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
|
| 46 | extended_arg = 0
|
| 47 |
|
| 48 | if op == dis.EXTENDED_ARG:
|
| 49 | # Hm not used?
|
| 50 | raise AssertionError
|
| 51 | extended_arg = oparg*65536L
|
| 52 |
|
| 53 | i += 2
|
| 54 |
|
| 55 | const_name = None
|
| 56 | var_name = None
|
| 57 |
|
| 58 | if op in dis.hasconst:
|
| 59 | const_name = co.co_consts[oparg]
|
| 60 |
|
| 61 | elif op in dis.hasname:
|
| 62 | try:
|
| 63 | var_name = co.co_names[oparg]
|
| 64 | except IndexError:
|
| 65 | log('Error: %r cannot index %s with %d', op_name, co.co_names,
|
| 66 | oparg)
|
| 67 | raise
|
| 68 |
|
| 69 | elif op in dis.hasjrel:
|
| 70 | #raise AssertionError(op_name)
|
| 71 | pass
|
| 72 |
|
| 73 | elif op in dis.haslocal:
|
| 74 | #raise AssertionError(op_name)
|
| 75 | pass
|
| 76 |
|
| 77 | elif op in dis.hascompare:
|
| 78 | #raise AssertionError(op_name)
|
| 79 | pass
|
| 80 |
|
| 81 | elif op in dis.hasfree:
|
| 82 | #raise AssertionError(op_name)
|
| 83 | pass
|
| 84 |
|
| 85 | yield op_name, const_name, var_name
|
| 86 | #log('\t==> i = %d, n = %d', i, n)
|
| 87 |
|
| 88 |
|
| 89 | import sre_compile
|
| 90 |
|
| 91 | def _GetAttr(module, name):
|
| 92 | # Hack for bug in _fixup_range() ! (No longer in Python 3.6 head.)
|
| 93 | if module is sre_compile and name == 'l':
|
| 94 | return None
|
| 95 | # traceback.py has a hasattr() test
|
| 96 | if module is sys and name == 'tracebacklimit':
|
| 97 | return None
|
| 98 |
|
| 99 | try:
|
| 100 | val = getattr(module, name)
|
| 101 | except AttributeError:
|
| 102 | #log('%r not on %r', name, module)
|
| 103 | # This could raise too
|
| 104 | val = getattr(__builtin__, name)
|
| 105 | return val
|
| 106 |
|
| 107 |
|
| 108 | def _Walk(obj, cls, ref, syms):
|
| 109 | """
|
| 110 | Discover statically what (globally-accessible) functions and classes are
|
| 111 | used.
|
| 112 |
|
| 113 | Args:
|
| 114 | obj: a callable object
|
| 115 | cls: an optional class that it was attached to, could be None
|
| 116 | ref: the way the object was referenced from another place in the code
|
| 117 | syms: output Symbols()
|
| 118 |
|
| 119 | Something like this is OK:
|
| 120 | # def Adder(x):
|
| 121 | # def f(y):
|
| 122 | # return x + y
|
| 123 | # return f
|
| 124 |
|
| 125 | Because we'll still have access to the inner code object. We probably won't
|
| 126 | compile it though.
|
| 127 | """
|
| 128 | if syms.Seen(obj):
|
| 129 | return
|
| 130 |
|
| 131 | module_name = getattr(obj, '__module__', None)
|
| 132 | # NOTE: We get duplicate objects like this, due to bound methods on different
|
| 133 | # objects. Not sure how to fix it.
|
| 134 | #
|
| 135 | # OBJ <function Parse at 0x7fcd270b4de8>
|
| 136 | # OBJ <function Parse at 0x7fcd2709e500>
|
| 137 | # OBJ <built-in method get of dict object at 0x7fcd28c53280>
|
| 138 | # OBJ <built-in method get of dict object at 0x7fcd28c53398>
|
| 139 |
|
| 140 | #if obj.__name__ in ('write', 'get', 'Parse'):
|
| 141 | # log('OBJ %s %d', obj, id(obj))
|
| 142 | # pass
|
| 143 |
|
| 144 | if module_name is None:
|
| 145 | syms.Add(obj, None, ref, None, None, None)
|
| 146 | return # Can't walk anything
|
| 147 |
|
| 148 | module = sys.modules[obj.__module__]
|
| 149 |
|
| 150 | #print(obj)
|
| 151 | if hasattr(obj, '__code__'): # Builtins don't have bytecode.
|
| 152 | co = obj.__code__
|
| 153 | #path = co.co_filename
|
| 154 |
|
| 155 | mod_name = None
|
| 156 | mod_path = co.co_filename
|
| 157 |
|
| 158 | syms.Add(obj, cls, ref, mod_name, mod_path, co.co_firstlineno)
|
| 159 | else:
|
| 160 | mod_name = module.__name__
|
| 161 | try:
|
| 162 | mod_path = module.__file__
|
| 163 | if mod_path.endswith('.pyc'):
|
| 164 | mod_path = mod_path[:-1]
|
| 165 | except AttributeError:
|
| 166 | mod_path = None
|
| 167 |
|
| 168 | #if obj.__name__ == 'lex_mode_e':
|
| 169 | # log('!!! %s name %s path %s', obj, mod.__name__, mod.__file__)
|
| 170 |
|
| 171 | #mod_name = None
|
| 172 | #mod_path = None
|
| 173 |
|
| 174 | syms.Add(obj, cls, ref, mod_name, mod_path, None)
|
| 175 | return
|
| 176 |
|
| 177 | #log('\tNAME %s', val.__code__.co_name)
|
| 178 | #log('\tNAMES %s', val.__code__.co_names)
|
| 179 |
|
| 180 | # Most functions and classes we call are globals!
|
| 181 | #log('\t_Walk %s %s', obj, module)
|
| 182 | #log('\t%s', sorted(dir(module)))
|
| 183 |
|
| 184 | # Have to account for foo.Bar(), which gives this sequence:
|
| 185 | # 2 0 LOAD_GLOBAL 0 (foo)
|
| 186 | # 3 LOAD_ATTR 1 (Bar)
|
| 187 | # 6 CALL_FUNCTION 0
|
| 188 | #
|
| 189 | # Also: os.path.join().
|
| 190 |
|
| 191 | try:
|
| 192 | last_val = None # value from previous LOAD_GLOBAL or LOAD_ATTR
|
| 193 | ref = []
|
| 194 | g = Disassemble(obj.__code__)
|
| 195 |
|
| 196 | while True:
|
| 197 | op, const, var = g.next()
|
| 198 |
|
| 199 | if op == 'LOAD_GLOBAL':
|
| 200 | val = _GetAttr(module, var)
|
| 201 | ref = [var] # reset it
|
| 202 |
|
| 203 | elif op == 'LOAD_ATTR':
|
| 204 | #if last_val is not None and isinstance(last_val, types.ModuleType):
|
| 205 | if last_val is not None:
|
| 206 | #log('%s %s', op, var)
|
| 207 | val = _GetAttr(last_val, var)
|
| 208 | ref.append(var)
|
| 209 |
|
| 210 | # Crawl the methods below. Otherwise we get duplicate bound/unbound
|
| 211 | # methods, which have unique addresses.
|
| 212 | # Examples: WAIT_SPEC.Parse, sys.stdout.write
|
| 213 |
|
| 214 | # BUG: os.fork and sys.stdout.write are the same?
|
| 215 | # I thought os.fork is types.BuiltinFunctionType, and
|
| 216 | # sys.stdout.write is types.BuiltinMethodType, but why not?
|
| 217 |
|
| 218 | if isinstance(val, (types.MethodType, types.BuiltinMethodType)):
|
| 219 | val = None
|
| 220 | ref = []
|
| 221 |
|
| 222 | else:
|
| 223 | val = None
|
| 224 | ref = []
|
| 225 |
|
| 226 | else: # Some other opcode
|
| 227 | val = None
|
| 228 | ref = []
|
| 229 |
|
| 230 | if callable(val):
|
| 231 | #log('VAL %s module %s', val, val.__module__)
|
| 232 | # Recursive call.
|
| 233 | _Walk(val, None, ref, syms)
|
| 234 |
|
| 235 | # If the value is a class, walk its methods. Note that we assume ALL
|
| 236 | # methods are used. It's possible to narrow this down a bit and detect
|
| 237 | # unused methods.
|
| 238 | if isinstance(val, type):
|
| 239 | cls = val
|
| 240 | #log('type %s', val)
|
| 241 | for name in dir(val):
|
| 242 | # prevent error with __abstractmethods__ attribute
|
| 243 | #if name.startswith('__'):
|
| 244 | if name == '__abstractmethods__':
|
| 245 | continue
|
| 246 | field_val = getattr(val, name)
|
| 247 | #log('field_val %s', field_val)
|
| 248 | if isinstance(field_val, types.MethodType):
|
| 249 | func_obj = field_val.im_func
|
| 250 | _Walk(func_obj, cls, ref, syms)
|
| 251 |
|
| 252 | last_val = val # Used on next iteration
|
| 253 |
|
| 254 | except StopIteration:
|
| 255 | pass
|
| 256 |
|
| 257 | #log('\tDone _Walk %s %s', obj, module)
|
| 258 |
|
| 259 |
|
| 260 | def PrintSig(fmt, func):
|
| 261 | if os.getenv('CALLGRAPH_SIG') != '1':
|
| 262 | return
|
| 263 | try:
|
| 264 | argspec = inspect.getargspec(func)
|
| 265 | except TypeError:
|
| 266 | return
|
| 267 | parts = [':%s' % ' '.join(argspec.args)]
|
| 268 | # These are keyword-only args?
|
| 269 | if argspec.varargs:
|
| 270 | parts.append('varargs=%s' % (argspec.varargs,))
|
| 271 | if argspec.keywords:
|
| 272 | parts.append('kw=%s' % (argspec.keywords,))
|
| 273 | # Hm the default of 'None' is bad -- you can't distinguish a default of None,
|
| 274 | # which is very common!
|
| 275 | # It's better to get this by parsing the AST.
|
| 276 | if argspec.defaults and any(d is not None for d in argspec.defaults):
|
| 277 | parts.append('defaults=%s' % (argspec.defaults,))
|
| 278 |
|
| 279 | print(fmt % ' '.join(parts))
|
| 280 |
|
| 281 |
|
| 282 | class Class(object):
|
| 283 |
|
| 284 | def __init__(self, cls, mod_name, mod_path):
|
| 285 | self.cls = cls
|
| 286 | self.mod_name = mod_name
|
| 287 | self.mod_path = mod_path
|
| 288 | self.methods = []
|
| 289 |
|
| 290 | def Name(self):
|
| 291 | return self.cls.__name__
|
| 292 |
|
| 293 | def AddMethod(self, m, mod_name, mod_path, line_num):
|
| 294 | # Just assume the method is in the same file as the class itself.
|
| 295 | self.methods.append((m, mod_name, mod_path, line_num))
|
| 296 |
|
| 297 | def Print(self):
|
| 298 | base_names = ' '.join(c.__name__ for c in self.cls.__bases__)
|
| 299 | print(' %s(%s)' % (self.cls.__name__, base_names))
|
| 300 |
|
| 301 | methods = [(m.__name__, m) for (m, _, _, _) in self.methods]
|
| 302 | methods.sort()
|
| 303 | for name, m in methods:
|
| 304 | print(' %s' % name)
|
| 305 | PrintSig(' %s', m)
|
| 306 |
|
| 307 | def __repr__(self):
|
| 308 | return '<Class %s %s %s %s>' % (
|
| 309 | self.cls, self.mod_name, self.mod_path, self.methods)
|
| 310 |
|
| 311 |
|
| 312 | class Symbols(object):
|
| 313 | """A sink for discovered symbols."""
|
| 314 |
|
| 315 | def __init__(self):
|
| 316 | self.seen = set()
|
| 317 |
|
| 318 | self.classes = {} # integer id() -> Class
|
| 319 | self.functions = [] # list of callables
|
| 320 |
|
| 321 | self.paths = {} # path -> list of functions
|
| 322 |
|
| 323 | def Seen(self, c):
|
| 324 | """c: a callable."""
|
| 325 | id_ = id(c)
|
| 326 | if id_ in self.seen:
|
| 327 | return True
|
| 328 | self.seen.add(id_)
|
| 329 | return False
|
| 330 |
|
| 331 | def Add(self, obj, cls, ref, mod_name, mod_path, line_num):
|
| 332 | """Could be a function, class Constructor, or method.
|
| 333 | Can also be native (C) or interpreted (Python with __code__ attribute.)
|
| 334 |
|
| 335 | Returns:
|
| 336 | True if we haven't yet seen it.
|
| 337 | """
|
| 338 | if mod_path is not None:
|
| 339 | mod_path = os.path.normpath(mod_path)
|
| 340 |
|
| 341 | if isinstance(obj, type):
|
| 342 | id_ = id(obj)
|
| 343 |
|
| 344 | # NOTE: Python's classes don't have a __code__ object, which appears to
|
| 345 | # be an irregularity. So we have to get location information from the
|
| 346 | # METHODS.
|
| 347 | assert not hasattr(obj, '__code__'), obj
|
| 348 | #assert path is None
|
| 349 | assert line_num is None
|
| 350 |
|
| 351 | self.classes[id_] = Class(obj, mod_name, mod_path)
|
| 352 |
|
| 353 | elif cls is not None:
|
| 354 | id_ = id(cls)
|
| 355 | descriptor = self.classes[id_]
|
| 356 | descriptor.AddMethod(obj, mod_name, mod_path, line_num)
|
| 357 |
|
| 358 | else:
|
| 359 | self.functions.append((obj, ref, mod_name, mod_path, line_num))
|
| 360 |
|
| 361 | return True
|
| 362 |
|
| 363 | def Report(self, f=sys.stdout):
|
| 364 | # Now categorize into files. We couldn't do that earlier because classes
|
| 365 | # don't know where they are located!
|
| 366 |
|
| 367 | py_srcs = collections.defaultdict(SourceFile)
|
| 368 | c_srcs = collections.defaultdict(SourceFile)
|
| 369 |
|
| 370 | for func, ref, mod_name, mod_path, line_num in self.functions:
|
| 371 | if mod_path:
|
| 372 | py_srcs[mod_path].functions.append((func, ref, line_num))
|
| 373 | else:
|
| 374 | c_srcs[mod_name].functions.append((func, ref, line_num))
|
| 375 |
|
| 376 | for cls in self.classes.values():
|
| 377 | #if cls.cls.__name__ == 'lex_mode_e':
|
| 378 | # log('!!! %s', cls)
|
| 379 |
|
| 380 | if cls.mod_path:
|
| 381 | py_srcs[cls.mod_path].classes.append(cls)
|
| 382 | else:
|
| 383 | c_srcs[cls.mod_name].classes.append(cls)
|
| 384 |
|
| 385 | prefix = os.getcwd()
|
| 386 | n = len(prefix) + 1
|
| 387 |
|
| 388 | #for name in sorted(py_srcs):
|
| 389 | # print(name)
|
| 390 |
|
| 391 | #return
|
| 392 |
|
| 393 | # Still missing: non-enum ASDL types? Why? CompoundObj?
|
| 394 | # command_e is there, but command and SimpleCommand aren't.
|
| 395 | # it's because we do
|
| 396 | # ast.command_e vs. ast.SimpleCommand
|
| 397 | # in both cases ast is a osh/meta _AsdlModule?
|
| 398 |
|
| 399 | print('PYTHON CODE')
|
| 400 | print()
|
| 401 |
|
| 402 | for path in sorted(py_srcs):
|
| 403 | src = py_srcs[path]
|
| 404 |
|
| 405 | if path is not None and path.startswith(prefix):
|
| 406 | path = path[n:]
|
| 407 |
|
| 408 | print('%s' % path)
|
| 409 |
|
| 410 | for func, ref, _ in src.functions:
|
| 411 | #third = func
|
| 412 | third = ''
|
| 413 | #print(' %s [%s] %s' % (func.__name__, '.'.join(ref), third))
|
| 414 | print(' %s' % func.__name__)
|
| 415 | PrintSig(' %s', func)
|
| 416 |
|
| 417 | classes = [(c.Name(), c) for c in src.classes]
|
| 418 | classes.sort()
|
| 419 | for c in src.classes:
|
| 420 | c.Print()
|
| 421 |
|
| 422 | print()
|
| 423 |
|
| 424 | print('NATIVE CODE')
|
| 425 | print()
|
| 426 |
|
| 427 | for mod_name in sorted(c_srcs):
|
| 428 | src = c_srcs[mod_name]
|
| 429 |
|
| 430 | print('%s' % mod_name)
|
| 431 |
|
| 432 | for func, ref, _ in src.functions:
|
| 433 | #third = func
|
| 434 | third = ''
|
| 435 | #print(' %s [%s] %s' % (func.__name__, '.'.join(ref), third))
|
| 436 | print(' %s' % func.__name__)
|
| 437 |
|
| 438 | classes = [(c.Name(), c) for c in src.classes]
|
| 439 | classes.sort()
|
| 440 | for c in src.classes:
|
| 441 | c.Print()
|
| 442 |
|
| 443 | print()
|
| 444 |
|
| 445 |
|
| 446 | class SourceFile(object):
|
| 447 |
|
| 448 | def __init__(self):
|
| 449 | self.classes = []
|
| 450 | self.functions = []
|
| 451 |
|
| 452 |
|
| 453 | def Walk(main, modules):
|
| 454 | """Given a function main, finds all functions it transitively calls.
|
| 455 |
|
| 456 | Uses heuristic bytecode analysis. Limitations:
|
| 457 |
|
| 458 | - functions that are local variables might not work? But this should work:
|
| 459 |
|
| 460 | if x > 0:
|
| 461 | f = GlobalFunc
|
| 462 | else:
|
| 463 | f = OtherGlobalFunc
|
| 464 | f() # The LOAD_GLOBAL will be found.
|
| 465 |
|
| 466 | Args:
|
| 467 | main: function
|
| 468 | modules: Dict[str, module]
|
| 469 | """
|
| 470 | syms = Symbols()
|
| 471 | _Walk(main, None, ['main'], syms)
|
| 472 |
|
| 473 | # TODO:
|
| 474 | # - co_consts should be unified? So we know how big the const pool is.
|
| 475 |
|
| 476 | syms.Report()
|
| 477 |
|