1 | from __future__ import print_function
|
2 | """
|
3 | callgraph.py
|
4 | """
|
5 |
|
6 | import collections
|
7 | import os
|
8 | import sys
|
9 |
|
10 | import __builtin__ # For looking up names
|
11 | import types
|
12 |
|
13 | from .lib import dis
|
14 | from .lib import inspect
|
15 |
|
16 | from .util import log
|
17 |
|
18 |
|
19 | def Disassemble(co):
|
20 | """Given a code object, yield instructions of interest.
|
21 |
|
22 | Args:
|
23 | co: __code__ attribute
|
24 |
|
25 | Structure copied from opy/compiler2/dis_tool.py, which was copied from
|
26 | dis.disassemble().
|
27 | """
|
28 | code = co.co_code
|
29 | extended_arg = 0 # Not used
|
30 |
|
31 | i = 0
|
32 | n = len(code)
|
33 | #log('\tLENGTH OF CODE %s: %d', co.co_name, n)
|
34 |
|
35 | while i < n:
|
36 | #log('\ti = %d, n = %d', i, n)
|
37 | op = ord(code[i])
|
38 | i += 1
|
39 |
|
40 | op_name = dis.opname[op]
|
41 |
|
42 | # operation is 1 byte, argument is 2 bytes.
|
43 | if op >= dis.HAVE_ARGUMENT:
|
44 | oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
|
45 | extended_arg = 0
|
46 |
|
47 | if op == dis.EXTENDED_ARG:
|
48 | # Hm not used?
|
49 | raise AssertionError
|
50 | extended_arg = oparg*65536L
|
51 |
|
52 | i += 2
|
53 |
|
54 | const_name = None
|
55 | var_name = None
|
56 |
|
57 | if op in dis.hasconst:
|
58 | const_name = co.co_consts[oparg]
|
59 |
|
60 | elif op in dis.hasname:
|
61 | try:
|
62 | var_name = co.co_names[oparg]
|
63 | except IndexError:
|
64 | log('Error: %r cannot index %s with %d', op_name, co.co_names,
|
65 | oparg)
|
66 | raise
|
67 |
|
68 | elif op in dis.hasjrel:
|
69 | #raise AssertionError(op_name)
|
70 | pass
|
71 |
|
72 | elif op in dis.haslocal:
|
73 | #raise AssertionError(op_name)
|
74 | pass
|
75 |
|
76 | elif op in dis.hascompare:
|
77 | #raise AssertionError(op_name)
|
78 | pass
|
79 |
|
80 | elif op in dis.hasfree:
|
81 | #raise AssertionError(op_name)
|
82 | pass
|
83 |
|
84 | yield op_name, const_name, var_name
|
85 | #log('\t==> i = %d, n = %d', i, n)
|
86 |
|
87 |
|
88 | import sre_compile
|
89 |
|
90 | def _GetAttr(module, name):
|
91 | # Hack for bug in _fixup_range() ! (No longer in Python 3.6 head.)
|
92 | if module is sre_compile and name == 'l':
|
93 | return None
|
94 | # traceback.py has a hasattr() test
|
95 | if module is sys and name == 'tracebacklimit':
|
96 | return None
|
97 |
|
98 | try:
|
99 | val = getattr(module, name)
|
100 | except AttributeError:
|
101 | #log('%r not on %r', name, module)
|
102 | # This could raise too
|
103 | val = getattr(__builtin__, name)
|
104 | return val
|
105 |
|
106 |
|
107 | def _Walk(obj, cls, ref, syms):
|
108 | """
|
109 | Discover statically what (globally-accessible) functions and classes are
|
110 | used.
|
111 |
|
112 | Args:
|
113 | obj: a callable object
|
114 | cls: an optional class that it was attached to, could be None
|
115 | ref: the way the object was referenced from another place in the code
|
116 | syms: output Symbols()
|
117 |
|
118 | Something like this is OK:
|
119 | # def Adder(x):
|
120 | # def f(y):
|
121 | # return x + y
|
122 | # return f
|
123 |
|
124 | Because we'll still have access to the inner code object. We probably won't
|
125 | compile it though.
|
126 | """
|
127 | if syms.Seen(obj):
|
128 | return
|
129 |
|
130 | module_name = getattr(obj, '__module__', None)
|
131 | # NOTE: We get duplicate objects like this, due to bound methods on different
|
132 | # objects. Not sure how to fix it.
|
133 | #
|
134 | # OBJ <function Parse at 0x7fcd270b4de8>
|
135 | # OBJ <function Parse at 0x7fcd2709e500>
|
136 | # OBJ <built-in method get of dict object at 0x7fcd28c53280>
|
137 | # OBJ <built-in method get of dict object at 0x7fcd28c53398>
|
138 |
|
139 | #if obj.__name__ in ('write', 'get', 'Parse'):
|
140 | # log('OBJ %s %d', obj, id(obj))
|
141 | # pass
|
142 |
|
143 | # Oh is the namedtuple_ crap because of the Block class byterun/pyobj?
|
144 |
|
145 | if module_name is None or module_name in (
|
146 | 'namedtuple_Arguments', 'namedtuple_ArgSpec',
|
147 | 'namedtuple_Block'):
|
148 | syms.Add(obj, None, ref, None, None, None)
|
149 | return # Can't walk anything
|
150 |
|
151 | #log('OBJ %s %d', obj, id(obj))
|
152 | module = sys.modules[obj.__module__]
|
153 |
|
154 | co = getattr(obj, '__code__', None)
|
155 | # For example, Builtins don't have bytecode.
|
156 | if isinstance(co, types.CodeType):
|
157 | co = obj.__code__
|
158 | #log('CO %s', co)
|
159 | #path = co.co_filename
|
160 |
|
161 | mod_name = None
|
162 | mod_path = co.co_filename
|
163 |
|
164 | syms.Add(obj, cls, ref, mod_name, mod_path, co.co_firstlineno)
|
165 | else:
|
166 | mod_name = module.__name__
|
167 | try:
|
168 | mod_path = module.__file__
|
169 | if mod_path.endswith('.pyc'):
|
170 | mod_path = mod_path[:-1]
|
171 | except AttributeError:
|
172 | mod_path = None
|
173 |
|
174 | #if obj.__name__ == 'lex_mode_e':
|
175 | # log('!!! %s name %s path %s', obj, mod.__name__, mod.__file__)
|
176 |
|
177 | #mod_name = None
|
178 | #mod_path = None
|
179 |
|
180 | syms.Add(obj, cls, ref, mod_name, mod_path, None)
|
181 | return
|
182 |
|
183 | #log('\tNAME %s', val.__code__.co_name)
|
184 | #log('\tNAMES %s', val.__code__.co_names)
|
185 |
|
186 | # Most functions and classes we call are globals!
|
187 | #log('\t_Walk %s %s', obj, module)
|
188 | #log('\t%s', sorted(dir(module)))
|
189 |
|
190 | # Have to account for foo.Bar(), which gives this sequence:
|
191 | # 2 0 LOAD_GLOBAL 0 (foo)
|
192 | # 3 LOAD_ATTR 1 (Bar)
|
193 | # 6 CALL_FUNCTION 0
|
194 | #
|
195 | # Also: os.path.join().
|
196 |
|
197 | try:
|
198 | last_val = None # value from previous LOAD_GLOBAL or LOAD_ATTR
|
199 | ref = []
|
200 | g = Disassemble(obj.__code__)
|
201 |
|
202 | while True:
|
203 | op, const, var = g.next()
|
204 |
|
205 | if op == 'LOAD_GLOBAL':
|
206 | val = _GetAttr(module, var)
|
207 | ref = [var] # reset it
|
208 |
|
209 | elif op == 'LOAD_ATTR':
|
210 | #if last_val is not None and isinstance(last_val, types.ModuleType):
|
211 | if last_val is not None:
|
212 | #log('%s %s', op, var)
|
213 | val = _GetAttr(last_val, var)
|
214 | ref.append(var)
|
215 |
|
216 | # Crawl the methods below. Otherwise we get duplicate bound/unbound
|
217 | # methods, which have unique addresses.
|
218 | # Examples: WAIT_SPEC.Parse, sys.stdout.write
|
219 |
|
220 | # BUG: os.fork and sys.stdout.write are the same?
|
221 | # I thought os.fork is types.BuiltinFunctionType, and
|
222 | # sys.stdout.write is types.BuiltinMethodType, but why not?
|
223 |
|
224 | if isinstance(val, (types.MethodType, types.BuiltinMethodType)):
|
225 | val = None
|
226 | ref = []
|
227 |
|
228 | else:
|
229 | val = None
|
230 | ref = []
|
231 |
|
232 | else: # Some other opcode
|
233 | val = None
|
234 | ref = []
|
235 |
|
236 | if callable(val):
|
237 | #log('VAL %s module %s', val, val.__module__)
|
238 | # Recursive call.
|
239 |
|
240 | # Check for old style:
|
241 | #if isinstance(val, types.ClassType):
|
242 | # print('OLD %s' % val)
|
243 | _Walk(val, None, ref, syms)
|
244 |
|
245 | # If the value is a class, walk its methods. Note that we assume ALL
|
246 | # methods are used. It's possible to narrow this down a bit and detect
|
247 | # unused methods.
|
248 | if isinstance(val, type):
|
249 | cls = val
|
250 | #log('type %s', val)
|
251 | for name in dir(val):
|
252 | # prevent error with __abstractmethods__ attribute
|
253 | #if name.startswith('__'):
|
254 | if name == '__abstractmethods__':
|
255 | continue
|
256 | field_val = getattr(val, name)
|
257 | #log('field_val %s', field_val)
|
258 | if isinstance(field_val, types.MethodType):
|
259 | func_obj = field_val.im_func
|
260 | _Walk(func_obj, cls, ref, syms)
|
261 |
|
262 | last_val = val # Used on next iteration
|
263 |
|
264 | except StopIteration:
|
265 | pass
|
266 |
|
267 | #log('\tDone _Walk %s %s', obj, module)
|
268 |
|
269 |
|
270 | def PrintSig(fmt, func):
|
271 | if os.getenv('CALLGRAPH_SIG') != '1':
|
272 | return
|
273 | try:
|
274 | argspec = inspect.getargspec(func)
|
275 | except TypeError:
|
276 | return
|
277 | parts = [':%s' % ' '.join(argspec.args)]
|
278 | # These are keyword-only args?
|
279 | if argspec.varargs:
|
280 | parts.append('varargs=%s' % (argspec.varargs,))
|
281 | if argspec.keywords:
|
282 | parts.append('kw=%s' % (argspec.keywords,))
|
283 | # Hm the default of 'None' is bad -- you can't distinguish a default of None,
|
284 | # which is very common!
|
285 | # It's better to get this by parsing the AST.
|
286 | if argspec.defaults and any(d is not None for d in argspec.defaults):
|
287 | parts.append('defaults=%s' % (argspec.defaults,))
|
288 |
|
289 | print(fmt % ' '.join(parts))
|
290 |
|
291 |
|
292 | class Class(object):
|
293 |
|
294 | def __init__(self, cls, mod_name, mod_path):
|
295 | self.cls = cls
|
296 | self.mod_name = mod_name
|
297 | self.mod_path = mod_path
|
298 | self.methods = []
|
299 |
|
300 | def Name(self):
|
301 | return self.cls.__name__
|
302 |
|
303 | def AddMethod(self, m, mod_name, mod_path, line_num):
|
304 | # Just assume the method is in the same file as the class itself.
|
305 | self.methods.append((m, mod_name, mod_path, line_num))
|
306 |
|
307 | def Print(self):
|
308 | base_names = ' '.join(c.__name__ for c in self.cls.__bases__)
|
309 | print(' %s(%s)' % (self.cls.__name__, base_names))
|
310 |
|
311 | methods = [(m.__name__, m) for (m, _, _, _) in self.methods]
|
312 | methods.sort()
|
313 | for name, m in methods:
|
314 | print(' %s' % name)
|
315 | PrintSig(' %s', m)
|
316 |
|
317 | def __repr__(self):
|
318 | return '<Class %s %s %s %s>' % (
|
319 | self.cls, self.mod_name, self.mod_path, self.methods)
|
320 |
|
321 |
|
322 | class Symbols(object):
|
323 | """A sink for discovered symbols."""
|
324 |
|
325 | def __init__(self):
|
326 | self.seen = set()
|
327 |
|
328 | self.classes = {} # integer id() -> Class
|
329 | self.functions = [] # list of callables
|
330 |
|
331 | self.paths = {} # path -> list of functions
|
332 |
|
333 | def Seen(self, c):
|
334 | """c: a callable."""
|
335 | id_ = id(c)
|
336 | if id_ in self.seen:
|
337 | return True
|
338 | self.seen.add(id_)
|
339 | return False
|
340 |
|
341 | def Add(self, obj, cls, ref, mod_name, mod_path, line_num):
|
342 | """Could be a function, class Constructor, or method.
|
343 | Can also be native (C) or interpreted (Python with __code__ attribute.)
|
344 |
|
345 | Returns:
|
346 | True if we haven't yet seen it.
|
347 | """
|
348 | if mod_path is not None:
|
349 | mod_path = os.path.normpath(mod_path)
|
350 |
|
351 | if isinstance(obj, type):
|
352 | id_ = id(obj)
|
353 |
|
354 | # NOTE: Python's classes don't have a __code__ object, which appears to
|
355 | # be an irregularity. So we have to get location information from the
|
356 | # METHODS.
|
357 |
|
358 | # Exception: this type has a __code__ object that is not types.CodeType
|
359 | if obj is not types.FunctionType:
|
360 | assert not hasattr(obj, '__code__'), obj
|
361 | #assert path is None
|
362 | assert line_num is None
|
363 |
|
364 | self.classes[id_] = Class(obj, mod_name, mod_path)
|
365 |
|
366 | elif cls is not None:
|
367 | id_ = id(cls)
|
368 | descriptor = self.classes[id_]
|
369 | descriptor.AddMethod(obj, mod_name, mod_path, line_num)
|
370 |
|
371 | else:
|
372 | self.functions.append((obj, ref, mod_name, mod_path, line_num))
|
373 |
|
374 | return True
|
375 |
|
376 | def Report(self, f=sys.stdout):
|
377 | # Now categorize into files. We couldn't do that earlier because classes
|
378 | # don't know where they are located!
|
379 |
|
380 | py_srcs = collections.defaultdict(SourceFile)
|
381 | c_srcs = collections.defaultdict(SourceFile)
|
382 |
|
383 | for func, ref, mod_name, mod_path, line_num in self.functions:
|
384 | if mod_path:
|
385 | py_srcs[mod_path].functions.append((func, ref, line_num))
|
386 | else:
|
387 | c_srcs[mod_name].functions.append((func, ref, line_num))
|
388 |
|
389 | for cls in self.classes.values():
|
390 | #if cls.cls.__name__ == 'lex_mode_e':
|
391 | # log('!!! %s', cls)
|
392 |
|
393 | if cls.mod_path:
|
394 | py_srcs[cls.mod_path].classes.append(cls)
|
395 | else:
|
396 | c_srcs[cls.mod_name].classes.append(cls)
|
397 |
|
398 | prefix = os.getcwd()
|
399 | n = len(prefix) + 1
|
400 |
|
401 | #for name in sorted(py_srcs):
|
402 | # print(name)
|
403 |
|
404 | #return
|
405 |
|
406 | # Still missing: non-enum ASDL types? Why? CompoundObj?
|
407 | # command_e is there, but command and SimpleCommand aren't.
|
408 | # it's because we do
|
409 | # ast.command_e vs. ast.SimpleCommand
|
410 | # in both cases ast is a core.meta _AsdlModule?
|
411 |
|
412 | print('PYTHON CODE')
|
413 | print()
|
414 |
|
415 | for path in sorted(py_srcs):
|
416 | src = py_srcs[path]
|
417 |
|
418 | if path is not None and path.startswith(prefix):
|
419 | path = path[n:]
|
420 |
|
421 | print('%s' % path)
|
422 |
|
423 | for func, ref, _ in src.functions:
|
424 | third = func
|
425 | #third = ''
|
426 | #print(' %s [%s] %s' % (func.__name__, '.'.join(ref), third))
|
427 | try:
|
428 | print(' %s' % func.__name__)
|
429 | except AttributeError:
|
430 | # In the CI, when we don't have fastlex, we get _MatchOshToken_Slow()
|
431 | # instances
|
432 | print(" NOTE: %s found at top level, but it's not a simple function" % func)
|
433 |
|
434 | PrintSig(' %s', func)
|
435 |
|
436 | classes = [(c.Name(), c) for c in src.classes]
|
437 | classes.sort()
|
438 | for c in src.classes:
|
439 | c.Print()
|
440 |
|
441 | print()
|
442 |
|
443 | print('NATIVE CODE')
|
444 | print()
|
445 |
|
446 | for mod_name in sorted(c_srcs):
|
447 | src = c_srcs[mod_name]
|
448 |
|
449 | print('%s' % mod_name)
|
450 |
|
451 | for func, ref, _ in src.functions:
|
452 | #third = func
|
453 | third = ''
|
454 | #print(' %s [%s] %s' % (func.__name__, '.'.join(ref), third))
|
455 | print(' %s' % func.__name__)
|
456 |
|
457 | classes = [(c.Name(), c) for c in src.classes]
|
458 | classes.sort()
|
459 | for c in src.classes:
|
460 | c.Print()
|
461 |
|
462 | print()
|
463 |
|
464 |
|
465 | class SourceFile(object):
|
466 |
|
467 | def __init__(self):
|
468 | self.classes = []
|
469 | self.functions = []
|
470 |
|
471 |
|
472 | def Walk(main, modules):
|
473 | """Given a function main, finds all functions it transitively calls.
|
474 |
|
475 | Uses heuristic bytecode analysis. Limitations:
|
476 |
|
477 | - functions that are local variables might not work? But this should work:
|
478 |
|
479 | if x > 0:
|
480 | f = GlobalFunc
|
481 | else:
|
482 | f = OtherGlobalFunc
|
483 | f() # The LOAD_GLOBAL will be found.
|
484 |
|
485 | Args:
|
486 | main: function
|
487 | modules: Dict[str, module]
|
488 | """
|
489 | syms = Symbols()
|
490 | _Walk(main, None, ['main'], syms)
|
491 |
|
492 | # TODO:
|
493 | # - co_consts should be unified? So we know how big the const pool is.
|
494 |
|
495 | syms.Report()
|
496 |
|