OILS / opy / _regtest / src / opy / callgraph.py View on Github | oilshell.org

477 lines, 231 significant
1#!/usr/bin/python
2from __future__ import print_function
3"""
4callgraph.py
5"""
6
7import collections
8import dis
9import inspect
10import os
11import sys
12
13import __builtin__ # For looking up names
14import types
15
16from core import util
17log = util.log
18
19
20def Disassemble(co):
21 """Given a code object, yield instructions of interest.
22
23 Args:
24 co: __code__ attribute
25
26 Structure copied from misc/inspect_pyc.py, which was copied from
27 dis.disassemble().
28 """
29 code = co.co_code
30 extended_arg = 0 # Not used
31
32 i = 0
33 n = len(code)
34 #log('\tLENGTH OF CODE %s: %d', co.co_name, n)
35
36 while i < n:
37 #log('\ti = %d, n = %d', i, n)
38 op = ord(code[i])
39 i += 1
40
41 op_name = dis.opname[op]
42
43 # operation is 1 byte, argument is 2 bytes.
44 if op >= dis.HAVE_ARGUMENT:
45 oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
46 extended_arg = 0
47
48 if op == dis.EXTENDED_ARG:
49 # Hm not used?
50 raise AssertionError
51 extended_arg = oparg*65536L
52
53 i += 2
54
55 const_name = None
56 var_name = None
57
58 if op in dis.hasconst:
59 const_name = co.co_consts[oparg]
60
61 elif op in dis.hasname:
62 try:
63 var_name = co.co_names[oparg]
64 except IndexError:
65 log('Error: %r cannot index %s with %d', op_name, co.co_names,
66 oparg)
67 raise
68
69 elif op in dis.hasjrel:
70 #raise AssertionError(op_name)
71 pass
72
73 elif op in dis.haslocal:
74 #raise AssertionError(op_name)
75 pass
76
77 elif op in dis.hascompare:
78 #raise AssertionError(op_name)
79 pass
80
81 elif op in dis.hasfree:
82 #raise AssertionError(op_name)
83 pass
84
85 yield op_name, const_name, var_name
86 #log('\t==> i = %d, n = %d', i, n)
87
88
89import sre_compile
90
91def _GetAttr(module, name):
92 # Hack for bug in _fixup_range() ! (No longer in Python 3.6 head.)
93 if module is sre_compile and name == 'l':
94 return None
95 # traceback.py has a hasattr() test
96 if module is sys and name == 'tracebacklimit':
97 return None
98
99 try:
100 val = getattr(module, name)
101 except AttributeError:
102 #log('%r not on %r', name, module)
103 # This could raise too
104 val = getattr(__builtin__, name)
105 return val
106
107
108def _Walk(obj, cls, ref, syms):
109 """
110 Discover statically what (globally-accessible) functions and classes are
111 used.
112
113 Args:
114 obj: a callable object
115 cls: an optional class that it was attached to, could be None
116 ref: the way the object was referenced from another place in the code
117 syms: output Symbols()
118
119 Something like this is OK:
120 # def Adder(x):
121 # def f(y):
122 # return x + y
123 # return f
124
125 Because we'll still have access to the inner code object. We probably won't
126 compile it though.
127 """
128 if syms.Seen(obj):
129 return
130
131 module_name = getattr(obj, '__module__', None)
132 # NOTE: We get duplicate objects like this, due to bound methods on different
133 # objects. Not sure how to fix it.
134 #
135 # OBJ <function Parse at 0x7fcd270b4de8>
136 # OBJ <function Parse at 0x7fcd2709e500>
137 # OBJ <built-in method get of dict object at 0x7fcd28c53280>
138 # OBJ <built-in method get of dict object at 0x7fcd28c53398>
139
140 #if obj.__name__ in ('write', 'get', 'Parse'):
141 # log('OBJ %s %d', obj, id(obj))
142 # pass
143
144 if module_name is None:
145 syms.Add(obj, None, ref, None, None, None)
146 return # Can't walk anything
147
148 module = sys.modules[obj.__module__]
149
150 #print(obj)
151 if hasattr(obj, '__code__'): # Builtins don't have bytecode.
152 co = obj.__code__
153 #path = co.co_filename
154
155 mod_name = None
156 mod_path = co.co_filename
157
158 syms.Add(obj, cls, ref, mod_name, mod_path, co.co_firstlineno)
159 else:
160 mod_name = module.__name__
161 try:
162 mod_path = module.__file__
163 if mod_path.endswith('.pyc'):
164 mod_path = mod_path[:-1]
165 except AttributeError:
166 mod_path = None
167
168 #if obj.__name__ == 'lex_mode_e':
169 # log('!!! %s name %s path %s', obj, mod.__name__, mod.__file__)
170
171 #mod_name = None
172 #mod_path = None
173
174 syms.Add(obj, cls, ref, mod_name, mod_path, None)
175 return
176
177 #log('\tNAME %s', val.__code__.co_name)
178 #log('\tNAMES %s', val.__code__.co_names)
179
180 # Most functions and classes we call are globals!
181 #log('\t_Walk %s %s', obj, module)
182 #log('\t%s', sorted(dir(module)))
183
184 # Have to account for foo.Bar(), which gives this sequence:
185 # 2 0 LOAD_GLOBAL 0 (foo)
186 # 3 LOAD_ATTR 1 (Bar)
187 # 6 CALL_FUNCTION 0
188 #
189 # Also: os.path.join().
190
191 try:
192 last_val = None # value from previous LOAD_GLOBAL or LOAD_ATTR
193 ref = []
194 g = Disassemble(obj.__code__)
195
196 while True:
197 op, const, var = g.next()
198
199 if op == 'LOAD_GLOBAL':
200 val = _GetAttr(module, var)
201 ref = [var] # reset it
202
203 elif op == 'LOAD_ATTR':
204 #if last_val is not None and isinstance(last_val, types.ModuleType):
205 if last_val is not None:
206 #log('%s %s', op, var)
207 val = _GetAttr(last_val, var)
208 ref.append(var)
209
210 # Crawl the methods below. Otherwise we get duplicate bound/unbound
211 # methods, which have unique addresses.
212 # Examples: WAIT_SPEC.Parse, sys.stdout.write
213
214 # BUG: os.fork and sys.stdout.write are the same?
215 # I thought os.fork is types.BuiltinFunctionType, and
216 # sys.stdout.write is types.BuiltinMethodType, but why not?
217
218 if isinstance(val, (types.MethodType, types.BuiltinMethodType)):
219 val = None
220 ref = []
221
222 else:
223 val = None
224 ref = []
225
226 else: # Some other opcode
227 val = None
228 ref = []
229
230 if callable(val):
231 #log('VAL %s module %s', val, val.__module__)
232 # Recursive call.
233 _Walk(val, None, ref, syms)
234
235 # If the value is a class, walk its methods. Note that we assume ALL
236 # methods are used. It's possible to narrow this down a bit and detect
237 # unused methods.
238 if isinstance(val, type):
239 cls = val
240 #log('type %s', val)
241 for name in dir(val):
242 # prevent error with __abstractmethods__ attribute
243 #if name.startswith('__'):
244 if name == '__abstractmethods__':
245 continue
246 field_val = getattr(val, name)
247 #log('field_val %s', field_val)
248 if isinstance(field_val, types.MethodType):
249 func_obj = field_val.im_func
250 _Walk(func_obj, cls, ref, syms)
251
252 last_val = val # Used on next iteration
253
254 except StopIteration:
255 pass
256
257 #log('\tDone _Walk %s %s', obj, module)
258
259
260def PrintSig(fmt, func):
261 if os.getenv('CALLGRAPH_SIG') != '1':
262 return
263 try:
264 argspec = inspect.getargspec(func)
265 except TypeError:
266 return
267 parts = [':%s' % ' '.join(argspec.args)]
268 # These are keyword-only args?
269 if argspec.varargs:
270 parts.append('varargs=%s' % (argspec.varargs,))
271 if argspec.keywords:
272 parts.append('kw=%s' % (argspec.keywords,))
273 # Hm the default of 'None' is bad -- you can't distinguish a default of None,
274 # which is very common!
275 # It's better to get this by parsing the AST.
276 if argspec.defaults and any(d is not None for d in argspec.defaults):
277 parts.append('defaults=%s' % (argspec.defaults,))
278
279 print(fmt % ' '.join(parts))
280
281
282class Class(object):
283
284 def __init__(self, cls, mod_name, mod_path):
285 self.cls = cls
286 self.mod_name = mod_name
287 self.mod_path = mod_path
288 self.methods = []
289
290 def Name(self):
291 return self.cls.__name__
292
293 def AddMethod(self, m, mod_name, mod_path, line_num):
294 # Just assume the method is in the same file as the class itself.
295 self.methods.append((m, mod_name, mod_path, line_num))
296
297 def Print(self):
298 base_names = ' '.join(c.__name__ for c in self.cls.__bases__)
299 print(' %s(%s)' % (self.cls.__name__, base_names))
300
301 methods = [(m.__name__, m) for (m, _, _, _) in self.methods]
302 methods.sort()
303 for name, m in methods:
304 print(' %s' % name)
305 PrintSig(' %s', m)
306
307 def __repr__(self):
308 return '<Class %s %s %s %s>' % (
309 self.cls, self.mod_name, self.mod_path, self.methods)
310
311
312class Symbols(object):
313 """A sink for discovered symbols."""
314
315 def __init__(self):
316 self.seen = set()
317
318 self.classes = {} # integer id() -> Class
319 self.functions = [] # list of callables
320
321 self.paths = {} # path -> list of functions
322
323 def Seen(self, c):
324 """c: a callable."""
325 id_ = id(c)
326 if id_ in self.seen:
327 return True
328 self.seen.add(id_)
329 return False
330
331 def Add(self, obj, cls, ref, mod_name, mod_path, line_num):
332 """Could be a function, class Constructor, or method.
333 Can also be native (C) or interpreted (Python with __code__ attribute.)
334
335 Returns:
336 True if we haven't yet seen it.
337 """
338 if mod_path is not None:
339 mod_path = os.path.normpath(mod_path)
340
341 if isinstance(obj, type):
342 id_ = id(obj)
343
344 # NOTE: Python's classes don't have a __code__ object, which appears to
345 # be an irregularity. So we have to get location information from the
346 # METHODS.
347 assert not hasattr(obj, '__code__'), obj
348 #assert path is None
349 assert line_num is None
350
351 self.classes[id_] = Class(obj, mod_name, mod_path)
352
353 elif cls is not None:
354 id_ = id(cls)
355 descriptor = self.classes[id_]
356 descriptor.AddMethod(obj, mod_name, mod_path, line_num)
357
358 else:
359 self.functions.append((obj, ref, mod_name, mod_path, line_num))
360
361 return True
362
363 def Report(self, f=sys.stdout):
364 # Now categorize into files. We couldn't do that earlier because classes
365 # don't know where they are located!
366
367 py_srcs = collections.defaultdict(SourceFile)
368 c_srcs = collections.defaultdict(SourceFile)
369
370 for func, ref, mod_name, mod_path, line_num in self.functions:
371 if mod_path:
372 py_srcs[mod_path].functions.append((func, ref, line_num))
373 else:
374 c_srcs[mod_name].functions.append((func, ref, line_num))
375
376 for cls in self.classes.values():
377 #if cls.cls.__name__ == 'lex_mode_e':
378 # log('!!! %s', cls)
379
380 if cls.mod_path:
381 py_srcs[cls.mod_path].classes.append(cls)
382 else:
383 c_srcs[cls.mod_name].classes.append(cls)
384
385 prefix = os.getcwd()
386 n = len(prefix) + 1
387
388 #for name in sorted(py_srcs):
389 # print(name)
390
391 #return
392
393 # Still missing: non-enum ASDL types? Why? CompoundObj?
394 # command_e is there, but command and SimpleCommand aren't.
395 # it's because we do
396 # ast.command_e vs. ast.SimpleCommand
397 # in both cases ast is a osh/meta _AsdlModule?
398
399 print('PYTHON CODE')
400 print()
401
402 for path in sorted(py_srcs):
403 src = py_srcs[path]
404
405 if path is not None and path.startswith(prefix):
406 path = path[n:]
407
408 print('%s' % path)
409
410 for func, ref, _ in src.functions:
411 #third = func
412 third = ''
413 #print(' %s [%s] %s' % (func.__name__, '.'.join(ref), third))
414 print(' %s' % func.__name__)
415 PrintSig(' %s', func)
416
417 classes = [(c.Name(), c) for c in src.classes]
418 classes.sort()
419 for c in src.classes:
420 c.Print()
421
422 print()
423
424 print('NATIVE CODE')
425 print()
426
427 for mod_name in sorted(c_srcs):
428 src = c_srcs[mod_name]
429
430 print('%s' % mod_name)
431
432 for func, ref, _ in src.functions:
433 #third = func
434 third = ''
435 #print(' %s [%s] %s' % (func.__name__, '.'.join(ref), third))
436 print(' %s' % func.__name__)
437
438 classes = [(c.Name(), c) for c in src.classes]
439 classes.sort()
440 for c in src.classes:
441 c.Print()
442
443 print()
444
445
446class SourceFile(object):
447
448 def __init__(self):
449 self.classes = []
450 self.functions = []
451
452
453def Walk(main, modules):
454 """Given a function main, finds all functions it transitively calls.
455
456 Uses heuristic bytecode analysis. Limitations:
457
458 - functions that are local variables might not work? But this should work:
459
460 if x > 0:
461 f = GlobalFunc
462 else:
463 f = OtherGlobalFunc
464 f() # The LOAD_GLOBAL will be found.
465
466 Args:
467 main: function
468 modules: Dict[str, module]
469 """
470 syms = Symbols()
471 _Walk(main, None, ['main'], syms)
472
473 # TODO:
474 # - co_consts should be unified? So we know how big the const pool is.
475
476 syms.Report()
477