| 1 | """
 | 
| 2 | resolve.py
 | 
| 3 | 
 | 
| 4 | TODO: Instead of pickling everything separately, copy sys.modules
 | 
| 5 | into a dict of dicts.
 | 
| 6 | 
 | 
| 7 | { 'osh.cmd_parse': { ... },  # dict instead of a module
 | 
| 8 |   'osh.bool_parse': { ... },
 | 
| 9 | }
 | 
| 10 | Then I think the sharing will work.
 | 
| 11 | 
 | 
| 12 | - Hook this up to oheap2.py?
 | 
| 13 |   - Instead of only pickling code objects (with string/tuple/int), you can
 | 
| 14 |     pickle a graph of user-defined classes after running?  Just like pickle.py
 | 
| 15 |     does it.
 | 
| 16 |   - Well I was thinking of doing that INSIDE OVM2, rather than in CPython.  But
 | 
| 17 |     either way would work.
 | 
| 18 | 
 | 
| 19 | - Combine callgraph.py and this module?
 | 
| 20 |   - resolve.py find all functions/classes/globals via sys.modules
 | 
| 21 |   - callgraph.py finds all of them via use in the bytecode
 | 
| 22 | 
 | 
| 23 | - We should produce a unified report and double check.
 | 
| 24 | - Might also need to combine them with build/cpython-defs.py.
 | 
| 25 |   - We also need the mapping from filenames to modules, which is really in the
 | 
| 26 |     build system.  _build/oil/module_init.c has the names and extern
 | 
| 27 |     declarations.  We could manually make a list.
 | 
| 28 | """
 | 
| 29 | from __future__ import print_function
 | 
| 30 | 
 | 
| 31 | import sys
 | 
| 32 | import pickle
 | 
| 33 | import copy_reg  # dependency of pickle, exclude it
 | 
| 34 | import types
 | 
| 35 | 
 | 
| 36 | from opy.util import log
 | 
| 37 | from pylib import unpickle
 | 
| 38 | 
 | 
| 39 | import __builtin__  # this is not __builtins__
 | 
| 40 | 
 | 
| 41 | 
 | 
| 42 | def banner(msg):
 | 
| 43 |   log('')
 | 
| 44 |   log(msg)
 | 
| 45 |   log('')
 | 
| 46 | 
 | 
| 47 | 
 | 
| 48 | def PrintVars(global_vars):
 | 
| 49 |   banner('VARIABLES')
 | 
| 50 |   global_vars.sort()  # sort by module name
 | 
| 51 | 
 | 
| 52 |   import collections
 | 
| 53 |   type_hist = collections.Counter()
 | 
| 54 | 
 | 
| 55 |   # 316 globals / constants (513 before deduping)
 | 
| 56 |   for (mod_name, name, obj) in global_vars:
 | 
| 57 |     log('%-15s %-15s %r', mod_name, name, obj)
 | 
| 58 |     type_hist[str(type(obj))] += 1
 | 
| 59 | 
 | 
| 60 |   # ID_SPEC is in core/meta and frontend/lex
 | 
| 61 |   for (mod_name, name, obj) in global_vars:
 | 
| 62 |     if 'IdSpec' in str(type(obj)):
 | 
| 63 |       log('%-20s %-15s %r', mod_name, name, obj)
 | 
| 64 |   log('')
 | 
| 65 | 
 | 
| 66 |   return type_hist
 | 
| 67 | 
 | 
| 68 | 
 | 
| 69 | def PrintFuncs(funcs):
 | 
| 70 |   banner('FUNCTIONS')
 | 
| 71 |   funcs.sort()  # sort by module name
 | 
| 72 | 
 | 
| 73 |   import collections
 | 
| 74 |   type_hist = collections.Counter()
 | 
| 75 | 
 | 
| 76 |   # 316 globals / constants (513 before deduping)
 | 
| 77 |   for (mod_name, name, obj) in funcs:
 | 
| 78 |     log('%-20s %-15s %r', mod_name, name, obj)
 | 
| 79 | 
 | 
| 80 | 
 | 
| 81 | OMITTED = (
 | 
| 82 |     '__class__', '__dict__', '__doc__', '__getattribute__', '__module__',
 | 
| 83 |     '__reduce__', '__slots__', '__subclasshook__')
 | 
| 84 | 
 | 
| 85 | def PrintClasses(classes):
 | 
| 86 |   banner('CLASSES')
 | 
| 87 | 
 | 
| 88 |   classes.sort()  # sort by module name
 | 
| 89 | 
 | 
| 90 |   import collections
 | 
| 91 |   type_hist = collections.Counter()
 | 
| 92 | 
 | 
| 93 |   num_methods = 0
 | 
| 94 | 
 | 
| 95 |   # Keep ALL unbound methods, so that we force them to have different IDs!
 | 
| 96 |   # https://stackoverflow.com/questions/13348031/ids-of-bound-and-unbound-method-objects-sometimes-the-same-for-different-o
 | 
| 97 |   # If we remove this, then the de-duping doesn't work properly.  unbound
 | 
| 98 |   # methods use the silly descriptor protocol.
 | 
| 99 | 
 | 
| 100 |   all_unbound = []
 | 
| 101 |   seen_ids = set()
 | 
| 102 | 
 | 
| 103 |   # 316 globals / constants (513 before deduping)
 | 
| 104 |   for (mod_name, name, obj) in classes:
 | 
| 105 |     log('%-20s %-15s %r', mod_name, name, obj)
 | 
| 106 |     names = []
 | 
| 107 |     for name in dir(obj):
 | 
| 108 |       if name in OMITTED:
 | 
| 109 |         continue
 | 
| 110 | 
 | 
| 111 |       f = getattr(obj, name)
 | 
| 112 |       all_unbound.append(f)
 | 
| 113 | 
 | 
| 114 |       id_ = id(f)
 | 
| 115 |       if id_ in seen_ids and not isinstance(f, (bool, int, types.NoneType)):
 | 
| 116 |         #log('skipping %s = %s with id %d', name, f, id_)
 | 
| 117 |         continue
 | 
| 118 |         #pass
 | 
| 119 |       seen_ids.add(id_)
 | 
| 120 | 
 | 
| 121 |       type_hist[str(type(f))] += 1
 | 
| 122 | 
 | 
| 123 |       #log('%s %s' , f, type(f))
 | 
| 124 |       # There are A LOT of other types.  Classes are complicated.
 | 
| 125 |       if isinstance(f, types.MethodType):
 | 
| 126 |         names.append(name)
 | 
| 127 | 
 | 
| 128 |       # user-defined class attributes shouldn't be used
 | 
| 129 |       # None is the tag for SimpleObj.
 | 
| 130 |       if isinstance(f, (bool, int, long, tuple, list, dict, set, str, type, types.NoneType)):
 | 
| 131 |         log('  (C) %s %s', name, f)
 | 
| 132 | 
 | 
| 133 |     names.sort()
 | 
| 134 |     for n in names:
 | 
| 135 |       log('  %s', n)
 | 
| 136 |       num_methods += 1
 | 
| 137 | 
 | 
| 138 |   return num_methods, type_hist
 | 
| 139 | 
 | 
| 140 | 
 | 
| 141 | def Walk(mod_dict):
 | 
| 142 |   """
 | 
| 143 |   Test if the objects in Oil can be pickled.
 | 
| 144 |   """
 | 
| 145 |   #print(sys.modules)
 | 
| 146 |   #d = dict(sys.modules)  # prevent copies
 | 
| 147 | 
 | 
| 148 |   # vars that aren't not classes or functions, which are presumed to be
 | 
| 149 |   # constant
 | 
| 150 |   global_vars = []
 | 
| 151 |   classes = []  # user-defined classes
 | 
| 152 |   funcs = []  # functions
 | 
| 153 | 
 | 
| 154 |   seen_ids = set()  # id
 | 
| 155 | 
 | 
| 156 |   num_objects = 0
 | 
| 157 |   num_modules = 0
 | 
| 158 |   n = 0
 | 
| 159 |   for mod_name, mod in mod_dict.iteritems():
 | 
| 160 |     if mod is pickle:
 | 
| 161 |       continue
 | 
| 162 |     if mod is copy_reg:
 | 
| 163 |       continue
 | 
| 164 |     if mod is unpickle:
 | 
| 165 |       continue
 | 
| 166 |     if mod is sys:  # get rid of it
 | 
| 167 |       continue
 | 
| 168 |     if mod is types:  # lots of stuff here no
 | 
| 169 |       continue
 | 
| 170 | 
 | 
| 171 |     names = dir(mod)
 | 
| 172 |     log('mod %s', mod)
 | 
| 173 |     for name in names:
 | 
| 174 |       if not name.startswith('__'):
 | 
| 175 |         obj = getattr(mod, name)
 | 
| 176 | 
 | 
| 177 |         id_ = id(obj)
 | 
| 178 |         if id_ in seen_ids:
 | 
| 179 |           continue
 | 
| 180 |         seen_ids.add(id_)
 | 
| 181 | 
 | 
| 182 |         log('%r = %r', name, obj)
 | 
| 183 |         if isinstance(obj, types.ModuleType):  # e.g. ASDL modules
 | 
| 184 |           continue
 | 
| 185 |         if isinstance(obj, types.FileType):  # types_asdl.pickle is opened
 | 
| 186 |           continue
 | 
| 187 |         if name == 'Struct':  # struct module
 | 
| 188 |           continue
 | 
| 189 |         if name in ('InputType', 'OutputType', 'cStringIO_CAPI'):  # cStringIO
 | 
| 190 |           continue
 | 
| 191 |         if name in ('_pattern_type',):  # re
 | 
| 192 |           continue
 | 
| 193 |         if obj is __builtin__.Ellipsis:
 | 
| 194 |           continue
 | 
| 195 |         if obj is __builtin__.NotImplemented:
 | 
| 196 |           continue
 | 
| 197 |         if obj is types.BuiltinFunctionType:
 | 
| 198 |           continue
 | 
| 199 |         if obj is types.ClassType:
 | 
| 200 |           continue
 | 
| 201 |         if obj is mod_dict:  # circular!
 | 
| 202 |           continue
 | 
| 203 | 
 | 
| 204 |         s = pickle.dumps(obj)
 | 
| 205 | 
 | 
| 206 |         # NOTE: this could be inefficient because it's a graph, not a tree.
 | 
| 207 |         n += len(s)
 | 
| 208 |         log('%d bytes', n)
 | 
| 209 |         num_objects += 1
 | 
| 210 | 
 | 
| 211 |         if name == 'print_function':
 | 
| 212 |           continue
 | 
| 213 |         # sys.modules gets polluted because of pickle, etc.
 | 
| 214 |         # get rid of _sre, _warnings, etc.
 | 
| 215 |         # still might want _struct
 | 
| 216 |         if (mod_name.startswith('_') and not mod_name.startswith('_devbuild')
 | 
| 217 |             or mod_name in ('codecs', 'encodings', 'encodings.aliases', 're',
 | 
| 218 |               'sre_constants', 'sre_compile', 'sre_parse')):
 | 
| 219 |           continue
 | 
| 220 | 
 | 
| 221 |         # user-defined types, not types.ClassType which is old-style 
 | 
| 222 |         # builtin functions can't be compiled.
 | 
| 223 |         #types_to_compile = (types.FunctionType, types.BuiltinFunctionType, type)
 | 
| 224 |         if isinstance(obj, types.BuiltinFunctionType):
 | 
| 225 |           continue  # cannot be compiled, not a constant
 | 
| 226 | 
 | 
| 227 |         if isinstance(obj, types.FunctionType):
 | 
| 228 |           funcs.append((mod_name, name, obj))
 | 
| 229 |           continue
 | 
| 230 |         if isinstance(obj, type):
 | 
| 231 |           classes.append((mod_name, name, obj))
 | 
| 232 |           continue
 | 
| 233 | 
 | 
| 234 |         global_vars.append((mod_name, name, obj))
 | 
| 235 | 
 | 
| 236 |     num_modules += 1
 | 
| 237 | 
 | 
| 238 |   log('Pickled %d objects in %d modules', num_objects, num_modules)
 | 
| 239 |   log('')
 | 
| 240 | 
 | 
| 241 |   if 0:
 | 
| 242 |     var_type_hist = PrintVars(global_vars)
 | 
| 243 |     log('')
 | 
| 244 | 
 | 
| 245 |   num_methods, attr_type_hist = PrintClasses(classes)
 | 
| 246 |   log('')
 | 
| 247 | 
 | 
| 248 |   if 0:
 | 
| 249 |     PrintFuncs(funcs)
 | 
| 250 |     log('')
 | 
| 251 | 
 | 
| 252 |   if 0:
 | 
| 253 |     log('Global variable types:')
 | 
| 254 |     for type_str, count in var_type_hist.most_common():
 | 
| 255 |       log('%10d %s', count, type_str)
 | 
| 256 |     log('')
 | 
| 257 | 
 | 
| 258 |   log('Class attribute types:')
 | 
| 259 |   for type_str, count in attr_type_hist.most_common():
 | 
| 260 |     log('%10d %s', count, type_str)
 | 
| 261 |   log('')
 | 
| 262 | 
 | 
| 263 |   # Audit what's at the top level.  int, dict, str, list are most common, then
 | 
| 264 |   # FlagSpec.
 | 
| 265 |   log('%d global vars', len(global_vars))
 | 
| 266 |   log('%d user-defined classes, with %d total methods on them', len(classes),
 | 
| 267 |       num_methods)
 | 
| 268 |   log('%d user-defined functions', len(funcs))
 | 
| 269 | 
 | 
| 270 | 
 |