| 1 | #!/usr/bin/env python2
 | 
| 2 | from __future__ import print_function
 | 
| 3 | """inspect_pyc module
 | 
| 4 | 
 | 
| 5 | This is a refactor of a recipe from Ned Batchelder's blog.  He has
 | 
| 6 | given me permission to publish this.  You can find the post at the
 | 
| 7 | following URL:
 | 
| 8 | 
 | 
| 9 |   http://nedbatchelder.com/blog/200804/the_structure_of_pyc_files.html
 | 
| 10 | 
 | 
| 11 | You may use this module as a script: "./inspect_pyc.py <PYC_FILE>".
 | 
| 12 | 
 | 
| 13 | NOTE:
 | 
| 14 | You can also see bytecode with:
 | 
| 15 | import os, dis
 | 
| 16 | dis.dis(os)
 | 
| 17 | 
 | 
| 18 | But that doesn't give all the metadata.  It's also nicer than
 | 
| 19 | tools/dumppyc.py, which came with the 'compiler2' package.
 | 
| 20 | """
 | 
| 21 | 
 | 
| 22 | import marshal, struct, sys, time, types
 | 
| 23 | 
 | 
| 24 | import consts  # this package
 | 
| 25 | 
 | 
| 26 | from opy.lib import dis
 | 
| 27 | 
 | 
| 28 | 
 | 
| 29 | INDENT = '  '
 | 
| 30 | MAX_HEX_LEN = 16
 | 
| 31 | NAME_OFFSET = 20
 | 
| 32 | 
 | 
| 33 | 
 | 
| 34 | def to_hexstr(bytes_value, level=0, wrap=False):
 | 
| 35 |     indent = INDENT * level
 | 
| 36 |     line = " ".join(("%02x",) * MAX_HEX_LEN)
 | 
| 37 |     last = " ".join(("%02x",) * (len(bytes_value) % MAX_HEX_LEN))
 | 
| 38 |     lines = (line,) * (len(bytes_value) // MAX_HEX_LEN)
 | 
| 39 |     if last:
 | 
| 40 |         lines += (last,)
 | 
| 41 |     if wrap:
 | 
| 42 |         template = indent + ("\n"+indent).join(lines)
 | 
| 43 |     else:
 | 
| 44 |         template = " ".join(lines)
 | 
| 45 |     try:
 | 
| 46 |         return template % tuple(bytes_value)
 | 
| 47 |     except TypeError:
 | 
| 48 |         return template % tuple(ord(char) for char in bytes_value)
 | 
| 49 | 
 | 
| 50 | 
 | 
| 51 | def ShowFlags(flags):
 | 
| 52 |   flag_names = []
 | 
| 53 |   for bit in sorted(consts.VALUE_TO_NAME):
 | 
| 54 |     if flags & bit:
 | 
| 55 |       flag_names.append(consts.VALUE_TO_NAME[bit])
 | 
| 56 | 
 | 
| 57 |   h = "0x%05x" % flags
 | 
| 58 |   if flag_names:
 | 
| 59 |     return '%s %s' % (h, ' '.join(flag_names))
 | 
| 60 |   else:
 | 
| 61 |     return h
 | 
| 62 | 
 | 
| 63 | 
 | 
| 64 | def unpack_pyc(f):
 | 
| 65 |     magic = f.read(4)
 | 
| 66 |     unixtime = struct.unpack("I", f.read(4))[0]
 | 
| 67 |     timestamp = time.asctime(time.localtime(unixtime))
 | 
| 68 |     code = marshal.load(f)
 | 
| 69 |     return magic, unixtime, timestamp, code
 | 
| 70 | 
 | 
| 71 | 
 | 
| 72 | # Enhancements:
 | 
| 73 | # - Actually print the line of code!  That will be very helpful.
 | 
| 74 | 
 | 
| 75 | def disassemble(co, indent, f):
 | 
| 76 |   """Copied from dis module.
 | 
| 77 | 
 | 
| 78 |   Args:
 | 
| 79 |     co: code object
 | 
| 80 |     indent: indentation to print with
 | 
| 81 | 
 | 
| 82 |   NOTE: byterun/pyobj.py:Frame.decode_next does something very similar.
 | 
| 83 |   """
 | 
| 84 |   def out(*args, **kwargs):
 | 
| 85 |     print(*args, file=f, **kwargs)
 | 
| 86 | 
 | 
| 87 |   code = co.co_code
 | 
| 88 |   labels = dis.findlabels(code)
 | 
| 89 |   linestarts = dict(dis.findlinestarts(co))
 | 
| 90 |   n = len(code)
 | 
| 91 |   i = 0
 | 
| 92 |   extended_arg = 0
 | 
| 93 |   free = None
 | 
| 94 | 
 | 
| 95 |   while i < n:
 | 
| 96 |       c = code[i]
 | 
| 97 |       op = ord(c)
 | 
| 98 | 
 | 
| 99 |       if i in linestarts:
 | 
| 100 |           if i > 0:
 | 
| 101 |               out()
 | 
| 102 |           prefix = linestarts[i]
 | 
| 103 |       else:
 | 
| 104 |           prefix = ''
 | 
| 105 |       out('%s%4s' % (indent, prefix), end=' ')
 | 
| 106 | 
 | 
| 107 |       if i in labels:  # Jump targets get a special symbol
 | 
| 108 |         arrow = '>>'
 | 
| 109 |       else:
 | 
| 110 |         arrow = '  '
 | 
| 111 | 
 | 
| 112 |       out(' %s %4r %-20s ' % (arrow, i, dis.opname[op]), end=' ')
 | 
| 113 |       i += 1
 | 
| 114 |       if op >= dis.HAVE_ARGUMENT:
 | 
| 115 |           oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
 | 
| 116 |           extended_arg = 0
 | 
| 117 |           i += 2
 | 
| 118 |           if op == dis.EXTENDED_ARG:
 | 
| 119 |               extended_arg = oparg*65536L
 | 
| 120 | 
 | 
| 121 |           oparg_str = None
 | 
| 122 | 
 | 
| 123 |           if op in dis.hasconst:
 | 
| 124 |             c = co.co_consts[oparg]
 | 
| 125 |             if isinstance(c, types.CodeType):
 | 
| 126 |               # %r prints a memory address, which inhibits diffing
 | 
| 127 |               oparg_str = '(code object %s %s %s)' % (
 | 
| 128 |                   c.co_name, c.co_filename, c.co_firstlineno)
 | 
| 129 |             else:
 | 
| 130 |               oparg_str = '(%r)' % (c,)
 | 
| 131 | 
 | 
| 132 |           elif op in dis.hasname:
 | 
| 133 |             oparg_str = '(%s)' % (co.co_names[oparg],)
 | 
| 134 | 
 | 
| 135 |           elif op in dis.hasjrel:
 | 
| 136 |             oparg_str = '(to %r)' % (i + oparg,)
 | 
| 137 | 
 | 
| 138 |           elif op in dis.haslocal:
 | 
| 139 |             oparg_str = '(%s)' % (co.co_varnames[oparg],)
 | 
| 140 | 
 | 
| 141 |           elif op in dis.hascompare:
 | 
| 142 |             oparg_str = '(%s)' % (dis.cmp_op[oparg],)
 | 
| 143 | 
 | 
| 144 |           elif op in dis.hasfree:
 | 
| 145 |             if free is None:
 | 
| 146 |               free = co.co_cellvars + co.co_freevars
 | 
| 147 |             oparg_str = '(%s)' % (free[oparg],)
 | 
| 148 | 
 | 
| 149 |           if oparg_str:
 | 
| 150 |             out('%5r %s' % (oparg, oparg_str), end=' ')
 | 
| 151 |           else:
 | 
| 152 |             out('%5r' % oparg, end=' ')
 | 
| 153 | 
 | 
| 154 |       out()
 | 
| 155 | 
 | 
| 156 | 
 | 
| 157 | def ParseOps(code):
 | 
| 158 |   """A lightweight parser.  Does some of what disassemble() does.
 | 
| 159 |   """
 | 
| 160 |   n = len(code)
 | 
| 161 |   i = 0
 | 
| 162 |   extended_arg = 0
 | 
| 163 | 
 | 
| 164 |   while i < n:
 | 
| 165 |       c = code[i]
 | 
| 166 |       op = ord(c)
 | 
| 167 | 
 | 
| 168 |       i += 1
 | 
| 169 |       if op >= dis.HAVE_ARGUMENT:
 | 
| 170 |           oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
 | 
| 171 |           extended_arg = 0
 | 
| 172 |           i += 2
 | 
| 173 |           if op == dis.EXTENDED_ARG:
 | 
| 174 |               extended_arg = oparg*65536L
 | 
| 175 | 
 | 
| 176 |       yield dis.opname[op], oparg
 | 
| 177 | 
 | 
| 178 | 
 | 
| 179 | class Visitor(object):
 | 
| 180 | 
 | 
| 181 |   def __init__(self, dis_bytecode=True, co_name=None):
 | 
| 182 |     """
 | 
| 183 |     Args:
 | 
| 184 |       dis_bytecode: Whether to show disassembly.
 | 
| 185 |       co_name: only print code object with exact name (and its children)
 | 
| 186 |     """
 | 
| 187 |     self.dis_bytecode = dis_bytecode
 | 
| 188 |     # Name of thing to print
 | 
| 189 |     self.co_name = co_name
 | 
| 190 | 
 | 
| 191 |   def show_consts(self, consts, level=0):
 | 
| 192 |     indent = INDENT * level
 | 
| 193 |     for i, obj in enumerate(consts):
 | 
| 194 |       if isinstance(obj, types.CodeType):
 | 
| 195 |         print("%s%s (code object)" % (indent, i))
 | 
| 196 |         # RECURSIVE CALL.
 | 
| 197 |         self.show_code(obj, level=level+1)
 | 
| 198 |       else:
 | 
| 199 |         print("%s%s %r" % (indent, i, obj))
 | 
| 200 | 
 | 
| 201 |   def maybe_show_consts(self, consts, level=0):
 | 
| 202 |     for obj in consts:
 | 
| 203 |       if isinstance(obj, types.CodeType):
 | 
| 204 |         self.show_code(obj, level=level+1)   # RECURSIVE CALL.
 | 
| 205 | 
 | 
| 206 |   def show_bytecode(self, code, level=0):
 | 
| 207 |     """Call dis.disassemble() to show bytecode."""
 | 
| 208 | 
 | 
| 209 |     indent = INDENT * level
 | 
| 210 |     print(to_hexstr(code.co_code, level, wrap=True))
 | 
| 211 | 
 | 
| 212 |     if self.dis_bytecode:
 | 
| 213 |       print(indent + "disassembled:")
 | 
| 214 |       disassemble(code, indent, sys.stdout)
 | 
| 215 | 
 | 
| 216 |   def show_code(self, code, level=0):
 | 
| 217 |     """Print a code object, e.g. metadata, bytecode, and consts."""
 | 
| 218 | 
 | 
| 219 |     # Filter recursive call
 | 
| 220 |     if self.co_name and code.co_name != self.co_name:
 | 
| 221 |       self.maybe_show_consts(code.co_consts, level=level+1)
 | 
| 222 |       return
 | 
| 223 | 
 | 
| 224 |     indent = INDENT * level
 | 
| 225 | 
 | 
| 226 |     for name in dir(code):
 | 
| 227 |       if not name.startswith("co_"):
 | 
| 228 |         continue
 | 
| 229 |       if name in ("co_code", "co_consts"):
 | 
| 230 |         continue
 | 
| 231 |       value = getattr(code, name)
 | 
| 232 |       if isinstance(value, str):
 | 
| 233 |         value = repr(value)
 | 
| 234 |       elif name == "co_flags":
 | 
| 235 |         value = ShowFlags(value)
 | 
| 236 |       elif name == "co_lnotab":
 | 
| 237 |         value = "0x(%s)" % to_hexstr(value)
 | 
| 238 |       print("%s%s%s" % (indent, (name+":").ljust(NAME_OFFSET), value))
 | 
| 239 | 
 | 
| 240 |     # Show bytecode FIRST, and then consts.  There is nested bytecode in the
 | 
| 241 |     # consts, so it's a 'top-down' order.
 | 
| 242 |     print("%sco_code" % indent)
 | 
| 243 |     self.show_bytecode(code, level=level+1)
 | 
| 244 | 
 | 
| 245 |     print("%sco_consts" % indent)
 | 
| 246 |     self.show_consts(code.co_consts, level=level+1)
 | 
| 247 | 
 | 
| 248 |   def Visit(self, f):
 | 
| 249 |     """Write a readable listing of a .pyc file to stdout."""
 | 
| 250 | 
 | 
| 251 |     magic, unixtime, timestamp, code = unpack_pyc(f)
 | 
| 252 | 
 | 
| 253 |     magic = "0x(%s)" % to_hexstr(magic)
 | 
| 254 |     print("  ## inspecting pyc file ##")
 | 
| 255 |     print("magic number: %s" % magic)
 | 
| 256 |     print("timestamp:    %s (%s)" % (unixtime, timestamp))
 | 
| 257 |     print("code")
 | 
| 258 |     self.show_code(code, level=1)
 | 
| 259 |     print("  ## done inspecting pyc file ##")
 |