| 1 | #! /usr/bin/env python
|
| 2 | from __future__ import print_function
|
| 3 | """inspect_pyc module
|
| 4 |
|
| 5 | This is a refactor of a recipe from Ned Batchelder's blog. He has
|
| 6 | given me permission to publish this. You can find the post at the
|
| 7 | following URL:
|
| 8 |
|
| 9 | http://nedbatchelder.com/blog/200804/the_structure_of_pyc_files.html
|
| 10 |
|
| 11 | You may use this module as a script: "./inspect_pyc.py <PYC_FILE>".
|
| 12 |
|
| 13 | NOTE:
|
| 14 | You can also see bytecode with:
|
| 15 | import os, dis
|
| 16 | dis.dis(os)
|
| 17 |
|
| 18 | But that doesn't give all the metadata. It's also nicer than
|
| 19 | tools/dumppyc.py, which came with the 'compiler2' package.
|
| 20 | """
|
| 21 |
|
| 22 | import collections, cStringIO, dis, marshal, struct, sys, time, types
|
| 23 | from ..compiler2 import consts
|
| 24 |
|
| 25 |
|
| 26 | INDENT = ' '
|
| 27 | MAX_HEX_LEN = 16
|
| 28 | NAME_OFFSET = 20
|
| 29 |
|
| 30 |
|
| 31 | def to_hexstr(bytes_value, level=0, wrap=False):
|
| 32 | indent = INDENT * level
|
| 33 | line = " ".join(("%02x",) * MAX_HEX_LEN)
|
| 34 | last = " ".join(("%02x",) * (len(bytes_value) % MAX_HEX_LEN))
|
| 35 | lines = (line,) * (len(bytes_value) // MAX_HEX_LEN)
|
| 36 | if last:
|
| 37 | lines += (last,)
|
| 38 | if wrap:
|
| 39 | template = indent + ("\n"+indent).join(lines)
|
| 40 | else:
|
| 41 | template = " ".join(lines)
|
| 42 | try:
|
| 43 | return template % tuple(bytes_value)
|
| 44 | except TypeError:
|
| 45 | return template % tuple(ord(char) for char in bytes_value)
|
| 46 |
|
| 47 |
|
| 48 | # TODO: Do this in a cleaner way. Right now I'm avoiding modifying the
|
| 49 | # consts module.
|
| 50 | def build_flags_def(consts, co_flags_def):
|
| 51 | for name in dir(consts):
|
| 52 | if name.startswith('CO_'):
|
| 53 | co_flags_def[name] = getattr(consts, name)
|
| 54 |
|
| 55 |
|
| 56 | _CO_FLAGS_DEF = {}
|
| 57 | build_flags_def(consts, _CO_FLAGS_DEF)
|
| 58 |
|
| 59 |
|
| 60 | def show_flags(value):
|
| 61 | names = []
|
| 62 | for name, bit in _CO_FLAGS_DEF.items():
|
| 63 | if value & bit:
|
| 64 | names.append(name)
|
| 65 |
|
| 66 | h = "0x%05x" % value
|
| 67 | if names:
|
| 68 | return '%s %s' % (h, ' '.join(names))
|
| 69 | else:
|
| 70 | return h
|
| 71 |
|
| 72 |
|
| 73 | def unpack_pyc(f):
|
| 74 | magic = f.read(4)
|
| 75 | unixtime = struct.unpack("I", f.read(4))[0]
|
| 76 | timestamp = time.asctime(time.localtime(unixtime))
|
| 77 | code = marshal.load(f)
|
| 78 | return magic, unixtime, timestamp, code
|
| 79 |
|
| 80 |
|
| 81 | # NOTE:
|
| 82 | # - We could change this into a bytecode visitor. It's a tree of code
|
| 83 | # objects. Each code object contains constants, and a constant can be another
|
| 84 | # code object.
|
| 85 |
|
| 86 | # Enhancements:
|
| 87 | # - Actually print the line of code! That will be very helpful.
|
| 88 | # - Print a histogram of byte codes. Print toal number of bytecodes.
|
| 89 | # - Copy of the
|
| 90 |
|
| 91 | def disassemble(co, indent, op_counts, f):
|
| 92 | """Copied from dis module.
|
| 93 |
|
| 94 | Args:
|
| 95 | co: code object
|
| 96 | indent: indentation to print with
|
| 97 |
|
| 98 | It doesn't do the indent we want.
|
| 99 | """
|
| 100 | def out(*args, **kwargs):
|
| 101 | print(*args, file=f, **kwargs)
|
| 102 |
|
| 103 | code = co.co_code
|
| 104 | labels = dis.findlabels(code)
|
| 105 | linestarts = dict(dis.findlinestarts(co))
|
| 106 | n = len(code)
|
| 107 | i = 0
|
| 108 | extended_arg = 0
|
| 109 | free = None
|
| 110 |
|
| 111 | while i < n:
|
| 112 | c = code[i]
|
| 113 | op = ord(c)
|
| 114 |
|
| 115 | op_counts[op] += 1
|
| 116 |
|
| 117 | if i in linestarts:
|
| 118 | if i > 0:
|
| 119 | out()
|
| 120 | prefix = linestarts[i]
|
| 121 | else:
|
| 122 | prefix = ''
|
| 123 | out('%s%4s' % (indent, prefix), end=' ')
|
| 124 |
|
| 125 | if i in labels: # Jump targets get a special symbol
|
| 126 | arrow = '>>'
|
| 127 | else:
|
| 128 | arrow = ' '
|
| 129 |
|
| 130 | out(' %s %4r %-20s ' % (arrow, i, dis.opname[op]), end=' ')
|
| 131 | i += 1
|
| 132 | if op >= dis.HAVE_ARGUMENT:
|
| 133 | oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
|
| 134 | extended_arg = 0
|
| 135 | i += 2
|
| 136 | if op == dis.EXTENDED_ARG:
|
| 137 | extended_arg = oparg*65536L
|
| 138 |
|
| 139 | oparg_str = None
|
| 140 |
|
| 141 | if op in dis.hasconst:
|
| 142 | c = co.co_consts[oparg]
|
| 143 | if isinstance(c, types.CodeType):
|
| 144 | # %r prints a memory address, which inhibits diffing
|
| 145 | oparg_str = '(code object %s %s %s)' % (
|
| 146 | c.co_name, c.co_filename, c.co_firstlineno)
|
| 147 | else:
|
| 148 | oparg_str = '(%r)' % (c,)
|
| 149 |
|
| 150 | elif op in dis.hasname:
|
| 151 | oparg_str = '(%s)' % (co.co_names[oparg],)
|
| 152 |
|
| 153 | elif op in dis.hasjrel:
|
| 154 | oparg_str = '(to %r)' % (i + oparg,)
|
| 155 |
|
| 156 | elif op in dis.haslocal:
|
| 157 | oparg_str = '(%s)' % (co.co_varnames[oparg],)
|
| 158 |
|
| 159 | elif op in dis.hascompare:
|
| 160 | oparg_str = '(%s)' % (dis.cmp_op[oparg],)
|
| 161 |
|
| 162 | elif op in dis.hasfree:
|
| 163 | if free is None:
|
| 164 | free = co.co_cellvars + co.co_freevars
|
| 165 | oparg_str = '(%s)' % (free[oparg],)
|
| 166 |
|
| 167 | if oparg_str:
|
| 168 | out('%5r %s' % (oparg, oparg_str), end=' ')
|
| 169 | else:
|
| 170 | out('%5r' % oparg, end=' ')
|
| 171 |
|
| 172 | out()
|
| 173 |
|
| 174 |
|
| 175 | class Visitor(object):
|
| 176 |
|
| 177 | def __init__(self, dis_bytecode=True):
|
| 178 | self.dis_bytecode = dis_bytecode # Whether to show disassembly.
|
| 179 | self.op_counts = collections.Counter()
|
| 180 |
|
| 181 | def show_consts(self, consts, level=0):
|
| 182 | indent = INDENT * level
|
| 183 | i = 0
|
| 184 | for obj in consts:
|
| 185 | if isinstance(obj, types.CodeType):
|
| 186 | print(indent+"%s (code object)" % i)
|
| 187 | # RECURSIVE CALL.
|
| 188 | self.show_code(obj, level=level+1)
|
| 189 | else:
|
| 190 | print(indent+"%s %r" % (i, obj))
|
| 191 | i += 1
|
| 192 |
|
| 193 | def show_bytecode(self, code, level=0):
|
| 194 | """Call dis.disassemble() to show bytecode."""
|
| 195 |
|
| 196 | indent = INDENT * level
|
| 197 | print(to_hexstr(code.co_code, level, wrap=True))
|
| 198 |
|
| 199 | if self.dis_bytecode:
|
| 200 | print(indent + "disassembled:")
|
| 201 | disassemble(code, indent, self.op_counts, sys.stdout)
|
| 202 |
|
| 203 | def show_code(self, code, level=0):
|
| 204 | """Print a code object, e.g. metadata, bytecode, and consts."""
|
| 205 |
|
| 206 | indent = INDENT * level
|
| 207 |
|
| 208 | for name in dir(code):
|
| 209 | if not name.startswith("co_"):
|
| 210 | continue
|
| 211 | if name in ("co_code", "co_consts"):
|
| 212 | continue
|
| 213 | value = getattr(code, name)
|
| 214 | if isinstance(value, str):
|
| 215 | value = repr(value)
|
| 216 | elif name == "co_flags":
|
| 217 | value = show_flags(value)
|
| 218 | elif name == "co_lnotab":
|
| 219 | value = "0x(%s)" % to_hexstr(value)
|
| 220 | print("%s%s%s" % (indent, (name+":").ljust(NAME_OFFSET), value))
|
| 221 |
|
| 222 | print("%sco_consts" % indent)
|
| 223 | self.show_consts(code.co_consts, level=level+1)
|
| 224 |
|
| 225 | print("%sco_code" % indent)
|
| 226 | self.show_bytecode(code, level=level+1)
|
| 227 |
|
| 228 | def Visit(self, f):
|
| 229 | """Write a readable listing of a .pyc file to stdout."""
|
| 230 |
|
| 231 | magic, unixtime, timestamp, code = unpack_pyc(f)
|
| 232 |
|
| 233 | magic = "0x(%s)" % to_hexstr(magic)
|
| 234 | print(" ## inspecting pyc file ##")
|
| 235 | print("magic number: %s" % magic)
|
| 236 | print("timestamp: %s (%s)" % (unixtime, timestamp))
|
| 237 | print("code")
|
| 238 | self.show_code(code, level=1)
|
| 239 | print(" ## done inspecting pyc file ##")
|
| 240 |
|
| 241 | def Report(self, f=sys.stdout):
|
| 242 | print()
|
| 243 | print('Opcode Histogram:', file=f)
|
| 244 | for op, count in self.op_counts.most_common():
|
| 245 | print('%5d %s' % (count, dis.opname[op]), file=f)
|
| 246 |
|
| 247 | print('', file=f)
|
| 248 | print('%d unique opcodes' % len(self.op_counts), file=f)
|