1 | #! /usr/bin/env python
|
2 | from __future__ import print_function
|
3 | """inspect_pyc module
|
4 |
|
5 | This is a refactor of a recipe from Ned Batchelder's blog. He has
|
6 | given me permission to publish this. You can find the post at the
|
7 | following URL:
|
8 |
|
9 | http://nedbatchelder.com/blog/200804/the_structure_of_pyc_files.html
|
10 |
|
11 | You may use this module as a script: "./inspect_pyc.py <PYC_FILE>".
|
12 |
|
13 | NOTE:
|
14 | You can also see bytecode with:
|
15 | import os, dis
|
16 | dis.dis(os)
|
17 |
|
18 | But that doesn't give all the metadata. It's also nicer than
|
19 | tools/dumppyc.py, which came with the 'compiler2' package.
|
20 | """
|
21 |
|
22 | import collections, cStringIO, dis, marshal, struct, sys, time, types
|
23 | from ..compiler2 import consts
|
24 |
|
25 |
|
26 | INDENT = ' '
|
27 | MAX_HEX_LEN = 16
|
28 | NAME_OFFSET = 20
|
29 |
|
30 |
|
31 | def to_hexstr(bytes_value, level=0, wrap=False):
|
32 | indent = INDENT * level
|
33 | line = " ".join(("%02x",) * MAX_HEX_LEN)
|
34 | last = " ".join(("%02x",) * (len(bytes_value) % MAX_HEX_LEN))
|
35 | lines = (line,) * (len(bytes_value) // MAX_HEX_LEN)
|
36 | if last:
|
37 | lines += (last,)
|
38 | if wrap:
|
39 | template = indent + ("\n"+indent).join(lines)
|
40 | else:
|
41 | template = " ".join(lines)
|
42 | try:
|
43 | return template % tuple(bytes_value)
|
44 | except TypeError:
|
45 | return template % tuple(ord(char) for char in bytes_value)
|
46 |
|
47 |
|
48 | # TODO: Do this in a cleaner way. Right now I'm avoiding modifying the
|
49 | # consts module.
|
50 | def build_flags_def(consts, co_flags_def):
|
51 | for name in dir(consts):
|
52 | if name.startswith('CO_'):
|
53 | co_flags_def[name] = getattr(consts, name)
|
54 |
|
55 |
|
56 | _CO_FLAGS_DEF = {}
|
57 | build_flags_def(consts, _CO_FLAGS_DEF)
|
58 |
|
59 |
|
60 | def show_flags(value):
|
61 | names = []
|
62 | for name, bit in _CO_FLAGS_DEF.items():
|
63 | if value & bit:
|
64 | names.append(name)
|
65 |
|
66 | h = "0x%05x" % value
|
67 | if names:
|
68 | return '%s %s' % (h, ' '.join(names))
|
69 | else:
|
70 | return h
|
71 |
|
72 |
|
73 | def unpack_pyc(f):
|
74 | magic = f.read(4)
|
75 | unixtime = struct.unpack("I", f.read(4))[0]
|
76 | timestamp = time.asctime(time.localtime(unixtime))
|
77 | code = marshal.load(f)
|
78 | return magic, unixtime, timestamp, code
|
79 |
|
80 |
|
81 | # NOTE:
|
82 | # - We could change this into a bytecode visitor. It's a tree of code
|
83 | # objects. Each code object contains constants, and a constant can be another
|
84 | # code object.
|
85 |
|
86 | # Enhancements:
|
87 | # - Actually print the line of code! That will be very helpful.
|
88 | # - Print a histogram of byte codes. Print toal number of bytecodes.
|
89 | # - Copy of the
|
90 |
|
91 | def disassemble(co, indent, op_counts, f):
|
92 | """Copied from dis module.
|
93 |
|
94 | Args:
|
95 | co: code object
|
96 | indent: indentation to print with
|
97 |
|
98 | It doesn't do the indent we want.
|
99 | """
|
100 | def out(*args, **kwargs):
|
101 | print(*args, file=f, **kwargs)
|
102 |
|
103 | code = co.co_code
|
104 | labels = dis.findlabels(code)
|
105 | linestarts = dict(dis.findlinestarts(co))
|
106 | n = len(code)
|
107 | i = 0
|
108 | extended_arg = 0
|
109 | free = None
|
110 |
|
111 | while i < n:
|
112 | c = code[i]
|
113 | op = ord(c)
|
114 |
|
115 | op_counts[op] += 1
|
116 |
|
117 | if i in linestarts:
|
118 | if i > 0:
|
119 | out()
|
120 | prefix = linestarts[i]
|
121 | else:
|
122 | prefix = ''
|
123 | out('%s%4s' % (indent, prefix), end=' ')
|
124 |
|
125 | if i in labels: # Jump targets get a special symbol
|
126 | arrow = '>>'
|
127 | else:
|
128 | arrow = ' '
|
129 |
|
130 | out(' %s %4r %-20s ' % (arrow, i, dis.opname[op]), end=' ')
|
131 | i += 1
|
132 | if op >= dis.HAVE_ARGUMENT:
|
133 | oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
|
134 | extended_arg = 0
|
135 | i += 2
|
136 | if op == dis.EXTENDED_ARG:
|
137 | extended_arg = oparg*65536L
|
138 |
|
139 | oparg_str = None
|
140 |
|
141 | if op in dis.hasconst:
|
142 | c = co.co_consts[oparg]
|
143 | if isinstance(c, types.CodeType):
|
144 | # %r prints a memory address, which inhibits diffing
|
145 | oparg_str = '(code object %s %s %s)' % (
|
146 | c.co_name, c.co_filename, c.co_firstlineno)
|
147 | else:
|
148 | oparg_str = '(%r)' % (c,)
|
149 |
|
150 | elif op in dis.hasname:
|
151 | oparg_str = '(%s)' % (co.co_names[oparg],)
|
152 |
|
153 | elif op in dis.hasjrel:
|
154 | oparg_str = '(to %r)' % (i + oparg,)
|
155 |
|
156 | elif op in dis.haslocal:
|
157 | oparg_str = '(%s)' % (co.co_varnames[oparg],)
|
158 |
|
159 | elif op in dis.hascompare:
|
160 | oparg_str = '(%s)' % (dis.cmp_op[oparg],)
|
161 |
|
162 | elif op in dis.hasfree:
|
163 | if free is None:
|
164 | free = co.co_cellvars + co.co_freevars
|
165 | oparg_str = '(%s)' % (free[oparg],)
|
166 |
|
167 | if oparg_str:
|
168 | out('%5r %s' % (oparg, oparg_str), end=' ')
|
169 | else:
|
170 | out('%5r' % oparg, end=' ')
|
171 |
|
172 | out()
|
173 |
|
174 |
|
175 | class Visitor(object):
|
176 |
|
177 | def __init__(self, dis_bytecode=True):
|
178 | self.dis_bytecode = dis_bytecode # Whether to show disassembly.
|
179 | self.op_counts = collections.Counter()
|
180 |
|
181 | def show_consts(self, consts, level=0):
|
182 | indent = INDENT * level
|
183 | i = 0
|
184 | for obj in consts:
|
185 | if isinstance(obj, types.CodeType):
|
186 | print(indent+"%s (code object)" % i)
|
187 | # RECURSIVE CALL.
|
188 | self.show_code(obj, level=level+1)
|
189 | else:
|
190 | print(indent+"%s %r" % (i, obj))
|
191 | i += 1
|
192 |
|
193 | def show_bytecode(self, code, level=0):
|
194 | """Call dis.disassemble() to show bytecode."""
|
195 |
|
196 | indent = INDENT * level
|
197 | print(to_hexstr(code.co_code, level, wrap=True))
|
198 |
|
199 | if self.dis_bytecode:
|
200 | print(indent + "disassembled:")
|
201 | disassemble(code, indent, self.op_counts, sys.stdout)
|
202 |
|
203 | def show_code(self, code, level=0):
|
204 | """Print a code object, e.g. metadata, bytecode, and consts."""
|
205 |
|
206 | indent = INDENT * level
|
207 |
|
208 | for name in dir(code):
|
209 | if not name.startswith("co_"):
|
210 | continue
|
211 | if name in ("co_code", "co_consts"):
|
212 | continue
|
213 | value = getattr(code, name)
|
214 | if isinstance(value, str):
|
215 | value = repr(value)
|
216 | elif name == "co_flags":
|
217 | value = show_flags(value)
|
218 | elif name == "co_lnotab":
|
219 | value = "0x(%s)" % to_hexstr(value)
|
220 | print("%s%s%s" % (indent, (name+":").ljust(NAME_OFFSET), value))
|
221 |
|
222 | print("%sco_consts" % indent)
|
223 | self.show_consts(code.co_consts, level=level+1)
|
224 |
|
225 | print("%sco_code" % indent)
|
226 | self.show_bytecode(code, level=level+1)
|
227 |
|
228 | def Visit(self, f):
|
229 | """Write a readable listing of a .pyc file to stdout."""
|
230 |
|
231 | magic, unixtime, timestamp, code = unpack_pyc(f)
|
232 |
|
233 | magic = "0x(%s)" % to_hexstr(magic)
|
234 | print(" ## inspecting pyc file ##")
|
235 | print("magic number: %s" % magic)
|
236 | print("timestamp: %s (%s)" % (unixtime, timestamp))
|
237 | print("code")
|
238 | self.show_code(code, level=1)
|
239 | print(" ## done inspecting pyc file ##")
|
240 |
|
241 | def Report(self, f=sys.stdout):
|
242 | print()
|
243 | print('Opcode Histogram:', file=f)
|
244 | for op, count in self.op_counts.most_common():
|
245 | print('%5d %s' % (count, dis.opname[op]), file=f)
|
246 |
|
247 | print('', file=f)
|
248 | print('%d unique opcodes' % len(self.op_counts), file=f)
|