1 | #!/usr/bin/env python2
|
2 | from __future__ import print_function
|
3 | """inspect_pyc module
|
4 |
|
5 | This is a refactor of a recipe from Ned Batchelder's blog. He has
|
6 | given me permission to publish this. You can find the post at the
|
7 | following URL:
|
8 |
|
9 | http://nedbatchelder.com/blog/200804/the_structure_of_pyc_files.html
|
10 |
|
11 | You may use this module as a script: "./inspect_pyc.py <PYC_FILE>".
|
12 |
|
13 | NOTE:
|
14 | You can also see bytecode with:
|
15 | import os, dis
|
16 | dis.dis(os)
|
17 |
|
18 | But that doesn't give all the metadata. It's also nicer than
|
19 | tools/dumppyc.py, which came with the 'compiler2' package.
|
20 | """
|
21 |
|
22 | import marshal, struct, sys, time, types
|
23 |
|
24 | import consts # this package
|
25 |
|
26 | from opy.lib import dis
|
27 |
|
28 |
|
29 | INDENT = ' '
|
30 | MAX_HEX_LEN = 16
|
31 | NAME_OFFSET = 20
|
32 |
|
33 |
|
34 | def to_hexstr(bytes_value, level=0, wrap=False):
|
35 | indent = INDENT * level
|
36 | line = " ".join(("%02x",) * MAX_HEX_LEN)
|
37 | last = " ".join(("%02x",) * (len(bytes_value) % MAX_HEX_LEN))
|
38 | lines = (line,) * (len(bytes_value) // MAX_HEX_LEN)
|
39 | if last:
|
40 | lines += (last,)
|
41 | if wrap:
|
42 | template = indent + ("\n"+indent).join(lines)
|
43 | else:
|
44 | template = " ".join(lines)
|
45 | try:
|
46 | return template % tuple(bytes_value)
|
47 | except TypeError:
|
48 | return template % tuple(ord(char) for char in bytes_value)
|
49 |
|
50 |
|
51 | def ShowFlags(flags):
|
52 | flag_names = []
|
53 | for bit in sorted(consts.VALUE_TO_NAME):
|
54 | if flags & bit:
|
55 | flag_names.append(consts.VALUE_TO_NAME[bit])
|
56 |
|
57 | h = "0x%05x" % flags
|
58 | if flag_names:
|
59 | return '%s %s' % (h, ' '.join(flag_names))
|
60 | else:
|
61 | return h
|
62 |
|
63 |
|
64 | def unpack_pyc(f):
|
65 | magic = f.read(4)
|
66 | unixtime = struct.unpack("I", f.read(4))[0]
|
67 | timestamp = time.asctime(time.localtime(unixtime))
|
68 | code = marshal.load(f)
|
69 | return magic, unixtime, timestamp, code
|
70 |
|
71 |
|
72 | # Enhancements:
|
73 | # - Actually print the line of code! That will be very helpful.
|
74 |
|
75 | def disassemble(co, indent, f):
|
76 | """Copied from dis module.
|
77 |
|
78 | Args:
|
79 | co: code object
|
80 | indent: indentation to print with
|
81 |
|
82 | NOTE: byterun/pyobj.py:Frame.decode_next does something very similar.
|
83 | """
|
84 | def out(*args, **kwargs):
|
85 | print(*args, file=f, **kwargs)
|
86 |
|
87 | code = co.co_code
|
88 | labels = dis.findlabels(code)
|
89 | linestarts = dict(dis.findlinestarts(co))
|
90 | n = len(code)
|
91 | i = 0
|
92 | extended_arg = 0
|
93 | free = None
|
94 |
|
95 | while i < n:
|
96 | c = code[i]
|
97 | op = ord(c)
|
98 |
|
99 | if i in linestarts:
|
100 | if i > 0:
|
101 | out()
|
102 | prefix = linestarts[i]
|
103 | else:
|
104 | prefix = ''
|
105 | out('%s%4s' % (indent, prefix), end=' ')
|
106 |
|
107 | if i in labels: # Jump targets get a special symbol
|
108 | arrow = '>>'
|
109 | else:
|
110 | arrow = ' '
|
111 |
|
112 | out(' %s %4r %-20s ' % (arrow, i, dis.opname[op]), end=' ')
|
113 | i += 1
|
114 | if op >= dis.HAVE_ARGUMENT:
|
115 | oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
|
116 | extended_arg = 0
|
117 | i += 2
|
118 | if op == dis.EXTENDED_ARG:
|
119 | extended_arg = oparg*65536L
|
120 |
|
121 | oparg_str = None
|
122 |
|
123 | if op in dis.hasconst:
|
124 | c = co.co_consts[oparg]
|
125 | if isinstance(c, types.CodeType):
|
126 | # %r prints a memory address, which inhibits diffing
|
127 | oparg_str = '(code object %s %s %s)' % (
|
128 | c.co_name, c.co_filename, c.co_firstlineno)
|
129 | else:
|
130 | oparg_str = '(%r)' % (c,)
|
131 |
|
132 | elif op in dis.hasname:
|
133 | oparg_str = '(%s)' % (co.co_names[oparg],)
|
134 |
|
135 | elif op in dis.hasjrel:
|
136 | oparg_str = '(to %r)' % (i + oparg,)
|
137 |
|
138 | elif op in dis.haslocal:
|
139 | oparg_str = '(%s)' % (co.co_varnames[oparg],)
|
140 |
|
141 | elif op in dis.hascompare:
|
142 | oparg_str = '(%s)' % (dis.cmp_op[oparg],)
|
143 |
|
144 | elif op in dis.hasfree:
|
145 | if free is None:
|
146 | free = co.co_cellvars + co.co_freevars
|
147 | oparg_str = '(%s)' % (free[oparg],)
|
148 |
|
149 | if oparg_str:
|
150 | out('%5r %s' % (oparg, oparg_str), end=' ')
|
151 | else:
|
152 | out('%5r' % oparg, end=' ')
|
153 |
|
154 | out()
|
155 |
|
156 |
|
157 | def ParseOps(code):
|
158 | """A lightweight parser. Does some of what disassemble() does.
|
159 | """
|
160 | n = len(code)
|
161 | i = 0
|
162 | extended_arg = 0
|
163 |
|
164 | while i < n:
|
165 | c = code[i]
|
166 | op = ord(c)
|
167 |
|
168 | i += 1
|
169 | if op >= dis.HAVE_ARGUMENT:
|
170 | oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
|
171 | extended_arg = 0
|
172 | i += 2
|
173 | if op == dis.EXTENDED_ARG:
|
174 | extended_arg = oparg*65536L
|
175 |
|
176 | yield dis.opname[op], oparg
|
177 |
|
178 |
|
179 | class Visitor(object):
|
180 |
|
181 | def __init__(self, dis_bytecode=True, co_name=None):
|
182 | """
|
183 | Args:
|
184 | dis_bytecode: Whether to show disassembly.
|
185 | co_name: only print code object with exact name (and its children)
|
186 | """
|
187 | self.dis_bytecode = dis_bytecode
|
188 | # Name of thing to print
|
189 | self.co_name = co_name
|
190 |
|
191 | def show_consts(self, consts, level=0):
|
192 | indent = INDENT * level
|
193 | for i, obj in enumerate(consts):
|
194 | if isinstance(obj, types.CodeType):
|
195 | print("%s%s (code object)" % (indent, i))
|
196 | # RECURSIVE CALL.
|
197 | self.show_code(obj, level=level+1)
|
198 | else:
|
199 | print("%s%s %r" % (indent, i, obj))
|
200 |
|
201 | def maybe_show_consts(self, consts, level=0):
|
202 | for obj in consts:
|
203 | if isinstance(obj, types.CodeType):
|
204 | self.show_code(obj, level=level+1) # RECURSIVE CALL.
|
205 |
|
206 | def show_bytecode(self, code, level=0):
|
207 | """Call dis.disassemble() to show bytecode."""
|
208 |
|
209 | indent = INDENT * level
|
210 | print(to_hexstr(code.co_code, level, wrap=True))
|
211 |
|
212 | if self.dis_bytecode:
|
213 | print(indent + "disassembled:")
|
214 | disassemble(code, indent, sys.stdout)
|
215 |
|
216 | def show_code(self, code, level=0):
|
217 | """Print a code object, e.g. metadata, bytecode, and consts."""
|
218 |
|
219 | # Filter recursive call
|
220 | if self.co_name and code.co_name != self.co_name:
|
221 | self.maybe_show_consts(code.co_consts, level=level+1)
|
222 | return
|
223 |
|
224 | indent = INDENT * level
|
225 |
|
226 | for name in dir(code):
|
227 | if not name.startswith("co_"):
|
228 | continue
|
229 | if name in ("co_code", "co_consts"):
|
230 | continue
|
231 | value = getattr(code, name)
|
232 | if isinstance(value, str):
|
233 | value = repr(value)
|
234 | elif name == "co_flags":
|
235 | value = ShowFlags(value)
|
236 | elif name == "co_lnotab":
|
237 | value = "0x(%s)" % to_hexstr(value)
|
238 | print("%s%s%s" % (indent, (name+":").ljust(NAME_OFFSET), value))
|
239 |
|
240 | # Show bytecode FIRST, and then consts. There is nested bytecode in the
|
241 | # consts, so it's a 'top-down' order.
|
242 | print("%sco_code" % indent)
|
243 | self.show_bytecode(code, level=level+1)
|
244 |
|
245 | print("%sco_consts" % indent)
|
246 | self.show_consts(code.co_consts, level=level+1)
|
247 |
|
248 | def Visit(self, f):
|
249 | """Write a readable listing of a .pyc file to stdout."""
|
250 |
|
251 | magic, unixtime, timestamp, code = unpack_pyc(f)
|
252 |
|
253 | magic = "0x(%s)" % to_hexstr(magic)
|
254 | print(" ## inspecting pyc file ##")
|
255 | print("magic number: %s" % magic)
|
256 | print("timestamp: %s (%s)" % (unixtime, timestamp))
|
257 | print("code")
|
258 | self.show_code(code, level=1)
|
259 | print(" ## done inspecting pyc file ##")
|