OILS / opy / _regtest / src / opy / misc / inspect_pyc.py View on Github | oilshell.org

248 lines, 156 significant
1#! /usr/bin/env python
2from __future__ import print_function
3"""inspect_pyc module
4
5This is a refactor of a recipe from Ned Batchelder's blog. He has
6given me permission to publish this. You can find the post at the
7following URL:
8
9 http://nedbatchelder.com/blog/200804/the_structure_of_pyc_files.html
10
11You may use this module as a script: "./inspect_pyc.py <PYC_FILE>".
12
13NOTE:
14You can also see bytecode with:
15import os, dis
16dis.dis(os)
17
18But that doesn't give all the metadata. It's also nicer than
19tools/dumppyc.py, which came with the 'compiler2' package.
20"""
21
22import collections, cStringIO, dis, marshal, struct, sys, time, types
23from ..compiler2 import consts
24
25
26INDENT = ' '
27MAX_HEX_LEN = 16
28NAME_OFFSET = 20
29
30
31def to_hexstr(bytes_value, level=0, wrap=False):
32 indent = INDENT * level
33 line = " ".join(("%02x",) * MAX_HEX_LEN)
34 last = " ".join(("%02x",) * (len(bytes_value) % MAX_HEX_LEN))
35 lines = (line,) * (len(bytes_value) // MAX_HEX_LEN)
36 if last:
37 lines += (last,)
38 if wrap:
39 template = indent + ("\n"+indent).join(lines)
40 else:
41 template = " ".join(lines)
42 try:
43 return template % tuple(bytes_value)
44 except TypeError:
45 return template % tuple(ord(char) for char in bytes_value)
46
47
48# TODO: Do this in a cleaner way. Right now I'm avoiding modifying the
49# consts module.
50def build_flags_def(consts, co_flags_def):
51 for name in dir(consts):
52 if name.startswith('CO_'):
53 co_flags_def[name] = getattr(consts, name)
54
55
56_CO_FLAGS_DEF = {}
57build_flags_def(consts, _CO_FLAGS_DEF)
58
59
60def show_flags(value):
61 names = []
62 for name, bit in _CO_FLAGS_DEF.items():
63 if value & bit:
64 names.append(name)
65
66 h = "0x%05x" % value
67 if names:
68 return '%s %s' % (h, ' '.join(names))
69 else:
70 return h
71
72
73def unpack_pyc(f):
74 magic = f.read(4)
75 unixtime = struct.unpack("I", f.read(4))[0]
76 timestamp = time.asctime(time.localtime(unixtime))
77 code = marshal.load(f)
78 return magic, unixtime, timestamp, code
79
80
81# NOTE:
82# - We could change this into a bytecode visitor. It's a tree of code
83# objects. Each code object contains constants, and a constant can be another
84# code object.
85
86# Enhancements:
87# - Actually print the line of code! That will be very helpful.
88# - Print a histogram of byte codes. Print toal number of bytecodes.
89# - Copy of the
90
91def disassemble(co, indent, op_counts, f):
92 """Copied from dis module.
93
94 Args:
95 co: code object
96 indent: indentation to print with
97
98 It doesn't do the indent we want.
99 """
100 def out(*args, **kwargs):
101 print(*args, file=f, **kwargs)
102
103 code = co.co_code
104 labels = dis.findlabels(code)
105 linestarts = dict(dis.findlinestarts(co))
106 n = len(code)
107 i = 0
108 extended_arg = 0
109 free = None
110
111 while i < n:
112 c = code[i]
113 op = ord(c)
114
115 op_counts[op] += 1
116
117 if i in linestarts:
118 if i > 0:
119 out()
120 prefix = linestarts[i]
121 else:
122 prefix = ''
123 out('%s%4s' % (indent, prefix), end=' ')
124
125 if i in labels: # Jump targets get a special symbol
126 arrow = '>>'
127 else:
128 arrow = ' '
129
130 out(' %s %4r %-20s ' % (arrow, i, dis.opname[op]), end=' ')
131 i += 1
132 if op >= dis.HAVE_ARGUMENT:
133 oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
134 extended_arg = 0
135 i += 2
136 if op == dis.EXTENDED_ARG:
137 extended_arg = oparg*65536L
138
139 oparg_str = None
140
141 if op in dis.hasconst:
142 c = co.co_consts[oparg]
143 if isinstance(c, types.CodeType):
144 # %r prints a memory address, which inhibits diffing
145 oparg_str = '(code object %s %s %s)' % (
146 c.co_name, c.co_filename, c.co_firstlineno)
147 else:
148 oparg_str = '(%r)' % (c,)
149
150 elif op in dis.hasname:
151 oparg_str = '(%s)' % (co.co_names[oparg],)
152
153 elif op in dis.hasjrel:
154 oparg_str = '(to %r)' % (i + oparg,)
155
156 elif op in dis.haslocal:
157 oparg_str = '(%s)' % (co.co_varnames[oparg],)
158
159 elif op in dis.hascompare:
160 oparg_str = '(%s)' % (dis.cmp_op[oparg],)
161
162 elif op in dis.hasfree:
163 if free is None:
164 free = co.co_cellvars + co.co_freevars
165 oparg_str = '(%s)' % (free[oparg],)
166
167 if oparg_str:
168 out('%5r %s' % (oparg, oparg_str), end=' ')
169 else:
170 out('%5r' % oparg, end=' ')
171
172 out()
173
174
175class Visitor(object):
176
177 def __init__(self, dis_bytecode=True):
178 self.dis_bytecode = dis_bytecode # Whether to show disassembly.
179 self.op_counts = collections.Counter()
180
181 def show_consts(self, consts, level=0):
182 indent = INDENT * level
183 i = 0
184 for obj in consts:
185 if isinstance(obj, types.CodeType):
186 print(indent+"%s (code object)" % i)
187 # RECURSIVE CALL.
188 self.show_code(obj, level=level+1)
189 else:
190 print(indent+"%s %r" % (i, obj))
191 i += 1
192
193 def show_bytecode(self, code, level=0):
194 """Call dis.disassemble() to show bytecode."""
195
196 indent = INDENT * level
197 print(to_hexstr(code.co_code, level, wrap=True))
198
199 if self.dis_bytecode:
200 print(indent + "disassembled:")
201 disassemble(code, indent, self.op_counts, sys.stdout)
202
203 def show_code(self, code, level=0):
204 """Print a code object, e.g. metadata, bytecode, and consts."""
205
206 indent = INDENT * level
207
208 for name in dir(code):
209 if not name.startswith("co_"):
210 continue
211 if name in ("co_code", "co_consts"):
212 continue
213 value = getattr(code, name)
214 if isinstance(value, str):
215 value = repr(value)
216 elif name == "co_flags":
217 value = show_flags(value)
218 elif name == "co_lnotab":
219 value = "0x(%s)" % to_hexstr(value)
220 print("%s%s%s" % (indent, (name+":").ljust(NAME_OFFSET), value))
221
222 print("%sco_consts" % indent)
223 self.show_consts(code.co_consts, level=level+1)
224
225 print("%sco_code" % indent)
226 self.show_bytecode(code, level=level+1)
227
228 def Visit(self, f):
229 """Write a readable listing of a .pyc file to stdout."""
230
231 magic, unixtime, timestamp, code = unpack_pyc(f)
232
233 magic = "0x(%s)" % to_hexstr(magic)
234 print(" ## inspecting pyc file ##")
235 print("magic number: %s" % magic)
236 print("timestamp: %s (%s)" % (unixtime, timestamp))
237 print("code")
238 self.show_code(code, level=1)
239 print(" ## done inspecting pyc file ##")
240
241 def Report(self, f=sys.stdout):
242 print()
243 print('Opcode Histogram:', file=f)
244 for op, count in self.op_counts.most_common():
245 print('%5d %s' % (count, dis.opname[op]), file=f)
246
247 print('', file=f)
248 print('%d unique opcodes' % len(self.op_counts), file=f)