OILS / opy / compiler2 / dis_tool.py View on Github | oilshell.org

259 lines, 163 significant
1#!/usr/bin/env python2
2from __future__ import print_function
3"""inspect_pyc module
4
5This is a refactor of a recipe from Ned Batchelder's blog. He has
6given me permission to publish this. You can find the post at the
7following URL:
8
9 http://nedbatchelder.com/blog/200804/the_structure_of_pyc_files.html
10
11You may use this module as a script: "./inspect_pyc.py <PYC_FILE>".
12
13NOTE:
14You can also see bytecode with:
15import os, dis
16dis.dis(os)
17
18But that doesn't give all the metadata. It's also nicer than
19tools/dumppyc.py, which came with the 'compiler2' package.
20"""
21
22import marshal, struct, sys, time, types
23
24import consts # this package
25
26from opy.lib import dis
27
28
29INDENT = ' '
30MAX_HEX_LEN = 16
31NAME_OFFSET = 20
32
33
34def to_hexstr(bytes_value, level=0, wrap=False):
35 indent = INDENT * level
36 line = " ".join(("%02x",) * MAX_HEX_LEN)
37 last = " ".join(("%02x",) * (len(bytes_value) % MAX_HEX_LEN))
38 lines = (line,) * (len(bytes_value) // MAX_HEX_LEN)
39 if last:
40 lines += (last,)
41 if wrap:
42 template = indent + ("\n"+indent).join(lines)
43 else:
44 template = " ".join(lines)
45 try:
46 return template % tuple(bytes_value)
47 except TypeError:
48 return template % tuple(ord(char) for char in bytes_value)
49
50
51def ShowFlags(flags):
52 flag_names = []
53 for bit in sorted(consts.VALUE_TO_NAME):
54 if flags & bit:
55 flag_names.append(consts.VALUE_TO_NAME[bit])
56
57 h = "0x%05x" % flags
58 if flag_names:
59 return '%s %s' % (h, ' '.join(flag_names))
60 else:
61 return h
62
63
64def unpack_pyc(f):
65 magic = f.read(4)
66 unixtime = struct.unpack("I", f.read(4))[0]
67 timestamp = time.asctime(time.localtime(unixtime))
68 code = marshal.load(f)
69 return magic, unixtime, timestamp, code
70
71
72# Enhancements:
73# - Actually print the line of code! That will be very helpful.
74
75def disassemble(co, indent, f):
76 """Copied from dis module.
77
78 Args:
79 co: code object
80 indent: indentation to print with
81
82 NOTE: byterun/pyobj.py:Frame.decode_next does something very similar.
83 """
84 def out(*args, **kwargs):
85 print(*args, file=f, **kwargs)
86
87 code = co.co_code
88 labels = dis.findlabels(code)
89 linestarts = dict(dis.findlinestarts(co))
90 n = len(code)
91 i = 0
92 extended_arg = 0
93 free = None
94
95 while i < n:
96 c = code[i]
97 op = ord(c)
98
99 if i in linestarts:
100 if i > 0:
101 out()
102 prefix = linestarts[i]
103 else:
104 prefix = ''
105 out('%s%4s' % (indent, prefix), end=' ')
106
107 if i in labels: # Jump targets get a special symbol
108 arrow = '>>'
109 else:
110 arrow = ' '
111
112 out(' %s %4r %-20s ' % (arrow, i, dis.opname[op]), end=' ')
113 i += 1
114 if op >= dis.HAVE_ARGUMENT:
115 oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
116 extended_arg = 0
117 i += 2
118 if op == dis.EXTENDED_ARG:
119 extended_arg = oparg*65536L
120
121 oparg_str = None
122
123 if op in dis.hasconst:
124 c = co.co_consts[oparg]
125 if isinstance(c, types.CodeType):
126 # %r prints a memory address, which inhibits diffing
127 oparg_str = '(code object %s %s %s)' % (
128 c.co_name, c.co_filename, c.co_firstlineno)
129 else:
130 oparg_str = '(%r)' % (c,)
131
132 elif op in dis.hasname:
133 oparg_str = '(%s)' % (co.co_names[oparg],)
134
135 elif op in dis.hasjrel:
136 oparg_str = '(to %r)' % (i + oparg,)
137
138 elif op in dis.haslocal:
139 oparg_str = '(%s)' % (co.co_varnames[oparg],)
140
141 elif op in dis.hascompare:
142 oparg_str = '(%s)' % (dis.cmp_op[oparg],)
143
144 elif op in dis.hasfree:
145 if free is None:
146 free = co.co_cellvars + co.co_freevars
147 oparg_str = '(%s)' % (free[oparg],)
148
149 if oparg_str:
150 out('%5r %s' % (oparg, oparg_str), end=' ')
151 else:
152 out('%5r' % oparg, end=' ')
153
154 out()
155
156
157def ParseOps(code):
158 """A lightweight parser. Does some of what disassemble() does.
159 """
160 n = len(code)
161 i = 0
162 extended_arg = 0
163
164 while i < n:
165 c = code[i]
166 op = ord(c)
167
168 i += 1
169 if op >= dis.HAVE_ARGUMENT:
170 oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
171 extended_arg = 0
172 i += 2
173 if op == dis.EXTENDED_ARG:
174 extended_arg = oparg*65536L
175
176 yield dis.opname[op], oparg
177
178
179class Visitor(object):
180
181 def __init__(self, dis_bytecode=True, co_name=None):
182 """
183 Args:
184 dis_bytecode: Whether to show disassembly.
185 co_name: only print code object with exact name (and its children)
186 """
187 self.dis_bytecode = dis_bytecode
188 # Name of thing to print
189 self.co_name = co_name
190
191 def show_consts(self, consts, level=0):
192 indent = INDENT * level
193 for i, obj in enumerate(consts):
194 if isinstance(obj, types.CodeType):
195 print("%s%s (code object)" % (indent, i))
196 # RECURSIVE CALL.
197 self.show_code(obj, level=level+1)
198 else:
199 print("%s%s %r" % (indent, i, obj))
200
201 def maybe_show_consts(self, consts, level=0):
202 for obj in consts:
203 if isinstance(obj, types.CodeType):
204 self.show_code(obj, level=level+1) # RECURSIVE CALL.
205
206 def show_bytecode(self, code, level=0):
207 """Call dis.disassemble() to show bytecode."""
208
209 indent = INDENT * level
210 print(to_hexstr(code.co_code, level, wrap=True))
211
212 if self.dis_bytecode:
213 print(indent + "disassembled:")
214 disassemble(code, indent, sys.stdout)
215
216 def show_code(self, code, level=0):
217 """Print a code object, e.g. metadata, bytecode, and consts."""
218
219 # Filter recursive call
220 if self.co_name and code.co_name != self.co_name:
221 self.maybe_show_consts(code.co_consts, level=level+1)
222 return
223
224 indent = INDENT * level
225
226 for name in dir(code):
227 if not name.startswith("co_"):
228 continue
229 if name in ("co_code", "co_consts"):
230 continue
231 value = getattr(code, name)
232 if isinstance(value, str):
233 value = repr(value)
234 elif name == "co_flags":
235 value = ShowFlags(value)
236 elif name == "co_lnotab":
237 value = "0x(%s)" % to_hexstr(value)
238 print("%s%s%s" % (indent, (name+":").ljust(NAME_OFFSET), value))
239
240 # Show bytecode FIRST, and then consts. There is nested bytecode in the
241 # consts, so it's a 'top-down' order.
242 print("%sco_code" % indent)
243 self.show_bytecode(code, level=level+1)
244
245 print("%sco_consts" % indent)
246 self.show_consts(code.co_consts, level=level+1)
247
248 def Visit(self, f):
249 """Write a readable listing of a .pyc file to stdout."""
250
251 magic, unixtime, timestamp, code = unpack_pyc(f)
252
253 magic = "0x(%s)" % to_hexstr(magic)
254 print(" ## inspecting pyc file ##")
255 print("magic number: %s" % magic)
256 print("timestamp: %s (%s)" % (unixtime, timestamp))
257 print("code")
258 self.show_code(code, level=1)
259 print(" ## done inspecting pyc file ##")