OILS / demo / old / gen_oheap_cpp.py View on Github | oilshell.org

395 lines, 211 significant
1#!/usr/bin/env python2
2"""
3asdl_cpp.py
4
5Turn an ASDL schema into C++ code that reads the OHeap V1 format.
6
7TODO:
8- Optional fields
9 - in osh, it's only used in two places:
10 - arith_expr? for slice length
11 - word? for var replace
12 - So you're already using pointers, can encode the NULL pointer.
13
14- Change everything to use references instead of pointers? Non-nullable.
15- Unify ClassDefVisitor and MethodBodyVisitor.
16 - Whether you need a separate method body should be a flag.
17 - offset calculations are duplicated
18- generate a C++ pretty-printer
19
20Technically we don't even need alignment? I guess the reason is to increase
21address space. If 1, then we have 16MiB of code. If 4, then we have 64 MiB.
22
23Everything is decoded on the fly, or is a char*, which I don't think has to be
24aligned (because the natural alignment would be 1 byte anyway.)
25"""
26from __future__ import print_function
27
28import sys
29
30from asdl import asdl_ as asdl
31from asdl import front_end
32from asdl import meta
33from asdl import runtime
34from asdl import visitor
35from misc.old import encode
36
37class ChainOfVisitors:
38 def __init__(self, *visitors):
39 self.visitors = visitors
40
41 def VisitModule(self, module):
42 for v in self.visitors:
43 v.VisitModule(module)
44
45
46_BUILTINS = {
47 'string': 'char*', # A read-only string is a char*
48 'int': 'int',
49 'bool': 'bool',
50 'id': 'Id', # Application specific hack for now
51}
52
53class ForwardDeclareVisitor(visitor.AsdlVisitor):
54 """Print forward declarations.
55
56 ASDL allows forward references of types, but C++ doesn't.
57 """
58 def VisitCompoundSum(self, sum, name, depth):
59 self.Emit("class %(name)s_t;" % locals(), depth)
60
61 def VisitProduct(self, product, name, depth):
62 self.Emit("class %(name)s_t;" % locals(), depth)
63
64 def EmitFooter(self):
65 self.Emit("", 0) # blank line
66
67
68class ClassDefVisitor(visitor.AsdlVisitor):
69 """Generate C++ classes and type-safe enums."""
70
71 def __init__(self, f, enc_params, type_lookup, enum_types=None):
72 visitor.AsdlVisitor.__init__(self, f)
73 self.ref_width = enc_params.ref_width
74 self.type_lookup = type_lookup
75 self.enum_types = enum_types or {}
76 self.pointer_type = enc_params.pointer_type
77 self.footer = [] # lines
78
79 def _GetCppType(self, field):
80 """Return a string for the C++ name of the type."""
81 type_name = field.type
82
83 cpp_type = _BUILTINS.get(type_name)
84 if cpp_type is not None:
85 return cpp_type
86
87 typ = self.type_lookup[type_name]
88 if isinstance(typ, asdl.Sum) and asdl.is_simple(typ):
89 # Use the enum instead of the class.
90 return "%s_e" % type_name
91
92 # - Pointer for optional type.
93 # - ints and strings should generally not be optional? We don't have them
94 # in osh yet, so leave it out for now.
95 if field.opt:
96 return "%s_t*" % type_name
97
98 return "%s_t&" % type_name
99
100 def EmitFooter(self):
101 for line in self.footer:
102 self.f.write(line)
103
104 def _EmitEnum(self, sum, name, depth):
105 enum = []
106 for i in xrange(len(sum.types)):
107 type = sum.types[i]
108 enum.append("%s = %d" % (type.name, i + 1)) # zero is reserved
109
110 self.Emit("enum class %s_e : uint8_t {" % name, depth)
111 self.Emit(", ".join(enum), depth + 1)
112 self.Emit("};", depth)
113 self.Emit("", depth)
114
115 def VisitSimpleSum(self, sum, name, depth):
116 self._EmitEnum(sum, name, depth)
117
118 def VisitCompoundSum(self, sum, name, depth):
119 # This is a sign that Python needs string interpolation!!!
120 def Emit(s, depth=depth):
121 self.Emit(s % sys._getframe(1).f_locals, depth)
122
123 self._EmitEnum(sum, name, depth)
124
125 Emit("class %(name)s_t : public Obj {")
126 Emit(" public:")
127 # All sum types have a tag
128 Emit("%(name)s_e tag() const {", depth + 1)
129 Emit("return static_cast<%(name)s_e>(bytes_[0]);", depth + 2)
130 Emit("}", depth + 1)
131 Emit("};")
132 Emit("")
133
134 # TODO: This should be replaced with a call to the generic
135 # self.VisitChildren()
136 super_name = "%s_t" % name
137 for t in sum.types:
138 self.VisitConstructor(t, super_name, depth)
139
140 # rudimentary attribute handling
141 for field in sum.attributes:
142 type_name = str(field.type)
143 assert type_name in runtime.BUILTIN_TYPES, type_name
144 Emit("%s %s;" % (type_name, field.name), depth + 1)
145
146 def VisitConstructor(self, cons, def_name, depth):
147 #print(dir(cons))
148 if cons.fields:
149 self.Emit("class %s : public %s {" % (cons.name, def_name), depth)
150 self.Emit(" public:", depth)
151 offset = 1 # for the ID
152 for f in cons.fields:
153 self.VisitField(f, cons.name, offset, depth + 1)
154 offset += self.ref_width
155 self.Emit("};", depth)
156 self.Emit("", depth)
157
158 def VisitProduct(self, product, name, depth):
159 self.Emit("class %(name)s_t : public Obj {" % locals(), depth)
160 self.Emit(" public:", depth)
161 offset = 0
162 for f in product.fields:
163 type_name = '%s_t' % name
164 self.VisitField(f, type_name, offset, depth + 1)
165 offset += self.ref_width
166
167 for field in product.attributes:
168 # rudimentary attribute handling
169 type_name = str(field.type)
170 assert type_name in runtime.BUILTIN_TYPES, type_name
171 self.Emit("%s %s;" % (type_name, field.name), depth + 1)
172 self.Emit("};", depth)
173 self.Emit("", depth)
174
175 def VisitField(self, field, type_name, offset, depth):
176 """
177 Even though they are inline, some of them can't be in the class {}, because
178 static_cast<> requires inheritance relationships to be already declared. We
179 have to print all the classes first, then all the bodies that might use
180 static_cast<>.
181
182 http://stackoverflow.com/questions/5808758/why-is-a-static-cast-from-a-pointer-to-base-to-a-pointer-to-derived-invalid
183 """
184 ctype = self._GetCppType(field)
185 name = field.name
186 pointer_type = self.pointer_type
187 # Either 'left' or 'BoolBinary::left', depending on whether it's inline.
188 # Mutated later.
189 maybe_qual_name = name
190
191 func_proto = None
192 func_header = None
193 body_line1 = None
194 inline_body = None
195
196 if field.seq: # Array/repeated
197 # For size accessor, follow the ref, and then it's the first integer.
198 size_header = (
199 'inline int %(name)s_size(const %(pointer_type)s* base) const {')
200 size_body = "return Ref(base, %(offset)d).Int(0);"
201
202 self.Emit(size_header % locals(), depth)
203 self.Emit(size_body % locals(), depth + 1)
204 self.Emit("}", depth)
205
206 ARRAY_OFFSET = 'int a = (index+1) * 3;'
207 A_POINTER = (
208 'inline const %(ctype)s %(maybe_qual_name)s('
209 'const %(pointer_type)s* base, int index) const')
210
211 if ctype in ('bool', 'int'):
212 func_header = A_POINTER + ' {'
213 body_line1 = ARRAY_OFFSET
214 inline_body = 'return Ref(base, %(offset)d).Int(a);'
215
216 elif ctype.endswith('_e') or ctype in self.enum_types:
217 func_header = A_POINTER + ' {'
218 body_line1 = ARRAY_OFFSET
219 inline_body = (
220 'return static_cast<const %(ctype)s>(Ref(base, %(offset)d).Int(a));')
221
222 elif ctype == 'char*':
223 func_header = A_POINTER + ' {'
224 body_line1 = ARRAY_OFFSET
225 inline_body = 'return Ref(base, %(offset)d).Str(base, a);'
226
227 else:
228 # Write function prototype now; write body later.
229 func_proto = A_POINTER + ';'
230
231 maybe_qual_name = '%s::%s' % (type_name, name)
232 func_def = A_POINTER + ' {'
233 # This static_cast<> (downcast) causes problems if put within "class
234 # {}".
235 func_body = (
236 'return static_cast<const %(ctype)s>('
237 'Ref(base, %(offset)d).Ref(base, a));')
238
239 self.footer.extend(visitor.FormatLines(func_def % locals(), 0))
240 self.footer.extend(visitor.FormatLines(ARRAY_OFFSET, 1))
241 self.footer.extend(visitor.FormatLines(func_body % locals(), 1))
242 self.footer.append('}\n\n')
243 maybe_qual_name = name # RESET for later
244
245 else: # not repeated
246 SIMPLE = "inline %(ctype)s %(maybe_qual_name)s() const {"
247 POINTER = (
248 'inline const %(ctype)s %(maybe_qual_name)s('
249 'const %(pointer_type)s* base) const')
250
251 if ctype in ('bool', 'int'):
252 func_header = SIMPLE
253 inline_body = 'return Int(%(offset)d);'
254
255 elif ctype.endswith('_e') or ctype in self.enum_types:
256 func_header = SIMPLE
257 inline_body = 'return static_cast<const %(ctype)s>(Int(%(offset)d));'
258
259 elif ctype == 'char*':
260 func_header = POINTER + " {"
261 inline_body = 'return Str(base, %(offset)d);'
262
263 else:
264 # Write function prototype now; write body later.
265 func_proto = POINTER + ";"
266
267 maybe_qual_name = '%s::%s' % (type_name, name)
268 func_def = POINTER + ' {'
269 if field.opt:
270 func_body = (
271 'return static_cast<const %(ctype)s>(Optional(base, %(offset)d));')
272 else:
273 func_body = (
274 'return static_cast<const %(ctype)s>(Ref(base, %(offset)d));')
275
276 # depth 0 for bodies
277 self.footer.extend(visitor.FormatLines(func_def % locals(), 0))
278 self.footer.extend(visitor.FormatLines(func_body % locals(), 1))
279 self.footer.append('}\n\n')
280 maybe_qual_name = name # RESET for later
281
282 if func_proto:
283 self.Emit(func_proto % locals(), depth)
284 else:
285 self.Emit(func_header % locals(), depth)
286 if body_line1:
287 self.Emit(body_line1, depth + 1)
288 self.Emit(inline_body % locals(), depth + 1)
289 self.Emit("}", depth)
290
291
292def main(argv):
293 try:
294 action = argv[1]
295 except IndexError:
296 raise RuntimeError('Action required')
297
298 # TODO: Also generate a switch/static_cast<> pretty printer in C++! For
299 # debugging. Might need to detect cycles though.
300 if action == 'cpp':
301 schema_path = argv[2]
302
303 # NOTE: This import can't be at the top level osh/asdl_gen.py depends on
304 # this gen_cpp.py module. We should move all the main() functions out of
305 # asdl/ and into command line tools.
306
307 from core.meta import Id
308 app_types = {'id': meta.UserType('id_kind_asdl', 'Id_t')}
309 with open(schema_path) as input_f:
310 module, type_lookup = front_end.LoadSchema(input_f, app_types)
311
312 # TODO: gen_cpp.py should be a library and the application should add Id?
313 # Or we should enable ASDL metaprogramming, and let Id be a metaprogrammed
314 # simple sum type.
315
316 f = sys.stdout
317
318 # How do mutation of strings, arrays, etc. work? Are they like C++
319 # containers, or their own? I think they mirror the oil language
320 # semantics.
321 # Every node should have a mirror. MutableObj. MutableRef (pointer).
322 # MutableArithVar -- has std::string. The mirrors are heap allocated.
323 # All the mutable ones should support Dump()/Encode()?
324 # You can just write more at the end... don't need to disturb existing
325 # nodes? Rewrite pointers.
326
327 alignment = 4
328 enc = encode.Params(alignment)
329 d = {'pointer_type': enc.pointer_type}
330
331 f.write("""\
332#include <cstdint>
333
334class Obj {
335 public:
336 // Decode a 3 byte integer from little endian
337 inline int Int(int n) const;
338
339 inline const Obj& Ref(const %(pointer_type)s* base, int n) const;
340
341 inline const Obj* Optional(const %(pointer_type)s* base, int n) const;
342
343 // NUL-terminated
344 inline const char* Str(const %(pointer_type)s* base, int n) const;
345
346 protected:
347 uint8_t bytes_[1]; // first is ID; rest are a payload
348};
349
350""" % d)
351
352 # Id should be treated as an enum.
353 c = ChainOfVisitors(
354 ForwardDeclareVisitor(f),
355 ClassDefVisitor(f, enc, type_lookup, enum_types=['Id']))
356 c.VisitModule(module)
357
358 f.write("""\
359inline int Obj::Int(int n) const {
360 return bytes_[n] + (bytes_[n+1] << 8) + (bytes_[n+2] << 16);
361}
362
363inline const Obj& Obj::Ref(const %(pointer_type)s* base, int n) const {
364 int offset = Int(n);
365 return reinterpret_cast<const Obj&>(base[offset]);
366}
367
368inline const Obj* Obj::Optional(const %(pointer_type)s* base, int n) const {
369 int offset = Int(n);
370 if (offset) {
371 return reinterpret_cast<const Obj*>(base + offset);
372 } else {
373 return nullptr;
374 }
375}
376
377inline const char* Obj::Str(const %(pointer_type)s* base, int n) const {
378 int offset = Int(n);
379 return reinterpret_cast<const char*>(base + offset);
380}
381""" % d)
382 # uint32_t* and char*/Obj* aren't related, so we need to use
383 # reinterpret_cast<>.
384 # http://stackoverflow.com/questions/10151834/why-cant-i-static-cast-between-char-and-unsigned-char
385
386 else:
387 raise RuntimeError('Invalid action %r' % action)
388
389
390if __name__ == '__main__':
391 try:
392 main(sys.argv)
393 except RuntimeError as e:
394 print('FATAL: %s' % e, file=sys.stderr)
395 sys.exit(1)