OILS / opy / _regtest / src / asdl / gen_cpp.py View on Github | oilshell.org

399 lines, 213 significant
1#!/usr/bin/env python
2from __future__ import print_function
3"""
4asdl_cpp.py
5
6Turn an ASDL schema into C++ code.
7
8TODO:
9- Optional fields
10 - in osh, it's only used in two places:
11 - arith_expr? for slice length
12 - word? for var replace
13 - So you're already using pointers, can encode the NULL pointer.
14
15- Change everything to use references instead of pointers? Non-nullable.
16- Unify ClassDefVisitor and MethodBodyVisitor.
17 - Whether you need a separate method body should be a flag.
18 - offset calculations are duplicated
19- generate a C++ pretty-printer
20
21Technically we don't even need alignment? I guess the reason is to increase
22address space. If 1, then we have 16MiB of code. If 4, then we have 64 MiB.
23
24Everything is decoded on the fly, or is a char*, which I don't think has to be
25aligned (because the natural alignment would be 1 byte anyway.)
26"""
27
28import sys
29
30from asdl import asdl_ as asdl
31from asdl import encode
32from asdl import visitor
33
34from osh.meta import Id
35
36class ChainOfVisitors:
37 def __init__(self, *visitors):
38 self.visitors = visitors
39
40 def VisitModule(self, module):
41 for v in self.visitors:
42 v.VisitModule(module)
43
44
45_BUILTINS = {
46 'string': 'char*', # A read-only string is a char*
47 'int': 'int',
48 'bool': 'bool',
49 'id': 'Id', # Application specific hack for now
50}
51
52class ForwardDeclareVisitor(visitor.AsdlVisitor):
53 """Print forward declarations.
54
55 ASDL allows forward references of types, but C++ doesn't.
56 """
57 def VisitCompoundSum(self, sum, name, depth):
58 self.Emit("class %(name)s_t;" % locals(), depth)
59
60 def VisitProduct(self, product, name, depth):
61 self.Emit("class %(name)s_t;" % locals(), depth)
62
63 def EmitFooter(self):
64 self.Emit("", 0) # blank line
65
66
67class ClassDefVisitor(visitor.AsdlVisitor):
68 """Generate C++ classes and type-safe enums."""
69
70 def __init__(self, f, enc_params, type_lookup, enum_types=None):
71 visitor.AsdlVisitor.__init__(self, f)
72 self.ref_width = enc_params.ref_width
73 self.type_lookup = type_lookup
74 self.enum_types = enum_types or {}
75 self.pointer_type = enc_params.pointer_type
76 self.footer = [] # lines
77
78 def _GetCppType(self, field):
79 """Return a string for the C++ name of the type."""
80 type_name = field.type
81
82 cpp_type = _BUILTINS.get(type_name)
83 if cpp_type is not None:
84 return cpp_type
85
86 typ = self.type_lookup.ByTypeName(type_name)
87 if isinstance(typ, asdl.Sum) and asdl.is_simple(typ):
88 # Use the enum instead of the class.
89 return "%s_e" % type_name
90
91 # - Pointer for optional type.
92 # - ints and strings should generally not be optional? We don't have them
93 # in osh yet, so leave it out for now.
94 if field.opt:
95 return "%s_t*" % type_name
96
97 return "%s_t&" % type_name
98
99 def EmitFooter(self):
100 for line in self.footer:
101 self.f.write(line)
102
103 def _EmitEnum(self, sum, name, depth):
104 enum = []
105 for i in range(len(sum.types)):
106 type = sum.types[i]
107 enum.append("%s = %d" % (type.name, i + 1)) # zero is reserved
108
109 self.Emit("enum class %s_e : uint8_t {" % name, depth)
110 self.Emit(", ".join(enum), depth + 1)
111 self.Emit("};", depth)
112 self.Emit("", depth)
113
114 def VisitSimpleSum(self, sum, name, depth):
115 self._EmitEnum(sum, name, depth)
116
117 def VisitCompoundSum(self, sum, name, depth):
118 # This is a sign that Python needs string interpolation!!!
119 def Emit(s, depth=depth):
120 self.Emit(s % sys._getframe(1).f_locals, depth)
121
122 self._EmitEnum(sum, name, depth)
123
124 Emit("class %(name)s_t : public Obj {")
125 Emit(" public:")
126 # All sum types have a tag
127 Emit("%(name)s_e tag() const {", depth + 1)
128 Emit("return static_cast<%(name)s_e>(bytes_[0]);", depth + 2)
129 Emit("}", depth + 1)
130 Emit("};")
131 Emit("")
132
133 # TODO: This should be replaced with a call to the generic
134 # self.VisitChildren()
135 super_name = "%s_t" % name
136 for t in sum.types:
137 self.VisitConstructor(t, super_name, depth)
138
139 # rudimentary attribute handling
140 for field in sum.attributes:
141 type = str(field.type)
142 assert type in asdl.builtin_types, type
143 Emit("%s %s;" % (type, field.name), depth + 1)
144
145 def VisitConstructor(self, cons, def_name, depth):
146 #print(dir(cons))
147 if cons.fields:
148 self.Emit("class %s : public %s {" % (cons.name, def_name), depth)
149 self.Emit(" public:", depth)
150 offset = 1 # for the ID
151 for f in cons.fields:
152 self.VisitField(f, cons.name, offset, depth + 1)
153 offset += self.ref_width
154 self.Emit("};", depth)
155 self.Emit("", depth)
156
157 def VisitProduct(self, product, name, depth):
158 self.Emit("class %(name)s_t : public Obj {" % locals(), depth)
159 self.Emit(" public:", depth)
160 offset = 0
161 for f in product.fields:
162 type_name = '%s_t' % name
163 self.VisitField(f, type_name, offset, depth + 1)
164 offset += self.ref_width
165
166 for field in product.attributes:
167 # rudimentary attribute handling
168 type = str(field.type)
169 assert type in asdl.builtin_types, type
170 self.Emit("%s %s;" % (type, field.name), depth + 1)
171 self.Emit("};", depth)
172 self.Emit("", depth)
173
174 def VisitField(self, field, type_name, offset, depth):
175 """
176 Even though they are inline, some of them can't be in the class {}, because
177 static_cast<> requires inheritance relationships to be already declared. We
178 have to print all the classes first, then all the bodies that might use
179 static_cast<>.
180
181 http://stackoverflow.com/questions/5808758/why-is-a-static-cast-from-a-pointer-to-base-to-a-pointer-to-derived-invalid
182 """
183 ctype = self._GetCppType(field)
184 name = field.name
185 pointer_type = self.pointer_type
186 # Either 'left' or 'BoolBinary::left', depending on whether it's inline.
187 # Mutated later.
188 maybe_qual_name = name
189
190 func_proto = None
191 func_header = None
192 body_line1 = None
193 inline_body = None
194
195 if field.seq: # Array/repeated
196 # For size accessor, follow the ref, and then it's the first integer.
197 size_header = (
198 'inline int %(name)s_size(const %(pointer_type)s* base) const {')
199 size_body = "return Ref(base, %(offset)d).Int(0);"
200
201 self.Emit(size_header % locals(), depth)
202 self.Emit(size_body % locals(), depth + 1)
203 self.Emit("}", depth)
204
205 ARRAY_OFFSET = 'int a = (index+1) * 3;'
206 A_POINTER = (
207 'inline const %(ctype)s %(maybe_qual_name)s('
208 'const %(pointer_type)s* base, int index) const')
209
210 if ctype in ('bool', 'int'):
211 func_header = A_POINTER + ' {'
212 body_line1 = ARRAY_OFFSET
213 inline_body = 'return Ref(base, %(offset)d).Int(a);'
214
215 elif ctype.endswith('_e') or ctype in self.enum_types:
216 func_header = A_POINTER + ' {'
217 body_line1 = ARRAY_OFFSET
218 inline_body = (
219 'return static_cast<const %(ctype)s>(Ref(base, %(offset)d).Int(a));')
220
221 elif ctype == 'char*':
222 func_header = A_POINTER + ' {'
223 body_line1 = ARRAY_OFFSET
224 inline_body = 'return Ref(base, %(offset)d).Str(base, a);'
225
226 else:
227 # Write function prototype now; write body later.
228 func_proto = A_POINTER + ';'
229
230 maybe_qual_name = '%s::%s' % (type_name, name)
231 func_def = A_POINTER + ' {'
232 # This static_cast<> (downcast) causes problems if put within "class
233 # {}".
234 func_body = (
235 'return static_cast<const %(ctype)s>('
236 'Ref(base, %(offset)d).Ref(base, a));')
237
238 self.footer.extend(visitor.FormatLines(func_def % locals(), 0))
239 self.footer.extend(visitor.FormatLines(ARRAY_OFFSET, 1))
240 self.footer.extend(visitor.FormatLines(func_body % locals(), 1))
241 self.footer.append('}\n\n')
242 maybe_qual_name = name # RESET for later
243
244 else: # not repeated
245 SIMPLE = "inline %(ctype)s %(maybe_qual_name)s() const {"
246 POINTER = (
247 'inline const %(ctype)s %(maybe_qual_name)s('
248 'const %(pointer_type)s* base) const')
249
250 if ctype in ('bool', 'int'):
251 func_header = SIMPLE
252 inline_body = 'return Int(%(offset)d);'
253
254 elif ctype.endswith('_e') or ctype in self.enum_types:
255 func_header = SIMPLE
256 inline_body = 'return static_cast<const %(ctype)s>(Int(%(offset)d));'
257
258 elif ctype == 'char*':
259 func_header = POINTER + " {"
260 inline_body = 'return Str(base, %(offset)d);'
261
262 else:
263 # Write function prototype now; write body later.
264 func_proto = POINTER + ";"
265
266 maybe_qual_name = '%s::%s' % (type_name, name)
267 func_def = POINTER + ' {'
268 if field.opt:
269 func_body = (
270 'return static_cast<const %(ctype)s>(Optional(base, %(offset)d));')
271 else:
272 func_body = (
273 'return static_cast<const %(ctype)s>(Ref(base, %(offset)d));')
274
275 # depth 0 for bodies
276 self.footer.extend(visitor.FormatLines(func_def % locals(), 0))
277 self.footer.extend(visitor.FormatLines(func_body % locals(), 1))
278 self.footer.append('}\n\n')
279 maybe_qual_name = name # RESET for later
280
281 if func_proto:
282 self.Emit(func_proto % locals(), depth)
283 else:
284 self.Emit(func_header % locals(), depth)
285 if body_line1:
286 self.Emit(body_line1, depth + 1)
287 self.Emit(inline_body % locals(), depth + 1)
288 self.Emit("}", depth)
289
290
291# Used by osh/ast_gen.py
292class CEnumVisitor(visitor.AsdlVisitor):
293
294 def VisitSimpleSum(self, sum, name, depth):
295 # Just use #define, since enums aren't namespaced.
296 for i, variant in enumerate(sum.types):
297 self.Emit('#define %s__%s %d' % (name, variant.name, i + 1), depth)
298 self.Emit("", depth)
299
300
301def main(argv):
302 try:
303 action = argv[1]
304 except IndexError:
305 raise RuntimeError('Action required')
306
307 # TODO: Also generate a switch/static_cast<> pretty printer in C++! For
308 # debugging. Might need to detect cycles though.
309 if action == 'cpp':
310 schema_path = argv[2]
311
312 app_types = {'id': asdl.UserType(Id)}
313 with open(schema_path) as input_f:
314 module, type_lookup = asdl.LoadSchema(input_f, app_types)
315
316 # TODO: gen_cpp.py should be a library and the application should add Id?
317 # Or we should enable ASDL metaprogramming, and let Id be a metaprogrammed
318 # simple sum type.
319
320 f = sys.stdout
321
322 # How do mutation of strings, arrays, etc. work? Are they like C++
323 # containers, or their own? I think they mirror the oil language
324 # semantics.
325 # Every node should have a mirror. MutableObj. MutableRef (pointer).
326 # MutableArithVar -- has std::string. The mirrors are heap allocated.
327 # All the mutable ones should support Dump()/Encode()?
328 # You can just write more at the end... don't need to disturb existing
329 # nodes? Rewrite pointers.
330
331 alignment = 4
332 enc = encode.Params(alignment)
333 d = {'pointer_type': enc.pointer_type}
334
335 f.write("""\
336#include <cstdint>
337
338class Obj {
339 public:
340 // Decode a 3 byte integer from little endian
341 inline int Int(int n) const;
342
343 inline const Obj& Ref(const %(pointer_type)s* base, int n) const;
344
345 inline const Obj* Optional(const %(pointer_type)s* base, int n) const;
346
347 // NUL-terminated
348 inline const char* Str(const %(pointer_type)s* base, int n) const;
349
350 protected:
351 uint8_t bytes_[1]; // first is ID; rest are a payload
352};
353
354""" % d)
355
356 # Id should be treated as an enum.
357 c = ChainOfVisitors(
358 ForwardDeclareVisitor(f),
359 ClassDefVisitor(f, enc, type_lookup, enum_types=['Id']))
360 c.VisitModule(module)
361
362 f.write("""\
363inline int Obj::Int(int n) const {
364 return bytes_[n] + (bytes_[n+1] << 8) + (bytes_[n+2] << 16);
365}
366
367inline const Obj& Obj::Ref(const %(pointer_type)s* base, int n) const {
368 int offset = Int(n);
369 return reinterpret_cast<const Obj&>(base[offset]);
370}
371
372inline const Obj* Obj::Optional(const %(pointer_type)s* base, int n) const {
373 int offset = Int(n);
374 if (offset) {
375 return reinterpret_cast<const Obj*>(base + offset);
376 } else {
377 return nullptr;
378 }
379}
380
381inline const char* Obj::Str(const %(pointer_type)s* base, int n) const {
382 int offset = Int(n);
383 return reinterpret_cast<const char*>(base + offset);
384}
385""" % d)
386 # uint32_t* and char*/Obj* aren't related, so we need to use
387 # reinterpret_cast<>.
388 # http://stackoverflow.com/questions/10151834/why-cant-i-static-cast-between-char-and-unsigned-char
389
390 else:
391 raise RuntimeError('Invalid action %r' % action)
392
393
394if __name__ == '__main__':
395 try:
396 main(sys.argv)
397 except RuntimeError as e:
398 print('FATAL: %s' % e, file=sys.stderr)
399 sys.exit(1)