1 | #!/usr/bin/env python2
|
2 | """
|
3 | asdl_cpp.py
|
4 |
|
5 | Turn an ASDL schema into C++ code that reads the OHeap V1 format.
|
6 |
|
7 | TODO:
|
8 | - Optional fields
|
9 | - in osh, it's only used in two places:
|
10 | - arith_expr? for slice length
|
11 | - word? for var replace
|
12 | - So you're already using pointers, can encode the NULL pointer.
|
13 |
|
14 | - Change everything to use references instead of pointers? Non-nullable.
|
15 | - Unify ClassDefVisitor and MethodBodyVisitor.
|
16 | - Whether you need a separate method body should be a flag.
|
17 | - offset calculations are duplicated
|
18 | - generate a C++ pretty-printer
|
19 |
|
20 | Technically we don't even need alignment? I guess the reason is to increase
|
21 | address space. If 1, then we have 16MiB of code. If 4, then we have 64 MiB.
|
22 |
|
23 | Everything is decoded on the fly, or is a char*, which I don't think has to be
|
24 | aligned (because the natural alignment would be 1 byte anyway.)
|
25 | """
|
26 | from __future__ import print_function
|
27 |
|
28 | import sys
|
29 |
|
30 | from asdl import asdl_ as asdl
|
31 | from asdl import front_end
|
32 | from asdl import meta
|
33 | from asdl import runtime
|
34 | from asdl import visitor
|
35 | from misc.old import encode
|
36 |
|
37 | class ChainOfVisitors:
|
38 | def __init__(self, *visitors):
|
39 | self.visitors = visitors
|
40 |
|
41 | def VisitModule(self, module):
|
42 | for v in self.visitors:
|
43 | v.VisitModule(module)
|
44 |
|
45 |
|
46 | _BUILTINS = {
|
47 | 'string': 'char*', # A read-only string is a char*
|
48 | 'int': 'int',
|
49 | 'bool': 'bool',
|
50 | 'id': 'Id', # Application specific hack for now
|
51 | }
|
52 |
|
53 | class ForwardDeclareVisitor(visitor.AsdlVisitor):
|
54 | """Print forward declarations.
|
55 |
|
56 | ASDL allows forward references of types, but C++ doesn't.
|
57 | """
|
58 | def VisitCompoundSum(self, sum, name, depth):
|
59 | self.Emit("class %(name)s_t;" % locals(), depth)
|
60 |
|
61 | def VisitProduct(self, product, name, depth):
|
62 | self.Emit("class %(name)s_t;" % locals(), depth)
|
63 |
|
64 | def EmitFooter(self):
|
65 | self.Emit("", 0) # blank line
|
66 |
|
67 |
|
68 | class ClassDefVisitor(visitor.AsdlVisitor):
|
69 | """Generate C++ classes and type-safe enums."""
|
70 |
|
71 | def __init__(self, f, enc_params, type_lookup, enum_types=None):
|
72 | visitor.AsdlVisitor.__init__(self, f)
|
73 | self.ref_width = enc_params.ref_width
|
74 | self.type_lookup = type_lookup
|
75 | self.enum_types = enum_types or {}
|
76 | self.pointer_type = enc_params.pointer_type
|
77 | self.footer = [] # lines
|
78 |
|
79 | def _GetCppType(self, field):
|
80 | """Return a string for the C++ name of the type."""
|
81 | type_name = field.type
|
82 |
|
83 | cpp_type = _BUILTINS.get(type_name)
|
84 | if cpp_type is not None:
|
85 | return cpp_type
|
86 |
|
87 | typ = self.type_lookup[type_name]
|
88 | if isinstance(typ, asdl.Sum) and asdl.is_simple(typ):
|
89 | # Use the enum instead of the class.
|
90 | return "%s_e" % type_name
|
91 |
|
92 | # - Pointer for optional type.
|
93 | # - ints and strings should generally not be optional? We don't have them
|
94 | # in osh yet, so leave it out for now.
|
95 | if field.opt:
|
96 | return "%s_t*" % type_name
|
97 |
|
98 | return "%s_t&" % type_name
|
99 |
|
100 | def EmitFooter(self):
|
101 | for line in self.footer:
|
102 | self.f.write(line)
|
103 |
|
104 | def _EmitEnum(self, sum, name, depth):
|
105 | enum = []
|
106 | for i in xrange(len(sum.types)):
|
107 | type = sum.types[i]
|
108 | enum.append("%s = %d" % (type.name, i + 1)) # zero is reserved
|
109 |
|
110 | self.Emit("enum class %s_e : uint8_t {" % name, depth)
|
111 | self.Emit(", ".join(enum), depth + 1)
|
112 | self.Emit("};", depth)
|
113 | self.Emit("", depth)
|
114 |
|
115 | def VisitSimpleSum(self, sum, name, depth):
|
116 | self._EmitEnum(sum, name, depth)
|
117 |
|
118 | def VisitCompoundSum(self, sum, name, depth):
|
119 | # This is a sign that Python needs string interpolation!!!
|
120 | def Emit(s, depth=depth):
|
121 | self.Emit(s % sys._getframe(1).f_locals, depth)
|
122 |
|
123 | self._EmitEnum(sum, name, depth)
|
124 |
|
125 | Emit("class %(name)s_t : public Obj {")
|
126 | Emit(" public:")
|
127 | # All sum types have a tag
|
128 | Emit("%(name)s_e tag() const {", depth + 1)
|
129 | Emit("return static_cast<%(name)s_e>(bytes_[0]);", depth + 2)
|
130 | Emit("}", depth + 1)
|
131 | Emit("};")
|
132 | Emit("")
|
133 |
|
134 | # TODO: This should be replaced with a call to the generic
|
135 | # self.VisitChildren()
|
136 | super_name = "%s_t" % name
|
137 | for t in sum.types:
|
138 | self.VisitConstructor(t, super_name, depth)
|
139 |
|
140 | # rudimentary attribute handling
|
141 | for field in sum.attributes:
|
142 | type_name = str(field.type)
|
143 | assert type_name in runtime.BUILTIN_TYPES, type_name
|
144 | Emit("%s %s;" % (type_name, field.name), depth + 1)
|
145 |
|
146 | def VisitConstructor(self, cons, def_name, depth):
|
147 | #print(dir(cons))
|
148 | if cons.fields:
|
149 | self.Emit("class %s : public %s {" % (cons.name, def_name), depth)
|
150 | self.Emit(" public:", depth)
|
151 | offset = 1 # for the ID
|
152 | for f in cons.fields:
|
153 | self.VisitField(f, cons.name, offset, depth + 1)
|
154 | offset += self.ref_width
|
155 | self.Emit("};", depth)
|
156 | self.Emit("", depth)
|
157 |
|
158 | def VisitProduct(self, product, name, depth):
|
159 | self.Emit("class %(name)s_t : public Obj {" % locals(), depth)
|
160 | self.Emit(" public:", depth)
|
161 | offset = 0
|
162 | for f in product.fields:
|
163 | type_name = '%s_t' % name
|
164 | self.VisitField(f, type_name, offset, depth + 1)
|
165 | offset += self.ref_width
|
166 |
|
167 | for field in product.attributes:
|
168 | # rudimentary attribute handling
|
169 | type_name = str(field.type)
|
170 | assert type_name in runtime.BUILTIN_TYPES, type_name
|
171 | self.Emit("%s %s;" % (type_name, field.name), depth + 1)
|
172 | self.Emit("};", depth)
|
173 | self.Emit("", depth)
|
174 |
|
175 | def VisitField(self, field, type_name, offset, depth):
|
176 | """
|
177 | Even though they are inline, some of them can't be in the class {}, because
|
178 | static_cast<> requires inheritance relationships to be already declared. We
|
179 | have to print all the classes first, then all the bodies that might use
|
180 | static_cast<>.
|
181 |
|
182 | http://stackoverflow.com/questions/5808758/why-is-a-static-cast-from-a-pointer-to-base-to-a-pointer-to-derived-invalid
|
183 | """
|
184 | ctype = self._GetCppType(field)
|
185 | name = field.name
|
186 | pointer_type = self.pointer_type
|
187 | # Either 'left' or 'BoolBinary::left', depending on whether it's inline.
|
188 | # Mutated later.
|
189 | maybe_qual_name = name
|
190 |
|
191 | func_proto = None
|
192 | func_header = None
|
193 | body_line1 = None
|
194 | inline_body = None
|
195 |
|
196 | if field.seq: # Array/repeated
|
197 | # For size accessor, follow the ref, and then it's the first integer.
|
198 | size_header = (
|
199 | 'inline int %(name)s_size(const %(pointer_type)s* base) const {')
|
200 | size_body = "return Ref(base, %(offset)d).Int(0);"
|
201 |
|
202 | self.Emit(size_header % locals(), depth)
|
203 | self.Emit(size_body % locals(), depth + 1)
|
204 | self.Emit("}", depth)
|
205 |
|
206 | ARRAY_OFFSET = 'int a = (index+1) * 3;'
|
207 | A_POINTER = (
|
208 | 'inline const %(ctype)s %(maybe_qual_name)s('
|
209 | 'const %(pointer_type)s* base, int index) const')
|
210 |
|
211 | if ctype in ('bool', 'int'):
|
212 | func_header = A_POINTER + ' {'
|
213 | body_line1 = ARRAY_OFFSET
|
214 | inline_body = 'return Ref(base, %(offset)d).Int(a);'
|
215 |
|
216 | elif ctype.endswith('_e') or ctype in self.enum_types:
|
217 | func_header = A_POINTER + ' {'
|
218 | body_line1 = ARRAY_OFFSET
|
219 | inline_body = (
|
220 | 'return static_cast<const %(ctype)s>(Ref(base, %(offset)d).Int(a));')
|
221 |
|
222 | elif ctype == 'char*':
|
223 | func_header = A_POINTER + ' {'
|
224 | body_line1 = ARRAY_OFFSET
|
225 | inline_body = 'return Ref(base, %(offset)d).Str(base, a);'
|
226 |
|
227 | else:
|
228 | # Write function prototype now; write body later.
|
229 | func_proto = A_POINTER + ';'
|
230 |
|
231 | maybe_qual_name = '%s::%s' % (type_name, name)
|
232 | func_def = A_POINTER + ' {'
|
233 | # This static_cast<> (downcast) causes problems if put within "class
|
234 | # {}".
|
235 | func_body = (
|
236 | 'return static_cast<const %(ctype)s>('
|
237 | 'Ref(base, %(offset)d).Ref(base, a));')
|
238 |
|
239 | self.footer.extend(visitor.FormatLines(func_def % locals(), 0))
|
240 | self.footer.extend(visitor.FormatLines(ARRAY_OFFSET, 1))
|
241 | self.footer.extend(visitor.FormatLines(func_body % locals(), 1))
|
242 | self.footer.append('}\n\n')
|
243 | maybe_qual_name = name # RESET for later
|
244 |
|
245 | else: # not repeated
|
246 | SIMPLE = "inline %(ctype)s %(maybe_qual_name)s() const {"
|
247 | POINTER = (
|
248 | 'inline const %(ctype)s %(maybe_qual_name)s('
|
249 | 'const %(pointer_type)s* base) const')
|
250 |
|
251 | if ctype in ('bool', 'int'):
|
252 | func_header = SIMPLE
|
253 | inline_body = 'return Int(%(offset)d);'
|
254 |
|
255 | elif ctype.endswith('_e') or ctype in self.enum_types:
|
256 | func_header = SIMPLE
|
257 | inline_body = 'return static_cast<const %(ctype)s>(Int(%(offset)d));'
|
258 |
|
259 | elif ctype == 'char*':
|
260 | func_header = POINTER + " {"
|
261 | inline_body = 'return Str(base, %(offset)d);'
|
262 |
|
263 | else:
|
264 | # Write function prototype now; write body later.
|
265 | func_proto = POINTER + ";"
|
266 |
|
267 | maybe_qual_name = '%s::%s' % (type_name, name)
|
268 | func_def = POINTER + ' {'
|
269 | if field.opt:
|
270 | func_body = (
|
271 | 'return static_cast<const %(ctype)s>(Optional(base, %(offset)d));')
|
272 | else:
|
273 | func_body = (
|
274 | 'return static_cast<const %(ctype)s>(Ref(base, %(offset)d));')
|
275 |
|
276 | # depth 0 for bodies
|
277 | self.footer.extend(visitor.FormatLines(func_def % locals(), 0))
|
278 | self.footer.extend(visitor.FormatLines(func_body % locals(), 1))
|
279 | self.footer.append('}\n\n')
|
280 | maybe_qual_name = name # RESET for later
|
281 |
|
282 | if func_proto:
|
283 | self.Emit(func_proto % locals(), depth)
|
284 | else:
|
285 | self.Emit(func_header % locals(), depth)
|
286 | if body_line1:
|
287 | self.Emit(body_line1, depth + 1)
|
288 | self.Emit(inline_body % locals(), depth + 1)
|
289 | self.Emit("}", depth)
|
290 |
|
291 |
|
292 | def main(argv):
|
293 | try:
|
294 | action = argv[1]
|
295 | except IndexError:
|
296 | raise RuntimeError('Action required')
|
297 |
|
298 | # TODO: Also generate a switch/static_cast<> pretty printer in C++! For
|
299 | # debugging. Might need to detect cycles though.
|
300 | if action == 'cpp':
|
301 | schema_path = argv[2]
|
302 |
|
303 | # NOTE: This import can't be at the top level osh/asdl_gen.py depends on
|
304 | # this gen_cpp.py module. We should move all the main() functions out of
|
305 | # asdl/ and into command line tools.
|
306 |
|
307 | from core.meta import Id
|
308 | app_types = {'id': meta.UserType('id_kind_asdl', 'Id_t')}
|
309 | with open(schema_path) as input_f:
|
310 | module, type_lookup = front_end.LoadSchema(input_f, app_types)
|
311 |
|
312 | # TODO: gen_cpp.py should be a library and the application should add Id?
|
313 | # Or we should enable ASDL metaprogramming, and let Id be a metaprogrammed
|
314 | # simple sum type.
|
315 |
|
316 | f = sys.stdout
|
317 |
|
318 | # How do mutation of strings, arrays, etc. work? Are they like C++
|
319 | # containers, or their own? I think they mirror the oil language
|
320 | # semantics.
|
321 | # Every node should have a mirror. MutableObj. MutableRef (pointer).
|
322 | # MutableArithVar -- has std::string. The mirrors are heap allocated.
|
323 | # All the mutable ones should support Dump()/Encode()?
|
324 | # You can just write more at the end... don't need to disturb existing
|
325 | # nodes? Rewrite pointers.
|
326 |
|
327 | alignment = 4
|
328 | enc = encode.Params(alignment)
|
329 | d = {'pointer_type': enc.pointer_type}
|
330 |
|
331 | f.write("""\
|
332 | #include <cstdint>
|
333 |
|
334 | class Obj {
|
335 | public:
|
336 | // Decode a 3 byte integer from little endian
|
337 | inline int Int(int n) const;
|
338 |
|
339 | inline const Obj& Ref(const %(pointer_type)s* base, int n) const;
|
340 |
|
341 | inline const Obj* Optional(const %(pointer_type)s* base, int n) const;
|
342 |
|
343 | // NUL-terminated
|
344 | inline const char* Str(const %(pointer_type)s* base, int n) const;
|
345 |
|
346 | protected:
|
347 | uint8_t bytes_[1]; // first is ID; rest are a payload
|
348 | };
|
349 |
|
350 | """ % d)
|
351 |
|
352 | # Id should be treated as an enum.
|
353 | c = ChainOfVisitors(
|
354 | ForwardDeclareVisitor(f),
|
355 | ClassDefVisitor(f, enc, type_lookup, enum_types=['Id']))
|
356 | c.VisitModule(module)
|
357 |
|
358 | f.write("""\
|
359 | inline int Obj::Int(int n) const {
|
360 | return bytes_[n] + (bytes_[n+1] << 8) + (bytes_[n+2] << 16);
|
361 | }
|
362 |
|
363 | inline const Obj& Obj::Ref(const %(pointer_type)s* base, int n) const {
|
364 | int offset = Int(n);
|
365 | return reinterpret_cast<const Obj&>(base[offset]);
|
366 | }
|
367 |
|
368 | inline const Obj* Obj::Optional(const %(pointer_type)s* base, int n) const {
|
369 | int offset = Int(n);
|
370 | if (offset) {
|
371 | return reinterpret_cast<const Obj*>(base + offset);
|
372 | } else {
|
373 | return nullptr;
|
374 | }
|
375 | }
|
376 |
|
377 | inline const char* Obj::Str(const %(pointer_type)s* base, int n) const {
|
378 | int offset = Int(n);
|
379 | return reinterpret_cast<const char*>(base + offset);
|
380 | }
|
381 | """ % d)
|
382 | # uint32_t* and char*/Obj* aren't related, so we need to use
|
383 | # reinterpret_cast<>.
|
384 | # http://stackoverflow.com/questions/10151834/why-cant-i-static-cast-between-char-and-unsigned-char
|
385 |
|
386 | else:
|
387 | raise RuntimeError('Invalid action %r' % action)
|
388 |
|
389 |
|
390 | if __name__ == '__main__':
|
391 | try:
|
392 | main(sys.argv)
|
393 | except RuntimeError as e:
|
394 | print('FATAL: %s' % e, file=sys.stderr)
|
395 | sys.exit(1)
|