1 | #!/usr/bin/env python2
2 | """
3 | asdl_cpp.py
4 |
5 | Turn an ASDL schema into C++ code that reads the OHeap V1 format.
6 |
7 | TODO:
8 | - Optional fields
9 | - in osh, it's only used in two places:
10 | - arith_expr? for slice length
11 | - word? for var replace
12 | - So you're already using pointers, can encode the NULL pointer.
13 |
14 | - Change everything to use references instead of pointers? Non-nullable.
15 | - Unify ClassDefVisitor and MethodBodyVisitor.
16 | - Whether you need a separate method body should be a flag.
17 | - offset calculations are duplicated
18 | - generate a C++ pretty-printer
19 |
20 | Technically we don't even need alignment? I guess the reason is to increase
21 | address space. If 1, then we have 16MiB of code. If 4, then we have 64 MiB.
22 |
23 | Everything is decoded on the fly, or is a char*, which I don't think has to be
24 | aligned (because the natural alignment would be 1 byte anyway.)
25 | """
26 | from __future__ import print_function
27 |
28 | import sys
29 |
30 | from asdl import asdl_ as asdl
31 | from asdl import front_end
32 | from asdl import meta
33 | from asdl import runtime
34 | from asdl import visitor
35 | from misc.old import encode
36 |
37 | class ChainOfVisitors:
38 | def __init__(self, *visitors):
39 | self.visitors = visitors
40 |
41 | def VisitModule(self, module):
42 | for v in self.visitors:
43 | v.VisitModule(module)
44 |
45 |
46 | _BUILTINS = {
47 | 'string': 'char*', # A read-only string is a char*
48 | 'int': 'int',
49 | 'bool': 'bool',
50 | 'id': 'Id', # Application specific hack for now
51 | }
52 |
53 | class ForwardDeclareVisitor(visitor.AsdlVisitor):
54 | """Print forward declarations.
55 |
56 | ASDL allows forward references of types, but C++ doesn't.
57 | """
58 | def VisitCompoundSum(self, sum, name, depth):
59 | self.Emit("class %(name)s_t;" % locals(), depth)
60 |
61 | def VisitProduct(self, product, name, depth):
62 | self.Emit("class %(name)s_t;" % locals(), depth)
63 |
64 | def EmitFooter(self):
65 | self.Emit("", 0) # blank line
66 |
67 |
68 | class ClassDefVisitor(visitor.AsdlVisitor):
69 | """Generate C++ classes and type-safe enums."""
70 |
71 | def __init__(self, f, enc_params, type_lookup, enum_types=None):
72 | visitor.AsdlVisitor.__init__(self, f)
73 | self.ref_width = enc_params.ref_width
74 | self.type_lookup = type_lookup
75 | self.enum_types = enum_types or {}
76 | self.pointer_type = enc_params.pointer_type
77 | self.footer = [] # lines
78 |
79 | def _GetCppType(self, field):
80 | """Return a string for the C++ name of the type."""
81 | type_name = field.type
82 |
83 | cpp_type = _BUILTINS.get(type_name)
84 | if cpp_type is not None:
85 | return cpp_type
86 |
87 | typ = self.type_lookup[type_name]
88 | if isinstance(typ, asdl.Sum) and asdl.is_simple(typ):
89 | # Use the enum instead of the class.
90 | return "%s_e" % type_name
91 |
92 | # - Pointer for optional type.
93 | # - ints and strings should generally not be optional? We don't have them
94 | # in osh yet, so leave it out for now.
95 | if field.opt:
96 | return "%s_t*" % type_name
97 |
98 | return "%s_t&" % type_name
99 |
100 | def EmitFooter(self):
101 | for line in self.footer:
102 | self.f.write(line)
103 |
104 | def _EmitEnum(self, sum, name, depth):
105 | enum = []
106 | for i in xrange(len(sum.types)):
107 | type = sum.types[i]
108 | enum.append("%s = %d" % (type.name, i + 1)) # zero is reserved
109 |
110 | self.Emit("enum class %s_e : uint8_t {" % name, depth)
111 | self.Emit(", ".join(enum), depth + 1)
112 | self.Emit("};", depth)
113 | self.Emit("", depth)
114 |
115 | def VisitSimpleSum(self, sum, name, depth):
116 | self._EmitEnum(sum, name, depth)
117 |
118 | def VisitCompoundSum(self, sum, name, depth):
119 | # This is a sign that Python needs string interpolation!!!
120 | def Emit(s, depth=depth):
121 | self.Emit(s % sys._getframe(1).f_locals, depth)
122 |
123 | self._EmitEnum(sum, name, depth)
124 |
125 | Emit("class %(name)s_t : public Obj {")
126 | Emit(" public:")
127 | # All sum types have a tag
128 | Emit("%(name)s_e tag() const {", depth + 1)
129 | Emit("return static_cast<%(name)s_e>(bytes_[0]);", depth + 2)
130 | Emit("}", depth + 1)
131 | Emit("};")
132 | Emit("")
133 |
134 | # TODO: This should be replaced with a call to the generic
135 | # self.VisitChildren()
136 | super_name = "%s_t" % name
137 | for t in sum.types:
138 | self.VisitConstructor(t, super_name, depth)
139 |
140 | # rudimentary attribute handling
141 | for field in sum.attributes:
142 | type_name = str(field.type)
143 | assert type_name in runtime.BUILTIN_TYPES, type_name
144 | Emit("%s %s;" % (type_name, field.name), depth + 1)
145 |
146 | def VisitConstructor(self, cons, def_name, depth):
147 | #print(dir(cons))
148 | if cons.fields:
149 | self.Emit("class %s : public %s {" % (cons.name, def_name), depth)
150 | self.Emit(" public:", depth)
151 | offset = 1 # for the ID
152 | for f in cons.fields:
153 | self.VisitField(f, cons.name, offset, depth + 1)
154 | offset += self.ref_width
155 | self.Emit("};", depth)
156 | self.Emit("", depth)
157 |
158 | def VisitProduct(self, product, name, depth):
159 | self.Emit("class %(name)s_t : public Obj {" % locals(), depth)
160 | self.Emit(" public:", depth)
161 | offset = 0
162 | for f in product.fields:
163 | type_name = '%s_t' % name
164 | self.VisitField(f, type_name, offset, depth + 1)
165 | offset += self.ref_width
166 |
167 | for field in product.attributes:
168 | # rudimentary attribute handling
169 | type_name = str(field.type)
170 | assert type_name in runtime.BUILTIN_TYPES, type_name
171 | self.Emit("%s %s;" % (type_name, field.name), depth + 1)
172 | self.Emit("};", depth)
173 | self.Emit("", depth)
174 |
175 | def VisitField(self, field, type_name, offset, depth):
176 | """
177 | Even though they are inline, some of them can't be in the class {}, because
178 | static_cast<> requires inheritance relationships to be already declared. We
179 | have to print all the classes first, then all the bodies that might use
180 | static_cast<>.
181 |
182 | http://stackoverflow.com/questions/5808758/why-is-a-static-cast-from-a-pointer-to-base-to-a-pointer-to-derived-invalid
183 | """
184 | ctype = self._GetCppType(field)
185 | name = field.name
186 | pointer_type = self.pointer_type
187 | # Either 'left' or 'BoolBinary::left', depending on whether it's inline.
188 | # Mutated later.
189 | maybe_qual_name = name
190 |
191 | func_proto = None
192 | func_header = None
193 | body_line1 = None
194 | inline_body = None
195 |
196 | if field.seq: # Array/repeated
197 | # For size accessor, follow the ref, and then it's the first integer.
198 | size_header = (
199 | 'inline int %(name)s_size(const %(pointer_type)s* base) const {')
200 | size_body = "return Ref(base, %(offset)d).Int(0);"
201 |
202 | self.Emit(size_header % locals(), depth)
203 | self.Emit(size_body % locals(), depth + 1)
204 | self.Emit("}", depth)
205 |
206 | ARRAY_OFFSET = 'int a = (index+1) * 3;'
207 | A_POINTER = (
208 | 'inline const %(ctype)s %(maybe_qual_name)s('
209 | 'const %(pointer_type)s* base, int index) const')
210 |
211 | if ctype in ('bool', 'int'):
212 | func_header = A_POINTER + ' {'
213 | body_line1 = ARRAY_OFFSET
214 | inline_body = 'return Ref(base, %(offset)d).Int(a);'
215 |
216 | elif ctype.endswith('_e') or ctype in self.enum_types:
217 | func_header = A_POINTER + ' {'
218 | body_line1 = ARRAY_OFFSET
219 | inline_body = (
220 | 'return static_cast<const %(ctype)s>(Ref(base, %(offset)d).Int(a));')
221 |
222 | elif ctype == 'char*':
223 | func_header = A_POINTER + ' {'
224 | body_line1 = ARRAY_OFFSET
225 | inline_body = 'return Ref(base, %(offset)d).Str(base, a);'
226 |
227 | else:
228 | # Write function prototype now; write body later.
229 | func_proto = A_POINTER + ';'
230 |
231 | maybe_qual_name = '%s::%s' % (type_name, name)
232 | func_def = A_POINTER + ' {'
233 | # This static_cast<> (downcast) causes problems if put within "class
234 | # {}".
235 | func_body = (
236 | 'return static_cast<const %(ctype)s>('
237 | 'Ref(base, %(offset)d).Ref(base, a));')
238 |
239 | self.footer.extend(visitor.FormatLines(func_def % locals(), 0))
240 | self.footer.extend(visitor.FormatLines(ARRAY_OFFSET, 1))
241 | self.footer.extend(visitor.FormatLines(func_body % locals(), 1))
242 | self.footer.append('}\n\n')
243 | maybe_qual_name = name # RESET for later
244 |
245 | else: # not repeated
246 | SIMPLE = "inline %(ctype)s %(maybe_qual_name)s() const {"
247 | POINTER = (
248 | 'inline const %(ctype)s %(maybe_qual_name)s('
249 | 'const %(pointer_type)s* base) const')
250 |
251 | if ctype in ('bool', 'int'):
252 | func_header = SIMPLE
253 | inline_body = 'return Int(%(offset)d);'
254 |
255 | elif ctype.endswith('_e') or ctype in self.enum_types:
256 | func_header = SIMPLE
257 | inline_body = 'return static_cast<const %(ctype)s>(Int(%(offset)d));'
258 |
259 | elif ctype == 'char*':
260 | func_header = POINTER + " {"
261 | inline_body = 'return Str(base, %(offset)d);'
262 |
263 | else:
264 | # Write function prototype now; write body later.
265 | func_proto = POINTER + ";"
266 |
267 | maybe_qual_name = '%s::%s' % (type_name, name)
268 | func_def = POINTER + ' {'
269 | if field.opt:
270 | func_body = (
271 | 'return static_cast<const %(ctype)s>(Optional(base, %(offset)d));')
272 | else:
273 | func_body = (
274 | 'return static_cast<const %(ctype)s>(Ref(base, %(offset)d));')
275 |
276 | # depth 0 for bodies
277 | self.footer.extend(visitor.FormatLines(func_def % locals(), 0))
278 | self.footer.extend(visitor.FormatLines(func_body % locals(), 1))
279 | self.footer.append('}\n\n')
280 | maybe_qual_name = name # RESET for later
281 |
282 | if func_proto:
283 | self.Emit(func_proto % locals(), depth)
284 | else:
285 | self.Emit(func_header % locals(), depth)
286 | if body_line1:
287 | self.Emit(body_line1, depth + 1)
288 | self.Emit(inline_body % locals(), depth + 1)
289 | self.Emit("}", depth)
290 |
291 |
292 | def main(argv):
293 | try:
294 | action = argv[1]
295 | except IndexError:
296 | raise RuntimeError('Action required')
297 |
298 | # TODO: Also generate a switch/static_cast<> pretty printer in C++! For
299 | # debugging. Might need to detect cycles though.
300 | if action == 'cpp':
301 | schema_path = argv[2]
302 |
303 | # NOTE: This import can't be at the top level osh/asdl_gen.py depends on
304 | # this gen_cpp.py module. We should move all the main() functions out of
305 | # asdl/ and into command line tools.
306 |
307 | from core.meta import Id
308 | app_types = {'id': meta.UserType('id_kind_asdl', 'Id_t')}
309 | with open(schema_path) as input_f:
310 | module, type_lookup = front_end.LoadSchema(input_f, app_types)
311 |
312 | # TODO: gen_cpp.py should be a library and the application should add Id?
313 | # Or we should enable ASDL metaprogramming, and let Id be a metaprogrammed
314 | # simple sum type.
315 |
316 | f = sys.stdout
317 |
318 | # How do mutation of strings, arrays, etc. work? Are they like C++
319 | # containers, or their own? I think they mirror the oil language
320 | # semantics.
321 | # Every node should have a mirror. MutableObj. MutableRef (pointer).
322 | # MutableArithVar -- has std::string. The mirrors are heap allocated.
323 | # All the mutable ones should support Dump()/Encode()?
324 | # You can just write more at the end... don't need to disturb existing
325 | # nodes? Rewrite pointers.
326 |
327 | alignment = 4
328 | enc = encode.Params(alignment)
329 | d = {'pointer_type': enc.pointer_type}
330 |
331 | f.write("""\
332 | #include <cstdint>
333 |
334 | class Obj {
335 | public:
336 | // Decode a 3 byte integer from little endian
337 | inline int Int(int n) const;
338 |
339 | inline const Obj& Ref(const %(pointer_type)s* base, int n) const;
340 |
341 | inline const Obj* Optional(const %(pointer_type)s* base, int n) const;
342 |
343 | // NUL-terminated
344 | inline const char* Str(const %(pointer_type)s* base, int n) const;
345 |
346 | protected:
347 | uint8_t bytes_[1]; // first is ID; rest are a payload
348 | };
349 |
350 | """ % d)
351 |
352 | # Id should be treated as an enum.
353 | c = ChainOfVisitors(
354 | ForwardDeclareVisitor(f),
355 | ClassDefVisitor(f, enc, type_lookup, enum_types=['Id']))
356 | c.VisitModule(module)
357 |
358 | f.write("""\
359 | inline int Obj::Int(int n) const {
360 | return bytes_[n] + (bytes_[n+1] << 8) + (bytes_[n+2] << 16);
361 | }
362 |
363 | inline const Obj& Obj::Ref(const %(pointer_type)s* base, int n) const {
364 | int offset = Int(n);
365 | return reinterpret_cast<const Obj&>(base[offset]);
366 | }
367 |
368 | inline const Obj* Obj::Optional(const %(pointer_type)s* base, int n) const {
369 | int offset = Int(n);
370 | if (offset) {
371 | return reinterpret_cast<const Obj*>(base + offset);
372 | } else {
373 | return nullptr;
374 | }
375 | }
376 |
377 | inline const char* Obj::Str(const %(pointer_type)s* base, int n) const {
378 | int offset = Int(n);
379 | return reinterpret_cast<const char*>(base + offset);
380 | }
381 | """ % d)
382 | # uint32_t* and char*/Obj* aren't related, so we need to use
383 | # reinterpret_cast<>.
384 | # http://stackoverflow.com/questions/10151834/why-cant-i-static-cast-between-char-and-unsigned-char
385 |
386 | else:
387 | raise RuntimeError('Invalid action %r' % action)
388 |
389 |
390 | if __name__ == '__main__':
391 | try:
392 | main(sys.argv)
393 | except RuntimeError as e:
394 | print('FATAL: %s' % e, file=sys.stderr)
395 | sys.exit(1)