| 1 | #!/usr/bin/env python2
 | 
| 2 | """
 | 
| 3 | asdl_cpp.py
 | 
| 4 | 
 | 
| 5 | Turn an ASDL schema into C++ code that reads the OHeap V1 format.
 | 
| 6 | 
 | 
| 7 | TODO:
 | 
| 8 | - Optional fields
 | 
| 9 |   - in osh, it's only used in two places:
 | 
| 10 |   - arith_expr? for slice length
 | 
| 11 |   - word? for var replace
 | 
| 12 |   - So you're already using pointers, can encode the NULL pointer.
 | 
| 13 | 
 | 
| 14 | - Change everything to use references instead of pointers?  Non-nullable.
 | 
| 15 | - Unify ClassDefVisitor and MethodBodyVisitor.
 | 
| 16 |   - Whether you need a separate method body should be a flag.
 | 
| 17 |   - offset calculations are duplicated
 | 
| 18 | - generate a C++ pretty-printer
 | 
| 19 | 
 | 
| 20 | Technically we don't even need alignment?  I guess the reason is to increase
 | 
| 21 | address space.  If 1, then we have 16MiB of code.  If 4, then we have 64 MiB.
 | 
| 22 | 
 | 
| 23 | Everything is decoded on the fly, or is a char*, which I don't think has to be
 | 
| 24 | aligned (because the natural alignment would be 1 byte anyway.)
 | 
| 25 | """
 | 
| 26 | from __future__ import print_function
 | 
| 27 | 
 | 
| 28 | import sys
 | 
| 29 | 
 | 
| 30 | from asdl import asdl_ as asdl
 | 
| 31 | from asdl import front_end
 | 
| 32 | from asdl import meta
 | 
| 33 | from asdl import runtime
 | 
| 34 | from asdl import visitor
 | 
| 35 | from misc.old import encode
 | 
| 36 | 
 | 
| 37 | class ChainOfVisitors:
 | 
| 38 |   def __init__(self, *visitors):
 | 
| 39 |     self.visitors = visitors
 | 
| 40 | 
 | 
| 41 |   def VisitModule(self, module):
 | 
| 42 |     for v in self.visitors:
 | 
| 43 |       v.VisitModule(module)
 | 
| 44 | 
 | 
| 45 | 
 | 
| 46 | _BUILTINS = {
 | 
| 47 |     'string': 'char*',  # A read-only string is a char*
 | 
| 48 |     'int': 'int',
 | 
| 49 |     'bool': 'bool',
 | 
| 50 |     'id': 'Id',  # Application specific hack for now
 | 
| 51 | }
 | 
| 52 | 
 | 
| 53 | class ForwardDeclareVisitor(visitor.AsdlVisitor):
 | 
| 54 |   """Print forward declarations.
 | 
| 55 | 
 | 
| 56 |   ASDL allows forward references of types, but C++ doesn't.
 | 
| 57 |   """
 | 
| 58 |   def VisitCompoundSum(self, sum, name, depth):
 | 
| 59 |     self.Emit("class %(name)s_t;" % locals(), depth)
 | 
| 60 | 
 | 
| 61 |   def VisitProduct(self, product, name, depth):
 | 
| 62 |     self.Emit("class %(name)s_t;" % locals(), depth)
 | 
| 63 | 
 | 
| 64 |   def EmitFooter(self):
 | 
| 65 |     self.Emit("", 0)  # blank line
 | 
| 66 | 
 | 
| 67 | 
 | 
| 68 | class ClassDefVisitor(visitor.AsdlVisitor):
 | 
| 69 |   """Generate C++ classes and type-safe enums."""
 | 
| 70 | 
 | 
| 71 |   def __init__(self, f, enc_params, type_lookup, enum_types=None):
 | 
| 72 |     visitor.AsdlVisitor.__init__(self, f)
 | 
| 73 |     self.ref_width = enc_params.ref_width
 | 
| 74 |     self.type_lookup = type_lookup
 | 
| 75 |     self.enum_types = enum_types or {}
 | 
| 76 |     self.pointer_type = enc_params.pointer_type
 | 
| 77 |     self.footer = []  # lines
 | 
| 78 | 
 | 
| 79 |   def _GetCppType(self, field):
 | 
| 80 |     """Return a string for the C++ name of the type."""
 | 
| 81 |     type_name = field.type
 | 
| 82 | 
 | 
| 83 |     cpp_type = _BUILTINS.get(type_name)
 | 
| 84 |     if cpp_type is not None:
 | 
| 85 |       return cpp_type
 | 
| 86 | 
 | 
| 87 |     typ = self.type_lookup[type_name]
 | 
| 88 |     if isinstance(typ, asdl.Sum) and asdl.is_simple(typ):
 | 
| 89 |       # Use the enum instead of the class.
 | 
| 90 |       return "%s_e" % type_name
 | 
| 91 | 
 | 
| 92 |     # - Pointer for optional type.
 | 
| 93 |     # - ints and strings should generally not be optional?  We don't have them
 | 
| 94 |     # in osh yet, so leave it out for now.
 | 
| 95 |     if field.opt:
 | 
| 96 |       return "%s_t*" % type_name
 | 
| 97 | 
 | 
| 98 |     return "%s_t&" % type_name
 | 
| 99 | 
 | 
| 100 |   def EmitFooter(self):
 | 
| 101 |     for line in self.footer:
 | 
| 102 |       self.f.write(line)
 | 
| 103 | 
 | 
| 104 |   def _EmitEnum(self, sum, name, depth):
 | 
| 105 |     enum = []
 | 
| 106 |     for i in xrange(len(sum.types)):
 | 
| 107 |       type = sum.types[i]
 | 
| 108 |       enum.append("%s = %d" % (type.name, i + 1))  # zero is reserved
 | 
| 109 | 
 | 
| 110 |     self.Emit("enum class %s_e : uint8_t {" % name, depth)
 | 
| 111 |     self.Emit(", ".join(enum), depth + 1)
 | 
| 112 |     self.Emit("};", depth)
 | 
| 113 |     self.Emit("", depth)
 | 
| 114 | 
 | 
| 115 |   def VisitSimpleSum(self, sum, name, depth):
 | 
| 116 |     self._EmitEnum(sum, name, depth)
 | 
| 117 | 
 | 
| 118 |   def VisitCompoundSum(self, sum, name, depth):
 | 
| 119 |     # This is a sign that Python needs string interpolation!!!
 | 
| 120 |     def Emit(s, depth=depth):
 | 
| 121 |       self.Emit(s % sys._getframe(1).f_locals, depth)
 | 
| 122 | 
 | 
| 123 |     self._EmitEnum(sum, name, depth)
 | 
| 124 | 
 | 
| 125 |     Emit("class %(name)s_t : public Obj {")
 | 
| 126 |     Emit(" public:")
 | 
| 127 |     # All sum types have a tag
 | 
| 128 |     Emit("%(name)s_e tag() const {", depth + 1)
 | 
| 129 |     Emit("return static_cast<%(name)s_e>(bytes_[0]);", depth + 2)
 | 
| 130 |     Emit("}", depth + 1)
 | 
| 131 |     Emit("};")
 | 
| 132 |     Emit("")
 | 
| 133 | 
 | 
| 134 |     # TODO: This should be replaced with a call to the generic
 | 
| 135 |     # self.VisitChildren()
 | 
| 136 |     super_name = "%s_t" % name
 | 
| 137 |     for t in sum.types:
 | 
| 138 |       self.VisitConstructor(t, super_name, depth)
 | 
| 139 | 
 | 
| 140 |     # rudimentary attribute handling
 | 
| 141 |     for field in sum.attributes:
 | 
| 142 |       type_name = str(field.type)
 | 
| 143 |       assert type_name in runtime.BUILTIN_TYPES, type_name
 | 
| 144 |       Emit("%s %s;" % (type_name, field.name), depth + 1)
 | 
| 145 | 
 | 
| 146 |   def VisitConstructor(self, cons, def_name, depth):
 | 
| 147 |     #print(dir(cons))
 | 
| 148 |     if cons.fields:
 | 
| 149 |       self.Emit("class %s : public %s {" % (cons.name, def_name), depth)
 | 
| 150 |       self.Emit(" public:", depth)
 | 
| 151 |       offset = 1  #  for the ID
 | 
| 152 |       for f in cons.fields:
 | 
| 153 |         self.VisitField(f, cons.name, offset, depth + 1)
 | 
| 154 |         offset += self.ref_width
 | 
| 155 |       self.Emit("};", depth)
 | 
| 156 |       self.Emit("", depth)
 | 
| 157 | 
 | 
| 158 |   def VisitProduct(self, product, name, depth):
 | 
| 159 |     self.Emit("class %(name)s_t : public Obj {" % locals(), depth)
 | 
| 160 |     self.Emit(" public:", depth)
 | 
| 161 |     offset = 0
 | 
| 162 |     for f in product.fields:
 | 
| 163 |       type_name = '%s_t' % name
 | 
| 164 |       self.VisitField(f, type_name, offset, depth + 1)
 | 
| 165 |       offset += self.ref_width
 | 
| 166 | 
 | 
| 167 |     for field in product.attributes:
 | 
| 168 |       # rudimentary attribute handling
 | 
| 169 |       type_name = str(field.type)
 | 
| 170 |       assert type_name in runtime.BUILTIN_TYPES, type_name
 | 
| 171 |       self.Emit("%s %s;" % (type_name, field.name), depth + 1)
 | 
| 172 |     self.Emit("};", depth)
 | 
| 173 |     self.Emit("", depth)
 | 
| 174 | 
 | 
| 175 |   def VisitField(self, field, type_name, offset, depth):
 | 
| 176 |     """
 | 
| 177 |     Even though they are inline, some of them can't be in the class {}, because
 | 
| 178 |     static_cast<> requires inheritance relationships to be already declared.  We
 | 
| 179 |     have to print all the classes first, then all the bodies that might use
 | 
| 180 |     static_cast<>.
 | 
| 181 | 
 | 
| 182 |     http://stackoverflow.com/questions/5808758/why-is-a-static-cast-from-a-pointer-to-base-to-a-pointer-to-derived-invalid
 | 
| 183 |     """
 | 
| 184 |     ctype = self._GetCppType(field)
 | 
| 185 |     name = field.name
 | 
| 186 |     pointer_type = self.pointer_type
 | 
| 187 |     # Either 'left' or 'BoolBinary::left', depending on whether it's inline.
 | 
| 188 |     # Mutated later.
 | 
| 189 |     maybe_qual_name = name
 | 
| 190 | 
 | 
| 191 |     func_proto = None
 | 
| 192 |     func_header = None
 | 
| 193 |     body_line1 = None
 | 
| 194 |     inline_body = None
 | 
| 195 | 
 | 
| 196 |     if field.seq:  # Array/repeated
 | 
| 197 |       # For size accessor, follow the ref, and then it's the first integer.
 | 
| 198 |       size_header = (
 | 
| 199 |           'inline int %(name)s_size(const %(pointer_type)s* base) const {')
 | 
| 200 |       size_body = "return Ref(base, %(offset)d).Int(0);"
 | 
| 201 | 
 | 
| 202 |       self.Emit(size_header % locals(), depth)
 | 
| 203 |       self.Emit(size_body % locals(), depth + 1)
 | 
| 204 |       self.Emit("}", depth)
 | 
| 205 | 
 | 
| 206 |       ARRAY_OFFSET = 'int a = (index+1) * 3;'
 | 
| 207 |       A_POINTER = (
 | 
| 208 |           'inline const %(ctype)s %(maybe_qual_name)s('
 | 
| 209 |           'const %(pointer_type)s* base, int index) const')
 | 
| 210 | 
 | 
| 211 |       if ctype in ('bool', 'int'):
 | 
| 212 |         func_header = A_POINTER + ' {'
 | 
| 213 |         body_line1 = ARRAY_OFFSET
 | 
| 214 |         inline_body = 'return Ref(base, %(offset)d).Int(a);'
 | 
| 215 | 
 | 
| 216 |       elif ctype.endswith('_e') or ctype in self.enum_types:
 | 
| 217 |         func_header = A_POINTER + ' {'
 | 
| 218 |         body_line1 = ARRAY_OFFSET
 | 
| 219 |         inline_body = (
 | 
| 220 |             'return static_cast<const %(ctype)s>(Ref(base, %(offset)d).Int(a));')
 | 
| 221 | 
 | 
| 222 |       elif ctype == 'char*':
 | 
| 223 |         func_header = A_POINTER + ' {'
 | 
| 224 |         body_line1 = ARRAY_OFFSET
 | 
| 225 |         inline_body = 'return Ref(base, %(offset)d).Str(base, a);'
 | 
| 226 | 
 | 
| 227 |       else:
 | 
| 228 |         # Write function prototype now; write body later.
 | 
| 229 |         func_proto = A_POINTER + ';'
 | 
| 230 | 
 | 
| 231 |         maybe_qual_name = '%s::%s' % (type_name, name)
 | 
| 232 |         func_def = A_POINTER + ' {'
 | 
| 233 |         # This static_cast<> (downcast) causes problems if put within "class
 | 
| 234 |         # {}".
 | 
| 235 |         func_body = (
 | 
| 236 |             'return static_cast<const %(ctype)s>('
 | 
| 237 |             'Ref(base, %(offset)d).Ref(base, a));')
 | 
| 238 | 
 | 
| 239 |         self.footer.extend(visitor.FormatLines(func_def % locals(), 0))
 | 
| 240 |         self.footer.extend(visitor.FormatLines(ARRAY_OFFSET, 1))
 | 
| 241 |         self.footer.extend(visitor.FormatLines(func_body % locals(), 1))
 | 
| 242 |         self.footer.append('}\n\n')
 | 
| 243 |         maybe_qual_name = name  # RESET for later
 | 
| 244 | 
 | 
| 245 |     else:  # not repeated
 | 
| 246 |       SIMPLE = "inline %(ctype)s %(maybe_qual_name)s() const {"
 | 
| 247 |       POINTER = (
 | 
| 248 |           'inline const %(ctype)s %(maybe_qual_name)s('
 | 
| 249 |           'const %(pointer_type)s* base) const')
 | 
| 250 | 
 | 
| 251 |       if ctype in ('bool', 'int'):
 | 
| 252 |         func_header = SIMPLE
 | 
| 253 |         inline_body = 'return Int(%(offset)d);'
 | 
| 254 | 
 | 
| 255 |       elif ctype.endswith('_e') or ctype in self.enum_types:
 | 
| 256 |         func_header = SIMPLE
 | 
| 257 |         inline_body = 'return static_cast<const %(ctype)s>(Int(%(offset)d));'
 | 
| 258 | 
 | 
| 259 |       elif ctype == 'char*':
 | 
| 260 |         func_header = POINTER + " {"
 | 
| 261 |         inline_body = 'return Str(base, %(offset)d);'
 | 
| 262 | 
 | 
| 263 |       else:
 | 
| 264 |         # Write function prototype now; write body later.
 | 
| 265 |         func_proto = POINTER + ";"
 | 
| 266 | 
 | 
| 267 |         maybe_qual_name = '%s::%s' % (type_name, name)
 | 
| 268 |         func_def = POINTER + ' {'
 | 
| 269 |         if field.opt:
 | 
| 270 |           func_body = (
 | 
| 271 |               'return static_cast<const %(ctype)s>(Optional(base, %(offset)d));')
 | 
| 272 |         else:
 | 
| 273 |           func_body = (
 | 
| 274 |               'return static_cast<const %(ctype)s>(Ref(base, %(offset)d));')
 | 
| 275 | 
 | 
| 276 |         # depth 0 for bodies
 | 
| 277 |         self.footer.extend(visitor.FormatLines(func_def % locals(), 0))
 | 
| 278 |         self.footer.extend(visitor.FormatLines(func_body % locals(), 1))
 | 
| 279 |         self.footer.append('}\n\n')
 | 
| 280 |         maybe_qual_name = name  # RESET for later
 | 
| 281 | 
 | 
| 282 |     if func_proto:
 | 
| 283 |       self.Emit(func_proto % locals(), depth)
 | 
| 284 |     else:
 | 
| 285 |       self.Emit(func_header % locals(), depth)
 | 
| 286 |       if body_line1:
 | 
| 287 |         self.Emit(body_line1, depth + 1)
 | 
| 288 |       self.Emit(inline_body % locals(), depth + 1)
 | 
| 289 |       self.Emit("}", depth)
 | 
| 290 | 
 | 
| 291 | 
 | 
| 292 | def main(argv):
 | 
| 293 |   try:
 | 
| 294 |     action = argv[1]
 | 
| 295 |   except IndexError:
 | 
| 296 |     raise RuntimeError('Action required')
 | 
| 297 | 
 | 
| 298 |   # TODO: Also generate a switch/static_cast<> pretty printer in C++!  For
 | 
| 299 |   # debugging.  Might need to detect cycles though.
 | 
| 300 |   if action == 'cpp':
 | 
| 301 |     schema_path = argv[2]
 | 
| 302 | 
 | 
| 303 |     # NOTE: This import can't be at the top level osh/asdl_gen.py depends on
 | 
| 304 |     # this gen_cpp.py module.  We should move all the main() functions out of
 | 
| 305 |     # asdl/ and into command line tools.
 | 
| 306 | 
 | 
| 307 |     from core.meta import Id
 | 
| 308 |     app_types = {'id': meta.UserType('id_kind_asdl', 'Id_t')}
 | 
| 309 |     with open(schema_path) as input_f:
 | 
| 310 |       module, type_lookup = front_end.LoadSchema(input_f, app_types)
 | 
| 311 | 
 | 
| 312 |     # TODO: gen_cpp.py should be a library and the application should add Id?
 | 
| 313 |     # Or we should enable ASDL metaprogramming, and let Id be a metaprogrammed
 | 
| 314 |     # simple sum type.
 | 
| 315 | 
 | 
| 316 |     f = sys.stdout
 | 
| 317 | 
 | 
| 318 |     # How do mutation of strings, arrays, etc.  work?  Are they like C++
 | 
| 319 |     # containers, or their own?  I think they mirror the oil language
 | 
| 320 |     # semantics.
 | 
| 321 |     # Every node should have a mirror.  MutableObj.  MutableRef (pointer).
 | 
| 322 |     # MutableArithVar -- has std::string.  The mirrors are heap allocated.
 | 
| 323 |     # All the mutable ones should support Dump()/Encode()?
 | 
| 324 |     # You can just write more at the end... don't need to disturb existing
 | 
| 325 |     # nodes?  Rewrite pointers.
 | 
| 326 | 
 | 
| 327 |     alignment = 4
 | 
| 328 |     enc = encode.Params(alignment)
 | 
| 329 |     d = {'pointer_type': enc.pointer_type}
 | 
| 330 | 
 | 
| 331 |     f.write("""\
 | 
| 332 | #include <cstdint>
 | 
| 333 | 
 | 
| 334 | class Obj {
 | 
| 335 |  public:
 | 
| 336 |   // Decode a 3 byte integer from little endian
 | 
| 337 |   inline int Int(int n) const;
 | 
| 338 | 
 | 
| 339 |   inline const Obj& Ref(const %(pointer_type)s* base, int n) const;
 | 
| 340 | 
 | 
| 341 |   inline const Obj* Optional(const %(pointer_type)s* base, int n) const;
 | 
| 342 | 
 | 
| 343 |   // NUL-terminated
 | 
| 344 |   inline const char* Str(const %(pointer_type)s* base, int n) const;
 | 
| 345 | 
 | 
| 346 |  protected:
 | 
| 347 |   uint8_t bytes_[1];  // first is ID; rest are a payload
 | 
| 348 | };
 | 
| 349 | 
 | 
| 350 | """ % d)
 | 
| 351 | 
 | 
| 352 |     # Id should be treated as an enum.
 | 
| 353 |     c = ChainOfVisitors(
 | 
| 354 |         ForwardDeclareVisitor(f),
 | 
| 355 |         ClassDefVisitor(f, enc, type_lookup, enum_types=['Id']))
 | 
| 356 |     c.VisitModule(module)
 | 
| 357 | 
 | 
| 358 |     f.write("""\
 | 
| 359 | inline int Obj::Int(int n) const {
 | 
| 360 |   return bytes_[n] + (bytes_[n+1] << 8) + (bytes_[n+2] << 16);
 | 
| 361 | }
 | 
| 362 | 
 | 
| 363 | inline const Obj& Obj::Ref(const %(pointer_type)s* base, int n) const {
 | 
| 364 |   int offset = Int(n);
 | 
| 365 |   return reinterpret_cast<const Obj&>(base[offset]);
 | 
| 366 | }
 | 
| 367 | 
 | 
| 368 | inline const Obj* Obj::Optional(const %(pointer_type)s* base, int n) const {
 | 
| 369 |   int offset = Int(n);
 | 
| 370 |   if (offset) {
 | 
| 371 |     return reinterpret_cast<const Obj*>(base + offset);
 | 
| 372 |   } else {
 | 
| 373 |     return nullptr;
 | 
| 374 |   }
 | 
| 375 | }
 | 
| 376 | 
 | 
| 377 | inline const char* Obj::Str(const %(pointer_type)s* base, int n) const {
 | 
| 378 |   int offset = Int(n);
 | 
| 379 |   return reinterpret_cast<const char*>(base + offset);
 | 
| 380 | }
 | 
| 381 | """ % d)
 | 
| 382 |   # uint32_t* and char*/Obj* aren't related, so we need to use
 | 
| 383 |   # reinterpret_cast<>.
 | 
| 384 |   # http://stackoverflow.com/questions/10151834/why-cant-i-static-cast-between-char-and-unsigned-char
 | 
| 385 | 
 | 
| 386 |   else:
 | 
| 387 |     raise RuntimeError('Invalid action %r' % action)
 | 
| 388 | 
 | 
| 389 | 
 | 
| 390 | if __name__ == '__main__':
 | 
| 391 |   try:
 | 
| 392 |     main(sys.argv)
 | 
| 393 |   except RuntimeError as e:
 | 
| 394 |     print('FATAL: %s' % e, file=sys.stderr)
 | 
| 395 |     sys.exit(1)
 |