| 1 | # yaks - idea to rewrite mycpp.
 | 
| 2 | # 
 | 
| 3 | # MAYBE:
 | 
| 4 | # - Can we generate value types in C++, to reduce GC pressure?  Is that done
 | 
| 5 | #   with a new ASDL keyword?
 | 
| 6 | #
 | 
| 7 | # The Yaks IR can be generated from:
 | 
| 8 | # 1. typed Python 2, like mycpp.  This can be done with the Python 3.8 AST,
 | 
| 9 | #    like pea/
 | 
| 10 | #    - we have a speed test of parsing in parallel in 300 ms or so
 | 
| 11 | #
 | 
| 12 | # Much later:
 | 
| 13 | #
 | 
| 14 | # 1. Some kind of Tea/ typed YSH syntax?  To write builtins in YSH, with typed
 | 
| 15 | #   'case' syntax?
 | 
| 16 | #   - better context manager / constructor-destructor syntax?
 | 
| 17 | #   - value types?
 | 
| 18 | #   - but there is no syntax for data types in YSH.
 | 
| 19 | #     - I think those are DYNAMIC SCHEMAS
 | 
| 20 | #     - We can keep ASDL though.
 | 
| 21 | # 2. If mycpp has a universal string type, and maybe double support:
 | 
| 22 | #    - TypeScript/JavaScript.  See oilshell/yaks experiment.
 | 
| 23 | #    - Python 3 with unicode string/byte semantics
 | 
| 24 | #      - we have a supported interpreter and tooling
 | 
| 25 | #    - Universal string type would use the PyPy utf-8 and utf-16 indices:
 | 
| 26 | #    64-byte blocks.
 | 
| 27 | #
 | 
| 28 | # Sketch of compilation pipeline:
 | 
| 29 | # 1. Create a CST language that can be hand-written or generated
 | 
| 30 | # 2. It can be transformed to this yaks.asdl, in the style of
 | 
| 31 | #    oilshell/yaks/transform.ts Precedence is unambiguous (+ 1 2)
 | 
| 32 | # 3. Go through the stages in mycpp/README.md, but PER MODULE
 | 
| 33 | #    a. const pass for GLOBAL_STR -- immutable literals
 | 
| 34 | #    b. forward decl pass
 | 
| 35 | #    c. declaration/function prototype pass
 | 
| 36 | #       - GC Masks are generated from this typed IR.  It knows about pointers
 | 
| 37 | #       and so forth
 | 
| 38 | #    d. implementation pass
 | 
| 39 | # 4. Then all the C++ goes into Ninja, as usual.
 | 
| 40 | 
 | 
| 41 | # TODO: where do we put location info?  For the type checker to blame.  I think
 | 
| 42 | # it's on every CST node?
 | 
| 43 | # I guess it's all the Call() nodes then?
 | 
| 44 | 
 | 
| 45 | module yaks
 | 
| 46 | {
 | 
| 47 |   # path is for error messages
 | 
| 48 |   # We may also map from path -> newline locations, for printing lines and
 | 
| 49 |   # columns
 | 
| 50 |   Token = (str path, str chunk, int start, int length)
 | 
| 51 | 
 | 
| 52 |   Bool = (bool b, Token loc)  # true false
 | 
| 53 |   Int = (int i, Token loc)  # 42
 | 
| 54 |   Str = (str s, Token loc)  # "\n"
 | 
| 55 | 
 | 
| 56 |   # ''' strings like YSH, for embedding say re2c blocks?
 | 
| 57 |   # strip leading whitespace
 | 
| 58 |   MultiStr = (List[Token] lines)
 | 
| 59 | 
 | 
| 60 |   #
 | 
| 61 |   # Heterogeneous Tree - make illegal states unrepresentable (compared to
 | 
| 62 |   # homogeneous tree)
 | 
| 63 |   #
 | 
| 64 | 
 | 
| 65 |   op =
 | 
| 66 |     Plus
 | 
| 67 |   | Minus
 | 
| 68 | 
 | 
| 69 |   kexpr =
 | 
| 70 |     Bool %Bool
 | 
| 71 |   | Int %Int
 | 
| 72 |   | Str %Str
 | 
| 73 |   | MultiStr %MultiStr
 | 
| 74 | 
 | 
| 75 |     # e.g. (not true)
 | 
| 76 |   | Unary(Token op, kexpr child)
 | 
| 77 |     # e.g. (+ 1 2)
 | 
| 78 |     #      (a i)  is a[i]
 | 
| 79 |     #      (d key)  is d[key]
 | 
| 80 |   | Binary(Token op, kexpr left, kexpr right)
 | 
| 81 | 
 | 
| 82 |     # 'x' if mybool else 'y'
 | 
| 83 |   | Ternary(Token op, kexpr left, kexpr cond, kexpr right)
 | 
| 84 | 
 | 
| 85 |     # List[Int] is (List Int) I guess
 | 
| 86 |     # This means we don't confuse Python and C++ precedence?  Everything is
 | 
| 87 |     # parenthesized.
 | 
| 88 |   | Call(kexpr f, List[kexpr] args)
 | 
| 89 | 
 | 
| 90 |   # Avoid conflict with hnode_asdl.Field (TODO: should have namespaces)
 | 
| 91 |   Field_ = (str name, ktype typ)
 | 
| 92 | 
 | 
| 93 |   # Do we have first-class / shared variants in this language?  We need it for
 | 
| 94 |   # Oils, not sure about Yaks itself.
 | 
| 95 |   variant = (Field_* fields)
 | 
| 96 | 
 | 
| 97 |   # Is this representation type checked directly?  I guess that would help.
 | 
| 98 |   # But Pea is also type checked?
 | 
| 99 |   ktype =
 | 
| 100 |     Bool
 | 
| 101 |   | Int
 | 
| 102 |   | Str
 | 
| 103 |   | List(ktype T)
 | 
| 104 |   | Dict(ktype K, ktype V)
 | 
| 105 | 
 | 
| 106 |     # How do we reference the symbol table?
 | 
| 107 |   | Class(str name)
 | 
| 108 | 
 | 
| 109 |     # This is like ASDL.  Or do we do type checking on CLASSES only, and this
 | 
| 110 |     # is SUGAR?
 | 
| 111 |   | Data(Field_* fields)
 | 
| 112 |   | Enum(variant* variants)
 | 
| 113 | 
 | 
| 114 |   NameType = (str name, ktype typ)
 | 
| 115 | 
 | 
| 116 |   stmt =
 | 
| 117 |     VarDecl(Token keyword)        # (var i Int 42)  =>  int i = 42;
 | 
| 118 |   | PlaceMutation(Token keyword)  # (setvar x y)    =>  x = y;
 | 
| 119 | 
 | 
| 120 |   | If()
 | 
| 121 |   | Switch()  # generated by 'with tagswitch'
 | 
| 122 |   | For()
 | 
| 123 |   | While()
 | 
| 124 | 
 | 
| 125 |   | Break
 | 
| 126 |   | Continue
 | 
| 127 |   | Return(kexpr e)
 | 
| 128 | 
 | 
| 129 |     # LATER
 | 
| 130 |   | Try()
 | 
| 131 |   | With()  # Scoped destruction
 | 
| 132 | 
 | 
| 133 |   # definitions allowed in a class
 | 
| 134 |   class_def = 
 | 
| 135 |     Constructor()
 | 
| 136 |   | Destructor()  # context managers generate this
 | 
| 137 |   | Method()
 | 
| 138 |   | Field()
 | 
| 139 | 
 | 
| 140 |   Signature = (List[NameType] params, ktype return_type)
 | 
| 141 |   # definitions allowed in a module
 | 
| 142 |   mod_def =
 | 
| 143 |     # These are different because we know the size at compile time.
 | 
| 144 |     # GLOBAL_STR()  GLOBAL_LIST()  GLOBAL_DICT()
 | 
| 145 |     #
 | 
| 146 |     # There's also the idea of taking an entire parsed ASDL structure and
 | 
| 147 |     # making it a global!
 | 
| 148 |     # I guess if you have these 3, it's not that hard.  You just need
 | 
| 149 |     # GLOBAL_CLASS(MyClass, {...}).  It will probably bloat the binary
 | 
| 150 |     # considerably, but could be the best solution.
 | 
| 151 |     Global(NameType name_type)  # GLOBAL_STR
 | 
| 152 | 
 | 
| 153 |   | Func(str name, Signature sig, List[stmt] statements)
 | 
| 154 |     # what we're generating
 | 
| 155 |   | Class(str name, List[class_def] defs)
 | 
| 156 | 
 | 
| 157 |     # Python import
 | 
| 158 |   | Import()
 | 
| 159 | 
 | 
| 160 |     # include <stdio.h>
 | 
| 161 |     # also need names to type check?  we may need the equivalent of .pyi files
 | 
| 162 |     # how do we express this in Python, maybe some special comments?
 | 
| 163 |   | Include(str path)
 | 
| 164 | 
 | 
| 165 |     # Is this SUGAR on top of classes?
 | 
| 166 |   | Data()
 | 
| 167 |   | Enum()
 | 
| 168 | 
 | 
| 169 | 
 | 
| 170 |   # This is list of globals, class/method definitions, and functions.
 | 
| 171 |   # It has DEPS (to be worked out)
 | 
| 172 |   # (module "core/runtime_asdl" (data ...)
 | 
| 173 |   Module = (str name, List[mod_def] defs)
 | 
| 174 | 
 | 
| 175 |   # A program is a collection of modules to link together, along with a main
 | 
| 176 |   # module like oils_for_unix, in bin/oils_for_unix.py.
 | 
| 177 |   # 
 | 
| 178 |   # It contains a main function like oils_for_unix::main().
 | 
| 179 |   #
 | 
| 180 |   # TODO: do we need any notion of package, like osh/ and core/ ?
 | 
| 181 | 
 | 
| 182 |   Program = (str main_module, List[Module] modules)
 | 
| 183 | }
 | 
| 184 | 
 | 
| 185 | # vim: sw=2
 |