| 1 | # yaks - idea to rewrite mycpp.
|
| 2 | #
|
| 3 | # MAYBE:
|
| 4 | # - Can we generate value types in C++, to reduce GC pressure? Is that done
|
| 5 | # with a new ASDL keyword?
|
| 6 | #
|
| 7 | # The Yaks IR can be generated from:
|
| 8 | # 1. typed Python 2, like mycpp. This can be done with the Python 3.8 AST,
|
| 9 | # like pea/
|
| 10 | # - we have a speed test of parsing in parallel in 300 ms or so
|
| 11 | #
|
| 12 | # Much later:
|
| 13 | #
|
| 14 | # 1. Some kind of Tea/ typed YSH syntax? To write builtins in YSH, with typed
|
| 15 | # 'case' syntax?
|
| 16 | # - better context manager / constructor-destructor syntax?
|
| 17 | # - value types?
|
| 18 | # - but there is no syntax for data types in YSH.
|
| 19 | # - I think those are DYNAMIC SCHEMAS
|
| 20 | # - We can keep ASDL though.
|
| 21 | # 2. If mycpp has a universal string type, and maybe double support:
|
| 22 | # - TypeScript/JavaScript. See oilshell/yaks experiment.
|
| 23 | # - Python 3 with unicode string/byte semantics
|
| 24 | # - we have a supported interpreter and tooling
|
| 25 | # - Universal string type would use the PyPy utf-8 and utf-16 indices:
|
| 26 | # 64-byte blocks.
|
| 27 | #
|
| 28 | # Sketch of compilation pipeline:
|
| 29 | # 1. Create a CST language that can be hand-written or generated
|
| 30 | # 2. It can be transformed to this yaks.asdl, in the style of
|
| 31 | # oilshell/yaks/transform.ts Precedence is unambiguous (+ 1 2)
|
| 32 | # 3. Go through the stages in mycpp/README.md, but PER MODULE
|
| 33 | # a. const pass for GLOBAL_STR -- immutable literals
|
| 34 | # b. forward decl pass
|
| 35 | # c. declaration/function prototype pass
|
| 36 | # - GC Masks are generated from this typed IR. It knows about pointers
|
| 37 | # and so forth
|
| 38 | # d. implementation pass
|
| 39 | # 4. Then all the C++ goes into Ninja, as usual.
|
| 40 |
|
| 41 | # TODO: where do we put location info? For the type checker to blame. I think
|
| 42 | # it's on every CST node?
|
| 43 | # I guess it's all the Call() nodes then?
|
| 44 |
|
| 45 | module yaks
|
| 46 | {
|
| 47 | # path is for error messages
|
| 48 | # We may also map from path -> newline locations, for printing lines and
|
| 49 | # columns
|
| 50 | Token = (str path, str chunk, int start, int length)
|
| 51 |
|
| 52 | Bool = (bool b, Token loc) # true false
|
| 53 | Int = (int i, Token loc) # 42
|
| 54 | Str = (str s, Token loc) # "\n"
|
| 55 |
|
| 56 | # ''' strings like YSH, for embedding say re2c blocks?
|
| 57 | # strip leading whitespace
|
| 58 | MultiStr = (List[Token] lines)
|
| 59 |
|
| 60 | #
|
| 61 | # Heterogeneous Tree - make illegal states unrepresentable (compared to
|
| 62 | # homogeneous tree)
|
| 63 | #
|
| 64 |
|
| 65 | op =
|
| 66 | Plus
|
| 67 | | Minus
|
| 68 |
|
| 69 | kexpr =
|
| 70 | Bool %Bool
|
| 71 | | Int %Int
|
| 72 | | Str %Str
|
| 73 | | MultiStr %MultiStr
|
| 74 |
|
| 75 | # e.g. (not true)
|
| 76 | | Unary(Token op, kexpr child)
|
| 77 | # e.g. (+ 1 2)
|
| 78 | # (a i) is a[i]
|
| 79 | # (d key) is d[key]
|
| 80 | | Binary(Token op, kexpr left, kexpr right)
|
| 81 |
|
| 82 | # 'x' if mybool else 'y'
|
| 83 | | Ternary(Token op, kexpr left, kexpr cond, kexpr right)
|
| 84 |
|
| 85 | # List[Int] is (List Int) I guess
|
| 86 | # This means we don't confuse Python and C++ precedence? Everything is
|
| 87 | # parenthesized.
|
| 88 | | Call(kexpr f, List[kexpr] args)
|
| 89 |
|
| 90 | # Avoid conflict with hnode_asdl.Field (TODO: should have namespaces)
|
| 91 | Field_ = (str name, ktype typ)
|
| 92 |
|
| 93 | # Do we have first-class / shared variants in this language? We need it for
|
| 94 | # Oils, not sure about Yaks itself.
|
| 95 | variant = (Field_* fields)
|
| 96 |
|
| 97 | # Is this representation type checked directly? I guess that would help.
|
| 98 | # But Pea is also type checked?
|
| 99 | ktype =
|
| 100 | Bool
|
| 101 | | Int
|
| 102 | | Str
|
| 103 | | List(ktype T)
|
| 104 | | Dict(ktype K, ktype V)
|
| 105 |
|
| 106 | # How do we reference the symbol table?
|
| 107 | | Class(str name)
|
| 108 |
|
| 109 | # This is like ASDL. Or do we do type checking on CLASSES only, and this
|
| 110 | # is SUGAR?
|
| 111 | | Data(Field_* fields)
|
| 112 | | Enum(variant* variants)
|
| 113 |
|
| 114 | NameType = (str name, ktype typ)
|
| 115 |
|
| 116 | stmt =
|
| 117 | VarDecl(Token keyword) # (var i Int 42) => int i = 42;
|
| 118 | | PlaceMutation(Token keyword) # (setvar x y) => x = y;
|
| 119 |
|
| 120 | | If()
|
| 121 | | Switch() # generated by 'with tagswitch'
|
| 122 | | For()
|
| 123 | | While()
|
| 124 |
|
| 125 | | Break
|
| 126 | | Continue
|
| 127 | | Return(kexpr e)
|
| 128 |
|
| 129 | # LATER
|
| 130 | | Try()
|
| 131 | | With() # Scoped destruction
|
| 132 |
|
| 133 | # definitions allowed in a class
|
| 134 | class_def =
|
| 135 | Constructor()
|
| 136 | | Destructor() # context managers generate this
|
| 137 | | Method()
|
| 138 | | Field()
|
| 139 |
|
| 140 | Signature = (List[NameType] params, ktype return_type)
|
| 141 | # definitions allowed in a module
|
| 142 | mod_def =
|
| 143 | # These are different because we know the size at compile time.
|
| 144 | # GLOBAL_STR() GLOBAL_LIST() GLOBAL_DICT()
|
| 145 | #
|
| 146 | # There's also the idea of taking an entire parsed ASDL structure and
|
| 147 | # making it a global!
|
| 148 | # I guess if you have these 3, it's not that hard. You just need
|
| 149 | # GLOBAL_CLASS(MyClass, {...}). It will probably bloat the binary
|
| 150 | # considerably, but could be the best solution.
|
| 151 | Global(NameType name_type) # GLOBAL_STR
|
| 152 |
|
| 153 | | Func(str name, Signature sig, List[stmt] statements)
|
| 154 | # what we're generating
|
| 155 | | Class(str name, List[class_def] defs)
|
| 156 |
|
| 157 | # Python import
|
| 158 | | Import()
|
| 159 |
|
| 160 | # include <stdio.h>
|
| 161 | # also need names to type check? we may need the equivalent of .pyi files
|
| 162 | # how do we express this in Python, maybe some special comments?
|
| 163 | | Include(str path)
|
| 164 |
|
| 165 | # Is this SUGAR on top of classes?
|
| 166 | | Data()
|
| 167 | | Enum()
|
| 168 |
|
| 169 |
|
| 170 | # This is list of globals, class/method definitions, and functions.
|
| 171 | # It has DEPS (to be worked out)
|
| 172 | # (module "core/runtime_asdl" (data ...)
|
| 173 | Module = (str name, List[mod_def] defs)
|
| 174 |
|
| 175 | # A program is a collection of modules to link together, along with a main
|
| 176 | # module like oils_for_unix, in bin/oils_for_unix.py.
|
| 177 | #
|
| 178 | # It contains a main function like oils_for_unix::main().
|
| 179 | #
|
| 180 | # TODO: do we need any notion of package, like osh/ and core/ ?
|
| 181 |
|
| 182 | Program = (str main_module, List[Module] modules)
|
| 183 | }
|
| 184 |
|
| 185 | # vim: sw=2
|