1 | # yaks - idea to rewrite mycpp.
|
2 | #
|
3 | # MAYBE:
|
4 | # - Can we generate value types in C++, to reduce GC pressure? Is that done
|
5 | # with a new ASDL keyword?
|
6 | #
|
7 | # The Yaks IR can be generated from:
|
8 | # 1. typed Python 2, like mycpp. This can be done with the Python 3.8 AST,
|
9 | # like pea/
|
10 | # - we have a speed test of parsing in parallel in 300 ms or so
|
11 | #
|
12 | # Much later:
|
13 | #
|
14 | # 1. Some kind of Tea/ typed YSH syntax? To write builtins in YSH, with typed
|
15 | # 'case' syntax?
|
16 | # - better context manager / constructor-destructor syntax?
|
17 | # - value types?
|
18 | # - but there is no syntax for data types in YSH.
|
19 | # - I think those are DYNAMIC SCHEMAS
|
20 | # - We can keep ASDL though.
|
21 | # 2. If mycpp has a universal string type, and maybe double support:
|
22 | # - TypeScript/JavaScript. See oilshell/yaks experiment.
|
23 | # - Python 3 with unicode string/byte semantics
|
24 | # - we have a supported interpreter and tooling
|
25 | # - Universal string type would use the PyPy utf-8 and utf-16 indices:
|
26 | # 64-byte blocks.
|
27 | #
|
28 | # Sketch of compilation pipeline:
|
29 | # 1. Create a CST language that can be hand-written or generated
|
30 | # 2. It can be transformed to this yaks.asdl, in the style of
|
31 | # oilshell/yaks/transform.ts Precedence is unambiguous (+ 1 2)
|
32 | # 3. Go through the stages in mycpp/README.md, but PER MODULE
|
33 | # a. const pass for GLOBAL_STR -- immutable literals
|
34 | # b. forward decl pass
|
35 | # c. declaration/function prototype pass
|
36 | # - GC Masks are generated from this typed IR. It knows about pointers
|
37 | # and so forth
|
38 | # d. implementation pass
|
39 | # 4. Then all the C++ goes into Ninja, as usual.
|
40 |
|
41 | # TODO: where do we put location info? For the type checker to blame. I think
|
42 | # it's on every CST node?
|
43 | # I guess it's all the Call() nodes then?
|
44 |
|
45 | module yaks
|
46 | {
|
47 | # path is for error messages
|
48 | # We may also map from path -> newline locations, for printing lines and
|
49 | # columns
|
50 | Token = (str path, str chunk, int start, int length)
|
51 |
|
52 | Bool = (bool b, Token loc) # true false
|
53 | Int = (int i, Token loc) # 42
|
54 | Str = (str s, Token loc) # "\n"
|
55 |
|
56 | # ''' strings like YSH, for embedding say re2c blocks?
|
57 | # strip leading whitespace
|
58 | MultiStr = (List[Token] lines)
|
59 |
|
60 | #
|
61 | # Heterogeneous Tree - make illegal states unrepresentable (compared to
|
62 | # homogeneous tree)
|
63 | #
|
64 |
|
65 | op =
|
66 | Plus
|
67 | | Minus
|
68 |
|
69 | kexpr =
|
70 | Bool %Bool
|
71 | | Int %Int
|
72 | | Str %Str
|
73 | | MultiStr %MultiStr
|
74 |
|
75 | # e.g. (not true)
|
76 | | Unary(Token op, kexpr child)
|
77 | # e.g. (+ 1 2)
|
78 | # (a i) is a[i]
|
79 | # (d key) is d[key]
|
80 | | Binary(Token op, kexpr left, kexpr right)
|
81 |
|
82 | # 'x' if mybool else 'y'
|
83 | | Ternary(Token op, kexpr left, kexpr cond, kexpr right)
|
84 |
|
85 | # List[Int] is (List Int) I guess
|
86 | # This means we don't confuse Python and C++ precedence? Everything is
|
87 | # parenthesized.
|
88 | | Call(kexpr f, List[kexpr] args)
|
89 |
|
90 | # Avoid conflict with hnode_asdl.Field (TODO: should have namespaces)
|
91 | Field_ = (str name, ktype typ)
|
92 |
|
93 | # Do we have first-class / shared variants in this language? We need it for
|
94 | # Oils, not sure about Yaks itself.
|
95 | variant = (Field_* fields)
|
96 |
|
97 | # Is this representation type checked directly? I guess that would help.
|
98 | # But Pea is also type checked?
|
99 | ktype =
|
100 | Bool
|
101 | | Int
|
102 | | Str
|
103 | | List(ktype T)
|
104 | | Dict(ktype K, ktype V)
|
105 |
|
106 | # How do we reference the symbol table?
|
107 | | Class(str name)
|
108 |
|
109 | # This is like ASDL. Or do we do type checking on CLASSES only, and this
|
110 | # is SUGAR?
|
111 | | Data(Field_* fields)
|
112 | | Enum(variant* variants)
|
113 |
|
114 | NameType = (str name, ktype typ)
|
115 |
|
116 | stmt =
|
117 | VarDecl(Token keyword) # (var i Int 42) => int i = 42;
|
118 | | PlaceMutation(Token keyword) # (setvar x y) => x = y;
|
119 |
|
120 | | If()
|
121 | | Switch() # generated by 'with tagswitch'
|
122 | | For()
|
123 | | While()
|
124 |
|
125 | | Break
|
126 | | Continue
|
127 | | Return(kexpr e)
|
128 |
|
129 | # LATER
|
130 | | Try()
|
131 | | With() # Scoped destruction
|
132 |
|
133 | # definitions allowed in a class
|
134 | class_def =
|
135 | Constructor()
|
136 | | Destructor() # context managers generate this
|
137 | | Method()
|
138 | | Field()
|
139 |
|
140 | Signature = (List[NameType] params, ktype return_type)
|
141 | # definitions allowed in a module
|
142 | mod_def =
|
143 | # These are different because we know the size at compile time.
|
144 | # GLOBAL_STR() GLOBAL_LIST() GLOBAL_DICT()
|
145 | #
|
146 | # There's also the idea of taking an entire parsed ASDL structure and
|
147 | # making it a global!
|
148 | # I guess if you have these 3, it's not that hard. You just need
|
149 | # GLOBAL_CLASS(MyClass, {...}). It will probably bloat the binary
|
150 | # considerably, but could be the best solution.
|
151 | Global(NameType name_type) # GLOBAL_STR
|
152 |
|
153 | | Func(str name, Signature sig, List[stmt] statements)
|
154 | # what we're generating
|
155 | | Class(str name, List[class_def] defs)
|
156 |
|
157 | # Python import
|
158 | | Import()
|
159 |
|
160 | # include <stdio.h>
|
161 | # also need names to type check? we may need the equivalent of .pyi files
|
162 | # how do we express this in Python, maybe some special comments?
|
163 | | Include(str path)
|
164 |
|
165 | # Is this SUGAR on top of classes?
|
166 | | Data()
|
167 | | Enum()
|
168 |
|
169 |
|
170 | # This is list of globals, class/method definitions, and functions.
|
171 | # It has DEPS (to be worked out)
|
172 | # (module "core/runtime_asdl" (data ...)
|
173 | Module = (str name, List[mod_def] defs)
|
174 |
|
175 | # A program is a collection of modules to link together, along with a main
|
176 | # module like oils_for_unix, in bin/oils_for_unix.py.
|
177 | #
|
178 | # It contains a main function like oils_for_unix::main().
|
179 | #
|
180 | # TODO: do we need any notion of package, like osh/ and core/ ?
|
181 |
|
182 | Program = (str main_module, List[Module] modules)
|
183 | }
|
184 |
|
185 | # vim: sw=2
|