1 | // yaks_runtime_test.cc
|
2 | //
|
3 | // Related to small_str_test.cc
|
4 |
|
5 | #include <inttypes.h>
|
6 | #include <limits.h> // HOST_NAME_MAX
|
7 | #include <unistd.h> // gethostname()
|
8 |
|
9 | #include <new> // placement new
|
10 |
|
11 | #include "mycpp/common.h"
|
12 | #include "mycpp/gc_obj.h" // ObjHeader
|
13 | #include "mycpp/runtime.h"
|
14 | #include "vendor/greatest.h"
|
15 |
|
16 | // namespace yaks {
|
17 |
|
18 | /*
|
19 |
|
20 | To Do
|
21 | =====
|
22 |
|
23 | - prototype rooting
|
24 | - register YaksValue only, not other primitive types
|
25 | - Is there maybe a ImmediateVariant type?
|
26 | - it can never be a HeapObj, and thus doesn't need to be rooted
|
27 | - how common is this? Probably not very
|
28 |
|
29 | - What are the ASDL schema language changes you need?
|
30 | - defining tags and so forth
|
31 | - shared tags ACROSS MODULES -- I ran into this with %LiteralBlock
|
32 | - and probably %Name for (Token tok, str name)
|
33 |
|
34 | - Does this make sense?
|
35 |
|
36 | alias CompoundWord = List[word_part]
|
37 |
|
38 | - What about
|
39 |
|
40 | tagged value =
|
41 | Int %Int
|
42 | | Frame %Dict[str, str]
|
43 |
|
44 | - What's the penalty in Python?
|
45 | - I think it's mainly mymember() bar() syntax everywhere
|
46 | - but then do you need to generate lots of tiny classes
|
47 | - or use getattr() on the list of known field names to simulate it
|
48 | - but that won't pass MyPy
|
49 | - damn you would need a ton of type annotations then ... every wrapper
|
50 | would get a type annotation
|
51 | - but this is only for public fields? Maybe it's not that bad
|
52 | - or do you need public self(), like perhaps
|
53 | - cmd_ev.self().word_ev
|
54 | - cmd_ev.member().foo
|
55 | - hm that's possible
|
56 | - self()->length_ = 32; // this can be direct? At least within a class
|
57 |
|
58 |
|
59 | ## Use Cases
|
60 |
|
61 | - write out value_t class more
|
62 | - value.Int is
|
63 | - immediate int32_t
|
64 | - or what about the 53 bit idea? (matching double precision)
|
65 | - 64-3 bits = 61, or 64-11 = 53
|
66 | - value.BigStr is BigStr and SmallStr
|
67 | - value.BashAssoc is Dict[str, str]
|
68 | - value.Dict is Dict[str, value]
|
69 | - value.Frame is Dict[str, cell]
|
70 |
|
71 | - CompoundWord is List[word_part] parts
|
72 |
|
73 | - a_index = BigStr(str s) | Int(int i)
|
74 | - this is not immdiate
|
75 |
|
76 | - Custom Token VALUE type as 16 bytes?
|
77 | - this makes GC scanning harder
|
78 | - can't have Token on the stack, but can have it embedded in other objects?
|
79 | - but then you can't have any of those on the stack either
|
80 | - There could be a restriction, hm
|
81 |
|
82 | ## Problems
|
83 |
|
84 | ### Code gen issue: -> vs .
|
85 |
|
86 | Is it possible for mycpp classes to use
|
87 |
|
88 | this->token = token;
|
89 | this->args = args;
|
90 |
|
91 | While ASDL uses
|
92 |
|
93 | w.parts()[0].tok
|
94 |
|
95 | How would you distinguish the two things?
|
96 |
|
97 | Plus they will be mixed, like:
|
98 |
|
99 | this->cmd_ev->w.parts()
|
100 |
|
101 | Cheap hack is to come up with a naming convention for the types
|
102 |
|
103 | Actually we already have it - types look like this. See visit_member_expr:
|
104 |
|
105 | lhs_type core.ui.ErrorFormatter expr NameExpr(errfmt [l]) name
|
106 | PrettyPrintError lhs_type core.util.UserExit expr NameExpr(e [l]) name status
|
107 | lhs_type osh.cmd_eval.CommandEvaluator expr NameExpr(cmd_ev [l]) name
|
108 | MaybeRunExitTrap lhs_type _devbuild.gen.syntax_asdl.IntParamBox expr
|
109 | NameExpr(mut_status [l]) name i
|
110 |
|
111 | So we can look for syntax_asdl / runtime_asdl
|
112 |
|
113 | ## Design
|
114 |
|
115 | - YaksValue contains a uint64_t ?
|
116 | - this means any BigStr, List, Dict, Tuple, or class
|
117 | - can List and Dict have type tag in addition to pointer?
|
118 |
|
119 | These are less than 8 bytes:
|
120 |
|
121 | - int32_t
|
122 | - float
|
123 | - enum class value_e {}
|
124 |
|
125 | Features that mycpp runtime doesn't have:
|
126 |
|
127 | - More efficient rooting, either:
|
128 | - our StackRoots({&a, &b, ...}) is slow
|
129 | - our StackRoot takes up a lot of code space!
|
130 | - Either
|
131 | - Shadow Stack for Garbage Collection
|
132 | - It's better to spill pointers to a separate frame
|
133 | - Hybrid rooting
|
134 | - ParamRoot, ParamRoot, SortedPointerRoots,
|
135 |
|
136 | - Not just more EFFICIENT, but LESS ROOTING
|
137 | - call graph analysis for stuff above the stack
|
138 |
|
139 | Tagged Pointers ("Boxless optimization")
|
140 |
|
141 | - Immediate Values - 8 bytes for variant type tag
|
142 | - Zero-arg constructors are integers
|
143 | - int32_t
|
144 | - float
|
145 | - Small BigStr
|
146 | - 4 bits len
|
147 | - how is this shared with type_tag?
|
148 | - are we reserving half of this space for small_str?
|
149 | - 6 bytes data
|
150 | - 1 byte nullptr
|
151 | - Pointer
|
152 |
|
153 | Later:
|
154 |
|
155 | - Value types? How does the GC scan them on the stack??? That is hard
|
156 |
|
157 | */
|
158 |
|
159 | // A GC heap value or an immediate value
|
160 | //
|
161 | // Can also be used for say:
|
162 | //
|
163 | // tagged num = yaks.Int | yaks.Float
|
164 | //
|
165 | // There is more than enough space for a tag!
|
166 | // A double would hae to be heap-allocated.
|
167 |
|
168 | // Note that Yaks is statically typed, and you can have a simple double, float,
|
169 | // or int64_t. (maybe i32 i64 f32 f64 like WASM.)
|
170 | //
|
171 | // You would only use a YaksValue for an Int if it's part of a variant type.
|
172 |
|
173 | // Kinds of layouts:
|
174 | //
|
175 | // - primitive i32 i64 f32 f64
|
176 | // - enum class scope_e
|
177 | // - YaksValue that MAY have a heap_obj (which is any BigStr value)
|
178 | // - then you have only 7 possible tags
|
179 | // - YaksValue that does NOT have a heap_obj -- then you have more room for tags
|
180 | // - you could have variatn of Bool, Int, and some other enum_class_e
|
181 |
|
182 | #define ASDL_NAMES struct
|
183 |
|
184 | ASDL_NAMES ytag_e {
|
185 | enum no_name {
|
186 | HeapObj = 0,
|
187 | HeapStr = 1,
|
188 | SmallStr = 2,
|
189 |
|
190 | // Do these make sense, or would you want to use Bool, Float for something
|
191 | // else? Bool is questionable.
|
192 | Bool = 3,
|
193 | Int = 4,
|
194 | Float = 5,
|
195 |
|
196 | // Note: Is this POSSIBLE? NO
|
197 | // EmptyList = 6, // much more common
|
198 | // EmptyDict = 7, // less common
|
199 |
|
200 | // Potential optimization:
|
201 | // - the minute they are append() or set(), i.e. non-empty, they become
|
202 | // HeapObj
|
203 | // - but you can iterate over an EmptyList or Dict
|
204 |
|
205 | // MUTABILITY breaks it
|
206 | //
|
207 | // var mylist = []
|
208 | // myfunc(mylist) // can't copy by value, must be by reference
|
209 | // print(len(mylist)) // must be mutated
|
210 | };
|
211 | };
|
212 |
|
213 | // Picture here
|
214 | // https://bernsteinbear.com/blog/compiling-a-lisp-2/#pointer-tagging-scheme
|
215 |
|
216 | union YaksValue {
|
217 | int ytag() const {
|
218 | // 0 for SmallStr -- any value can be a string
|
219 | // 1..7 for immediate values
|
220 | // for heap values: look in heap_obj->tag()
|
221 | return ytag_e::HeapObj;
|
222 | }
|
223 |
|
224 | void* heap_obj;
|
225 | bool b;
|
226 | float f;
|
227 |
|
228 | int32_t i;
|
229 |
|
230 | // It's 8 bytes on 32 bit systems too -- I guess we need this for small str?
|
231 | uint64_t whole;
|
232 |
|
233 | // TODO: where do we put
|
234 | // - 1 byte type_tag? Or do we have fewer than that? Maybe 5 bits, since 3
|
235 | // are for small_str len?
|
236 | // - that's 32 immediate types, and then you can overflow into heap?
|
237 | // - NO WE only have THREE BITS because our allocator is 24 or 48 byte
|
238 | // aligned
|
239 | // - so we have at most 8 immediate values, and the rest heap values?
|
240 | // - but SmallStr always takes up some space
|
241 |
|
242 | // - NUL terminator for SmallStr
|
243 | };
|
244 |
|
245 | /* Class translation
|
246 |
|
247 | class Slice:
|
248 | def __init__(self, s: str, start: int, length: int):
|
249 | self.s = s
|
250 | self.start = start
|
251 | self.length = length
|
252 |
|
253 | def get(self) -> str:
|
254 | return self.s[self.start : self.start + self.length]
|
255 | */
|
256 |
|
257 | class Slice {
|
258 | public:
|
259 | Slice(BigStr* s, int start, int length) {
|
260 | // TODO: allocate Members
|
261 | self_.heap_obj = Alloc<Members>();
|
262 |
|
263 | self()->s_ = s;
|
264 | self()->start_ = start;
|
265 | self()->length_ = length;
|
266 | }
|
267 |
|
268 | struct Members {
|
269 | BigStr* s_;
|
270 | int start_;
|
271 | int length_;
|
272 |
|
273 | static constexpr ObjHeader obj_header() {
|
274 | return ObjHeader::ClassFixed(field_mask(), sizeof(Members));
|
275 | }
|
276 | static constexpr uint32_t field_mask() {
|
277 | return maskbit(offsetof(Members, s_));
|
278 | }
|
279 | };
|
280 |
|
281 | Members* self() {
|
282 | return static_cast<Members*>(self_.heap_obj);
|
283 | }
|
284 |
|
285 | int start() {
|
286 | return self()->start_;
|
287 | }
|
288 |
|
289 | // Field Accessors -- this will make the generated code a lot longer?
|
290 | // Naming convention is like protobuf
|
291 |
|
292 | int length() {
|
293 | return self()->length_;
|
294 | }
|
295 |
|
296 | // Methods
|
297 | BigStr* Get() {
|
298 | // return StrFromC("yo");
|
299 |
|
300 | int n = self()->length_;
|
301 |
|
302 | log("start = %d", self()->start_);
|
303 | log("n = %d", n);
|
304 |
|
305 | BigStr* result = NewStr(n);
|
306 |
|
307 | memcpy(result->data_, self()->s_->data_ + self()->start_, n);
|
308 |
|
309 | result->data_[n] = '\0';
|
310 |
|
311 | log("result->data %s", result->data_);
|
312 |
|
313 | return result;
|
314 | }
|
315 |
|
316 | YaksValue self_;
|
317 | };
|
318 |
|
319 | void SliceFunc(Slice myslice) {
|
320 | BigStr* s = myslice.Get();
|
321 | log("val = %s", s->data_);
|
322 | print(s);
|
323 |
|
324 | log("start %d length %d", myslice.start(), myslice.length());
|
325 | }
|
326 |
|
327 | // Based on _gen/core/runtime.asdl.h
|
328 |
|
329 | // TODO: This can have a typetag() method
|
330 | //
|
331 | // - It will look in the immediate YaksValue for the common cases?
|
332 | // - and in the case you only have immediates
|
333 | // And then look in the GC header of the heap allocated object for the other
|
334 | // cases?
|
335 |
|
336 | class value_t {
|
337 | protected:
|
338 | value_t() {
|
339 | }
|
340 |
|
341 | public:
|
342 | int typetag() const {
|
343 | // Look if it's an integer or string
|
344 |
|
345 | // Look for heap object
|
346 | int ytag = self_.ytag();
|
347 | if (ytag == ytag_e::HeapObj) {
|
348 | return 0;
|
349 | }
|
350 | return 1;
|
351 |
|
352 | // return ObjHeader::FromObject(this)->type_tag;
|
353 | }
|
354 |
|
355 | // All variants have this.
|
356 | YaksValue self_;
|
357 |
|
358 | // hnode_t* PrettyTree();
|
359 | DISALLOW_COPY_AND_ASSIGN(value_t)
|
360 | };
|
361 |
|
362 | class value__Int : public value_t {
|
363 | public:
|
364 | int typetag() const {
|
365 | // Reuse the primitive tag
|
366 | return ytag_e::Int;
|
367 | }
|
368 | };
|
369 |
|
370 | class value__Str : public value_t {
|
371 | public:
|
372 | int typetag() const {
|
373 | // int ytag = self_.ytag();
|
374 | // CHECK(ytag == ytag_e::SmallStr || ytag == ytag_e::HeapStr) {
|
375 |
|
376 | // TODO: return value_e::Str
|
377 | return 0;
|
378 | }
|
379 | };
|
380 |
|
381 | TEST yaks_test() {
|
382 | Slice myslice(StrFromC("hello"), 1, 3);
|
383 |
|
384 | log("myslice %p", &myslice);
|
385 |
|
386 | SliceFunc(myslice);
|
387 |
|
388 | // TODO: constructor
|
389 | value__Int i;
|
390 | log(" i.typetag = %d", i.typetag());
|
391 |
|
392 | value__Str s;
|
393 | log(" s.typetag = %d", s.typetag());
|
394 |
|
395 | PASS();
|
396 | }
|
397 |
|
398 | //} // namespace small_str_test
|
399 |
|
400 | GREATEST_MAIN_DEFS();
|
401 |
|
402 | int main(int argc, char** argv) {
|
403 | // gHeap.Init();
|
404 |
|
405 | GREATEST_MAIN_BEGIN();
|
406 |
|
407 | // RUN_TEST(yaks::yaks_test);
|
408 | RUN_TEST(yaks_test);
|
409 |
|
410 | // gHeap.CleanProcessExit();
|
411 |
|
412 | GREATEST_MAIN_END();
|
413 | return 0;
|
414 | }
|