OILS / yaks / yaks_runtime_test.cc View on Github | oilshell.org

414 lines, 103 significant
1// yaks_runtime_test.cc
2//
3// Related to small_str_test.cc
4
5#include <inttypes.h>
6#include <limits.h> // HOST_NAME_MAX
7#include <unistd.h> // gethostname()
8
9#include <new> // placement new
10
11#include "mycpp/common.h"
12#include "mycpp/gc_obj.h" // ObjHeader
13#include "mycpp/runtime.h"
14#include "vendor/greatest.h"
15
16// namespace yaks {
17
18/*
19
20To Do
21=====
22
23- prototype rooting
24 - register YaksValue only, not other primitive types
25 - Is there maybe a ImmediateVariant type?
26 - it can never be a HeapObj, and thus doesn't need to be rooted
27 - how common is this? Probably not very
28
29- What are the ASDL schema language changes you need?
30 - defining tags and so forth
31 - shared tags ACROSS MODULES -- I ran into this with %LiteralBlock
32 - and probably %Name for (Token tok, str name)
33
34- Does this make sense?
35
36 alias CompoundWord = List[word_part]
37
38- What about
39
40 tagged value =
41 Int %Int
42 | Frame %Dict[str, str]
43
44- What's the penalty in Python?
45 - I think it's mainly mymember() bar() syntax everywhere
46 - but then do you need to generate lots of tiny classes
47 - or use getattr() on the list of known field names to simulate it
48 - but that won't pass MyPy
49 - damn you would need a ton of type annotations then ... every wrapper
50 would get a type annotation
51 - but this is only for public fields? Maybe it's not that bad
52 - or do you need public self(), like perhaps
53 - cmd_ev.self().word_ev
54 - cmd_ev.member().foo
55 - hm that's possible
56 - self()->length_ = 32; // this can be direct? At least within a class
57
58
59## Use Cases
60
61- write out value_t class more
62 - value.Int is
63 - immediate int32_t
64 - or what about the 53 bit idea? (matching double precision)
65 - 64-3 bits = 61, or 64-11 = 53
66 - value.BigStr is BigStr and SmallStr
67 - value.BashAssoc is Dict[str, str]
68 - value.Dict is Dict[str, value]
69 - value.Frame is Dict[str, cell]
70
71- CompoundWord is List[word_part] parts
72
73- a_index = BigStr(str s) | Int(int i)
74 - this is not immdiate
75
76- Custom Token VALUE type as 16 bytes?
77 - this makes GC scanning harder
78 - can't have Token on the stack, but can have it embedded in other objects?
79 - but then you can't have any of those on the stack either
80 - There could be a restriction, hm
81
82## Problems
83
84### Code gen issue: -> vs .
85
86Is it possible for mycpp classes to use
87
88 this->token = token;
89 this->args = args;
90
91While ASDL uses
92
93 w.parts()[0].tok
94
95How would you distinguish the two things?
96
97Plus they will be mixed, like:
98
99 this->cmd_ev->w.parts()
100
101Cheap hack is to come up with a naming convention for the types
102
103Actually we already have it - types look like this. See visit_member_expr:
104
105 lhs_type core.ui.ErrorFormatter expr NameExpr(errfmt [l]) name
106PrettyPrintError lhs_type core.util.UserExit expr NameExpr(e [l]) name status
107 lhs_type osh.cmd_eval.CommandEvaluator expr NameExpr(cmd_ev [l]) name
108MaybeRunExitTrap lhs_type _devbuild.gen.syntax_asdl.IntParamBox expr
109NameExpr(mut_status [l]) name i
110
111So we can look for syntax_asdl / runtime_asdl
112
113## Design
114
115- YaksValue contains a uint64_t ?
116 - this means any BigStr, List, Dict, Tuple, or class
117 - can List and Dict have type tag in addition to pointer?
118
119These are less than 8 bytes:
120
121- int32_t
122- float
123- enum class value_e {}
124
125Features that mycpp runtime doesn't have:
126
127- More efficient rooting, either:
128 - our StackRoots({&a, &b, ...}) is slow
129 - our StackRoot takes up a lot of code space!
130- Either
131 - Shadow Stack for Garbage Collection
132 - It's better to spill pointers to a separate frame
133 - Hybrid rooting
134 - ParamRoot, ParamRoot, SortedPointerRoots,
135
136- Not just more EFFICIENT, but LESS ROOTING
137 - call graph analysis for stuff above the stack
138
139Tagged Pointers ("Boxless optimization")
140
141- Immediate Values - 8 bytes for variant type tag
142 - Zero-arg constructors are integers
143 - int32_t
144 - float
145 - Small BigStr
146 - 4 bits len
147 - how is this shared with type_tag?
148 - are we reserving half of this space for small_str?
149 - 6 bytes data
150 - 1 byte nullptr
151 - Pointer
152
153Later:
154
155- Value types? How does the GC scan them on the stack??? That is hard
156
157*/
158
159// A GC heap value or an immediate value
160//
161// Can also be used for say:
162//
163// tagged num = yaks.Int | yaks.Float
164//
165// There is more than enough space for a tag!
166// A double would hae to be heap-allocated.
167
168// Note that Yaks is statically typed, and you can have a simple double, float,
169// or int64_t. (maybe i32 i64 f32 f64 like WASM.)
170//
171// You would only use a YaksValue for an Int if it's part of a variant type.
172
173// Kinds of layouts:
174//
175// - primitive i32 i64 f32 f64
176// - enum class scope_e
177// - YaksValue that MAY have a heap_obj (which is any BigStr value)
178// - then you have only 7 possible tags
179// - YaksValue that does NOT have a heap_obj -- then you have more room for tags
180// - you could have variatn of Bool, Int, and some other enum_class_e
181
182#define ASDL_NAMES struct
183
184ASDL_NAMES ytag_e {
185 enum no_name {
186 HeapObj = 0,
187 HeapStr = 1,
188 SmallStr = 2,
189
190 // Do these make sense, or would you want to use Bool, Float for something
191 // else? Bool is questionable.
192 Bool = 3,
193 Int = 4,
194 Float = 5,
195
196 // Note: Is this POSSIBLE? NO
197 // EmptyList = 6, // much more common
198 // EmptyDict = 7, // less common
199
200 // Potential optimization:
201 // - the minute they are append() or set(), i.e. non-empty, they become
202 // HeapObj
203 // - but you can iterate over an EmptyList or Dict
204
205 // MUTABILITY breaks it
206 //
207 // var mylist = []
208 // myfunc(mylist) // can't copy by value, must be by reference
209 // print(len(mylist)) // must be mutated
210 };
211};
212
213// Picture here
214// https://bernsteinbear.com/blog/compiling-a-lisp-2/#pointer-tagging-scheme
215
216union YaksValue {
217 int ytag() const {
218 // 0 for SmallStr -- any value can be a string
219 // 1..7 for immediate values
220 // for heap values: look in heap_obj->tag()
221 return ytag_e::HeapObj;
222 }
223
224 void* heap_obj;
225 bool b;
226 float f;
227
228 int32_t i;
229
230 // It's 8 bytes on 32 bit systems too -- I guess we need this for small str?
231 uint64_t whole;
232
233 // TODO: where do we put
234 // - 1 byte type_tag? Or do we have fewer than that? Maybe 5 bits, since 3
235 // are for small_str len?
236 // - that's 32 immediate types, and then you can overflow into heap?
237 // - NO WE only have THREE BITS because our allocator is 24 or 48 byte
238 // aligned
239 // - so we have at most 8 immediate values, and the rest heap values?
240 // - but SmallStr always takes up some space
241
242 // - NUL terminator for SmallStr
243};
244
245/* Class translation
246
247class Slice:
248 def __init__(self, s: str, start: int, length: int):
249 self.s = s
250 self.start = start
251 self.length = length
252
253 def get(self) -> str:
254 return self.s[self.start : self.start + self.length]
255*/
256
257class Slice {
258 public:
259 Slice(BigStr* s, int start, int length) {
260 // TODO: allocate Members
261 self_.heap_obj = Alloc<Members>();
262
263 self()->s_ = s;
264 self()->start_ = start;
265 self()->length_ = length;
266 }
267
268 struct Members {
269 BigStr* s_;
270 int start_;
271 int length_;
272
273 static constexpr ObjHeader obj_header() {
274 return ObjHeader::ClassFixed(field_mask(), sizeof(Members));
275 }
276 static constexpr uint32_t field_mask() {
277 return maskbit(offsetof(Members, s_));
278 }
279 };
280
281 Members* self() {
282 return static_cast<Members*>(self_.heap_obj);
283 }
284
285 int start() {
286 return self()->start_;
287 }
288
289 // Field Accessors -- this will make the generated code a lot longer?
290 // Naming convention is like protobuf
291
292 int length() {
293 return self()->length_;
294 }
295
296 // Methods
297 BigStr* Get() {
298 // return StrFromC("yo");
299
300 int n = self()->length_;
301
302 log("start = %d", self()->start_);
303 log("n = %d", n);
304
305 BigStr* result = NewStr(n);
306
307 memcpy(result->data_, self()->s_->data_ + self()->start_, n);
308
309 result->data_[n] = '\0';
310
311 log("result->data %s", result->data_);
312
313 return result;
314 }
315
316 YaksValue self_;
317};
318
319void SliceFunc(Slice myslice) {
320 BigStr* s = myslice.Get();
321 log("val = %s", s->data_);
322 print(s);
323
324 log("start %d length %d", myslice.start(), myslice.length());
325}
326
327// Based on _gen/core/runtime.asdl.h
328
329// TODO: This can have a typetag() method
330//
331// - It will look in the immediate YaksValue for the common cases?
332// - and in the case you only have immediates
333// And then look in the GC header of the heap allocated object for the other
334// cases?
335
336class value_t {
337 protected:
338 value_t() {
339 }
340
341 public:
342 int typetag() const {
343 // Look if it's an integer or string
344
345 // Look for heap object
346 int ytag = self_.ytag();
347 if (ytag == ytag_e::HeapObj) {
348 return 0;
349 }
350 return 1;
351
352 // return ObjHeader::FromObject(this)->type_tag;
353 }
354
355 // All variants have this.
356 YaksValue self_;
357
358 // hnode_t* PrettyTree();
359 DISALLOW_COPY_AND_ASSIGN(value_t)
360};
361
362class value__Int : public value_t {
363 public:
364 int typetag() const {
365 // Reuse the primitive tag
366 return ytag_e::Int;
367 }
368};
369
370class value__Str : public value_t {
371 public:
372 int typetag() const {
373 // int ytag = self_.ytag();
374 // CHECK(ytag == ytag_e::SmallStr || ytag == ytag_e::HeapStr) {
375
376 // TODO: return value_e::Str
377 return 0;
378 }
379};
380
381TEST yaks_test() {
382 Slice myslice(StrFromC("hello"), 1, 3);
383
384 log("myslice %p", &myslice);
385
386 SliceFunc(myslice);
387
388 // TODO: constructor
389 value__Int i;
390 log(" i.typetag = %d", i.typetag());
391
392 value__Str s;
393 log(" s.typetag = %d", s.typetag());
394
395 PASS();
396}
397
398//} // namespace small_str_test
399
400GREATEST_MAIN_DEFS();
401
402int main(int argc, char** argv) {
403 // gHeap.Init();
404
405 GREATEST_MAIN_BEGIN();
406
407 // RUN_TEST(yaks::yaks_test);
408 RUN_TEST(yaks_test);
409
410 // gHeap.CleanProcessExit();
411
412 GREATEST_MAIN_END();
413 return 0;
414}