| 1 | // yaks_runtime_test.cc
 | 
| 2 | //
 | 
| 3 | // Related to small_str_test.cc
 | 
| 4 | 
 | 
| 5 | #include <inttypes.h>
 | 
| 6 | #include <limits.h>  // HOST_NAME_MAX
 | 
| 7 | #include <unistd.h>  // gethostname()
 | 
| 8 | 
 | 
| 9 | #include <new>  // placement new
 | 
| 10 | 
 | 
| 11 | #include "mycpp/common.h"
 | 
| 12 | #include "mycpp/gc_obj.h"  // ObjHeader
 | 
| 13 | #include "mycpp/runtime.h"
 | 
| 14 | #include "vendor/greatest.h"
 | 
| 15 | 
 | 
| 16 | // namespace yaks {
 | 
| 17 | 
 | 
| 18 | /*
 | 
| 19 | 
 | 
| 20 | To Do
 | 
| 21 | =====
 | 
| 22 | 
 | 
| 23 | - prototype rooting
 | 
| 24 |   - register YaksValue only, not other primitive types
 | 
| 25 |   - Is there maybe a ImmediateVariant type?
 | 
| 26 |     - it can never be a HeapObj, and thus doesn't need to be rooted
 | 
| 27 |     - how common is this?  Probably not very
 | 
| 28 | 
 | 
| 29 | - What are the ASDL schema language changes you need?
 | 
| 30 |   - defining tags and so forth
 | 
| 31 |     - shared tags ACROSS MODULES -- I ran into this with %LiteralBlock
 | 
| 32 |     - and probably %Name for (Token tok, str name)
 | 
| 33 | 
 | 
| 34 | - Does this make sense?
 | 
| 35 | 
 | 
| 36 |     alias CompoundWord = List[word_part]
 | 
| 37 | 
 | 
| 38 | - What about
 | 
| 39 | 
 | 
| 40 |     tagged value =
 | 
| 41 |       Int %Int
 | 
| 42 |     | Frame %Dict[str, str]
 | 
| 43 | 
 | 
| 44 | - What's the penalty in Python?
 | 
| 45 |   - I think it's mainly mymember() bar() syntax everywhere
 | 
| 46 |     - but then do you need to generate lots of tiny classes
 | 
| 47 |       - or use getattr() on the list of known field names to simulate it
 | 
| 48 |       - but that won't pass MyPy
 | 
| 49 |       - damn you would need a ton of type annotations then ... every wrapper
 | 
| 50 |       would get a type annotation
 | 
| 51 |       - but this is only for public fields?  Maybe it's not that bad
 | 
| 52 |     - or do you need public self(), like perhaps
 | 
| 53 |       - cmd_ev.self().word_ev
 | 
| 54 |       - cmd_ev.member().foo
 | 
| 55 |         - hm that's possible
 | 
| 56 |   - self()->length_ = 32;  // this can be direct?  At least within a class
 | 
| 57 | 
 | 
| 58 | 
 | 
| 59 | ## Use Cases
 | 
| 60 | 
 | 
| 61 | - write out value_t class more
 | 
| 62 |   - value.Int is
 | 
| 63 |     - immediate int32_t
 | 
| 64 |     - or what about the 53 bit idea? (matching double precision)
 | 
| 65 |       - 64-3 bits = 61, or 64-11 = 53
 | 
| 66 |   - value.BigStr is BigStr and SmallStr
 | 
| 67 |   - value.BashAssoc is Dict[str, str]
 | 
| 68 |   - value.Dict is Dict[str, value]
 | 
| 69 |   - value.Frame is Dict[str, cell]
 | 
| 70 | 
 | 
| 71 | - CompoundWord is List[word_part] parts
 | 
| 72 | 
 | 
| 73 | - a_index = BigStr(str s) | Int(int i)
 | 
| 74 |   - this is not immdiate
 | 
| 75 | 
 | 
| 76 | - Custom Token VALUE type as 16 bytes?
 | 
| 77 |   - this makes GC scanning harder
 | 
| 78 |   - can't have Token on the stack, but can have it embedded in other objects?
 | 
| 79 |     - but then you can't have any of those on the stack either
 | 
| 80 |   - There could be a restriction, hm
 | 
| 81 | 
 | 
| 82 | ## Problems
 | 
| 83 | 
 | 
| 84 | ### Code gen issue: -> vs .
 | 
| 85 | 
 | 
| 86 | Is it possible for mycpp classes to use
 | 
| 87 | 
 | 
| 88 |   this->token = token;
 | 
| 89 |   this->args = args;
 | 
| 90 | 
 | 
| 91 | While ASDL uses
 | 
| 92 | 
 | 
| 93 |   w.parts()[0].tok
 | 
| 94 | 
 | 
| 95 | How would you distinguish the two things?
 | 
| 96 | 
 | 
| 97 | Plus they will be mixed, like:
 | 
| 98 | 
 | 
| 99 |   this->cmd_ev->w.parts()
 | 
| 100 | 
 | 
| 101 | Cheap hack is to come up with a naming convention for the types
 | 
| 102 | 
 | 
| 103 | Actually we already have it - types look like this.  See visit_member_expr:
 | 
| 104 | 
 | 
| 105 |       lhs_type core.ui.ErrorFormatter expr NameExpr(errfmt [l]) name
 | 
| 106 | PrettyPrintError lhs_type core.util.UserExit expr NameExpr(e [l]) name status
 | 
| 107 |   lhs_type osh.cmd_eval.CommandEvaluator expr NameExpr(cmd_ev [l]) name
 | 
| 108 | MaybeRunExitTrap lhs_type _devbuild.gen.syntax_asdl.IntParamBox expr
 | 
| 109 | NameExpr(mut_status [l]) name i
 | 
| 110 | 
 | 
| 111 | So we can look for syntax_asdl / runtime_asdl
 | 
| 112 | 
 | 
| 113 | ## Design
 | 
| 114 | 
 | 
| 115 | - YaksValue contains a uint64_t  ?
 | 
| 116 |   - this means any BigStr, List, Dict, Tuple, or class
 | 
| 117 |   - can List and Dict have type tag in addition to pointer?
 | 
| 118 | 
 | 
| 119 | These are less than 8 bytes:
 | 
| 120 | 
 | 
| 121 | - int32_t
 | 
| 122 | - float
 | 
| 123 | - enum class value_e {}
 | 
| 124 | 
 | 
| 125 | Features that mycpp runtime doesn't have:
 | 
| 126 | 
 | 
| 127 | - More efficient rooting, either:
 | 
| 128 |   - our StackRoots({&a, &b, ...}) is slow
 | 
| 129 |   - our StackRoot takes up a lot of code space!
 | 
| 130 | - Either
 | 
| 131 |   - Shadow Stack for Garbage Collection
 | 
| 132 |     - It's better to spill pointers to a separate frame
 | 
| 133 |   - Hybrid rooting
 | 
| 134 |     - ParamRoot, ParamRoot, SortedPointerRoots,
 | 
| 135 | 
 | 
| 136 | - Not just more EFFICIENT, but LESS ROOTING
 | 
| 137 |   - call graph analysis for stuff above the stack
 | 
| 138 | 
 | 
| 139 | Tagged Pointers ("Boxless optimization")
 | 
| 140 | 
 | 
| 141 | - Immediate Values - 8 bytes for variant type tag
 | 
| 142 |   - Zero-arg constructors are integers
 | 
| 143 |   - int32_t
 | 
| 144 |   - float
 | 
| 145 |   - Small BigStr
 | 
| 146 |     - 4 bits len
 | 
| 147 |       - how is this shared with type_tag?
 | 
| 148 |       - are we reserving half of this space for small_str?
 | 
| 149 |     - 6 bytes data
 | 
| 150 |     - 1 byte nullptr
 | 
| 151 |   - Pointer
 | 
| 152 | 
 | 
| 153 | Later:
 | 
| 154 | 
 | 
| 155 | - Value types?  How does the GC scan them on the stack???  That is hard
 | 
| 156 | 
 | 
| 157 | */
 | 
| 158 | 
 | 
| 159 | // A GC heap value or an immediate value
 | 
| 160 | //
 | 
| 161 | // Can also be used for say:
 | 
| 162 | //
 | 
| 163 | // tagged num = yaks.Int | yaks.Float
 | 
| 164 | //
 | 
| 165 | // There is more than enough space for a tag!
 | 
| 166 | // A double would hae to be heap-allocated.
 | 
| 167 | 
 | 
| 168 | // Note that Yaks is statically typed, and you can have a simple double, float,
 | 
| 169 | // or int64_t.  (maybe i32 i64 f32 f64 like WASM.)
 | 
| 170 | //
 | 
| 171 | // You would only use a YaksValue for an Int if it's part of a variant type.
 | 
| 172 | 
 | 
| 173 | // Kinds of layouts:
 | 
| 174 | //
 | 
| 175 | // - primitive i32 i64 f32 f64
 | 
| 176 | // - enum class scope_e
 | 
| 177 | // - YaksValue that MAY have a heap_obj (which is any BigStr value)
 | 
| 178 | //   - then you have only 7 possible tags
 | 
| 179 | // - YaksValue that does NOT have a heap_obj -- then you have more room for tags
 | 
| 180 | //   - you could have variatn of Bool, Int, and some other enum_class_e
 | 
| 181 | 
 | 
| 182 | #define ASDL_NAMES struct
 | 
| 183 | 
 | 
| 184 | ASDL_NAMES ytag_e {
 | 
| 185 |   enum no_name {
 | 
| 186 |     HeapObj = 0,
 | 
| 187 |     HeapStr = 1,
 | 
| 188 |     SmallStr = 2,
 | 
| 189 | 
 | 
| 190 |     // Do these make sense, or would you want to use Bool, Float for something
 | 
| 191 |     // else?  Bool is questionable.
 | 
| 192 |     Bool = 3,
 | 
| 193 |     Int = 4,
 | 
| 194 |     Float = 5,
 | 
| 195 | 
 | 
| 196 |     // Note: Is this POSSIBLE?   NO
 | 
| 197 |     // EmptyList = 6,  // much more common
 | 
| 198 |     // EmptyDict = 7,  // less common
 | 
| 199 | 
 | 
| 200 |     // Potential optimization:
 | 
| 201 |     // - the minute they are append() or set(), i.e. non-empty, they become
 | 
| 202 |     // HeapObj
 | 
| 203 |     // - but you can iterate over an EmptyList or Dict
 | 
| 204 | 
 | 
| 205 |     // MUTABILITY breaks it
 | 
| 206 |     //
 | 
| 207 |     // var mylist = []
 | 
| 208 |     // myfunc(mylist)  // can't copy by value, must be by reference
 | 
| 209 |     // print(len(mylist))  // must be mutated
 | 
| 210 |   };
 | 
| 211 | };
 | 
| 212 | 
 | 
| 213 | // Picture here
 | 
| 214 | // https://bernsteinbear.com/blog/compiling-a-lisp-2/#pointer-tagging-scheme
 | 
| 215 | 
 | 
| 216 | union YaksValue {
 | 
| 217 |   int ytag() const {
 | 
| 218 |     // 0 for SmallStr -- any value can be a string
 | 
| 219 |     // 1..7 for immediate values
 | 
| 220 |     // for heap values: look in heap_obj->tag()
 | 
| 221 |     return ytag_e::HeapObj;
 | 
| 222 |   }
 | 
| 223 | 
 | 
| 224 |   void* heap_obj;
 | 
| 225 |   bool b;
 | 
| 226 |   float f;
 | 
| 227 | 
 | 
| 228 |   int32_t i;
 | 
| 229 | 
 | 
| 230 |   // It's 8 bytes on 32 bit systems too -- I guess we need this for small str?
 | 
| 231 |   uint64_t whole;
 | 
| 232 | 
 | 
| 233 |   // TODO: where do we put
 | 
| 234 |   // - 1 byte type_tag?  Or do we have fewer than that?  Maybe 5 bits, since 3
 | 
| 235 |   // are for small_str len?
 | 
| 236 |   //   - that's 32 immediate types, and then you can overflow into heap?
 | 
| 237 |   //   - NO WE only have THREE BITS because our allocator is 24 or 48 byte
 | 
| 238 |   //   aligned
 | 
| 239 |   //   - so we have at most 8 immediate values, and the rest heap values?
 | 
| 240 |   //   - but SmallStr always takes up some space
 | 
| 241 | 
 | 
| 242 |   // - NUL terminator for SmallStr
 | 
| 243 | };
 | 
| 244 | 
 | 
| 245 | /* Class translation
 | 
| 246 | 
 | 
| 247 | class Slice:
 | 
| 248 |   def __init__(self, s: str, start: int, length: int):
 | 
| 249 |     self.s = s
 | 
| 250 |     self.start = start
 | 
| 251 |     self.length = length
 | 
| 252 | 
 | 
| 253 |   def get(self) -> str:
 | 
| 254 |     return self.s[self.start : self.start + self.length]
 | 
| 255 | */
 | 
| 256 | 
 | 
| 257 | class Slice {
 | 
| 258 |  public:
 | 
| 259 |   Slice(BigStr* s, int start, int length) {
 | 
| 260 |     // TODO: allocate Members
 | 
| 261 |     self_.heap_obj = Alloc<Members>();
 | 
| 262 | 
 | 
| 263 |     self()->s_ = s;
 | 
| 264 |     self()->start_ = start;
 | 
| 265 |     self()->length_ = length;
 | 
| 266 |   }
 | 
| 267 | 
 | 
| 268 |   struct Members {
 | 
| 269 |     BigStr* s_;
 | 
| 270 |     int start_;
 | 
| 271 |     int length_;
 | 
| 272 | 
 | 
| 273 |     static constexpr ObjHeader obj_header() {
 | 
| 274 |       return ObjHeader::ClassFixed(field_mask(), sizeof(Members));
 | 
| 275 |     }
 | 
| 276 |     static constexpr uint32_t field_mask() {
 | 
| 277 |       return maskbit(offsetof(Members, s_));
 | 
| 278 |     }
 | 
| 279 |   };
 | 
| 280 | 
 | 
| 281 |   Members* self() {
 | 
| 282 |     return static_cast<Members*>(self_.heap_obj);
 | 
| 283 |   }
 | 
| 284 | 
 | 
| 285 |   int start() {
 | 
| 286 |     return self()->start_;
 | 
| 287 |   }
 | 
| 288 | 
 | 
| 289 |   // Field Accessors -- this will make the generated code a lot longer?
 | 
| 290 |   // Naming convention is like protobuf
 | 
| 291 | 
 | 
| 292 |   int length() {
 | 
| 293 |     return self()->length_;
 | 
| 294 |   }
 | 
| 295 | 
 | 
| 296 |   // Methods
 | 
| 297 |   BigStr* Get() {
 | 
| 298 |     // return StrFromC("yo");
 | 
| 299 | 
 | 
| 300 |     int n = self()->length_;
 | 
| 301 | 
 | 
| 302 |     log("start = %d", self()->start_);
 | 
| 303 |     log("n = %d", n);
 | 
| 304 | 
 | 
| 305 |     BigStr* result = NewStr(n);
 | 
| 306 | 
 | 
| 307 |     memcpy(result->data_, self()->s_->data_ + self()->start_, n);
 | 
| 308 | 
 | 
| 309 |     result->data_[n] = '\0';
 | 
| 310 | 
 | 
| 311 |     log("result->data %s", result->data_);
 | 
| 312 | 
 | 
| 313 |     return result;
 | 
| 314 |   }
 | 
| 315 | 
 | 
| 316 |   YaksValue self_;
 | 
| 317 | };
 | 
| 318 | 
 | 
| 319 | void SliceFunc(Slice myslice) {
 | 
| 320 |   BigStr* s = myslice.Get();
 | 
| 321 |   log("val = %s", s->data_);
 | 
| 322 |   print(s);
 | 
| 323 | 
 | 
| 324 |   log("start %d length %d", myslice.start(), myslice.length());
 | 
| 325 | }
 | 
| 326 | 
 | 
| 327 | // Based on _gen/core/runtime.asdl.h
 | 
| 328 | 
 | 
| 329 | // TODO: This can have a typetag() method
 | 
| 330 | //
 | 
| 331 | // - It will look in the immediate YaksValue for the common cases?
 | 
| 332 | //   - and in the case you only have immediates
 | 
| 333 | // And then look in the GC header of the heap allocated object for the other
 | 
| 334 | // cases?
 | 
| 335 | 
 | 
| 336 | class value_t {
 | 
| 337 |  protected:
 | 
| 338 |   value_t() {
 | 
| 339 |   }
 | 
| 340 | 
 | 
| 341 |  public:
 | 
| 342 |   int typetag() const {
 | 
| 343 |     // Look if it's an integer or string
 | 
| 344 | 
 | 
| 345 |     // Look for heap object
 | 
| 346 |     int ytag = self_.ytag();
 | 
| 347 |     if (ytag == ytag_e::HeapObj) {
 | 
| 348 |       return 0;
 | 
| 349 |     }
 | 
| 350 |     return 1;
 | 
| 351 | 
 | 
| 352 |     // return ObjHeader::FromObject(this)->type_tag;
 | 
| 353 |   }
 | 
| 354 | 
 | 
| 355 |   // All variants have this.
 | 
| 356 |   YaksValue self_;
 | 
| 357 | 
 | 
| 358 |   // hnode_t* PrettyTree();
 | 
| 359 |   DISALLOW_COPY_AND_ASSIGN(value_t)
 | 
| 360 | };
 | 
| 361 | 
 | 
| 362 | class value__Int : public value_t {
 | 
| 363 |  public:
 | 
| 364 |   int typetag() const {
 | 
| 365 |     // Reuse the primitive tag
 | 
| 366 |     return ytag_e::Int;
 | 
| 367 |   }
 | 
| 368 | };
 | 
| 369 | 
 | 
| 370 | class value__Str : public value_t {
 | 
| 371 |  public:
 | 
| 372 |   int typetag() const {
 | 
| 373 |     // int ytag = self_.ytag();
 | 
| 374 |     // CHECK(ytag == ytag_e::SmallStr || ytag == ytag_e::HeapStr) {
 | 
| 375 | 
 | 
| 376 |     // TODO: return value_e::Str
 | 
| 377 |     return 0;
 | 
| 378 |   }
 | 
| 379 | };
 | 
| 380 | 
 | 
| 381 | TEST yaks_test() {
 | 
| 382 |   Slice myslice(StrFromC("hello"), 1, 3);
 | 
| 383 | 
 | 
| 384 |   log("myslice %p", &myslice);
 | 
| 385 | 
 | 
| 386 |   SliceFunc(myslice);
 | 
| 387 | 
 | 
| 388 |   // TODO: constructor
 | 
| 389 |   value__Int i;
 | 
| 390 |   log(" i.typetag = %d", i.typetag());
 | 
| 391 | 
 | 
| 392 |   value__Str s;
 | 
| 393 |   log(" s.typetag = %d", s.typetag());
 | 
| 394 | 
 | 
| 395 |   PASS();
 | 
| 396 | }
 | 
| 397 | 
 | 
| 398 | //}  // namespace small_str_test
 | 
| 399 | 
 | 
| 400 | GREATEST_MAIN_DEFS();
 | 
| 401 | 
 | 
| 402 | int main(int argc, char** argv) {
 | 
| 403 |   // gHeap.Init();
 | 
| 404 | 
 | 
| 405 |   GREATEST_MAIN_BEGIN();
 | 
| 406 | 
 | 
| 407 |   // RUN_TEST(yaks::yaks_test);
 | 
| 408 |   RUN_TEST(yaks_test);
 | 
| 409 | 
 | 
| 410 |   // gHeap.CleanProcessExit();
 | 
| 411 | 
 | 
| 412 |   GREATEST_MAIN_END();
 | 
| 413 |   return 0;
 | 
| 414 | }
 |