1 | #include <assert.h>
|
2 | #include <stdarg.h> // va_list, etc.
|
3 | #include <stdio.h>
|
4 | #include <stdint.h>
|
5 | #include <stdlib.h>
|
6 | #include <string.h> // memcmp
|
7 | #include <vector>
|
8 | #include <unordered_map>
|
9 |
|
10 | #include "opcode.h"
|
11 |
|
12 | #define VERBOSE_OPS 0
|
13 | #define VERBOSE_NAMES 0
|
14 | #define VERBOSE_VALUE_STACK 0
|
15 | #define VERBOSE_ALLOC 0 // for New*() functions
|
16 |
|
17 | using std::vector;
|
18 | using std::unordered_map;
|
19 |
|
20 | typedef int32_t Handle;
|
21 |
|
22 | typedef vector<Handle> Args;
|
23 | typedef vector<Handle> Rets;
|
24 |
|
25 | // Like enum why_code in ceval.c.
|
26 | enum class Why {
|
27 | Not,
|
28 | Exception,
|
29 | Reraise,
|
30 | Return,
|
31 | Break,
|
32 | Continue,
|
33 | Yield,
|
34 | };
|
35 |
|
36 | enum CompareOp {
|
37 | LT,
|
38 | LE,
|
39 | EQ,
|
40 | NE,
|
41 | GT,
|
42 | GE,
|
43 | IS,
|
44 | IS_NOT,
|
45 | };
|
46 |
|
47 | //
|
48 | // Forward declarations
|
49 | //
|
50 |
|
51 | class OHeap;
|
52 |
|
53 | //
|
54 | // Prototypes
|
55 | //
|
56 |
|
57 | Why func_print(const OHeap& heap, const Args& args, Rets* rets);
|
58 |
|
59 | //
|
60 | // Constants
|
61 | //
|
62 |
|
63 | // TODO: Generate this?
|
64 | const int TAG_NONE = -1;
|
65 | const int TAG_BOOL = -2;
|
66 | const int TAG_INT = -3;
|
67 | const int TAG_FLOAT = -4;
|
68 | const int TAG_STR = -5;
|
69 | const int TAG_TUPLE = -6;
|
70 | const int TAG_CODE = -7;
|
71 | const int TAG_FUNC = -8;
|
72 |
|
73 | // Should this be zero? Positive are user defined, negative are native, 0 is
|
74 | // invalid? Useful for NewCell() to return on allocation failure. And
|
75 | // uninitialized handles should be in an invalid state.
|
76 | const int kInvalidHandle = -10;
|
77 |
|
78 | // TODO: These should be generated
|
79 | const int kTrueHandle = -11;
|
80 | const int kFalseHandle = -12;
|
81 |
|
82 | const char* kTagDebugString[] = {
|
83 | "",
|
84 | "None",
|
85 | "bool",
|
86 | "int",
|
87 | "float",
|
88 | "str",
|
89 | "tuple",
|
90 | "code",
|
91 | };
|
92 |
|
93 | const char* TagDebugString(int tag) {
|
94 | return kTagDebugString[-tag];
|
95 | }
|
96 |
|
97 | //
|
98 | // Utilities
|
99 | //
|
100 |
|
101 | // Log messages to stdout.
|
102 | void log(const char* fmt, ...) {
|
103 | va_list args;
|
104 | va_start(args, fmt);
|
105 | vprintf(fmt, args);
|
106 | va_end(args);
|
107 | printf("\n");
|
108 | }
|
109 |
|
110 | // Implement hash and equality functors for unordered_map.
|
111 | struct NameHash {
|
112 | int operator() (const char* s) const {
|
113 | // DJB hash: http://www.cse.yorku.ca/~oz/hash.html
|
114 | int h = 5381;
|
115 |
|
116 | while (char c = *s++) {
|
117 | h = (h << 5) + h + c;
|
118 | }
|
119 | return h;
|
120 | }
|
121 | };
|
122 |
|
123 | struct NameEq {
|
124 | bool operator() (const char* x, const char* y) const {
|
125 | return strcmp(x, y) == 0;
|
126 | }
|
127 | };
|
128 |
|
129 | // Dictionary of names (char*) to value (Handle).
|
130 | //
|
131 | // TODO: if we de-dupe all the names in OHeap, and there's no runtime code
|
132 | // generation, each variable name string will have exactly one address. So
|
133 | // then can we use pointer comparison for equality / hashing? Would be nice.
|
134 | typedef unordered_map<const char*, Handle, NameHash, NameEq> NameLookup;
|
135 |
|
136 |
|
137 | // 16 bytes
|
138 | struct Cell {
|
139 | int16_t tag;
|
140 | uint8_t is_slab;
|
141 | uint8_t small_len; // end first 4 bytes
|
142 |
|
143 | union {
|
144 | // following TWELVE bytes, for small string, tuple, etc.
|
145 | uint8_t small_val[1];
|
146 | int32_t big_len; // length of slab. TODO: Use this.
|
147 | };
|
148 |
|
149 | union {
|
150 | // The wire format
|
151 | struct {
|
152 | uint8_t pad[4];
|
153 | int32_t offset;
|
154 | } slab_wire;
|
155 |
|
156 | // Resolved memory format
|
157 | uint8_t* ptr; // should be 8 bytes on 64-bit systems
|
158 | int64_t i;
|
159 | double d;
|
160 | };
|
161 | };
|
162 |
|
163 | // Same interface for big or small strings.
|
164 | // What about hash code? That could be stored with big strings.
|
165 | struct Str {
|
166 | int32_t len;
|
167 | const char* data; // NUL-terminated, but can also contain NUL.
|
168 | // should not be mutated.
|
169 | };
|
170 |
|
171 | struct Tuple {
|
172 | int32_t len;
|
173 | const Handle* handles;
|
174 | };
|
175 |
|
176 |
|
177 | // Dicts require special consideration in these cases:
|
178 | // - When deserializing, we have to create a new DictIndex from the DictItems
|
179 | // array. We compute the size of the index from the number of items.
|
180 | // - When garbage collecting, we iterate over DictItems and mark 'key' and
|
181 | // 'value' Handles, skipping over the hash value.
|
182 | //
|
183 | // Another possibility: Why isn't the hash stored with the key itself rather
|
184 | // than in the items array? I guess it could be both.
|
185 |
|
186 | struct DictIndex {
|
187 | int size; // power of 2
|
188 | int num_used; // is this the same as the number of items?
|
189 | // For the load factor.
|
190 |
|
191 | // The slab first has sparse indices, and then dense items, like CPython.
|
192 |
|
193 | // Using the same approach as CPython.
|
194 | //
|
195 | // NOTE PyDict_MINSIZE == 8
|
196 | // "8 allows dicts with no more than 5 active entries; experiments suggested
|
197 | // this suffices for the majority of dicts (consisting mostly of
|
198 | // usually-small dicts created to pass keyword arguments)."
|
199 | // This is always a power of 2 (see dictresize() in dictobject.c).
|
200 | // So it goes 8, 16, 32 ...
|
201 | //
|
202 | // Optimization: DictIndex could be shared among different hash tables!
|
203 | // As long as they have the exact same set of keys. But how would you
|
204 | // determine that?
|
205 |
|
206 | // Doesn't this produce a lot of unpredictable branches? Maybe as a
|
207 | // compromise we could just use options for 2 bytes and 4 bytes? Dicts up
|
208 | // to 2**32 should be fine.
|
209 | /*
|
210 | The size in bytes of an indice depends on dk_size:
|
211 |
|
212 | - 1 byte if dk_size <= 0xff (char*)
|
213 | - 2 bytes if dk_size <= 0xffff (int16_t*)
|
214 | - 4 bytes if dk_size <= 0xffffffff (int32_t*)
|
215 | - 8 bytes otherwise (int64_t*)
|
216 | */
|
217 | union {
|
218 | int8_t as_1[8];
|
219 | int16_t as_2[4];
|
220 | int32_t as_4[2];
|
221 | #if SIZEOF_VOID_P > 4
|
222 | int64_t as_8[1];
|
223 | #endif
|
224 | } dk_indices;
|
225 | };
|
226 |
|
227 | struct DictSlab {
|
228 | // number of items is in Cell big_len
|
229 | int items_offset; // offset to later in the slab?
|
230 |
|
231 | int indices_size; // how many we can have without reallocating
|
232 | int indices_used; //
|
233 |
|
234 | };
|
235 |
|
236 | struct DictItem {
|
237 | uint64_t hash;
|
238 | Handle key;
|
239 | Handle value;
|
240 | };
|
241 |
|
242 | // Wire format for dicts: a hole for the index, and then an array of DictItem.
|
243 | struct DictSlabWire {
|
244 | union {
|
245 | uint8_t pad[8];
|
246 | DictIndex* index;
|
247 | };
|
248 | // DictItems here. Length is stored in the cell?
|
249 | };
|
250 |
|
251 | class Code {
|
252 | public:
|
253 | Code(OHeap* heap, Cell* self)
|
254 | : heap_(heap),
|
255 | self_(self) {
|
256 | }
|
257 | // Assume the pointers are patched below
|
258 | int64_t argcount() const {
|
259 | return FieldAsInt(1);
|
260 | }
|
261 | int64_t nlocals() const {
|
262 | return FieldAsInt(2);
|
263 | }
|
264 | int64_t stacksize() const {
|
265 | return FieldAsInt(3);
|
266 | }
|
267 | int64_t flags() const {
|
268 | return FieldAsInt(4);
|
269 | }
|
270 | int64_t firstlineno() const {
|
271 | return FieldAsInt(5);
|
272 | }
|
273 | Str name() const {
|
274 | return FieldAsStr(6);
|
275 | }
|
276 | Str filename() const {
|
277 | return FieldAsStr(7);
|
278 | }
|
279 | Str code() const {
|
280 | return FieldAsStr(8);
|
281 | }
|
282 | Tuple names() const {
|
283 | return FieldAsTuple(9);
|
284 | }
|
285 | Tuple varnames() const {
|
286 | return FieldAsTuple(10);
|
287 | }
|
288 | Tuple consts() const {
|
289 | return FieldAsTuple(11);
|
290 | }
|
291 |
|
292 | private:
|
293 | inline Handle GetField(int field_index) const {
|
294 | int32_t* slab = reinterpret_cast<int32_t*>(self_->ptr);
|
295 | return slab[field_index];
|
296 | }
|
297 |
|
298 | inline int64_t FieldAsInt(int field_index) const;
|
299 | inline Str FieldAsStr(int field_index) const;
|
300 | inline Tuple FieldAsTuple(int field_index) const;
|
301 |
|
302 | OHeap* heap_;
|
303 | Cell* self_;
|
304 | };
|
305 |
|
306 | // A convenient "view" on a function object. To create a function, you create
|
307 | // the cell and the slab directly!
|
308 | //
|
309 | // LATER: This may have a closure pointer too.
|
310 | class Func {
|
311 | public:
|
312 | Func(OHeap* heap, Cell* self)
|
313 | : heap_(heap),
|
314 | self_(self) {
|
315 | }
|
316 | // Code is copyable?
|
317 | #if 0
|
318 | Code code() const {
|
319 | Handle h = 0; // TODO: Field access for handle
|
320 | Code c(heap_, heap_->cells_ + h);
|
321 | return c;
|
322 | }
|
323 | #endif
|
324 | Tuple defaults() const {
|
325 | Tuple t;
|
326 | return t;
|
327 | //return FieldAsTuple(1);
|
328 | }
|
329 | // Fields: code, globals, defaults, __doc__,
|
330 | // And note that we have to SET them too.
|
331 |
|
332 | private:
|
333 | OHeap* heap_;
|
334 | Cell* self_;
|
335 | };
|
336 |
|
337 | class OHeap {
|
338 | public:
|
339 | OHeap() : slabs_(nullptr), num_cells_(0), max_cells_(0), cells_(nullptr) {
|
340 | }
|
341 |
|
342 | ~OHeap() {
|
343 | if (slabs_) {
|
344 | free(slabs_);
|
345 | }
|
346 | if (cells_) {
|
347 | free(cells_);
|
348 | }
|
349 | }
|
350 |
|
351 | uint8_t* AllocPermanentSlabs(int total_slab_size) {
|
352 | slabs_ = static_cast<uint8_t*>(malloc(total_slab_size));
|
353 | return slabs_;
|
354 | }
|
355 |
|
356 | Cell* AllocInitialCells(int num_cells) {
|
357 | num_cells_ = num_cells;
|
358 | // Allocate 2x the number of cells to account for growth.
|
359 | //max_cells_ = num_cells * 2;
|
360 | max_cells_ = num_cells * 10;
|
361 | cells_ = static_cast<Cell*>(malloc(sizeof(Cell) * max_cells_));
|
362 | return cells_;
|
363 | }
|
364 |
|
365 | bool AsInt(Handle h, int64_t* out) const {
|
366 | assert(h >= 0);
|
367 | const Cell& cell = cells_[h];
|
368 | if (cell.tag != TAG_INT) {
|
369 | return false;
|
370 | }
|
371 | *out = cell.i;
|
372 | return true;
|
373 | }
|
374 |
|
375 | // C string. NULL if the cell isn't a string.
|
376 | // NOTE: Shouldn't modify this?
|
377 | const char* AsStr0(Handle h) const {
|
378 | assert(h >= 0);
|
379 | const Cell& cell = cells_[h];
|
380 | if (cell.tag != TAG_STR) {
|
381 | log("AsStr0 expected string but got tag %d", cell.tag);
|
382 | return nullptr;
|
383 | }
|
384 | if (cell.is_slab) {
|
385 | int32_t* str_slab = reinterpret_cast<int32_t*>(cell.ptr);
|
386 | // everything after len
|
387 | return reinterpret_cast<const char*>(str_slab + 1);
|
388 | } else {
|
389 | return reinterpret_cast<const char*>(&cell.small_val);
|
390 | }
|
391 | }
|
392 | // Sets str and len. Returns false if the Cell isn't a string.
|
393 | bool AsStr(Handle h, Str* out) const {
|
394 | assert(h >= 0);
|
395 | const Cell& cell = cells_[h];
|
396 | if (cell.tag != TAG_STR) {
|
397 | return false;
|
398 | }
|
399 | if (cell.is_slab) {
|
400 | int32_t* str_slab = reinterpret_cast<int32_t*>(cell.ptr);
|
401 | out->len = *str_slab;
|
402 | // everything after len
|
403 | out->data = reinterpret_cast<const char*>(str_slab + 1);
|
404 | } else {
|
405 | out->len = cell.small_len; // in bytes
|
406 | out->data = reinterpret_cast<const char*>(&cell.small_val);
|
407 | }
|
408 | return true;
|
409 | }
|
410 |
|
411 | bool AsTuple(Handle h, Tuple* out) {
|
412 | assert(h >= 0);
|
413 | const Cell& cell = cells_[h];
|
414 | if (cell.tag != TAG_TUPLE) {
|
415 | return false;
|
416 | }
|
417 | if (cell.is_slab) {
|
418 | int32_t* tuple_slab = reinterpret_cast<int32_t*>(cell.ptr);
|
419 | out->len = *tuple_slab;
|
420 | // everything after len
|
421 | out->handles = reinterpret_cast<const Handle*>(tuple_slab + 1);
|
422 | } else {
|
423 | out->len = cell.small_len; // in entries
|
424 | out->handles = reinterpret_cast<const Handle*>(&cell.small_val);
|
425 | }
|
426 | return true;
|
427 | };
|
428 |
|
429 | // TODO: How do we bounds check?
|
430 | Code AsCode(Handle h) {
|
431 | assert(h >= 0);
|
432 | log("tag = %d", cells_[h].tag);
|
433 | assert(cells_[h].tag == TAG_CODE);
|
434 | return Code(this, cells_ + h);
|
435 | }
|
436 |
|
437 | // Returns whether the value is truthy, according to Python's rules.
|
438 | // Should we unify this with the bool() constructor?
|
439 | bool Truthy(Handle h) {
|
440 | assert(h >= 0);
|
441 | const Cell& cell = cells_[h];
|
442 | switch (cell.tag) {
|
443 | case TAG_NONE:
|
444 | return false;
|
445 | case TAG_BOOL:
|
446 | return cell.i != 0; // True or False
|
447 | case TAG_INT:
|
448 | return cell.i != 0; // nonzero
|
449 | case TAG_FLOAT:
|
450 | return cell.d != 0.0; // Is this correct?
|
451 | case TAG_STR: {
|
452 | Str s;
|
453 | AsStr(h, &s);
|
454 | return s.len != 0;
|
455 | }
|
456 | case TAG_TUPLE:
|
457 | assert(0); // TODO
|
458 | break;
|
459 | case TAG_CODE:
|
460 | return true; // always truthy
|
461 |
|
462 | // NOTE: Instances don't get to override nonzero? They are always true.
|
463 |
|
464 | default:
|
465 | assert(0); // TODO
|
466 | }
|
467 | }
|
468 |
|
469 | // For now just append to end. Later we have to look at the free list.
|
470 | Handle NewCell() {
|
471 | // TODO: Should we reserve handle 0 for NULL, an allocation failure? The
|
472 | // file format will bloat by 16 bytes?
|
473 | if (num_cells_ == max_cells_) {
|
474 | log("Allocation failure: num_cells_ = %d", num_cells_);
|
475 | assert(0);
|
476 | }
|
477 | return num_cells_++;
|
478 | }
|
479 |
|
480 | // TODO: append to cells_.
|
481 | // Zero out the 16 bytes first, and then set cell.i?
|
482 | Handle NewInt(int64_t i) {
|
483 | Handle h = NewCell();
|
484 | memset(cells_ + h, 0, sizeof(Cell));
|
485 | cells_[h].tag = TAG_INT;
|
486 | cells_[h].i = i;
|
487 | #if VERBOSE_ALLOC
|
488 | log("new int <id = %d> %d", h, i);
|
489 | #endif
|
490 | return h;
|
491 | }
|
492 | Handle NewStr0(const char* s) {
|
493 | Handle h = NewCell();
|
494 | memset(cells_ + h, 0, sizeof(Cell));
|
495 | cells_[h].tag = TAG_STR;
|
496 |
|
497 | // TODO: Determine if its big or small.
|
498 | assert(0);
|
499 | return h;
|
500 | }
|
501 |
|
502 | Handle NewTuple(int initial_size) {
|
503 | assert(0);
|
504 | return kInvalidHandle;
|
505 | }
|
506 |
|
507 | Handle NewFunc(Handle code, NameLookup* globals) {
|
508 | Handle h = NewCell();
|
509 | memset(cells_ + h, 0, sizeof(Cell));
|
510 | cells_[h].tag = TAG_FUNC;
|
511 |
|
512 | // NOTE: This should be a Cell because we want to freeze it!
|
513 |
|
514 | // This should be a pointer to a slab. TODO: So we need a function to
|
515 | // allocate a slab with 3 fields? code, globals, defaults are essential.
|
516 | // THen it could be small.
|
517 | //
|
518 | // BUT we also want a docstring? That will be useful for running some code.
|
519 | // So it needs to be a slab.
|
520 | //
|
521 | // Should there be indirection with "globals"? It should be its own handle?
|
522 | // Yes I think it's a handle to an entry of sys.modules?
|
523 |
|
524 | cells_[h].ptr = nullptr;
|
525 |
|
526 | assert(0);
|
527 | return kInvalidHandle;
|
528 | }
|
529 |
|
530 | int Last() {
|
531 | return num_cells_ - 1;
|
532 | }
|
533 |
|
534 | void DebugString(Handle h) {
|
535 | const Cell& cell = cells_[h];
|
536 |
|
537 | fprintf(stderr, " <id %d> ", h);
|
538 |
|
539 | switch (cell.tag) {
|
540 | case TAG_NONE:
|
541 | log("None");
|
542 | break;
|
543 | case TAG_BOOL:
|
544 | log("Bool");
|
545 | break;
|
546 | case TAG_INT: {
|
547 | int64_t i;
|
548 | AsInt(h, &i);
|
549 | log("Int %d", i);
|
550 | break;
|
551 | }
|
552 | case TAG_FLOAT:
|
553 | log("Float");
|
554 | break;
|
555 | case TAG_STR:
|
556 | log("Str %s", AsStr0(h));
|
557 | break;
|
558 | default:
|
559 | log("%s", TagDebugString(cell.tag));
|
560 | }
|
561 | }
|
562 |
|
563 | // Getter
|
564 | inline Cell* cells() {
|
565 | return cells_;
|
566 | }
|
567 | private:
|
568 | uint8_t* slabs_; // so we can free it, not used directly
|
569 | int num_cells_;
|
570 | int max_cells_;
|
571 | Cell* cells_;
|
572 | };
|
573 |
|
574 |
|
575 | //
|
576 | // Code implementation. Must come after OHeap declaration.
|
577 | //
|
578 |
|
579 | inline int64_t Code::FieldAsInt(int field_index) const {
|
580 | Handle h = GetField(field_index);
|
581 | int64_t i;
|
582 | assert(heap_->AsInt(h, &i)); // invalid bytecode not handled
|
583 | return i;
|
584 | }
|
585 |
|
586 | inline Str Code::FieldAsStr(int field_index) const {
|
587 | Handle h = GetField(field_index);
|
588 |
|
589 | Str s;
|
590 | assert(heap_->AsStr(h, &s)); // invalid bytecode not handled
|
591 | return s;
|
592 | }
|
593 |
|
594 | inline Tuple Code::FieldAsTuple(int field_index) const {
|
595 | Handle h = GetField(field_index);
|
596 |
|
597 | Tuple t;
|
598 | assert(heap_->AsTuple(h, &t)); // invalid bytecode not handled
|
599 | return t;
|
600 | }
|
601 |
|
602 | //
|
603 | // File I/O
|
604 | //
|
605 |
|
606 | const char* kHeader = "OHP2";
|
607 | const int kHeaderLen = 4;
|
608 |
|
609 | bool ReadHeader(FILE* f) {
|
610 | char buf[kHeaderLen];
|
611 | if (fread(buf, kHeaderLen, 1, f) != 1) {
|
612 | log("Couldn't read OHeap header");
|
613 | return false;
|
614 | }
|
615 | if (memcmp(buf, kHeader, kHeaderLen) != 0) {
|
616 | log("Error: expected '%s' in OHeap header", kHeader);
|
617 | return false;
|
618 | }
|
619 | return true;
|
620 | }
|
621 |
|
622 | bool Load(FILE* f, OHeap* heap) {
|
623 | if (!ReadHeader(f)) {
|
624 | return false;
|
625 | }
|
626 |
|
627 | int32_t total_slab_size = 0;
|
628 | if (fread(&total_slab_size, sizeof total_slab_size, 1, f) != 1) {
|
629 | log("Error reading total_slab_size");
|
630 | return false;
|
631 | }
|
632 | log("total_slab_size = %d", total_slab_size);
|
633 |
|
634 | int32_t num_cells = 0;
|
635 | if (fread(&num_cells, sizeof num_cells, 1, f) != 1) {
|
636 | log("Error reading num_cells");
|
637 | return false;
|
638 | }
|
639 | log("num_cells = %d", num_cells);
|
640 |
|
641 | int32_t num_read;
|
642 |
|
643 | // TODO: Limit total size of slabs?
|
644 | uint8_t* slabs = heap->AllocPermanentSlabs(total_slab_size);
|
645 | num_read = fread(slabs, 1, total_slab_size, f);
|
646 | if (num_read != total_slab_size) {
|
647 | log("Error reading slabs");
|
648 | return false;
|
649 | }
|
650 |
|
651 | size_t pos = ftell(f);
|
652 | log("pos after reading slabs = %d", pos);
|
653 |
|
654 | Cell* cells = heap->AllocInitialCells(num_cells);
|
655 | num_read = fread(cells, sizeof(Cell), num_cells, f);
|
656 | if (num_read != num_cells) {
|
657 | log("Error: expected %d cells, got %d", num_cells, num_read);
|
658 | return false;
|
659 | }
|
660 |
|
661 | // Patch the offsets into pointers.
|
662 | int num_slabs = 0;
|
663 | for (int i = 0; i < num_cells; ++i) {
|
664 | const Cell& cell = cells[i];
|
665 | if (cell.is_slab) {
|
666 | num_slabs++;
|
667 | int32_t slab_offset = cell.slab_wire.offset;
|
668 | //log("i = %d, slab offset = %d", i, slab_offset);
|
669 | cells[i].ptr = slabs + slab_offset;
|
670 | //log("ptr = %p", cell.ptr);
|
671 | }
|
672 | }
|
673 | log("Patched %d slabs", num_slabs);
|
674 |
|
675 | // Print out all the slab lengths for verification.
|
676 | for (int i = 0; i < num_cells; ++i) {
|
677 | const Cell& cell = cells[i];
|
678 | if (cell.is_slab) {
|
679 | //log("i = %d", i);
|
680 | //log("ptr = %p", cell.ptr);
|
681 | int32_t* start = reinterpret_cast<int32_t*>(cell.ptr);
|
682 | //log("start = %p", start);
|
683 | int32_t len = *start;
|
684 | log("slab len = %d", len);
|
685 | }
|
686 | }
|
687 |
|
688 | return true;
|
689 | }
|
690 |
|
691 | enum class BlockType : uint8_t {
|
692 | Loop,
|
693 | Except,
|
694 | Finally,
|
695 | With,
|
696 | };
|
697 |
|
698 | // Like PyTryBlock in frameobject.h
|
699 | struct Block {
|
700 | BlockType type;
|
701 | uint8_t level; // VALUE stack level to pop to.
|
702 | uint16_t jump_target; // Called 'handler' in CPython.
|
703 | };
|
704 |
|
705 | class Frame {
|
706 | public:
|
707 | // TODO: Reserve the right size for these stacks?
|
708 | // from co.stacksize
|
709 | Frame(const Code& co)
|
710 | : co_(co),
|
711 | value_stack_(),
|
712 | block_stack_(),
|
713 | last_i_(0),
|
714 | globals_(),
|
715 | locals_() {
|
716 | }
|
717 | // Take the handle of a string, and return a handle of a value.
|
718 | Handle LoadName(const char* name) {
|
719 | #if VERBOSE_NAMES
|
720 | log("-- Looking up %s", name);
|
721 | #endif
|
722 |
|
723 | auto it = locals_.find(name);
|
724 | if (it != locals_.end()) {
|
725 | return it->second;
|
726 | }
|
727 |
|
728 | if (strcmp(name, "print") == 0) {
|
729 | return -1; // special value for a C function?
|
730 | }
|
731 |
|
732 | return 0; // should this be a specal value?
|
733 | }
|
734 | void StoreName(const char* name, Handle value_h) {
|
735 | locals_[name] = value_h;
|
736 | }
|
737 | inline void JumpTo(int dest) {
|
738 | last_i_ = dest;
|
739 | }
|
740 | inline void JumpRelative(int offset) {
|
741 | last_i_ += offset; // Is this correct?
|
742 | }
|
743 | const Code& co_; // public for now
|
744 | vector<Handle> value_stack_;
|
745 | vector<Block> block_stack_;
|
746 | int last_i_; // index into bytecode (which is variable length)
|
747 | NameLookup globals_;
|
748 | private:
|
749 | NameLookup locals_;
|
750 | };
|
751 |
|
752 | class VM {
|
753 | public:
|
754 | VM(OHeap* heap)
|
755 | : heap_(heap) {
|
756 | }
|
757 | ~VM() {
|
758 | for (auto* frame : call_stack_) {
|
759 | delete frame;
|
760 | }
|
761 | }
|
762 |
|
763 | // Like PyEval_EvalFrameEx. It has to be on the VM object in order to create
|
764 | Why RunFrame(Frame* frame);
|
765 |
|
766 | // Treat the last object on the heap as a code object to run.
|
767 | Why RunMain();
|
768 |
|
769 | private:
|
770 | void DebugHandleArray(const vector<Handle>& handles);
|
771 |
|
772 | OHeap* heap_;
|
773 | vector<Frame*> call_stack_; // call stack
|
774 | Handle modules; // like sys.modules. A dictionary of globals.
|
775 |
|
776 | // See PyThreadState for other stuff that goes here.
|
777 | // Exception info, profiling, tracing, counters, etc.
|
778 |
|
779 | // PyInterpreterState: modules, sysdict, builtins, module reloading
|
780 | // OVM won't have overridable builtins.
|
781 | };
|
782 |
|
783 | void VM::DebugHandleArray(const vector<Handle>& handles) {
|
784 | printf("(%zu) [ ", handles.size());
|
785 | for (Handle h : handles) {
|
786 | printf("%d ", h);
|
787 | }
|
788 | printf("]\n");
|
789 |
|
790 | printf(" [ ");
|
791 | for (Handle h : handles) {
|
792 | if (h < 0) {
|
793 | printf("(native) ");
|
794 | } else {
|
795 | int tag = heap_->cells()[h].tag;
|
796 | printf("%s ", TagDebugString(tag));
|
797 | }
|
798 | }
|
799 | printf("]\n");
|
800 |
|
801 | }
|
802 |
|
803 | void CodeDebugString(const Code& co, OHeap* heap) {
|
804 | log("argcount = %d", co.argcount());
|
805 | log("nlocals = %d", co.nlocals());
|
806 | log("stacksize = %d", co.stacksize());
|
807 | log("flags = %d", co.flags());
|
808 | log("firstlineno = %d", co.firstlineno());
|
809 |
|
810 | log("name = %s", co.name().data);
|
811 | log("filename = %s", co.filename().data);
|
812 | log("len(code) = %d", co.code().len);
|
813 |
|
814 | log("len(names) = %d", co.names().len);
|
815 | log("len(varnames) = %d", co.varnames().len);
|
816 | Tuple consts = co.consts();
|
817 |
|
818 | log("len(consts) = %d", consts.len);
|
819 |
|
820 | log("consts {");
|
821 | for (int i = 0; i < consts.len; ++i) {
|
822 | heap->DebugString(consts.handles[i]);
|
823 | }
|
824 | log("}");
|
825 | log("-----");
|
826 | }
|
827 |
|
828 | Why VM::RunFrame(Frame* frame) {
|
829 | const Code& co = frame->co_;
|
830 |
|
831 | Tuple names = co.names();
|
832 | //Tuple varnames = co.varnames();
|
833 | Tuple consts = co.consts();
|
834 |
|
835 | vector<Handle>& value_stack = frame->value_stack_;
|
836 | vector<Block>& block_stack = frame->block_stack_;
|
837 |
|
838 | CodeDebugString(co, heap_); // Show what code we're running.
|
839 |
|
840 | Why why = Why::Not;
|
841 | Handle retval = kInvalidHandle;
|
842 |
|
843 | Str b = co.code();
|
844 | int code_len = b.len;
|
845 | const uint8_t* bytecode = reinterpret_cast<const uint8_t*>(b.data);
|
846 |
|
847 | int inst_count = 0;
|
848 |
|
849 | while (true) {
|
850 | assert(0 <= frame->last_i_);
|
851 | assert(frame->last_i_ < code_len);
|
852 |
|
853 | uint8_t op = bytecode[frame->last_i_];
|
854 | int oparg;
|
855 | frame->last_i_++;
|
856 | #if VERBOSE_OPS
|
857 | printf("%20s", kOpcodeNames[op]);
|
858 | #endif
|
859 |
|
860 | if (op >= HAVE_ARGUMENT) {
|
861 | int i = frame->last_i_;
|
862 | oparg = bytecode[i] + (bytecode[i+1] << 8);
|
863 | #if VERBOSE_OPS
|
864 | printf(" %5d (last_i_ = %d)", oparg, i);
|
865 | if (oparg < 0) {
|
866 | log(" oparg bytes: %d %d", bytecode[i], bytecode[i+1]);
|
867 | }
|
868 | #endif
|
869 | frame->last_i_ += 2;
|
870 | }
|
871 | #if VERBOSE_OPS
|
872 | printf("\n");
|
873 | #endif
|
874 |
|
875 | switch(op) {
|
876 | case LOAD_CONST:
|
877 | //log("load_const handle = %d", consts.handles[oparg]);
|
878 | // NOTE: bounds check?
|
879 | value_stack.push_back(consts.handles[oparg]);
|
880 | break;
|
881 | case LOAD_NAME: {
|
882 | Handle name_h = names.handles[oparg];
|
883 | const char* name = heap_->AsStr0(name_h);
|
884 | assert(name != nullptr); // Invalid bytecode not handled
|
885 |
|
886 | //log("load_name handle = %d", names.handles[oparg]);
|
887 | Handle h = frame->LoadName(name);
|
888 | value_stack.push_back(h);
|
889 | break;
|
890 | }
|
891 | case STORE_NAME: {
|
892 | Handle name_h = names.handles[oparg];
|
893 | const char* name = heap_->AsStr0(name_h);
|
894 | assert(name != nullptr); // Invalid bytecode not handled
|
895 |
|
896 | frame->StoreName(name, value_stack.back());
|
897 | value_stack.pop_back();
|
898 | break;
|
899 | }
|
900 | case POP_TOP:
|
901 | value_stack.pop_back();
|
902 | break;
|
903 | case CALL_FUNCTION: {
|
904 | int num_args = oparg & 0xff;
|
905 | //int num_kwargs = (oparg >> 8) & 0xff; // copied from CPython
|
906 | //log("num_args %d", num_args);
|
907 |
|
908 | #if VERBOSE_VALUE_STACK
|
909 | log("value stack on CALL_FUNCTION");
|
910 | DebugHandleArray(value_stack);
|
911 | #endif
|
912 |
|
913 | vector<Handle> args;
|
914 | args.reserve(num_args); // reserve the right size
|
915 |
|
916 | // Pop num_args off. TODO: Could print() builtin do this itself to avoid
|
917 | // copying?
|
918 | for (int i = 0; i < num_args; ++i ) {
|
919 | args.push_back(value_stack.back());
|
920 | value_stack.pop_back();
|
921 | }
|
922 | #if VERBOSE_VALUE_STACK
|
923 | log("Popped args:");
|
924 | DebugHandleArray(args);
|
925 |
|
926 | log("Value stack after popping args:");
|
927 | DebugHandleArray(value_stack);
|
928 | #endif
|
929 |
|
930 | // Pop the function itself off
|
931 | Handle func_handle = value_stack.back();
|
932 | value_stack.pop_back();
|
933 |
|
934 | //log("func handle %d", func_handle);
|
935 |
|
936 | vector<Handle> rets;
|
937 | if (func_handle < 0) {
|
938 | // TODO: dispatch table for native functions.
|
939 | // Call func_print for now.
|
940 |
|
941 | why = func_print(*heap_, args, &rets);
|
942 | if (why != Why::Not) {
|
943 | log("EXCEPTION after calling native function");
|
944 | break;
|
945 | }
|
946 | } else {
|
947 | //Func func; // has CodeObject and more?
|
948 | //heap_->AsFunc(func_handle, &func);
|
949 | //CallFunction(func, args, &rets);
|
950 | rets.push_back(0);
|
951 | }
|
952 |
|
953 | // Now push return values.
|
954 | assert(rets.size() == 1);
|
955 | value_stack.push_back(rets[0]);
|
956 | break;
|
957 | }
|
958 |
|
959 | // Computation
|
960 | case COMPARE_OP: {
|
961 | Handle w = value_stack.back();
|
962 | value_stack.pop_back();
|
963 | Handle v = value_stack.back();
|
964 | value_stack.pop_back();
|
965 |
|
966 | // CPython inlines cmp(int, int) too.
|
967 | int64_t a, b;
|
968 | bool result;
|
969 | if (heap_->AsInt(v, &a) && heap_->AsInt(w, &b)) {
|
970 | switch (oparg) {
|
971 | case CompareOp::LT: result = a < b; break;
|
972 | case CompareOp::LE: result = a <= b; break;
|
973 | case CompareOp::EQ: result = a == b; break;
|
974 | case CompareOp::NE: result = a != b; break;
|
975 | case CompareOp::GT: result = a > b; break;
|
976 | case CompareOp::GE: result = a >= b; break;
|
977 | //case CompareOp::IS: result = v == w; break;
|
978 | //case CompareOp::IS_NOT: result = v != w; break;
|
979 | default:
|
980 | log("Unhandled compare %d", oparg);
|
981 | assert(0);
|
982 | }
|
983 | // TODO: Avoid stack movement by SET_TOP().
|
984 |
|
985 | // Use canonical handles rather than allocating bools.
|
986 | value_stack.push_back(result ? kTrueHandle : kFalseHandle);
|
987 | } else {
|
988 | assert(0);
|
989 | }
|
990 | break;
|
991 | }
|
992 |
|
993 | case BINARY_ADD: {
|
994 | Handle w = value_stack.back();
|
995 | value_stack.pop_back();
|
996 | Handle v = value_stack.back();
|
997 | value_stack.pop_back();
|
998 |
|
999 | int64_t a, b, result;
|
1000 | if (heap_->AsInt(w, &a) && heap_->AsInt(v, &b)) {
|
1001 | result = a + b;
|
1002 | } else {
|
1003 | // TODO: Concatenate strings, tuples, lists
|
1004 | assert(0);
|
1005 | }
|
1006 |
|
1007 | Handle result_h = heap_->NewInt(result);
|
1008 | value_stack.push_back(result_h);
|
1009 | break;
|
1010 | }
|
1011 |
|
1012 | case BINARY_MODULO: {
|
1013 | Handle w = value_stack.back();
|
1014 | value_stack.pop_back();
|
1015 | Handle v = value_stack.back();
|
1016 | value_stack.pop_back();
|
1017 |
|
1018 | Str s;
|
1019 | if (heap_->AsStr(v, &s)) {
|
1020 | // TODO: Do string formatting
|
1021 | assert(0);
|
1022 | }
|
1023 |
|
1024 | int64_t a, b, result;
|
1025 | if (heap_->AsInt(v, &a) && heap_->AsInt(w, &b)) {
|
1026 | result = a % b;
|
1027 | Handle result_h = heap_->NewInt(result);
|
1028 | value_stack.push_back(result_h);
|
1029 | break;
|
1030 | }
|
1031 |
|
1032 | // TODO: TypeError
|
1033 | assert(0);
|
1034 |
|
1035 | break;
|
1036 | }
|
1037 |
|
1038 | //
|
1039 | // Jumps
|
1040 | //
|
1041 | case JUMP_ABSOLUTE:
|
1042 | frame->JumpTo(oparg);
|
1043 | break;
|
1044 |
|
1045 | case JUMP_FORWARD:
|
1046 | frame->JumpRelative(oparg);
|
1047 | break;
|
1048 |
|
1049 | case POP_JUMP_IF_FALSE: {
|
1050 | Handle w = value_stack.back();
|
1051 | value_stack.pop_back();
|
1052 |
|
1053 | // Special case for Py_True / Py_False like CPython.
|
1054 | if (w == kTrueHandle) {
|
1055 | break;
|
1056 | }
|
1057 | if (w == kFalseHandle || !heap_->Truthy(w)) {
|
1058 | frame->JumpTo(oparg);
|
1059 | }
|
1060 | break;
|
1061 | }
|
1062 |
|
1063 | //
|
1064 | // Control Flow
|
1065 | //
|
1066 |
|
1067 | case SETUP_LOOP: {
|
1068 | Block b;
|
1069 | b.type = BlockType::Loop;
|
1070 | b.level = value_stack.size();
|
1071 | b.jump_target = frame->last_i_ + oparg; // oparg is relative jump target
|
1072 | block_stack.push_back(b);
|
1073 | break;
|
1074 | }
|
1075 |
|
1076 | case POP_BLOCK:
|
1077 | block_stack.pop_back();
|
1078 | break;
|
1079 |
|
1080 | case BREAK_LOOP:
|
1081 | why = Why::Break;
|
1082 | break;
|
1083 |
|
1084 | case RETURN_VALUE:
|
1085 | // TODO: Set return value here. It's just a Handle I guess.
|
1086 | retval = value_stack.back();
|
1087 | value_stack.pop_back();
|
1088 | why = Why::Return;
|
1089 | break;
|
1090 |
|
1091 | case MAKE_FUNCTION: {
|
1092 | Handle code = value_stack.back();
|
1093 | value_stack.pop_back();
|
1094 | // TODO: default arguments are on the stack.
|
1095 | if (oparg) {
|
1096 | //Handle defaults = heap_->NewTuple(oparg); // initial size
|
1097 | for (int i = 0; i < oparg; ++i) {
|
1098 | value_stack.pop_back();
|
1099 | }
|
1100 | }
|
1101 | // the function is run with the same globals as the frame it was defined in
|
1102 | NameLookup* globals = &frame->globals_;
|
1103 | Handle func = heap_->NewFunc(code, globals);
|
1104 | value_stack.push_back(func);
|
1105 | }
|
1106 |
|
1107 | default:
|
1108 | log("Unhandled instruction");
|
1109 | break;
|
1110 |
|
1111 | }
|
1112 |
|
1113 | while (why != Why::Not && block_stack.size()) {
|
1114 | assert(why != Why::Yield);
|
1115 | Block b = block_stack.back();
|
1116 |
|
1117 | // TODO: This code appears to be unused! continue compiles as
|
1118 | // POP_JUMP_IF_FALSE!
|
1119 | if (b.type == BlockType::Loop && why == Why::Continue) {
|
1120 | assert(0);
|
1121 | // TODO: retval? I guess it's popped off the stack.
|
1122 | frame->JumpTo(retval);
|
1123 | }
|
1124 | block_stack.pop_back();
|
1125 |
|
1126 | // Unwind value stack to the saved level.
|
1127 | while (value_stack.size() > b.level) {
|
1128 | value_stack.pop_back();
|
1129 | }
|
1130 |
|
1131 | if (b.type == BlockType::Loop && why == Why::Break) {
|
1132 | why = Why::Not;
|
1133 | frame->JumpTo(b.jump_target);
|
1134 | }
|
1135 |
|
1136 | if (b.type == BlockType::Finally ||
|
1137 | b.type == BlockType::Except && why == Why::Exception ||
|
1138 | b.type == BlockType::With) {
|
1139 | assert(0);
|
1140 | }
|
1141 | }
|
1142 |
|
1143 | // TODO: Handle the block stack. Break should JUMP to the location in the
|
1144 | // block handler!
|
1145 | if (why != Why::Not) { // return, yield, continue, etc.
|
1146 | break;
|
1147 | }
|
1148 | inst_count++;
|
1149 | }
|
1150 |
|
1151 | log("Processed %d instructions", inst_count);
|
1152 | return why;
|
1153 | }
|
1154 |
|
1155 | Why VM::RunMain() {
|
1156 | Code co = heap_->AsCode(heap_->Last());
|
1157 |
|
1158 | Frame* frame = new Frame(co);
|
1159 | call_stack_.push_back(frame);
|
1160 |
|
1161 | log("co = %p", co);
|
1162 |
|
1163 | return RunFrame(frame);
|
1164 | }
|
1165 |
|
1166 | // Need a VM to be able to convert args to Cell?
|
1167 | Why func_print(const OHeap& heap, const Args& args, Rets* rets) {
|
1168 | Str s;
|
1169 | if (heap.AsStr(args[0], &s)) {
|
1170 | //printf("PRINTING\n");
|
1171 | fwrite(s.data, sizeof(char), s.len, stdout); // make sure to write NUL bytes!
|
1172 | puts("\n");
|
1173 |
|
1174 | // This is like Py_RETURN_NONE?
|
1175 | rets->push_back(0);
|
1176 | return Why::Not;
|
1177 | }
|
1178 |
|
1179 | // TODO: We should really call the str() constructor here, which will call
|
1180 | // __str__ on user-defined instances.
|
1181 | int64_t i;
|
1182 | if (heap.AsInt(args[0], &i)) {
|
1183 | printf("%ld\n", i);
|
1184 |
|
1185 | rets->push_back(0);
|
1186 | return Why::Not;
|
1187 | }
|
1188 |
|
1189 | // TODO: Set TypeError
|
1190 | // I guess you need the VM argument here.
|
1191 | return Why::Exception;
|
1192 | }
|
1193 |
|
1194 | int main(int argc, char **argv) {
|
1195 | if (argc == 0) {
|
1196 | log("Expected filename\n");
|
1197 | return 1;
|
1198 | }
|
1199 | FILE *f = fopen(argv[1], "rb");
|
1200 | if (!f) {
|
1201 | log("Error opening %s", argv[1]);
|
1202 | return 1;
|
1203 | }
|
1204 |
|
1205 | assert(sizeof(Cell) == 16);
|
1206 |
|
1207 | OHeap heap;
|
1208 | if (!Load(f, &heap)) {
|
1209 | log("Error loading '%s'", argv[1]);
|
1210 | return 1;
|
1211 | }
|
1212 |
|
1213 | VM vm(&heap);
|
1214 | vm.RunMain();
|
1215 | return 0;
|
1216 | }
|