OILS / demo / old / ovm2 / main.cc View on Github | oilshell.org

1216 lines, 734 significant
1#include <assert.h>
2#include <stdarg.h> // va_list, etc.
3#include <stdio.h>
4#include <stdint.h>
5#include <stdlib.h>
6#include <string.h> // memcmp
7#include <vector>
8#include <unordered_map>
9
10#include "opcode.h"
11
12#define VERBOSE_OPS 0
13#define VERBOSE_NAMES 0
14#define VERBOSE_VALUE_STACK 0
15#define VERBOSE_ALLOC 0 // for New*() functions
16
17using std::vector;
18using std::unordered_map;
19
20typedef int32_t Handle;
21
22typedef vector<Handle> Args;
23typedef vector<Handle> Rets;
24
25// Like enum why_code in ceval.c.
26enum class Why {
27 Not,
28 Exception,
29 Reraise,
30 Return,
31 Break,
32 Continue,
33 Yield,
34};
35
36enum CompareOp {
37 LT,
38 LE,
39 EQ,
40 NE,
41 GT,
42 GE,
43 IS,
44 IS_NOT,
45};
46
47//
48// Forward declarations
49//
50
51class OHeap;
52
53//
54// Prototypes
55//
56
57Why func_print(const OHeap& heap, const Args& args, Rets* rets);
58
59//
60// Constants
61//
62
63// TODO: Generate this?
64const int TAG_NONE = -1;
65const int TAG_BOOL = -2;
66const int TAG_INT = -3;
67const int TAG_FLOAT = -4;
68const int TAG_STR = -5;
69const int TAG_TUPLE = -6;
70const int TAG_CODE = -7;
71const int TAG_FUNC = -8;
72
73// Should this be zero? Positive are user defined, negative are native, 0 is
74// invalid? Useful for NewCell() to return on allocation failure. And
75// uninitialized handles should be in an invalid state.
76const int kInvalidHandle = -10;
77
78// TODO: These should be generated
79const int kTrueHandle = -11;
80const int kFalseHandle = -12;
81
82const char* kTagDebugString[] = {
83 "",
84 "None",
85 "bool",
86 "int",
87 "float",
88 "str",
89 "tuple",
90 "code",
91};
92
93const char* TagDebugString(int tag) {
94 return kTagDebugString[-tag];
95}
96
97//
98// Utilities
99//
100
101// Log messages to stdout.
102void log(const char* fmt, ...) {
103 va_list args;
104 va_start(args, fmt);
105 vprintf(fmt, args);
106 va_end(args);
107 printf("\n");
108}
109
110// Implement hash and equality functors for unordered_map.
111struct NameHash {
112 int operator() (const char* s) const {
113 // DJB hash: http://www.cse.yorku.ca/~oz/hash.html
114 int h = 5381;
115
116 while (char c = *s++) {
117 h = (h << 5) + h + c;
118 }
119 return h;
120 }
121};
122
123struct NameEq {
124 bool operator() (const char* x, const char* y) const {
125 return strcmp(x, y) == 0;
126 }
127};
128
129// Dictionary of names (char*) to value (Handle).
130//
131// TODO: if we de-dupe all the names in OHeap, and there's no runtime code
132// generation, each variable name string will have exactly one address. So
133// then can we use pointer comparison for equality / hashing? Would be nice.
134typedef unordered_map<const char*, Handle, NameHash, NameEq> NameLookup;
135
136
137// 16 bytes
138struct Cell {
139 int16_t tag;
140 uint8_t is_slab;
141 uint8_t small_len; // end first 4 bytes
142
143 union {
144 // following TWELVE bytes, for small string, tuple, etc.
145 uint8_t small_val[1];
146 int32_t big_len; // length of slab. TODO: Use this.
147 };
148
149 union {
150 // The wire format
151 struct {
152 uint8_t pad[4];
153 int32_t offset;
154 } slab_wire;
155
156 // Resolved memory format
157 uint8_t* ptr; // should be 8 bytes on 64-bit systems
158 int64_t i;
159 double d;
160 };
161};
162
163// Same interface for big or small strings.
164// What about hash code? That could be stored with big strings.
165struct Str {
166 int32_t len;
167 const char* data; // NUL-terminated, but can also contain NUL.
168 // should not be mutated.
169};
170
171struct Tuple {
172 int32_t len;
173 const Handle* handles;
174};
175
176
177// Dicts require special consideration in these cases:
178// - When deserializing, we have to create a new DictIndex from the DictItems
179// array. We compute the size of the index from the number of items.
180// - When garbage collecting, we iterate over DictItems and mark 'key' and
181// 'value' Handles, skipping over the hash value.
182//
183// Another possibility: Why isn't the hash stored with the key itself rather
184// than in the items array? I guess it could be both.
185
186struct DictIndex {
187 int size; // power of 2
188 int num_used; // is this the same as the number of items?
189 // For the load factor.
190
191// The slab first has sparse indices, and then dense items, like CPython.
192
193 // Using the same approach as CPython.
194 //
195 // NOTE PyDict_MINSIZE == 8
196 // "8 allows dicts with no more than 5 active entries; experiments suggested
197 // this suffices for the majority of dicts (consisting mostly of
198 // usually-small dicts created to pass keyword arguments)."
199 // This is always a power of 2 (see dictresize() in dictobject.c).
200 // So it goes 8, 16, 32 ...
201 //
202 // Optimization: DictIndex could be shared among different hash tables!
203 // As long as they have the exact same set of keys. But how would you
204 // determine that?
205
206 // Doesn't this produce a lot of unpredictable branches? Maybe as a
207 // compromise we could just use options for 2 bytes and 4 bytes? Dicts up
208 // to 2**32 should be fine.
209/*
210 The size in bytes of an indice depends on dk_size:
211
212 - 1 byte if dk_size <= 0xff (char*)
213 - 2 bytes if dk_size <= 0xffff (int16_t*)
214 - 4 bytes if dk_size <= 0xffffffff (int32_t*)
215 - 8 bytes otherwise (int64_t*)
216*/
217 union {
218 int8_t as_1[8];
219 int16_t as_2[4];
220 int32_t as_4[2];
221#if SIZEOF_VOID_P > 4
222 int64_t as_8[1];
223#endif
224 } dk_indices;
225};
226
227struct DictSlab {
228 // number of items is in Cell big_len
229 int items_offset; // offset to later in the slab?
230
231 int indices_size; // how many we can have without reallocating
232 int indices_used; //
233
234};
235
236struct DictItem {
237 uint64_t hash;
238 Handle key;
239 Handle value;
240};
241
242// Wire format for dicts: a hole for the index, and then an array of DictItem.
243struct DictSlabWire {
244 union {
245 uint8_t pad[8];
246 DictIndex* index;
247 };
248 // DictItems here. Length is stored in the cell?
249};
250
251class Code {
252 public:
253 Code(OHeap* heap, Cell* self)
254 : heap_(heap),
255 self_(self) {
256 }
257 // Assume the pointers are patched below
258 int64_t argcount() const {
259 return FieldAsInt(1);
260 }
261 int64_t nlocals() const {
262 return FieldAsInt(2);
263 }
264 int64_t stacksize() const {
265 return FieldAsInt(3);
266 }
267 int64_t flags() const {
268 return FieldAsInt(4);
269 }
270 int64_t firstlineno() const {
271 return FieldAsInt(5);
272 }
273 Str name() const {
274 return FieldAsStr(6);
275 }
276 Str filename() const {
277 return FieldAsStr(7);
278 }
279 Str code() const {
280 return FieldAsStr(8);
281 }
282 Tuple names() const {
283 return FieldAsTuple(9);
284 }
285 Tuple varnames() const {
286 return FieldAsTuple(10);
287 }
288 Tuple consts() const {
289 return FieldAsTuple(11);
290 }
291
292 private:
293 inline Handle GetField(int field_index) const {
294 int32_t* slab = reinterpret_cast<int32_t*>(self_->ptr);
295 return slab[field_index];
296 }
297
298 inline int64_t FieldAsInt(int field_index) const;
299 inline Str FieldAsStr(int field_index) const;
300 inline Tuple FieldAsTuple(int field_index) const;
301
302 OHeap* heap_;
303 Cell* self_;
304};
305
306// A convenient "view" on a function object. To create a function, you create
307// the cell and the slab directly!
308//
309// LATER: This may have a closure pointer too.
310class Func {
311 public:
312 Func(OHeap* heap, Cell* self)
313 : heap_(heap),
314 self_(self) {
315 }
316 // Code is copyable?
317#if 0
318 Code code() const {
319 Handle h = 0; // TODO: Field access for handle
320 Code c(heap_, heap_->cells_ + h);
321 return c;
322 }
323#endif
324 Tuple defaults() const {
325 Tuple t;
326 return t;
327 //return FieldAsTuple(1);
328 }
329 // Fields: code, globals, defaults, __doc__,
330 // And note that we have to SET them too.
331
332 private:
333 OHeap* heap_;
334 Cell* self_;
335};
336
337class OHeap {
338 public:
339 OHeap() : slabs_(nullptr), num_cells_(0), max_cells_(0), cells_(nullptr) {
340 }
341
342 ~OHeap() {
343 if (slabs_) {
344 free(slabs_);
345 }
346 if (cells_) {
347 free(cells_);
348 }
349 }
350
351 uint8_t* AllocPermanentSlabs(int total_slab_size) {
352 slabs_ = static_cast<uint8_t*>(malloc(total_slab_size));
353 return slabs_;
354 }
355
356 Cell* AllocInitialCells(int num_cells) {
357 num_cells_ = num_cells;
358 // Allocate 2x the number of cells to account for growth.
359 //max_cells_ = num_cells * 2;
360 max_cells_ = num_cells * 10;
361 cells_ = static_cast<Cell*>(malloc(sizeof(Cell) * max_cells_));
362 return cells_;
363 }
364
365 bool AsInt(Handle h, int64_t* out) const {
366 assert(h >= 0);
367 const Cell& cell = cells_[h];
368 if (cell.tag != TAG_INT) {
369 return false;
370 }
371 *out = cell.i;
372 return true;
373 }
374
375 // C string. NULL if the cell isn't a string.
376 // NOTE: Shouldn't modify this?
377 const char* AsStr0(Handle h) const {
378 assert(h >= 0);
379 const Cell& cell = cells_[h];
380 if (cell.tag != TAG_STR) {
381 log("AsStr0 expected string but got tag %d", cell.tag);
382 return nullptr;
383 }
384 if (cell.is_slab) {
385 int32_t* str_slab = reinterpret_cast<int32_t*>(cell.ptr);
386 // everything after len
387 return reinterpret_cast<const char*>(str_slab + 1);
388 } else {
389 return reinterpret_cast<const char*>(&cell.small_val);
390 }
391 }
392 // Sets str and len. Returns false if the Cell isn't a string.
393 bool AsStr(Handle h, Str* out) const {
394 assert(h >= 0);
395 const Cell& cell = cells_[h];
396 if (cell.tag != TAG_STR) {
397 return false;
398 }
399 if (cell.is_slab) {
400 int32_t* str_slab = reinterpret_cast<int32_t*>(cell.ptr);
401 out->len = *str_slab;
402 // everything after len
403 out->data = reinterpret_cast<const char*>(str_slab + 1);
404 } else {
405 out->len = cell.small_len; // in bytes
406 out->data = reinterpret_cast<const char*>(&cell.small_val);
407 }
408 return true;
409 }
410
411 bool AsTuple(Handle h, Tuple* out) {
412 assert(h >= 0);
413 const Cell& cell = cells_[h];
414 if (cell.tag != TAG_TUPLE) {
415 return false;
416 }
417 if (cell.is_slab) {
418 int32_t* tuple_slab = reinterpret_cast<int32_t*>(cell.ptr);
419 out->len = *tuple_slab;
420 // everything after len
421 out->handles = reinterpret_cast<const Handle*>(tuple_slab + 1);
422 } else {
423 out->len = cell.small_len; // in entries
424 out->handles = reinterpret_cast<const Handle*>(&cell.small_val);
425 }
426 return true;
427 };
428
429 // TODO: How do we bounds check?
430 Code AsCode(Handle h) {
431 assert(h >= 0);
432 log("tag = %d", cells_[h].tag);
433 assert(cells_[h].tag == TAG_CODE);
434 return Code(this, cells_ + h);
435 }
436
437 // Returns whether the value is truthy, according to Python's rules.
438 // Should we unify this with the bool() constructor?
439 bool Truthy(Handle h) {
440 assert(h >= 0);
441 const Cell& cell = cells_[h];
442 switch (cell.tag) {
443 case TAG_NONE:
444 return false;
445 case TAG_BOOL:
446 return cell.i != 0; // True or False
447 case TAG_INT:
448 return cell.i != 0; // nonzero
449 case TAG_FLOAT:
450 return cell.d != 0.0; // Is this correct?
451 case TAG_STR: {
452 Str s;
453 AsStr(h, &s);
454 return s.len != 0;
455 }
456 case TAG_TUPLE:
457 assert(0); // TODO
458 break;
459 case TAG_CODE:
460 return true; // always truthy
461
462 // NOTE: Instances don't get to override nonzero? They are always true.
463
464 default:
465 assert(0); // TODO
466 }
467 }
468
469 // For now just append to end. Later we have to look at the free list.
470 Handle NewCell() {
471 // TODO: Should we reserve handle 0 for NULL, an allocation failure? The
472 // file format will bloat by 16 bytes?
473 if (num_cells_ == max_cells_) {
474 log("Allocation failure: num_cells_ = %d", num_cells_);
475 assert(0);
476 }
477 return num_cells_++;
478 }
479
480 // TODO: append to cells_.
481 // Zero out the 16 bytes first, and then set cell.i?
482 Handle NewInt(int64_t i) {
483 Handle h = NewCell();
484 memset(cells_ + h, 0, sizeof(Cell));
485 cells_[h].tag = TAG_INT;
486 cells_[h].i = i;
487#if VERBOSE_ALLOC
488 log("new int <id = %d> %d", h, i);
489#endif
490 return h;
491 }
492 Handle NewStr0(const char* s) {
493 Handle h = NewCell();
494 memset(cells_ + h, 0, sizeof(Cell));
495 cells_[h].tag = TAG_STR;
496
497 // TODO: Determine if its big or small.
498 assert(0);
499 return h;
500 }
501
502 Handle NewTuple(int initial_size) {
503 assert(0);
504 return kInvalidHandle;
505 }
506
507 Handle NewFunc(Handle code, NameLookup* globals) {
508 Handle h = NewCell();
509 memset(cells_ + h, 0, sizeof(Cell));
510 cells_[h].tag = TAG_FUNC;
511
512 // NOTE: This should be a Cell because we want to freeze it!
513
514 // This should be a pointer to a slab. TODO: So we need a function to
515 // allocate a slab with 3 fields? code, globals, defaults are essential.
516 // THen it could be small.
517 //
518 // BUT we also want a docstring? That will be useful for running some code.
519 // So it needs to be a slab.
520 //
521 // Should there be indirection with "globals"? It should be its own handle?
522 // Yes I think it's a handle to an entry of sys.modules?
523
524 cells_[h].ptr = nullptr;
525
526 assert(0);
527 return kInvalidHandle;
528 }
529
530 int Last() {
531 return num_cells_ - 1;
532 }
533
534 void DebugString(Handle h) {
535 const Cell& cell = cells_[h];
536
537 fprintf(stderr, " <id %d> ", h);
538
539 switch (cell.tag) {
540 case TAG_NONE:
541 log("None");
542 break;
543 case TAG_BOOL:
544 log("Bool");
545 break;
546 case TAG_INT: {
547 int64_t i;
548 AsInt(h, &i);
549 log("Int %d", i);
550 break;
551 }
552 case TAG_FLOAT:
553 log("Float");
554 break;
555 case TAG_STR:
556 log("Str %s", AsStr0(h));
557 break;
558 default:
559 log("%s", TagDebugString(cell.tag));
560 }
561 }
562
563 // Getter
564 inline Cell* cells() {
565 return cells_;
566 }
567 private:
568 uint8_t* slabs_; // so we can free it, not used directly
569 int num_cells_;
570 int max_cells_;
571 Cell* cells_;
572};
573
574
575//
576// Code implementation. Must come after OHeap declaration.
577//
578
579inline int64_t Code::FieldAsInt(int field_index) const {
580 Handle h = GetField(field_index);
581 int64_t i;
582 assert(heap_->AsInt(h, &i)); // invalid bytecode not handled
583 return i;
584}
585
586inline Str Code::FieldAsStr(int field_index) const {
587 Handle h = GetField(field_index);
588
589 Str s;
590 assert(heap_->AsStr(h, &s)); // invalid bytecode not handled
591 return s;
592}
593
594inline Tuple Code::FieldAsTuple(int field_index) const {
595 Handle h = GetField(field_index);
596
597 Tuple t;
598 assert(heap_->AsTuple(h, &t)); // invalid bytecode not handled
599 return t;
600}
601
602//
603// File I/O
604//
605
606const char* kHeader = "OHP2";
607const int kHeaderLen = 4;
608
609bool ReadHeader(FILE* f) {
610 char buf[kHeaderLen];
611 if (fread(buf, kHeaderLen, 1, f) != 1) {
612 log("Couldn't read OHeap header");
613 return false;
614 }
615 if (memcmp(buf, kHeader, kHeaderLen) != 0) {
616 log("Error: expected '%s' in OHeap header", kHeader);
617 return false;
618 }
619 return true;
620}
621
622bool Load(FILE* f, OHeap* heap) {
623 if (!ReadHeader(f)) {
624 return false;
625 }
626
627 int32_t total_slab_size = 0;
628 if (fread(&total_slab_size, sizeof total_slab_size, 1, f) != 1) {
629 log("Error reading total_slab_size");
630 return false;
631 }
632 log("total_slab_size = %d", total_slab_size);
633
634 int32_t num_cells = 0;
635 if (fread(&num_cells, sizeof num_cells, 1, f) != 1) {
636 log("Error reading num_cells");
637 return false;
638 }
639 log("num_cells = %d", num_cells);
640
641 int32_t num_read;
642
643 // TODO: Limit total size of slabs?
644 uint8_t* slabs = heap->AllocPermanentSlabs(total_slab_size);
645 num_read = fread(slabs, 1, total_slab_size, f);
646 if (num_read != total_slab_size) {
647 log("Error reading slabs");
648 return false;
649 }
650
651 size_t pos = ftell(f);
652 log("pos after reading slabs = %d", pos);
653
654 Cell* cells = heap->AllocInitialCells(num_cells);
655 num_read = fread(cells, sizeof(Cell), num_cells, f);
656 if (num_read != num_cells) {
657 log("Error: expected %d cells, got %d", num_cells, num_read);
658 return false;
659 }
660
661 // Patch the offsets into pointers.
662 int num_slabs = 0;
663 for (int i = 0; i < num_cells; ++i) {
664 const Cell& cell = cells[i];
665 if (cell.is_slab) {
666 num_slabs++;
667 int32_t slab_offset = cell.slab_wire.offset;
668 //log("i = %d, slab offset = %d", i, slab_offset);
669 cells[i].ptr = slabs + slab_offset;
670 //log("ptr = %p", cell.ptr);
671 }
672 }
673 log("Patched %d slabs", num_slabs);
674
675 // Print out all the slab lengths for verification.
676 for (int i = 0; i < num_cells; ++i) {
677 const Cell& cell = cells[i];
678 if (cell.is_slab) {
679 //log("i = %d", i);
680 //log("ptr = %p", cell.ptr);
681 int32_t* start = reinterpret_cast<int32_t*>(cell.ptr);
682 //log("start = %p", start);
683 int32_t len = *start;
684 log("slab len = %d", len);
685 }
686 }
687
688 return true;
689}
690
691enum class BlockType : uint8_t {
692 Loop,
693 Except,
694 Finally,
695 With,
696};
697
698// Like PyTryBlock in frameobject.h
699struct Block {
700 BlockType type;
701 uint8_t level; // VALUE stack level to pop to.
702 uint16_t jump_target; // Called 'handler' in CPython.
703};
704
705class Frame {
706 public:
707 // TODO: Reserve the right size for these stacks?
708 // from co.stacksize
709 Frame(const Code& co)
710 : co_(co),
711 value_stack_(),
712 block_stack_(),
713 last_i_(0),
714 globals_(),
715 locals_() {
716 }
717 // Take the handle of a string, and return a handle of a value.
718 Handle LoadName(const char* name) {
719#if VERBOSE_NAMES
720 log("-- Looking up %s", name);
721#endif
722
723 auto it = locals_.find(name);
724 if (it != locals_.end()) {
725 return it->second;
726 }
727
728 if (strcmp(name, "print") == 0) {
729 return -1; // special value for a C function?
730 }
731
732 return 0; // should this be a specal value?
733 }
734 void StoreName(const char* name, Handle value_h) {
735 locals_[name] = value_h;
736 }
737 inline void JumpTo(int dest) {
738 last_i_ = dest;
739 }
740 inline void JumpRelative(int offset) {
741 last_i_ += offset; // Is this correct?
742 }
743 const Code& co_; // public for now
744 vector<Handle> value_stack_;
745 vector<Block> block_stack_;
746 int last_i_; // index into bytecode (which is variable length)
747 NameLookup globals_;
748 private:
749 NameLookup locals_;
750};
751
752class VM {
753 public:
754 VM(OHeap* heap)
755 : heap_(heap) {
756 }
757 ~VM() {
758 for (auto* frame : call_stack_) {
759 delete frame;
760 }
761 }
762
763 // Like PyEval_EvalFrameEx. It has to be on the VM object in order to create
764 Why RunFrame(Frame* frame);
765
766 // Treat the last object on the heap as a code object to run.
767 Why RunMain();
768
769 private:
770 void DebugHandleArray(const vector<Handle>& handles);
771
772 OHeap* heap_;
773 vector<Frame*> call_stack_; // call stack
774 Handle modules; // like sys.modules. A dictionary of globals.
775
776 // See PyThreadState for other stuff that goes here.
777 // Exception info, profiling, tracing, counters, etc.
778
779 // PyInterpreterState: modules, sysdict, builtins, module reloading
780 // OVM won't have overridable builtins.
781};
782
783void VM::DebugHandleArray(const vector<Handle>& handles) {
784 printf("(%zu) [ ", handles.size());
785 for (Handle h : handles) {
786 printf("%d ", h);
787 }
788 printf("]\n");
789
790 printf(" [ ");
791 for (Handle h : handles) {
792 if (h < 0) {
793 printf("(native) ");
794 } else {
795 int tag = heap_->cells()[h].tag;
796 printf("%s ", TagDebugString(tag));
797 }
798 }
799 printf("]\n");
800
801}
802
803void CodeDebugString(const Code& co, OHeap* heap) {
804 log("argcount = %d", co.argcount());
805 log("nlocals = %d", co.nlocals());
806 log("stacksize = %d", co.stacksize());
807 log("flags = %d", co.flags());
808 log("firstlineno = %d", co.firstlineno());
809
810 log("name = %s", co.name().data);
811 log("filename = %s", co.filename().data);
812 log("len(code) = %d", co.code().len);
813
814 log("len(names) = %d", co.names().len);
815 log("len(varnames) = %d", co.varnames().len);
816 Tuple consts = co.consts();
817
818 log("len(consts) = %d", consts.len);
819
820 log("consts {");
821 for (int i = 0; i < consts.len; ++i) {
822 heap->DebugString(consts.handles[i]);
823 }
824 log("}");
825 log("-----");
826}
827
828Why VM::RunFrame(Frame* frame) {
829 const Code& co = frame->co_;
830
831 Tuple names = co.names();
832 //Tuple varnames = co.varnames();
833 Tuple consts = co.consts();
834
835 vector<Handle>& value_stack = frame->value_stack_;
836 vector<Block>& block_stack = frame->block_stack_;
837
838 CodeDebugString(co, heap_); // Show what code we're running.
839
840 Why why = Why::Not;
841 Handle retval = kInvalidHandle;
842
843 Str b = co.code();
844 int code_len = b.len;
845 const uint8_t* bytecode = reinterpret_cast<const uint8_t*>(b.data);
846
847 int inst_count = 0;
848
849 while (true) {
850 assert(0 <= frame->last_i_);
851 assert(frame->last_i_ < code_len);
852
853 uint8_t op = bytecode[frame->last_i_];
854 int oparg;
855 frame->last_i_++;
856#if VERBOSE_OPS
857 printf("%20s", kOpcodeNames[op]);
858#endif
859
860 if (op >= HAVE_ARGUMENT) {
861 int i = frame->last_i_;
862 oparg = bytecode[i] + (bytecode[i+1] << 8);
863#if VERBOSE_OPS
864 printf(" %5d (last_i_ = %d)", oparg, i);
865 if (oparg < 0) {
866 log(" oparg bytes: %d %d", bytecode[i], bytecode[i+1]);
867 }
868#endif
869 frame->last_i_ += 2;
870 }
871#if VERBOSE_OPS
872 printf("\n");
873#endif
874
875 switch(op) {
876 case LOAD_CONST:
877 //log("load_const handle = %d", consts.handles[oparg]);
878 // NOTE: bounds check?
879 value_stack.push_back(consts.handles[oparg]);
880 break;
881 case LOAD_NAME: {
882 Handle name_h = names.handles[oparg];
883 const char* name = heap_->AsStr0(name_h);
884 assert(name != nullptr); // Invalid bytecode not handled
885
886 //log("load_name handle = %d", names.handles[oparg]);
887 Handle h = frame->LoadName(name);
888 value_stack.push_back(h);
889 break;
890 }
891 case STORE_NAME: {
892 Handle name_h = names.handles[oparg];
893 const char* name = heap_->AsStr0(name_h);
894 assert(name != nullptr); // Invalid bytecode not handled
895
896 frame->StoreName(name, value_stack.back());
897 value_stack.pop_back();
898 break;
899 }
900 case POP_TOP:
901 value_stack.pop_back();
902 break;
903 case CALL_FUNCTION: {
904 int num_args = oparg & 0xff;
905 //int num_kwargs = (oparg >> 8) & 0xff; // copied from CPython
906 //log("num_args %d", num_args);
907
908#if VERBOSE_VALUE_STACK
909 log("value stack on CALL_FUNCTION");
910 DebugHandleArray(value_stack);
911#endif
912
913 vector<Handle> args;
914 args.reserve(num_args); // reserve the right size
915
916 // Pop num_args off. TODO: Could print() builtin do this itself to avoid
917 // copying?
918 for (int i = 0; i < num_args; ++i ) {
919 args.push_back(value_stack.back());
920 value_stack.pop_back();
921 }
922#if VERBOSE_VALUE_STACK
923 log("Popped args:");
924 DebugHandleArray(args);
925
926 log("Value stack after popping args:");
927 DebugHandleArray(value_stack);
928#endif
929
930 // Pop the function itself off
931 Handle func_handle = value_stack.back();
932 value_stack.pop_back();
933
934 //log("func handle %d", func_handle);
935
936 vector<Handle> rets;
937 if (func_handle < 0) {
938 // TODO: dispatch table for native functions.
939 // Call func_print for now.
940
941 why = func_print(*heap_, args, &rets);
942 if (why != Why::Not) {
943 log("EXCEPTION after calling native function");
944 break;
945 }
946 } else {
947 //Func func; // has CodeObject and more?
948 //heap_->AsFunc(func_handle, &func);
949 //CallFunction(func, args, &rets);
950 rets.push_back(0);
951 }
952
953 // Now push return values.
954 assert(rets.size() == 1);
955 value_stack.push_back(rets[0]);
956 break;
957 }
958
959 // Computation
960 case COMPARE_OP: {
961 Handle w = value_stack.back();
962 value_stack.pop_back();
963 Handle v = value_stack.back();
964 value_stack.pop_back();
965
966 // CPython inlines cmp(int, int) too.
967 int64_t a, b;
968 bool result;
969 if (heap_->AsInt(v, &a) && heap_->AsInt(w, &b)) {
970 switch (oparg) {
971 case CompareOp::LT: result = a < b; break;
972 case CompareOp::LE: result = a <= b; break;
973 case CompareOp::EQ: result = a == b; break;
974 case CompareOp::NE: result = a != b; break;
975 case CompareOp::GT: result = a > b; break;
976 case CompareOp::GE: result = a >= b; break;
977 //case CompareOp::IS: result = v == w; break;
978 //case CompareOp::IS_NOT: result = v != w; break;
979 default:
980 log("Unhandled compare %d", oparg);
981 assert(0);
982 }
983 // TODO: Avoid stack movement by SET_TOP().
984
985 // Use canonical handles rather than allocating bools.
986 value_stack.push_back(result ? kTrueHandle : kFalseHandle);
987 } else {
988 assert(0);
989 }
990 break;
991 }
992
993 case BINARY_ADD: {
994 Handle w = value_stack.back();
995 value_stack.pop_back();
996 Handle v = value_stack.back();
997 value_stack.pop_back();
998
999 int64_t a, b, result;
1000 if (heap_->AsInt(w, &a) && heap_->AsInt(v, &b)) {
1001 result = a + b;
1002 } else {
1003 // TODO: Concatenate strings, tuples, lists
1004 assert(0);
1005 }
1006
1007 Handle result_h = heap_->NewInt(result);
1008 value_stack.push_back(result_h);
1009 break;
1010 }
1011
1012 case BINARY_MODULO: {
1013 Handle w = value_stack.back();
1014 value_stack.pop_back();
1015 Handle v = value_stack.back();
1016 value_stack.pop_back();
1017
1018 Str s;
1019 if (heap_->AsStr(v, &s)) {
1020 // TODO: Do string formatting
1021 assert(0);
1022 }
1023
1024 int64_t a, b, result;
1025 if (heap_->AsInt(v, &a) && heap_->AsInt(w, &b)) {
1026 result = a % b;
1027 Handle result_h = heap_->NewInt(result);
1028 value_stack.push_back(result_h);
1029 break;
1030 }
1031
1032 // TODO: TypeError
1033 assert(0);
1034
1035 break;
1036 }
1037
1038 //
1039 // Jumps
1040 //
1041 case JUMP_ABSOLUTE:
1042 frame->JumpTo(oparg);
1043 break;
1044
1045 case JUMP_FORWARD:
1046 frame->JumpRelative(oparg);
1047 break;
1048
1049 case POP_JUMP_IF_FALSE: {
1050 Handle w = value_stack.back();
1051 value_stack.pop_back();
1052
1053 // Special case for Py_True / Py_False like CPython.
1054 if (w == kTrueHandle) {
1055 break;
1056 }
1057 if (w == kFalseHandle || !heap_->Truthy(w)) {
1058 frame->JumpTo(oparg);
1059 }
1060 break;
1061 }
1062
1063 //
1064 // Control Flow
1065 //
1066
1067 case SETUP_LOOP: {
1068 Block b;
1069 b.type = BlockType::Loop;
1070 b.level = value_stack.size();
1071 b.jump_target = frame->last_i_ + oparg; // oparg is relative jump target
1072 block_stack.push_back(b);
1073 break;
1074 }
1075
1076 case POP_BLOCK:
1077 block_stack.pop_back();
1078 break;
1079
1080 case BREAK_LOOP:
1081 why = Why::Break;
1082 break;
1083
1084 case RETURN_VALUE:
1085 // TODO: Set return value here. It's just a Handle I guess.
1086 retval = value_stack.back();
1087 value_stack.pop_back();
1088 why = Why::Return;
1089 break;
1090
1091 case MAKE_FUNCTION: {
1092 Handle code = value_stack.back();
1093 value_stack.pop_back();
1094 // TODO: default arguments are on the stack.
1095 if (oparg) {
1096 //Handle defaults = heap_->NewTuple(oparg); // initial size
1097 for (int i = 0; i < oparg; ++i) {
1098 value_stack.pop_back();
1099 }
1100 }
1101 // the function is run with the same globals as the frame it was defined in
1102 NameLookup* globals = &frame->globals_;
1103 Handle func = heap_->NewFunc(code, globals);
1104 value_stack.push_back(func);
1105 }
1106
1107 default:
1108 log("Unhandled instruction");
1109 break;
1110
1111 }
1112
1113 while (why != Why::Not && block_stack.size()) {
1114 assert(why != Why::Yield);
1115 Block b = block_stack.back();
1116
1117 // TODO: This code appears to be unused! continue compiles as
1118 // POP_JUMP_IF_FALSE!
1119 if (b.type == BlockType::Loop && why == Why::Continue) {
1120 assert(0);
1121 // TODO: retval? I guess it's popped off the stack.
1122 frame->JumpTo(retval);
1123 }
1124 block_stack.pop_back();
1125
1126 // Unwind value stack to the saved level.
1127 while (value_stack.size() > b.level) {
1128 value_stack.pop_back();
1129 }
1130
1131 if (b.type == BlockType::Loop && why == Why::Break) {
1132 why = Why::Not;
1133 frame->JumpTo(b.jump_target);
1134 }
1135
1136 if (b.type == BlockType::Finally ||
1137 b.type == BlockType::Except && why == Why::Exception ||
1138 b.type == BlockType::With) {
1139 assert(0);
1140 }
1141 }
1142
1143 // TODO: Handle the block stack. Break should JUMP to the location in the
1144 // block handler!
1145 if (why != Why::Not) { // return, yield, continue, etc.
1146 break;
1147 }
1148 inst_count++;
1149 }
1150
1151 log("Processed %d instructions", inst_count);
1152 return why;
1153}
1154
1155Why VM::RunMain() {
1156 Code co = heap_->AsCode(heap_->Last());
1157
1158 Frame* frame = new Frame(co);
1159 call_stack_.push_back(frame);
1160
1161 log("co = %p", co);
1162
1163 return RunFrame(frame);
1164}
1165
1166// Need a VM to be able to convert args to Cell?
1167Why func_print(const OHeap& heap, const Args& args, Rets* rets) {
1168 Str s;
1169 if (heap.AsStr(args[0], &s)) {
1170 //printf("PRINTING\n");
1171 fwrite(s.data, sizeof(char), s.len, stdout); // make sure to write NUL bytes!
1172 puts("\n");
1173
1174 // This is like Py_RETURN_NONE?
1175 rets->push_back(0);
1176 return Why::Not;
1177 }
1178
1179 // TODO: We should really call the str() constructor here, which will call
1180 // __str__ on user-defined instances.
1181 int64_t i;
1182 if (heap.AsInt(args[0], &i)) {
1183 printf("%ld\n", i);
1184
1185 rets->push_back(0);
1186 return Why::Not;
1187 }
1188
1189 // TODO: Set TypeError
1190 // I guess you need the VM argument here.
1191 return Why::Exception;
1192}
1193
1194int main(int argc, char **argv) {
1195 if (argc == 0) {
1196 log("Expected filename\n");
1197 return 1;
1198 }
1199 FILE *f = fopen(argv[1], "rb");
1200 if (!f) {
1201 log("Error opening %s", argv[1]);
1202 return 1;
1203 }
1204
1205 assert(sizeof(Cell) == 16);
1206
1207 OHeap heap;
1208 if (!Load(f, &heap)) {
1209 log("Error loading '%s'", argv[1]);
1210 return 1;
1211 }
1212
1213 VM vm(&heap);
1214 vm.RunMain();
1215 return 0;
1216}