OILS / mycpp / gc_mylib.h View on Github | oilshell.org

391 lines, 202 significant
1// gc_mylib.h - corresponds to mycpp/mylib.py
2
3#ifndef MYCPP_GC_MYLIB_H
4#define MYCPP_GC_MYLIB_H
5
6#include <limits.h> // CHAR_BIT
7
8#include "mycpp/gc_alloc.h" // gHeap
9#include "mycpp/gc_dict.h" // for dict_erase()
10#include "mycpp/gc_tuple.h"
11
12template <class K, class V>
13class Dict;
14
15// https://stackoverflow.com/questions/3919995/determining-sprintf-buffer-size-whats-the-standard/11092994#11092994
16// Notes:
17// - Python 2.7's intobject.c has an erroneous +6
18// - This is 13, but len('-2147483648') is 11, which means we only need 12?
19// - This formula is valid for octal(), because 2^(3 bits) = 8
20
21const int kIntBufSize = CHAR_BIT * sizeof(int) / 3 + 3;
22
23namespace mylib {
24
25void InitCppOnly();
26
27// Wrappers around our C++ APIs
28
29inline void MaybeCollect() {
30 gHeap.MaybeCollect();
31}
32
33void print_stderr(BigStr* s);
34
35inline int ByteAt(BigStr* s, int i) {
36 DCHECK(0 <= i);
37 DCHECK(i <= len(s));
38
39 return static_cast<unsigned char>(s->data_[i]);
40}
41
42inline int ByteEquals(int byte, BigStr* ch) {
43 DCHECK(0 <= byte);
44 DCHECK(byte < 256);
45
46 DCHECK(len(ch) == 1);
47
48 return byte == static_cast<unsigned char>(ch->data_[0]);
49}
50
51inline int ByteInSet(int byte, BigStr* byte_set) {
52 DCHECK(0 <= byte);
53 DCHECK(byte < 256);
54
55 int n = len(byte_set);
56 for (int i = 0; i < n; ++i) {
57 int b = static_cast<unsigned char>(byte_set->data_[i]);
58 if (byte == b) {
59 return true;
60 }
61 }
62 return false;
63}
64
65BigStr* JoinBytes(List<int>* byte_list);
66
67// const int kStdout = 1;
68// const int kStderr = 2;
69
70// void writeln(BigStr* s, int fd = kStdout);
71
72Tuple2<BigStr*, BigStr*> split_once(BigStr* s, BigStr* delim);
73
74template <typename K, typename V>
75void dict_erase(Dict<K, V>* haystack, K needle) {
76 DCHECK(haystack->obj_header().heap_tag != HeapTag::Global);
77
78 int pos = haystack->hash_and_probe(needle);
79 if (pos == kTooSmall) {
80 return;
81 }
82 DCHECK(pos >= 0);
83 int kv_index = haystack->index_->items_[pos];
84 if (kv_index < 0) {
85 return;
86 }
87
88 int last_kv_index = haystack->len_ - 1;
89 DCHECK(kv_index <= last_kv_index);
90
91 // Swap the target entry with the most recently inserted one before removing
92 // it. This has two benefits.
93 // (1) It keeps the entry arrays compact. All valid entries occupy a
94 // contiguous region in memory.
95 // (2) It prevents holes in the entry arrays. This makes iterating over
96 // entries (e.g. in keys() or DictIter()) trivial and doesn't require
97 // any extra validity state (like a bitset of unusable slots). This is
98 // important because keys and values wont't always be pointers, so we
99 // can't rely on NULL checks for validity. We also can't wrap the slab
100 // entry types in some other type without modifying the garbage
101 // collector to trace through unmanaged types (or paying the extra
102 // allocations for the outer type).
103 if (kv_index != last_kv_index) {
104 K last_key = haystack->keys_->items_[last_kv_index];
105 V last_val = haystack->values_->items_[last_kv_index];
106 int last_pos = haystack->hash_and_probe(last_key);
107 DCHECK(last_pos != kNotFound);
108 haystack->keys_->items_[kv_index] = last_key;
109 haystack->values_->items_[kv_index] = last_val;
110 haystack->index_->items_[last_pos] = kv_index;
111 }
112
113 // Zero out for GC. These could be nullptr or 0
114 haystack->keys_->items_[last_kv_index] = 0;
115 haystack->values_->items_[last_kv_index] = 0;
116 haystack->index_->items_[pos] = kDeletedEntry;
117 haystack->len_--;
118 DCHECK(haystack->len_ < haystack->capacity_);
119}
120
121// NOTE: Can use OverAllocatedStr for all of these, rather than copying
122
123inline BigStr* hex_lower(int i) {
124 char buf[kIntBufSize];
125 int len = snprintf(buf, kIntBufSize, "%x", i);
126 return ::StrFromC(buf, len);
127}
128
129inline BigStr* hex_upper(int i) {
130 char buf[kIntBufSize];
131 int len = snprintf(buf, kIntBufSize, "%X", i);
132 return ::StrFromC(buf, len);
133}
134
135inline BigStr* octal(int i) {
136 char buf[kIntBufSize];
137 int len = snprintf(buf, kIntBufSize, "%o", i);
138 return ::StrFromC(buf, len);
139}
140
141// Abstract type: Union of LineReader and Writer
142class File {
143 public:
144 File() {
145 }
146 // Writer
147 virtual void write(BigStr* s) = 0;
148 virtual void flush() = 0;
149
150 // Reader
151 virtual BigStr* readline() = 0;
152
153 // Both
154 virtual bool isatty() = 0;
155 virtual void close() = 0;
156
157 static constexpr ObjHeader obj_header() {
158 return ObjHeader::ClassFixed(field_mask(), sizeof(File));
159 }
160
161 static constexpr uint32_t field_mask() {
162 return kZeroMask;
163 }
164};
165
166// Wrap a FILE* for read and write
167class CFile : public File {
168 public:
169 explicit CFile(FILE* f) : File(), f_(f) {
170 }
171 // Writer
172 void write(BigStr* s) override;
173 void flush() override;
174
175 // Reader
176 BigStr* readline() override;
177
178 // Both
179 bool isatty() override;
180 void close() override;
181
182 static constexpr ObjHeader obj_header() {
183 return ObjHeader::ClassFixed(field_mask(), sizeof(CFile));
184 }
185
186 static constexpr uint32_t field_mask() {
187 // not mutating field_mask because FILE* isn't a GC object
188 return File::field_mask();
189 }
190
191 private:
192 FILE* f_;
193
194 DISALLOW_COPY_AND_ASSIGN(CFile)
195};
196
197// Abstract File we can only read from.
198// TODO: can we get rid of DCHECK() and reinterpret_cast?
199class LineReader : public File {
200 public:
201 LineReader() : File() {
202 }
203 void write(BigStr* s) override {
204 CHECK(false); // should not happen
205 }
206 void flush() override {
207 CHECK(false); // should not happen
208 }
209
210 static constexpr ObjHeader obj_header() {
211 return ObjHeader::ClassFixed(field_mask(), sizeof(LineReader));
212 }
213
214 static constexpr uint32_t field_mask() {
215 return kZeroMask;
216 }
217};
218
219class BufLineReader : public LineReader {
220 public:
221 explicit BufLineReader(BigStr* s) : LineReader(), s_(s), pos_(0) {
222 }
223 virtual BigStr* readline();
224 virtual bool isatty() {
225 return false;
226 }
227 virtual void close() {
228 }
229
230 BigStr* s_;
231 int pos_;
232
233 static constexpr ObjHeader obj_header() {
234 return ObjHeader::ClassFixed(field_mask(), sizeof(LineReader));
235 }
236
237 static constexpr uint32_t field_mask() {
238 return LineReader::field_mask() | maskbit(offsetof(BufLineReader, s_));
239 }
240
241 DISALLOW_COPY_AND_ASSIGN(BufLineReader)
242};
243
244extern LineReader* gStdin;
245
246inline LineReader* Stdin() {
247 if (gStdin == nullptr) {
248 gStdin = reinterpret_cast<LineReader*>(Alloc<CFile>(stdin));
249 }
250 return gStdin;
251}
252
253LineReader* open(BigStr* path);
254
255// Abstract File we can only write to.
256// TODO: can we get rid of DCHECK() and reinterpret_cast?
257class Writer : public File {
258 public:
259 Writer() : File() {
260 }
261 BigStr* readline() override {
262 CHECK(false); // should not happen
263 }
264
265 static constexpr ObjHeader obj_header() {
266 return ObjHeader::ClassFixed(field_mask(), sizeof(Writer));
267 }
268
269 static constexpr uint32_t field_mask() {
270 return kZeroMask;
271 }
272};
273
274class MutableStr;
275
276class BufWriter : public Writer {
277 public:
278 BufWriter() : Writer(), str_(nullptr), len_(0) {
279 }
280 void write(BigStr* s) override;
281 void write_spaces(int n);
282 void clear() { // Reuse this instance
283 str_ = nullptr;
284 len_ = 0;
285 is_valid_ = true;
286 }
287 void close() override {
288 }
289 void flush() override {
290 }
291 bool isatty() override {
292 return false;
293 }
294 BigStr* getvalue(); // part of cStringIO API
295
296 //
297 // Low Level API for C++ usage only
298 //
299
300 // Convenient API that avoids BigStr*
301 void WriteConst(const char* c_string);
302
303 // Potentially resizes the buffer.
304 void EnsureMoreSpace(int n);
305 // After EnsureMoreSpace(42), you can write 42 more bytes safely.
306 //
307 // Note that if you call EnsureMoreSpace(42), write 5 byte, and then
308 // EnsureMoreSpace(42) again, the amount of additional space reserved is 47.
309
310 // (Similar to vector::reserve(n), but it takes an integer to ADD to the
311 // capacity.)
312
313 uint8_t* LengthPointer(); // start + length
314 uint8_t* CapacityPointer(); // start + capacity
315 void SetLengthFrom(uint8_t* length_ptr);
316
317 int Length() {
318 return len_;
319 }
320
321 // Rewind to earlier position, future writes start there
322 void Truncate(int length);
323
324 static constexpr ObjHeader obj_header() {
325 return ObjHeader::ClassFixed(field_mask(), sizeof(BufWriter));
326 }
327
328 static constexpr unsigned field_mask() {
329 // maskvit_v() because BufWriter has virtual methods
330 return Writer::field_mask() | maskbit(offsetof(BufWriter, str_));
331 }
332
333 private:
334 void WriteRaw(char* s, int n);
335
336 MutableStr* str_; // getvalue() turns this directly into Str*, no copying
337 int len_; // how many bytes have been written so far
338 bool is_valid_ = true; // It becomes invalid after getvalue() is called
339};
340
341extern Writer* gStdout;
342
343inline Writer* Stdout() {
344 if (gStdout == nullptr) {
345 gStdout = reinterpret_cast<Writer*>(Alloc<CFile>(stdout));
346 gHeap.RootGlobalVar(gStdout);
347 }
348 return gStdout;
349}
350
351extern Writer* gStderr;
352
353inline Writer* Stderr() {
354 if (gStderr == nullptr) {
355 gStderr = reinterpret_cast<Writer*>(Alloc<CFile>(stderr));
356 gHeap.RootGlobalVar(gStderr);
357 }
358 return gStderr;
359}
360
361class UniqueObjects {
362 // Can't be expressed in typed Python because we don't have uint64_t for
363 // addresses
364
365 public:
366 UniqueObjects() {
367 }
368 void Add(void* obj) {
369 }
370 int Get(void* obj) {
371 return -1;
372 }
373
374 static constexpr ObjHeader obj_header() {
375 return ObjHeader::ClassFixed(field_mask(), sizeof(UniqueObjects));
376 }
377
378 // SPECIAL CASE? We should never have a unique reference to an object? So
379 // don't bother tracing
380 static constexpr uint32_t field_mask() {
381 return kZeroMask;
382 }
383
384 private:
385 // address -> small integer ID
386 Dict<void*, int> addresses_;
387};
388
389} // namespace mylib
390
391#endif // MYCPP_GC_MYLIB_H