mycpp

Coverage Report

Created: 2024-07-09 17:10

/home/uke/oil/mycpp/gc_mylib.h
Line
Count
Source (jump to first uncovered line)
1
// gc_mylib.h - corresponds to mycpp/mylib.py
2
3
#ifndef MYCPP_GC_MYLIB_H
4
#define MYCPP_GC_MYLIB_H
5
6
#include <limits.h>  // CHAR_BIT
7
8
#include "mycpp/gc_alloc.h"  // gHeap
9
#include "mycpp/gc_dict.h"   // for dict_erase()
10
#include "mycpp/gc_mops.h"
11
#include "mycpp/gc_tuple.h"
12
13
template <class K, class V>
14
class Dict;
15
16
// https://stackoverflow.com/questions/3919995/determining-sprintf-buffer-size-whats-the-standard/11092994#11092994
17
// Notes:
18
// - Python 2.7's intobject.c has an erroneous +6
19
// - This is 13, but len('-2147483648') is 11, which means we only need 12?
20
// - This formula is valid for octal(), because 2^(3 bits) = 8
21
22
const int kIntBufSize = CHAR_BIT * sizeof(int) / 3 + 3;
23
24
namespace mylib {
25
26
void InitCppOnly();
27
28
// Wrappers around our C++ APIs
29
30
1
inline void MaybeCollect() {
31
1
  gHeap.MaybeCollect();
32
1
}
33
34
void print_stderr(BigStr* s);
35
36
0
inline int ByteAt(BigStr* s, int i) {
37
0
  DCHECK(0 <= i);
38
0
  DCHECK(i <= len(s));
39
0
40
0
  return static_cast<unsigned char>(s->data_[i]);
41
0
}
42
43
0
inline int ByteEquals(int byte, BigStr* ch) {
44
0
  DCHECK(0 <= byte);
45
0
  DCHECK(byte < 256);
46
0
47
0
  DCHECK(len(ch) == 1);
48
0
49
0
  return byte == static_cast<unsigned char>(ch->data_[0]);
50
0
}
51
52
0
inline int ByteInSet(int byte, BigStr* byte_set) {
53
0
  DCHECK(0 <= byte);
54
0
  DCHECK(byte < 256);
55
0
56
0
  int n = len(byte_set);
57
0
  for (int i = 0; i < n; ++i) {
58
0
    int b = static_cast<unsigned char>(byte_set->data_[i]);
59
0
    if (byte == b) {
60
0
      return true;
61
0
    }
62
0
  }
63
0
  return false;
64
0
}
65
66
BigStr* JoinBytes(List<int>* byte_list);
67
68
void BigIntSort(List<mops::BigInt>* keys);
69
70
// const int kStdout = 1;
71
// const int kStderr = 2;
72
73
// void writeln(BigStr* s, int fd = kStdout);
74
75
Tuple2<BigStr*, BigStr*> split_once(BigStr* s, BigStr* delim);
76
77
template <typename K, typename V>
78
140
void dict_erase(Dict<K, V>* haystack, K needle) {
79
140
  DCHECK(haystack->obj_header().heap_tag != HeapTag::Global);
80
81
0
  int pos = haystack->hash_and_probe(needle);
82
140
  if (pos == kTooSmall) {
83
0
    return;
84
0
  }
85
140
  DCHECK(pos >= 0);
86
0
  int kv_index = haystack->index_->items_[pos];
87
140
  if (kv_index < 0) {
88
1
    return;
89
1
  }
90
91
139
  int last_kv_index = haystack->len_ - 1;
92
139
  DCHECK(kv_index <= last_kv_index);
93
94
  // Swap the target entry with the most recently inserted one before removing
95
  // it. This has two benefits.
96
  //   (1) It keeps the entry arrays compact. All valid entries occupy a
97
  //       contiguous region in memory.
98
  //   (2) It prevents holes in the entry arrays. This makes iterating over
99
  //       entries (e.g. in keys() or DictIter()) trivial and doesn't require
100
  //       any extra validity state (like a bitset of unusable slots). This is
101
  //       important because keys and values wont't always be pointers, so we
102
  //       can't rely on NULL checks for validity. We also can't wrap the slab
103
  //       entry types in some other type without modifying the garbage
104
  //       collector to trace through unmanaged types (or paying the extra
105
  //       allocations for the outer type).
106
139
  if (kv_index != last_kv_index) {
107
71
    K last_key = haystack->keys_->items_[last_kv_index];
108
71
    V last_val = haystack->values_->items_[last_kv_index];
109
71
    int last_pos = haystack->hash_and_probe(last_key);
110
71
    DCHECK(last_pos != kNotFound);
111
0
    haystack->keys_->items_[kv_index] = last_key;
112
71
    haystack->values_->items_[kv_index] = last_val;
113
71
    haystack->index_->items_[last_pos] = kv_index;
114
71
  }
115
116
  // Zero out for GC.  These could be nullptr or 0
117
0
  haystack->keys_->items_[last_kv_index] = 0;
118
139
  haystack->values_->items_[last_kv_index] = 0;
119
139
  haystack->index_->items_[pos] = kDeletedEntry;
120
139
  haystack->len_--;
121
139
  DCHECK(haystack->len_ < haystack->capacity_);
122
139
}
_ZN5mylib10dict_eraseIP6BigStriEEvP4DictIT_T0_ES4_
Line
Count
Source
78
1
void dict_erase(Dict<K, V>* haystack, K needle) {
79
1
  DCHECK(haystack->obj_header().heap_tag != HeapTag::Global);
80
81
0
  int pos = haystack->hash_and_probe(needle);
82
1
  if (pos == kTooSmall) {
83
0
    return;
84
0
  }
85
1
  DCHECK(pos >= 0);
86
0
  int kv_index = haystack->index_->items_[pos];
87
1
  if (kv_index < 0) {
88
0
    return;
89
0
  }
90
91
1
  int last_kv_index = haystack->len_ - 1;
92
1
  DCHECK(kv_index <= last_kv_index);
93
94
  // Swap the target entry with the most recently inserted one before removing
95
  // it. This has two benefits.
96
  //   (1) It keeps the entry arrays compact. All valid entries occupy a
97
  //       contiguous region in memory.
98
  //   (2) It prevents holes in the entry arrays. This makes iterating over
99
  //       entries (e.g. in keys() or DictIter()) trivial and doesn't require
100
  //       any extra validity state (like a bitset of unusable slots). This is
101
  //       important because keys and values wont't always be pointers, so we
102
  //       can't rely on NULL checks for validity. We also can't wrap the slab
103
  //       entry types in some other type without modifying the garbage
104
  //       collector to trace through unmanaged types (or paying the extra
105
  //       allocations for the outer type).
106
1
  if (kv_index != last_kv_index) {
107
0
    K last_key = haystack->keys_->items_[last_kv_index];
108
0
    V last_val = haystack->values_->items_[last_kv_index];
109
0
    int last_pos = haystack->hash_and_probe(last_key);
110
0
    DCHECK(last_pos != kNotFound);
111
0
    haystack->keys_->items_[kv_index] = last_key;
112
0
    haystack->values_->items_[kv_index] = last_val;
113
0
    haystack->index_->items_[last_pos] = kv_index;
114
0
  }
115
116
  // Zero out for GC.  These could be nullptr or 0
117
0
  haystack->keys_->items_[last_kv_index] = 0;
118
1
  haystack->values_->items_[last_kv_index] = 0;
119
1
  haystack->index_->items_[pos] = kDeletedEntry;
120
1
  haystack->len_--;
121
1
  DCHECK(haystack->len_ < haystack->capacity_);
122
1
}
_ZN5mylib10dict_eraseIP6BigStrS2_EEvP4DictIT_T0_ES4_
Line
Count
Source
78
1
void dict_erase(Dict<K, V>* haystack, K needle) {
79
1
  DCHECK(haystack->obj_header().heap_tag != HeapTag::Global);
80
81
0
  int pos = haystack->hash_and_probe(needle);
82
1
  if (pos == kTooSmall) {
83
0
    return;
84
0
  }
85
1
  DCHECK(pos >= 0);
86
0
  int kv_index = haystack->index_->items_[pos];
87
1
  if (kv_index < 0) {
88
0
    return;
89
0
  }
90
91
1
  int last_kv_index = haystack->len_ - 1;
92
1
  DCHECK(kv_index <= last_kv_index);
93
94
  // Swap the target entry with the most recently inserted one before removing
95
  // it. This has two benefits.
96
  //   (1) It keeps the entry arrays compact. All valid entries occupy a
97
  //       contiguous region in memory.
98
  //   (2) It prevents holes in the entry arrays. This makes iterating over
99
  //       entries (e.g. in keys() or DictIter()) trivial and doesn't require
100
  //       any extra validity state (like a bitset of unusable slots). This is
101
  //       important because keys and values wont't always be pointers, so we
102
  //       can't rely on NULL checks for validity. We also can't wrap the slab
103
  //       entry types in some other type without modifying the garbage
104
  //       collector to trace through unmanaged types (or paying the extra
105
  //       allocations for the outer type).
106
1
  if (kv_index != last_kv_index) {
107
0
    K last_key = haystack->keys_->items_[last_kv_index];
108
0
    V last_val = haystack->values_->items_[last_kv_index];
109
0
    int last_pos = haystack->hash_and_probe(last_key);
110
0
    DCHECK(last_pos != kNotFound);
111
0
    haystack->keys_->items_[kv_index] = last_key;
112
0
    haystack->values_->items_[kv_index] = last_val;
113
0
    haystack->index_->items_[last_pos] = kv_index;
114
0
  }
115
116
  // Zero out for GC.  These could be nullptr or 0
117
0
  haystack->keys_->items_[last_kv_index] = 0;
118
1
  haystack->values_->items_[last_kv_index] = 0;
119
1
  haystack->index_->items_[pos] = kDeletedEntry;
120
1
  haystack->len_--;
121
1
  DCHECK(haystack->len_ < haystack->capacity_);
122
1
}
_ZN5mylib10dict_eraseIiiEEvP4DictIT_T0_ES2_
Line
Count
Source
78
137
void dict_erase(Dict<K, V>* haystack, K needle) {
79
137
  DCHECK(haystack->obj_header().heap_tag != HeapTag::Global);
80
81
0
  int pos = haystack->hash_and_probe(needle);
82
137
  if (pos == kTooSmall) {
83
0
    return;
84
0
  }
85
137
  DCHECK(pos >= 0);
86
0
  int kv_index = haystack->index_->items_[pos];
87
137
  if (kv_index < 0) {
88
0
    return;
89
0
  }
90
91
137
  int last_kv_index = haystack->len_ - 1;
92
137
  DCHECK(kv_index <= last_kv_index);
93
94
  // Swap the target entry with the most recently inserted one before removing
95
  // it. This has two benefits.
96
  //   (1) It keeps the entry arrays compact. All valid entries occupy a
97
  //       contiguous region in memory.
98
  //   (2) It prevents holes in the entry arrays. This makes iterating over
99
  //       entries (e.g. in keys() or DictIter()) trivial and doesn't require
100
  //       any extra validity state (like a bitset of unusable slots). This is
101
  //       important because keys and values wont't always be pointers, so we
102
  //       can't rely on NULL checks for validity. We also can't wrap the slab
103
  //       entry types in some other type without modifying the garbage
104
  //       collector to trace through unmanaged types (or paying the extra
105
  //       allocations for the outer type).
106
137
  if (kv_index != last_kv_index) {
107
71
    K last_key = haystack->keys_->items_[last_kv_index];
108
71
    V last_val = haystack->values_->items_[last_kv_index];
109
71
    int last_pos = haystack->hash_and_probe(last_key);
110
71
    DCHECK(last_pos != kNotFound);
111
0
    haystack->keys_->items_[kv_index] = last_key;
112
71
    haystack->values_->items_[kv_index] = last_val;
113
71
    haystack->index_->items_[last_pos] = kv_index;
114
71
  }
115
116
  // Zero out for GC.  These could be nullptr or 0
117
0
  haystack->keys_->items_[last_kv_index] = 0;
118
137
  haystack->values_->items_[last_kv_index] = 0;
119
137
  haystack->index_->items_[pos] = kDeletedEntry;
120
137
  haystack->len_--;
121
137
  DCHECK(haystack->len_ < haystack->capacity_);
122
137
}
_ZN5mylib10dict_eraseIiP6BigStrEEvP4DictIT_T0_ES4_
Line
Count
Source
78
1
void dict_erase(Dict<K, V>* haystack, K needle) {
79
1
  DCHECK(haystack->obj_header().heap_tag != HeapTag::Global);
80
81
0
  int pos = haystack->hash_and_probe(needle);
82
1
  if (pos == kTooSmall) {
83
0
    return;
84
0
  }
85
1
  DCHECK(pos >= 0);
86
0
  int kv_index = haystack->index_->items_[pos];
87
1
  if (kv_index < 0) {
88
1
    return;
89
1
  }
90
91
0
  int last_kv_index = haystack->len_ - 1;
92
0
  DCHECK(kv_index <= last_kv_index);
93
94
  // Swap the target entry with the most recently inserted one before removing
95
  // it. This has two benefits.
96
  //   (1) It keeps the entry arrays compact. All valid entries occupy a
97
  //       contiguous region in memory.
98
  //   (2) It prevents holes in the entry arrays. This makes iterating over
99
  //       entries (e.g. in keys() or DictIter()) trivial and doesn't require
100
  //       any extra validity state (like a bitset of unusable slots). This is
101
  //       important because keys and values wont't always be pointers, so we
102
  //       can't rely on NULL checks for validity. We also can't wrap the slab
103
  //       entry types in some other type without modifying the garbage
104
  //       collector to trace through unmanaged types (or paying the extra
105
  //       allocations for the outer type).
106
0
  if (kv_index != last_kv_index) {
107
0
    K last_key = haystack->keys_->items_[last_kv_index];
108
0
    V last_val = haystack->values_->items_[last_kv_index];
109
0
    int last_pos = haystack->hash_and_probe(last_key);
110
0
    DCHECK(last_pos != kNotFound);
111
0
    haystack->keys_->items_[kv_index] = last_key;
112
0
    haystack->values_->items_[kv_index] = last_val;
113
0
    haystack->index_->items_[last_pos] = kv_index;
114
0
  }
115
116
  // Zero out for GC.  These could be nullptr or 0
117
0
  haystack->keys_->items_[last_kv_index] = 0;
118
0
  haystack->values_->items_[last_kv_index] = 0;
119
0
  haystack->index_->items_[pos] = kDeletedEntry;
120
0
  haystack->len_--;
121
0
  DCHECK(haystack->len_ < haystack->capacity_);
122
0
}
123
124
2
inline BigStr* hex_lower(int i) {
125
  // Note: Could also use OverAllocatedStr, but most strings are small?
126
2
  char buf[kIntBufSize];
127
2
  int len = snprintf(buf, kIntBufSize, "%x", i);
128
2
  return ::StrFromC(buf, len);
129
2
}
130
131
// Abstract type: Union of LineReader and Writer
132
class File {
133
 public:
134
14
  File() {
135
14
  }
136
  // Writer
137
  virtual void write(BigStr* s) = 0;
138
  virtual void flush() = 0;
139
140
  // Reader
141
  virtual BigStr* readline() = 0;
142
143
  // Both
144
  virtual bool isatty() = 0;
145
  virtual void close() = 0;
146
147
0
  static constexpr ObjHeader obj_header() {
148
0
    return ObjHeader::ClassFixed(field_mask(), sizeof(File));
149
0
  }
150
151
7
  static constexpr uint32_t field_mask() {
152
7
    return kZeroMask;
153
7
  }
154
};
155
156
// Wrap a FILE* for read and write
157
class CFile : public File {
158
 public:
159
7
  explicit CFile(FILE* f) : File(), f_(f) {
160
7
  }
161
  // Writer
162
  void write(BigStr* s) override;
163
  void flush() override;
164
165
  // Reader
166
  BigStr* readline() override;
167
168
  // Both
169
  bool isatty() override;
170
  void close() override;
171
172
7
  static constexpr ObjHeader obj_header() {
173
7
    return ObjHeader::ClassFixed(field_mask(), sizeof(CFile));
174
7
  }
175
176
7
  static constexpr uint32_t field_mask() {
177
    // not mutating field_mask because FILE* isn't a GC object
178
7
    return File::field_mask();
179
7
  }
180
181
 private:
182
  FILE* f_;
183
184
  DISALLOW_COPY_AND_ASSIGN(CFile)
185
};
186
187
// Abstract File we can only read from.
188
// TODO: can we get rid of DCHECK() and reinterpret_cast?
189
class LineReader : public File {
190
 public:
191
3
  LineReader() : File() {
192
3
  }
193
0
  void write(BigStr* s) override {
194
0
    CHECK(false);  // should not happen
195
0
  }
196
0
  void flush() override {
197
0
    CHECK(false);  // should not happen
198
0
  }
199
200
0
  static constexpr ObjHeader obj_header() {
201
0
    return ObjHeader::ClassFixed(field_mask(), sizeof(LineReader));
202
0
  }
203
204
3
  static constexpr uint32_t field_mask() {
205
3
    return kZeroMask;
206
3
  }
207
};
208
209
class BufLineReader : public LineReader {
210
 public:
211
3
  explicit BufLineReader(BigStr* s) : LineReader(), s_(s), pos_(0) {
212
3
  }
213
  virtual BigStr* readline();
214
1
  virtual bool isatty() {
215
1
    return false;
216
1
  }
217
1
  virtual void close() {
218
1
  }
219
220
  BigStr* s_;
221
  int pos_;
222
223
3
  static constexpr ObjHeader obj_header() {
224
3
    return ObjHeader::ClassFixed(field_mask(), sizeof(LineReader));
225
3
  }
226
227
3
  static constexpr uint32_t field_mask() {
228
3
    return LineReader::field_mask() | maskbit(offsetof(BufLineReader, s_));
229
3
  }
230
231
  DISALLOW_COPY_AND_ASSIGN(BufLineReader)
232
};
233
234
extern LineReader* gStdin;
235
236
1
inline LineReader* Stdin() {
237
1
  if (gStdin == nullptr) {
238
1
    gStdin = reinterpret_cast<LineReader*>(Alloc<CFile>(stdin));
239
1
  }
240
1
  return gStdin;
241
1
}
242
243
LineReader* open(BigStr* path);
244
245
// Abstract File we can only write to.
246
// TODO: can we get rid of DCHECK() and reinterpret_cast?
247
class Writer : public File {
248
 public:
249
4
  Writer() : File() {
250
4
  }
251
0
  BigStr* readline() override {
252
0
    CHECK(false);  // should not happen
253
0
  }
254
255
0
  static constexpr ObjHeader obj_header() {
256
0
    return ObjHeader::ClassFixed(field_mask(), sizeof(Writer));
257
0
  }
258
259
4
  static constexpr uint32_t field_mask() {
260
4
    return kZeroMask;
261
4
  }
262
};
263
264
class MutableStr;
265
266
class BufWriter : public Writer {
267
 public:
268
4
  BufWriter() : Writer(), str_(nullptr), len_(0) {
269
4
  }
270
  void write(BigStr* s) override;
271
  void write_spaces(int n);
272
1
  void clear() {  // Reuse this instance
273
1
    str_ = nullptr;
274
1
    len_ = 0;
275
1
    is_valid_ = true;
276
1
  }
277
0
  void close() override {
278
0
  }
279
1
  void flush() override {
280
1
  }
281
1
  bool isatty() override {
282
1
    return false;
283
1
  }
284
  BigStr* getvalue();  // part of cStringIO API
285
286
  //
287
  // Low Level API for C++ usage only
288
  //
289
290
  // Convenient API that avoids BigStr*
291
  void WriteConst(const char* c_string);
292
293
  // Potentially resizes the buffer.
294
  void EnsureMoreSpace(int n);
295
  // After EnsureMoreSpace(42), you can write 42 more bytes safely.
296
  //
297
  // Note that if you call EnsureMoreSpace(42), write 5 byte, and then
298
  // EnsureMoreSpace(42) again, the amount of additional space reserved is 47.
299
300
  // (Similar to vector::reserve(n), but it takes an integer to ADD to the
301
  // capacity.)
302
303
  uint8_t* LengthPointer();    // start + length
304
  uint8_t* CapacityPointer();  // start + capacity
305
  void SetLengthFrom(uint8_t* length_ptr);
306
307
0
  int Length() {
308
0
    return len_;
309
0
  }
310
311
  // Rewind to earlier position, future writes start there
312
  void Truncate(int length);
313
314
4
  static constexpr ObjHeader obj_header() {
315
4
    return ObjHeader::ClassFixed(field_mask(), sizeof(BufWriter));
316
4
  }
317
318
4
  static constexpr unsigned field_mask() {
319
    // maskvit_v() because BufWriter has virtual methods
320
4
    return Writer::field_mask() | maskbit(offsetof(BufWriter, str_));
321
4
  }
322
323
 private:
324
  void WriteRaw(char* s, int n);
325
326
  MutableStr* str_;  // getvalue() turns this directly into Str*, no copying
327
  int len_;          // how many bytes have been written so far
328
  bool is_valid_ = true;  // It becomes invalid after getvalue() is called
329
};
330
331
extern Writer* gStdout;
332
333
1
inline Writer* Stdout() {
334
1
  if (gStdout == nullptr) {
335
1
    gStdout = reinterpret_cast<Writer*>(Alloc<CFile>(stdout));
336
1
    gHeap.RootGlobalVar(gStdout);
337
1
  }
338
1
  return gStdout;
339
1
}
340
341
extern Writer* gStderr;
342
343
1
inline Writer* Stderr() {
344
1
  if (gStderr == nullptr) {
345
1
    gStderr = reinterpret_cast<Writer*>(Alloc<CFile>(stderr));
346
1
    gHeap.RootGlobalVar(gStderr);
347
1
  }
348
1
  return gStderr;
349
1
}
350
351
class UniqueObjects {
352
  // Can't be expressed in typed Python because we don't have uint64_t for
353
  // addresses
354
355
 public:
356
0
  UniqueObjects() {
357
0
  }
358
0
  void Add(void* obj) {
359
0
  }
360
0
  int Get(void* obj) {
361
0
    return -1;
362
0
  }
363
364
0
  static constexpr ObjHeader obj_header() {
365
0
    return ObjHeader::ClassFixed(field_mask(), sizeof(UniqueObjects));
366
0
  }
367
368
  // SPECIAL CASE? We should never have a unique reference to an object?  So
369
  // don't bother tracing
370
0
  static constexpr uint32_t field_mask() {
371
0
    return kZeroMask;
372
0
  }
373
374
 private:
375
  // address -> small integer ID
376
  Dict<void*, int> addresses_;
377
};
378
379
}  // namespace mylib
380
381
#endif  // MYCPP_GC_MYLIB_H