OILS / mycpp / gc_list.h View on Github | oilshell.org

514 lines, 282 significant
1#ifndef MYCPP_GC_LIST_H
2#define MYCPP_GC_LIST_H
3
4#include <string.h> // memcpy
5
6#include <algorithm> // sort() is templated
7
8#include "mycpp/common.h" // DCHECK
9#include "mycpp/comparators.h"
10#include "mycpp/gc_alloc.h" // Alloc
11#include "mycpp/gc_builtins.h" // ValueError
12#include "mycpp/gc_slab.h"
13
14// GlobalList is layout-compatible with List (unit tests assert this), and it
15// can be a true C global (incurs zero startup time)
16
17template <typename T, int N>
18class GlobalList {
19 public:
20 int len_;
21 int capacity_;
22 GlobalSlab<T, N>* slab_;
23};
24
25#define GLOBAL_LIST(name, T, N, array) \
26 GcGlobal<GlobalSlab<T, N>> _slab_##name = {ObjHeader::Global(TypeTag::Slab), \
27 {.items_ = array}}; \
28 GcGlobal<GlobalList<T, N>> _list_##name = { \
29 ObjHeader::Global(TypeTag::List), \
30 {.len_ = N, .capacity_ = N, .slab_ = &_slab_##name.obj}}; \
31 List<T>* name = reinterpret_cast<List<T>*>(&_list_##name.obj);
32
33template <typename T>
34class List {
35 public:
36 List() : len_(0), capacity_(0), slab_(nullptr) {
37 }
38
39 // Implements L[i]
40 T at(int i);
41
42 // returns index of the element
43 int index(T element);
44
45 // Implements L[i] = item
46 void set(int i, T item);
47
48 // L[begin:]
49 List* slice(int begin);
50
51 // L[begin:end]
52 List* slice(int begin, int end);
53
54 // Should we have a separate API that doesn't return it?
55 // https://stackoverflow.com/questions/12600330/pop-back-return-value
56 T pop();
57
58 // Used in osh/word_parse.py to remove from front
59 T pop(int i);
60
61 // Remove the first occourence of x from the list.
62 void remove(T x);
63
64 void clear();
65
66 // Used in osh/string_ops.py
67 void reverse();
68
69 // Templated function
70 void sort();
71
72 // Ensure that there's space for at LEAST this many items
73 void reserve(int num_desired);
74
75 // Append a single element to this list.
76 void append(T item);
77
78 // Extend this list with multiple elements.
79 void extend(List<T>* other);
80
81 static constexpr ObjHeader obj_header() {
82 return ObjHeader::ClassFixed(field_mask(), sizeof(List<T>));
83 }
84
85 int len_; // number of entries
86 int capacity_; // max entries before resizing
87
88 // The container may be resized, so this field isn't in-line.
89 Slab<T>* slab_;
90
91 // A list has one Slab pointer which we need to follow.
92 static constexpr uint32_t field_mask() {
93 return maskbit(offsetof(List, slab_));
94 }
95
96 DISALLOW_COPY_AND_ASSIGN(List)
97
98 static_assert(sizeof(ObjHeader) % sizeof(T) == 0,
99 "ObjHeader size should be multiple of item size");
100 static constexpr int kHeaderFudge = sizeof(ObjHeader) / sizeof(T);
101
102#if 0
103 // 24-byte pool comes from very common List header, and Token
104 static constexpr int kPoolBytes1 = 24 - sizeof(ObjHeader);
105 static_assert(kPoolBytes1 % sizeof(T) == 0,
106 "An integral number of items should fit in first pool");
107 static constexpr int kNumItems1 = kPoolBytes1 / sizeof(T);
108#endif
109
110 // Matches mark_sweep_heap.h
111 static constexpr int kPoolBytes2 = 48 - sizeof(ObjHeader);
112 static_assert(kPoolBytes2 % sizeof(T) == 0,
113 "An integral number of items should fit in second pool");
114 static constexpr int kNumItems2 = kPoolBytes2 / sizeof(T);
115
116#if 0
117 static constexpr int kMinBytes2 = 128 - sizeof(ObjHeader);
118 static_assert(kMinBytes2 % sizeof(T) == 0,
119 "An integral number of items should fit");
120 static constexpr int kMinItems2 = kMinBytes2 / sizeof(T);
121#endif
122
123 // Given the number of items desired, return the number items we should
124 // reserve room for, according to our growth policy.
125 int HowManyItems(int num_desired) {
126 // Using the 24-byte pool leads to too much GC of tiny slab objects! So
127 // just use the larger 48 byte pool.
128#if 0
129 if (num_desired <= kNumItems1) { // use full cell in pool 1
130 return kNumItems1;
131 }
132#endif
133 if (num_desired <= kNumItems2) { // use full cell in pool 2
134 return kNumItems2;
135 }
136#if 0
137 if (num_desired <= kMinItems2) { // 48 -> 128, not 48 -> 64
138 return kMinItems2;
139 }
140#endif
141
142 // Make sure the total allocation is a power of 2. TODO: consider using
143 // slightly less than power of 2, to account for malloc() headers, and
144 // reduce fragmentation.
145 // Example:
146 // - ask for 11 integers
147 // - round up 11+2 == 13 up to 16 items
148 // - return 14 items
149 // - 14 integers is 56 bytes, plus 8 byte GC header => 64 byte alloc.
150 return RoundUp(num_desired + kHeaderFudge) - kHeaderFudge;
151 }
152};
153
154// "Constructors" as free functions since we can't allocate within a
155// constructor. Allocation may cause garbage collection, which interferes with
156// placement new.
157
158// This is not really necessary, only syntactic sugar.
159template <typename T>
160List<T>* NewList() {
161 return Alloc<List<T>>();
162}
163
164// Literal ['foo', 'bar']
165// This seems to allow better template argument type deduction than a
166// constructor.
167template <typename T>
168List<T>* NewList(std::initializer_list<T> init) {
169 auto self = Alloc<List<T>>();
170
171 int n = init.size();
172 self->reserve(n);
173
174 int i = 0;
175 for (auto item : init) {
176 self->set(i, item);
177 ++i;
178 }
179 self->len_ = n;
180 return self;
181}
182
183// ['foo'] * 3
184template <typename T>
185List<T>* NewList(T item, int times) {
186 auto self = Alloc<List<T>>();
187
188 self->reserve(times);
189 self->len_ = times;
190 for (int i = 0; i < times; ++i) {
191 self->set(i, item);
192 }
193 return self;
194}
195
196template <typename T>
197void List<T>::append(T item) {
198 reserve(len_ + 1);
199 slab_->items_[len_] = item;
200 ++len_;
201}
202
203template <typename T>
204int len(const List<T>* L) {
205 return L->len_;
206}
207
208template <typename T>
209List<T>* list_repeat(T item, int times);
210
211template <typename T>
212inline bool list_contains(List<T>* haystack, T needle);
213
214template <typename K, typename V>
215class Dict; // forward decl
216
217template <typename V>
218List<BigStr*>* sorted(Dict<BigStr*, V>* d);
219
220template <typename T>
221List<T>* sorted(List<T>* l);
222
223// L[begin:]
224template <typename T>
225List<T>* List<T>::slice(int begin) {
226 return slice(begin, len_);
227}
228
229// L[begin:end]
230template <typename T>
231List<T>* List<T>::slice(int begin, int end) {
232 SLICE_ADJUST(begin, end, len_);
233
234 DCHECK(0 <= begin && begin <= len_);
235 DCHECK(0 <= end && end <= len_);
236
237 int new_len = end - begin;
238 DCHECK(0 <= new_len && new_len <= len_);
239
240 List<T>* result = NewList<T>();
241 result->reserve(new_len);
242
243 // Faster than append() in a loop
244 memcpy(result->slab_->items_, slab_->items_ + begin, new_len * sizeof(T));
245 result->len_ = new_len;
246
247 return result;
248}
249
250// Ensure that there's space for a number of items
251template <typename T>
252void List<T>::reserve(int num_desired) {
253 // log("reserve capacity = %d, n = %d", capacity_, n);
254
255 // Don't do anything if there's already enough space.
256 if (capacity_ >= num_desired) {
257 return;
258 }
259
260 // Slabs should be a total of 2^N bytes. kCapacityAdjust is the number of
261 // items that the 8 byte header takes up: 1 for List<T*>, and 2 for
262 // List<int>.
263 //
264 // Example: the user reserves space for 3 integers. The minimum number of
265 // items would be 5, which is rounded up to 8. Subtract 2 again, giving 6,
266 // which leads to 8 + 6*4 = 32 byte Slab.
267
268 capacity_ = HowManyItems(num_desired);
269 auto new_slab = NewSlab<T>(capacity_);
270
271 if (len_ > 0) {
272 // log("Copying %d bytes", len_ * sizeof(T));
273 memcpy(new_slab->items_, slab_->items_, len_ * sizeof(T));
274 }
275 slab_ = new_slab;
276}
277
278// Implements L[i] = item
279template <typename T>
280void List<T>::set(int i, T item) {
281 if (i < 0) {
282 i = len_ + i;
283 }
284
285 DCHECK(i >= 0);
286 DCHECK(i < capacity_);
287
288 slab_->items_[i] = item;
289}
290
291// Implements L[i]
292template <typename T>
293T List<T>::at(int i) {
294 if (i < 0) {
295 int j = len_ + i;
296 if (j >= len_ || j < 0) {
297 throw Alloc<IndexError>();
298 }
299 return slab_->items_[j];
300 }
301
302 if (i >= len_ || i < 0) {
303 throw Alloc<IndexError>();
304 }
305 return slab_->items_[i];
306}
307
308// L.index(i) -- Python method
309template <typename T>
310int List<T>::index(T value) {
311 int element_count = len(this);
312 for (int i = 0; i < element_count; i++) {
313 if (are_equal(slab_->items_[i], value)) {
314 return i;
315 }
316 }
317 throw Alloc<ValueError>();
318}
319
320// Should we have a separate API that doesn't return it?
321// https://stackoverflow.com/questions/12600330/pop-back-return-value
322template <typename T>
323T List<T>::pop() {
324 if (len_ == 0) {
325 throw Alloc<IndexError>();
326 }
327 len_--;
328 T result = slab_->items_[len_];
329 slab_->items_[len_] = 0; // zero for GC scan
330 return result;
331}
332
333// Used in osh/word_parse.py to remove from front
334template <typename T>
335T List<T>::pop(int i) {
336 if (len_ < i) {
337 throw Alloc<IndexError>();
338 }
339
340 T result = at(i);
341 len_--;
342
343 // Shift everything by one
344 memmove(slab_->items_ + i, slab_->items_ + (i + 1), len_ * sizeof(T));
345
346 /*
347 for (int j = 0; j < len_; j++) {
348 slab_->items_[j] = slab_->items_[j+1];
349 }
350 */
351
352 slab_->items_[len_] = 0; // zero for GC scan
353 return result;
354}
355
356template <typename T>
357void List<T>::remove(T x) {
358 int idx = this->index(x);
359 this->pop(idx); // unused
360}
361
362template <typename T>
363void List<T>::clear() {
364 if (slab_) {
365 memset(slab_->items_, 0, len_ * sizeof(T)); // zero for GC scan
366 }
367 len_ = 0;
368}
369
370// Used in osh/string_ops.py
371template <typename T>
372void List<T>::reverse() {
373 for (int i = 0; i < len_ / 2; ++i) {
374 // log("swapping %d and %d", i, n-i);
375 T tmp = slab_->items_[i];
376 int j = len_ - 1 - i;
377 slab_->items_[i] = slab_->items_[j];
378 slab_->items_[j] = tmp;
379 }
380}
381
382// Extend this list with multiple elements.
383template <typename T>
384void List<T>::extend(List<T>* other) {
385 int n = other->len_;
386 int new_len = len_ + n;
387 reserve(new_len);
388
389 for (int i = 0; i < n; ++i) {
390 set(len_ + i, other->slab_->items_[i]);
391 }
392 len_ = new_len;
393}
394
395inline bool _cmp(BigStr* a, BigStr* b) {
396 return mylib::str_cmp(a, b) < 0;
397}
398
399template <typename T>
400void List<T>::sort() {
401 std::sort(slab_->items_, slab_->items_ + len_, _cmp);
402}
403
404// TODO: mycpp can just generate the constructor instead?
405// e.g. [None] * 3
406template <typename T>
407List<T>* list_repeat(T item, int times) {
408 return NewList<T>(item, times);
409}
410
411// e.g. 'a' in ['a', 'b', 'c']
412template <typename T>
413inline bool list_contains(List<T>* haystack, T needle) {
414 int n = len(haystack);
415 for (int i = 0; i < n; ++i) {
416 if (are_equal(haystack->at(i), needle)) {
417 return true;
418 }
419 }
420 return false;
421}
422
423template <typename V>
424List<BigStr*>* sorted(Dict<BigStr*, V>* d) {
425 auto keys = d->keys();
426 keys->sort();
427 return keys;
428}
429
430template <typename T>
431List<T>* sorted(List<T>* l) {
432 auto ret = list(l);
433 ret->sort();
434 return ret;
435}
436
437// list(L) copies the list
438template <typename T>
439List<T>* list(List<T>* other) {
440 auto result = NewList<T>();
441 result->extend(other);
442 return result;
443}
444
445template <class T>
446class ListIter {
447 public:
448 explicit ListIter(List<T>* L) : L_(L), i_(0) {
449 // Cheney only: L_ could be moved during iteration.
450 // gHeap.PushRoot(reinterpret_cast<RawObject**>(&L_));
451 }
452
453 ~ListIter() {
454 // gHeap.PopRoot();
455 }
456 void Next() {
457 i_++;
458 }
459 bool Done() {
460 // "unsigned size_t was a mistake"
461 return i_ >= static_cast<int>(L_->len_);
462 }
463 T Value() {
464 return L_->slab_->items_[i_];
465 }
466 T iterNext() {
467 if (Done()) {
468 throw Alloc<StopIteration>();
469 }
470 T ret = L_->slab_->items_[i_];
471 Next();
472 return ret;
473 }
474
475 // only for use with generators
476 List<T>* GetList() {
477 return L_;
478 }
479
480 private:
481 List<T>* L_;
482 int i_;
483};
484
485// list(it) returns the iterator's backing list
486template <typename T>
487List<T>* list(ListIter<T> it) {
488 return list(it.GetList());
489}
490
491// TODO: Does using pointers rather than indices make this more efficient?
492template <class T>
493class ReverseListIter {
494 public:
495 explicit ReverseListIter(List<T>* L) : L_(L), i_(L_->len_ - 1) {
496 }
497 void Next() {
498 i_--;
499 }
500 bool Done() {
501 return i_ < 0;
502 }
503 T Value() {
504 return L_->slab_->items_[i_];
505 }
506
507 private:
508 List<T>* L_;
509 int i_;
510};
511
512int max(List<int>* elems);
513
514#endif // MYCPP_GC_LIST_H