OILS / mycpp / gc_list.h View on Github | oilshell.org

524 lines, 288 significant
1#ifndef MYCPP_GC_LIST_H
2#define MYCPP_GC_LIST_H
3
4#include <string.h> // memcpy
5
6#include <algorithm> // sort() is templated
7
8#include "mycpp/common.h" // DCHECK
9#include "mycpp/comparators.h"
10#include "mycpp/gc_alloc.h" // Alloc
11#include "mycpp/gc_builtins.h" // ValueError
12#include "mycpp/gc_mops.h" // BigInt
13#include "mycpp/gc_slab.h"
14
15// GlobalList is layout-compatible with List (unit tests assert this), and it
16// can be a true C global (incurs zero startup time)
17
18template <typename T, int N>
19class GlobalList {
20 public:
21 int len_;
22 int capacity_;
23 GlobalSlab<T, N>* slab_;
24};
25
26#define GLOBAL_LIST(name, T, N, array) \
27 GcGlobal<GlobalSlab<T, N>> _slab_##name = {ObjHeader::Global(TypeTag::Slab), \
28 {.items_ = array}}; \
29 GcGlobal<GlobalList<T, N>> _list_##name = { \
30 ObjHeader::Global(TypeTag::List), \
31 {.len_ = N, .capacity_ = N, .slab_ = &_slab_##name.obj}}; \
32 List<T>* name = reinterpret_cast<List<T>*>(&_list_##name.obj);
33
34template <typename T>
35class List {
36 public:
37 List() : len_(0), capacity_(0), slab_(nullptr) {
38 }
39
40 // Implements L[i]
41 T at(int i);
42
43 // returns index of the element
44 int index(T element);
45
46 // Implements L[i] = item
47 void set(int i, T item);
48
49 // L[begin:]
50 List* slice(int begin);
51
52 // L[begin:end]
53 List* slice(int begin, int end);
54
55 // Should we have a separate API that doesn't return it?
56 // https://stackoverflow.com/questions/12600330/pop-back-return-value
57 T pop();
58
59 // Used in osh/word_parse.py to remove from front
60 T pop(int i);
61
62 // Remove the first occourence of x from the list.
63 void remove(T x);
64
65 void clear();
66
67 // Used in osh/string_ops.py
68 void reverse();
69
70 // Templated function
71 void sort();
72
73 // Ensure that there's space for at LEAST this many items
74 void reserve(int num_desired);
75
76 // Append a single element to this list.
77 void append(T item);
78
79 // Extend this list with multiple elements.
80 void extend(List<T>* other);
81
82 static constexpr ObjHeader obj_header() {
83 return ObjHeader::ClassFixed(field_mask(), sizeof(List<T>));
84 }
85
86 int len_; // number of entries
87 int capacity_; // max entries before resizing
88
89 // The container may be resized, so this field isn't in-line.
90 Slab<T>* slab_;
91
92 // A list has one Slab pointer which we need to follow.
93 static constexpr uint32_t field_mask() {
94 return maskbit(offsetof(List, slab_));
95 }
96
97 DISALLOW_COPY_AND_ASSIGN(List)
98
99 static_assert(sizeof(ObjHeader) % sizeof(T) == 0,
100 "ObjHeader size should be multiple of item size");
101 static constexpr int kHeaderFudge = sizeof(ObjHeader) / sizeof(T);
102
103#if 0
104 // 24-byte pool comes from very common List header, and Token
105 static constexpr int kPoolBytes1 = 24 - sizeof(ObjHeader);
106 static_assert(kPoolBytes1 % sizeof(T) == 0,
107 "An integral number of items should fit in first pool");
108 static constexpr int kNumItems1 = kPoolBytes1 / sizeof(T);
109#endif
110
111 // Matches mark_sweep_heap.h
112 static constexpr int kPoolBytes2 = 48 - sizeof(ObjHeader);
113 static_assert(kPoolBytes2 % sizeof(T) == 0,
114 "An integral number of items should fit in second pool");
115 static constexpr int kNumItems2 = kPoolBytes2 / sizeof(T);
116
117#if 0
118 static constexpr int kMinBytes2 = 128 - sizeof(ObjHeader);
119 static_assert(kMinBytes2 % sizeof(T) == 0,
120 "An integral number of items should fit");
121 static constexpr int kMinItems2 = kMinBytes2 / sizeof(T);
122#endif
123
124 // Given the number of items desired, return the number items we should
125 // reserve room for, according to our growth policy.
126 int HowManyItems(int num_desired) {
127 // Using the 24-byte pool leads to too much GC of tiny slab objects! So
128 // just use the larger 48 byte pool.
129#if 0
130 if (num_desired <= kNumItems1) { // use full cell in pool 1
131 return kNumItems1;
132 }
133#endif
134 if (num_desired <= kNumItems2) { // use full cell in pool 2
135 return kNumItems2;
136 }
137#if 0
138 if (num_desired <= kMinItems2) { // 48 -> 128, not 48 -> 64
139 return kMinItems2;
140 }
141#endif
142
143 // Make sure the total allocation is a power of 2. TODO: consider using
144 // slightly less than power of 2, to account for malloc() headers, and
145 // reduce fragmentation.
146 // Example:
147 // - ask for 11 integers
148 // - round up 11+2 == 13 up to 16 items
149 // - return 14 items
150 // - 14 integers is 56 bytes, plus 8 byte GC header => 64 byte alloc.
151 return RoundUp(num_desired + kHeaderFudge) - kHeaderFudge;
152 }
153};
154
155// "Constructors" as free functions since we can't allocate within a
156// constructor. Allocation may cause garbage collection, which interferes with
157// placement new.
158
159// This is not really necessary, only syntactic sugar.
160template <typename T>
161List<T>* NewList() {
162 return Alloc<List<T>>();
163}
164
165// Literal ['foo', 'bar']
166// This seems to allow better template argument type deduction than a
167// constructor.
168template <typename T>
169List<T>* NewList(std::initializer_list<T> init) {
170 auto self = Alloc<List<T>>();
171
172 int n = init.size();
173 self->reserve(n);
174
175 int i = 0;
176 for (auto item : init) {
177 self->set(i, item);
178 ++i;
179 }
180 self->len_ = n;
181 return self;
182}
183
184// ['foo'] * 3
185template <typename T>
186List<T>* NewList(T item, int times) {
187 auto self = Alloc<List<T>>();
188
189 self->reserve(times);
190 self->len_ = times;
191 for (int i = 0; i < times; ++i) {
192 self->set(i, item);
193 }
194 return self;
195}
196
197template <typename T>
198void List<T>::append(T item) {
199 reserve(len_ + 1);
200 slab_->items_[len_] = item;
201 ++len_;
202}
203
204template <typename T>
205int len(const List<T>* L) {
206 return L->len_;
207}
208
209template <typename T>
210List<T>* list_repeat(T item, int times);
211
212template <typename T>
213inline bool list_contains(List<T>* haystack, T needle);
214
215template <typename K, typename V>
216class Dict; // forward decl
217
218template <typename V>
219List<BigStr*>* sorted(Dict<BigStr*, V>* d);
220
221template <typename T>
222List<T>* sorted(List<T>* l);
223
224// L[begin:]
225template <typename T>
226List<T>* List<T>::slice(int begin) {
227 return slice(begin, len_);
228}
229
230// L[begin:end]
231template <typename T>
232List<T>* List<T>::slice(int begin, int end) {
233 SLICE_ADJUST(begin, end, len_);
234
235 DCHECK(0 <= begin && begin <= len_);
236 DCHECK(0 <= end && end <= len_);
237
238 int new_len = end - begin;
239 DCHECK(0 <= new_len && new_len <= len_);
240
241 List<T>* result = NewList<T>();
242 result->reserve(new_len);
243
244 // Faster than append() in a loop
245 memcpy(result->slab_->items_, slab_->items_ + begin, new_len * sizeof(T));
246 result->len_ = new_len;
247
248 return result;
249}
250
251// Ensure that there's space for a number of items
252template <typename T>
253void List<T>::reserve(int num_desired) {
254 // log("reserve capacity = %d, n = %d", capacity_, n);
255
256 // Don't do anything if there's already enough space.
257 if (capacity_ >= num_desired) {
258 return;
259 }
260
261 // Slabs should be a total of 2^N bytes. kCapacityAdjust is the number of
262 // items that the 8 byte header takes up: 1 for List<T*>, and 2 for
263 // List<int>.
264 //
265 // Example: the user reserves space for 3 integers. The minimum number of
266 // items would be 5, which is rounded up to 8. Subtract 2 again, giving 6,
267 // which leads to 8 + 6*4 = 32 byte Slab.
268
269 capacity_ = HowManyItems(num_desired);
270 auto new_slab = NewSlab<T>(capacity_);
271
272 if (len_ > 0) {
273 // log("Copying %d bytes", len_ * sizeof(T));
274 memcpy(new_slab->items_, slab_->items_, len_ * sizeof(T));
275 }
276 slab_ = new_slab;
277}
278
279// Implements L[i] = item
280template <typename T>
281void List<T>::set(int i, T item) {
282 if (i < 0) {
283 i = len_ + i;
284 }
285
286 DCHECK(i >= 0);
287 DCHECK(i < capacity_);
288
289 slab_->items_[i] = item;
290}
291
292// Implements L[i]
293template <typename T>
294T List<T>::at(int i) {
295 if (i < 0) {
296 int j = len_ + i;
297 if (j >= len_ || j < 0) {
298 throw Alloc<IndexError>();
299 }
300 return slab_->items_[j];
301 }
302
303 if (i >= len_ || i < 0) {
304 throw Alloc<IndexError>();
305 }
306 return slab_->items_[i];
307}
308
309// L.index(i) -- Python method
310template <typename T>
311int List<T>::index(T value) {
312 int element_count = len(this);
313 for (int i = 0; i < element_count; i++) {
314 if (items_equal(slab_->items_[i], value)) {
315 return i;
316 }
317 }
318 throw Alloc<ValueError>();
319}
320
321// Should we have a separate API that doesn't return it?
322// https://stackoverflow.com/questions/12600330/pop-back-return-value
323template <typename T>
324T List<T>::pop() {
325 if (len_ == 0) {
326 throw Alloc<IndexError>();
327 }
328 len_--;
329 T result = slab_->items_[len_];
330 slab_->items_[len_] = 0; // zero for GC scan
331 return result;
332}
333
334// Used in osh/word_parse.py to remove from front
335template <typename T>
336T List<T>::pop(int i) {
337 if (len_ < i) {
338 throw Alloc<IndexError>();
339 }
340
341 T result = at(i);
342 len_--;
343
344 // Shift everything by one
345 memmove(slab_->items_ + i, slab_->items_ + (i + 1), len_ * sizeof(T));
346
347 /*
348 for (int j = 0; j < len_; j++) {
349 slab_->items_[j] = slab_->items_[j+1];
350 }
351 */
352
353 slab_->items_[len_] = 0; // zero for GC scan
354 return result;
355}
356
357template <typename T>
358void List<T>::remove(T x) {
359 int idx = this->index(x);
360 this->pop(idx); // unused
361}
362
363template <typename T>
364void List<T>::clear() {
365 if (slab_) {
366 memset(slab_->items_, 0, len_ * sizeof(T)); // zero for GC scan
367 }
368 len_ = 0;
369}
370
371// Used in osh/string_ops.py
372template <typename T>
373void List<T>::reverse() {
374 for (int i = 0; i < len_ / 2; ++i) {
375 // log("swapping %d and %d", i, n-i);
376 T tmp = slab_->items_[i];
377 int j = len_ - 1 - i;
378 slab_->items_[i] = slab_->items_[j];
379 slab_->items_[j] = tmp;
380 }
381}
382
383// Extend this list with multiple elements.
384template <typename T>
385void List<T>::extend(List<T>* other) {
386 int n = other->len_;
387 int new_len = len_ + n;
388 reserve(new_len);
389
390 for (int i = 0; i < n; ++i) {
391 set(len_ + i, other->slab_->items_[i]);
392 }
393 len_ = new_len;
394}
395
396inline bool CompareBigStr(BigStr* a, BigStr* b) {
397 return mylib::str_cmp(a, b) < 0;
398}
399
400template <>
401inline void List<BigStr*>::sort() {
402 std::sort(slab_->items_, slab_->items_ + len_, CompareBigStr);
403}
404
405inline bool CompareBigInt(mops::BigInt a, mops::BigInt b) {
406 return a < b;
407}
408
409template <>
410inline void List<mops::BigInt>::sort() {
411 std::sort(slab_->items_, slab_->items_ + len_, CompareBigInt);
412}
413
414// TODO: mycpp can just generate the constructor instead?
415// e.g. [None] * 3
416template <typename T>
417List<T>* list_repeat(T item, int times) {
418 return NewList<T>(item, times);
419}
420
421// e.g. 'a' in ['a', 'b', 'c']
422template <typename T>
423inline bool list_contains(List<T>* haystack, T needle) {
424 int n = len(haystack);
425 for (int i = 0; i < n; ++i) {
426 if (items_equal(haystack->at(i), needle)) {
427 return true;
428 }
429 }
430 return false;
431}
432
433template <typename V>
434List<BigStr*>* sorted(Dict<BigStr*, V>* d) {
435 auto keys = d->keys();
436 keys->sort();
437 return keys;
438}
439
440template <typename T>
441List<T>* sorted(List<T>* l) {
442 auto ret = list(l);
443 ret->sort();
444 return ret;
445}
446
447// list(L) copies the list
448template <typename T>
449List<T>* list(List<T>* other) {
450 auto result = NewList<T>();
451 result->extend(other);
452 return result;
453}
454
455template <class T>
456class ListIter {
457 public:
458 explicit ListIter(List<T>* L) : L_(L), i_(0) {
459 // Cheney only: L_ could be moved during iteration.
460 // gHeap.PushRoot(reinterpret_cast<RawObject**>(&L_));
461 }
462
463 ~ListIter() {
464 // gHeap.PopRoot();
465 }
466 void Next() {
467 i_++;
468 }
469 bool Done() {
470 // "unsigned size_t was a mistake"
471 return i_ >= static_cast<int>(L_->len_);
472 }
473 T Value() {
474 return L_->slab_->items_[i_];
475 }
476 T iterNext() {
477 if (Done()) {
478 throw Alloc<StopIteration>();
479 }
480 T ret = L_->slab_->items_[i_];
481 Next();
482 return ret;
483 }
484
485 // only for use with generators
486 List<T>* GetList() {
487 return L_;
488 }
489
490 private:
491 List<T>* L_;
492 int i_;
493};
494
495// list(it) returns the iterator's backing list
496template <typename T>
497List<T>* list(ListIter<T> it) {
498 return list(it.GetList());
499}
500
501// TODO: Does using pointers rather than indices make this more efficient?
502template <class T>
503class ReverseListIter {
504 public:
505 explicit ReverseListIter(List<T>* L) : L_(L), i_(L_->len_ - 1) {
506 }
507 void Next() {
508 i_--;
509 }
510 bool Done() {
511 return i_ < 0;
512 }
513 T Value() {
514 return L_->slab_->items_[i_];
515 }
516
517 private:
518 List<T>* L_;
519 int i_;
520};
521
522int max(List<int>* elems);
523
524#endif // MYCPP_GC_LIST_H