examples

Coverage Report

Created: 2024-07-16 02:49

/home/uke/oil/mycpp/gc_dict.h
Line
Count
Source (jump to first uncovered line)
1
// Hash table based on CPython's "compact dict":
2
//
3
//   https://mail.python.org/pipermail/python-dev/2012-December/123028.html
4
//   https://code.activestate.com/recipes/578375/
5
//
6
// Main differences:
7
// - It's type-specialized in C++ -- Dict<K, V>.
8
// - It's integrated with our mark and sweep GC, using Slab<int>, Slab<K>, and
9
//   Slab<V>
10
// - We use linear probing, not the pseudo-random number generator
11
12
#ifndef MYCPP_GC_DICT_H
13
#define MYCPP_GC_DICT_H
14
15
#include "mycpp/comparators.h"
16
#include "mycpp/gc_builtins.h"
17
#include "mycpp/gc_list.h"
18
#include "mycpp/hash.h"
19
20
// Non-negative entries in index_ are array indices into keys_ and values_.
21
// There are two special negative entries:
22
23
// index_ value to say this Dict item was deleted (a tombstone).
24
const int kDeletedEntry = -1;
25
26
// index_ value to say this Dict entry is free.
27
const int kEmptyEntry = -2;
28
29
// Return value for find_kv_index(), not stored in index_.
30
const int kNotFound = -3;
31
32
// Return value for hash_and_probe(), not stored in index_.
33
const int kTooSmall = -4;
34
35
// Helper for keys() and values()
36
template <typename T>
37
List<T>* ListFromDictSlab(Slab<T>* slab, int n) {
38
  List<T>* result = Alloc<List<T>>();
39
  result->reserve(n);
40
41
  for (int i = 0; i < n; ++i) {
42
    result->append(slab->items_[i]);
43
  }
44
  return result;
45
}
46
47
// GlobalDict is layout-compatible with Dict (unit tests assert this), and it
48
// can be a true C global (incurs zero startup time)
49
50
template <typename K, typename V, int N>
51
class GlobalDict {
52
 public:
53
  int len_;
54
  int capacity_;
55
  int index_len_;
56
  GlobalSlab<int, N>* index_;
57
  GlobalSlab<K, N>* keys_;
58
  GlobalSlab<V, N>* values_;
59
};
60
61
#define GLOBAL_DICT(name, K, V, N, keys, vals)                                 \
62
  GcGlobal<GlobalSlab<K, N>> _keys_##name = {ObjHeader::Global(TypeTag::Slab), \
63
                                             {.items_ = keys}};                \
64
  GcGlobal<GlobalSlab<V, N>> _vals_##name = {ObjHeader::Global(TypeTag::Slab), \
65
                                             {.items_ = vals}};                \
66
  GcGlobal<GlobalDict<K, V, N>> _dict_##name = {                               \
67
      ObjHeader::Global(TypeTag::Dict),                                        \
68
      {.len_ = N,                                                              \
69
       .capacity_ = N,                                                         \
70
       .index_len_ = 0,                                                        \
71
       .index_ = nullptr,                                                      \
72
       .keys_ = &_keys_##name.obj,                                             \
73
       .values_ = &_vals_##name.obj},                                          \
74
  };                                                                           \
75
  Dict<K, V>* name = reinterpret_cast<Dict<K, V>*>(&_dict_##name.obj);
76
77
template <class K, class V>
78
class Dict {
79
 public:
80
  Dict()
81
      : len_(0),
82
        capacity_(0),
83
        index_len_(0),
84
        index_(nullptr),
85
        keys_(nullptr),
86
14
        values_(nullptr) {
87
14
  }
_ZN4DictIP6BigStriEC2Ev
Line
Count
Source
86
3
        values_(nullptr) {
87
3
  }
_ZN4DictIibEC2Ev
Line
Count
Source
86
9
        values_(nullptr) {
87
9
  }
Unexecuted instantiation: _ZN4DictIiiEC2Ev
_ZN4DictIP6BigStrS1_EC2Ev
Line
Count
Source
86
2
        values_(nullptr) {
87
2
  }
88
89
  Dict(std::initializer_list<K> keys, std::initializer_list<V> values)
90
      : len_(0),
91
        capacity_(0),
92
        index_len_(0),
93
        index_(nullptr),
94
        keys_(nullptr),
95
1
        values_(nullptr) {
96
1
    DCHECK(keys.size() == values.size());
97
0
    auto v = values.begin();  // This simulates a "zip" loop
98
2
    for (auto key : keys) {
99
      // note: calls reserve(), and may allocate
100
2
      this->set(key, *v);
101
2
      ++v;
102
2
    }
103
1
  }
104
105
  // Reserve enough space for at LEAST this many key-value pairs.
106
  void reserve(int num_desired);
107
108
  // d[key] in Python: raises KeyError if not found
109
  V at(K key) const;
110
111
  // d.get(key) in Python. (Can't use this if V isn't a pointer!)
112
  V get(K key) const;
113
114
  // Get a key, but return a default if not found.
115
  // expr_parse.py uses this with OTHER_BALANCE
116
  V get(K key, V default_val) const;
117
118
  // Implements d[k] = v.  May resize the dictionary.
119
  void set(K key, V val);
120
121
  void update(List<Tuple2<K, V>*>* pairs);
122
  void update(Dict<K, V>* other);
123
124
  List<K>* keys() const;
125
126
  List<V>* values() const;
127
128
  void clear();
129
130
  // Helper used by find_kv_index(), set(), mylib::dict_erase() in
131
  // gc_mylib.h
132
  // Returns either:
133
  // - the slot for an existing key, or an empty slot for a new key
134
  // - kTooSmall if the table is full
135
  int hash_and_probe(K key) const;
136
137
  // Helper used by at(), get(), dict_contains()
138
  // Given a key, returns either:
139
  // - an index into keys_ and values_
140
  // - kNotFound
141
  int find_kv_index(K key) const;
142
143
15
  static constexpr ObjHeader obj_header() {
144
15
    return ObjHeader::ClassFixed(field_mask(), sizeof(Dict));
145
15
  }
_ZN4DictIP6BigStriE10obj_headerEv
Line
Count
Source
143
4
  static constexpr ObjHeader obj_header() {
144
4
    return ObjHeader::ClassFixed(field_mask(), sizeof(Dict));
145
4
  }
_ZN4DictIibE10obj_headerEv
Line
Count
Source
143
9
  static constexpr ObjHeader obj_header() {
144
9
    return ObjHeader::ClassFixed(field_mask(), sizeof(Dict));
145
9
  }
Unexecuted instantiation: _ZN4DictIiiE10obj_headerEv
_ZN4DictIP6BigStrS1_E10obj_headerEv
Line
Count
Source
143
2
  static constexpr ObjHeader obj_header() {
144
2
    return ObjHeader::ClassFixed(field_mask(), sizeof(Dict));
145
2
  }
146
147
  int len_;        // number of entries (keys and values, almost dense)
148
  int capacity_;   // number of k/v slots
149
  int index_len_;  // number of index slots
150
151
  // These 3 slabs are resized at the same time.
152
  Slab<int>* index_;  // kEmptyEntry, kDeletedEntry, or a valid index into
153
                      // keys_ and values_
154
  Slab<K>* keys_;     // Dict<K, int>
155
  Slab<V>* values_;   // Dict<int, V>
156
157
  // A dict has 3 pointers the GC needs to follow.
158
15
  static constexpr uint32_t field_mask() {
159
15
    return maskbit(offsetof(Dict, index_)) | maskbit(offsetof(Dict, keys_)) |
160
15
           maskbit(offsetof(Dict, values_));
161
15
  }
_ZN4DictIP6BigStriE10field_maskEv
Line
Count
Source
158
4
  static constexpr uint32_t field_mask() {
159
4
    return maskbit(offsetof(Dict, index_)) | maskbit(offsetof(Dict, keys_)) |
160
4
           maskbit(offsetof(Dict, values_));
161
4
  }
_ZN4DictIibE10field_maskEv
Line
Count
Source
158
9
  static constexpr uint32_t field_mask() {
159
9
    return maskbit(offsetof(Dict, index_)) | maskbit(offsetof(Dict, keys_)) |
160
9
           maskbit(offsetof(Dict, values_));
161
9
  }
Unexecuted instantiation: _ZN4DictIiiE10field_maskEv
_ZN4DictIP6BigStrS1_E10field_maskEv
Line
Count
Source
158
2
  static constexpr uint32_t field_mask() {
159
2
    return maskbit(offsetof(Dict, index_)) | maskbit(offsetof(Dict, keys_)) |
160
2
           maskbit(offsetof(Dict, values_));
161
2
  }
162
163
  DISALLOW_COPY_AND_ASSIGN(Dict);
164
165
  // kItemSize is max of K and V size.  That is, on 64-bit machines, the RARE
166
  // Dict<int, int> is smaller than other dicts
167
  static constexpr int kItemSize = sizeof(K) > sizeof(V) ? sizeof(K)
168
                                                         : sizeof(V);
169
170
  // Matches mark_sweep_heap.h
171
  static constexpr int kPoolBytes2 = 48 - sizeof(ObjHeader);
172
  static_assert(kPoolBytes2 % kItemSize == 0,
173
                "An integral number of items should fit in second pool");
174
  static constexpr int kNumItems2 = kPoolBytes2 / kItemSize;
175
176
  static const int kHeaderFudge = sizeof(ObjHeader) / kItemSize;
177
  static_assert(sizeof(ObjHeader) % kItemSize == 0,
178
                "Slab header size should be multiple of key size");
179
180
#if 0
181
  static constexpr int kMinBytes2 = 128 - sizeof(ObjHeader);
182
  static_assert(kMinBytes2 % kItemSize == 0,
183
                "An integral number of items should fit");
184
  static constexpr int kMinItems2 = kMinBytes2 / kItemSize;
185
#endif
186
187
14
  int HowManyPairs(int num_desired) {
188
    // See gc_list.h for comments on nearly identical logic
189
190
14
    if (num_desired <= kNumItems2) {  // use full cell in pool 2
191
14
      return kNumItems2;
192
14
    }
193
#if 0
194
    if (num_desired <= kMinItems2) {  // 48 -> 128, not 48 -> 64
195
      return kMinItems2;
196
    }
197
#endif
198
0
    return RoundUp(num_desired + kHeaderFudge) - kHeaderFudge;
199
14
  }
_ZN4DictIP6BigStriE12HowManyPairsEi
Line
Count
Source
187
4
  int HowManyPairs(int num_desired) {
188
    // See gc_list.h for comments on nearly identical logic
189
190
4
    if (num_desired <= kNumItems2) {  // use full cell in pool 2
191
4
      return kNumItems2;
192
4
    }
193
#if 0
194
    if (num_desired <= kMinItems2) {  // 48 -> 128, not 48 -> 64
195
      return kMinItems2;
196
    }
197
#endif
198
0
    return RoundUp(num_desired + kHeaderFudge) - kHeaderFudge;
199
4
  }
_ZN4DictIibE12HowManyPairsEi
Line
Count
Source
187
9
  int HowManyPairs(int num_desired) {
188
    // See gc_list.h for comments on nearly identical logic
189
190
9
    if (num_desired <= kNumItems2) {  // use full cell in pool 2
191
9
      return kNumItems2;
192
9
    }
193
#if 0
194
    if (num_desired <= kMinItems2) {  // 48 -> 128, not 48 -> 64
195
      return kMinItems2;
196
    }
197
#endif
198
0
    return RoundUp(num_desired + kHeaderFudge) - kHeaderFudge;
199
9
  }
_ZN4DictIP6BigStrS1_E12HowManyPairsEi
Line
Count
Source
187
1
  int HowManyPairs(int num_desired) {
188
    // See gc_list.h for comments on nearly identical logic
189
190
1
    if (num_desired <= kNumItems2) {  // use full cell in pool 2
191
1
      return kNumItems2;
192
1
    }
193
#if 0
194
    if (num_desired <= kMinItems2) {  // 48 -> 128, not 48 -> 64
195
      return kMinItems2;
196
    }
197
#endif
198
0
    return RoundUp(num_desired + kHeaderFudge) - kHeaderFudge;
199
1
  }
200
};
201
202
template <typename K, typename V>
203
38
inline bool dict_contains(const Dict<K, V>* haystack, K needle) {
204
38
  return haystack->find_kv_index(needle) != kNotFound;
205
38
}
_Z13dict_containsIP6BigStriEbPK4DictIT_T0_ES3_
Line
Count
Source
203
1
inline bool dict_contains(const Dict<K, V>* haystack, K needle) {
204
1
  return haystack->find_kv_index(needle) != kNotFound;
205
1
}
_Z13dict_containsIibEbPK4DictIT_T0_ES1_
Line
Count
Source
203
37
inline bool dict_contains(const Dict<K, V>* haystack, K needle) {
204
37
  return haystack->find_kv_index(needle) != kNotFound;
205
37
}
206
207
template <typename K, typename V>
208
14
void Dict<K, V>::reserve(int num_desired) {
209
14
  if (capacity_ >= num_desired) {
210
0
    return;  // Don't do anything if there's already enough space.
211
0
  }
212
213
14
  int old_len = len_;
214
14
  Slab<K>* old_k = keys_;
215
14
  Slab<V>* old_v = values_;
216
217
  // Calculate the number of keys and values we should have
218
14
  capacity_ = HowManyPairs(num_desired);
219
220
  // 1) Ensure index len a power of 2, to avoid expensive modulus % operation
221
  // 2) Introduce hash table load factor.   Use capacity_+1 to simulate ceil()
222
  // div, not floor() div.
223
14
  index_len_ = RoundUp((capacity_ + 1) * 5 / 4);
224
14
  DCHECK(index_len_ > capacity_);
225
226
0
  index_ = NewSlab<int>(index_len_);
227
198
  for (int i = 0; i < index_len_; ++i) {
228
184
    index_->items_[i] = kEmptyEntry;
229
184
  }
230
231
  // These are DENSE, while index_ is sparse.
232
14
  keys_ = NewSlab<K>(capacity_);
233
14
  values_ = NewSlab<V>(capacity_);
234
235
14
  if (old_k != nullptr) {  // rehash if there were any entries
236
    // log("REHASH num_desired %d", num_desired);
237
0
    len_ = 0;
238
0
    for (int i = 0; i < old_len; ++i) {
239
0
      set(old_k->items_[i], old_v->items_[i]);
240
0
    }
241
0
  }
242
14
}
_ZN4DictIP6BigStriE7reserveEi
Line
Count
Source
208
4
void Dict<K, V>::reserve(int num_desired) {
209
4
  if (capacity_ >= num_desired) {
210
0
    return;  // Don't do anything if there's already enough space.
211
0
  }
212
213
4
  int old_len = len_;
214
4
  Slab<K>* old_k = keys_;
215
4
  Slab<V>* old_v = values_;
216
217
  // Calculate the number of keys and values we should have
218
4
  capacity_ = HowManyPairs(num_desired);
219
220
  // 1) Ensure index len a power of 2, to avoid expensive modulus % operation
221
  // 2) Introduce hash table load factor.   Use capacity_+1 to simulate ceil()
222
  // div, not floor() div.
223
4
  index_len_ = RoundUp((capacity_ + 1) * 5 / 4);
224
4
  DCHECK(index_len_ > capacity_);
225
226
0
  index_ = NewSlab<int>(index_len_);
227
36
  for (int i = 0; i < index_len_; ++i) {
228
32
    index_->items_[i] = kEmptyEntry;
229
32
  }
230
231
  // These are DENSE, while index_ is sparse.
232
4
  keys_ = NewSlab<K>(capacity_);
233
4
  values_ = NewSlab<V>(capacity_);
234
235
4
  if (old_k != nullptr) {  // rehash if there were any entries
236
    // log("REHASH num_desired %d", num_desired);
237
0
    len_ = 0;
238
0
    for (int i = 0; i < old_len; ++i) {
239
0
      set(old_k->items_[i], old_v->items_[i]);
240
0
    }
241
0
  }
242
4
}
_ZN4DictIibE7reserveEi
Line
Count
Source
208
9
void Dict<K, V>::reserve(int num_desired) {
209
9
  if (capacity_ >= num_desired) {
210
0
    return;  // Don't do anything if there's already enough space.
211
0
  }
212
213
9
  int old_len = len_;
214
9
  Slab<K>* old_k = keys_;
215
9
  Slab<V>* old_v = values_;
216
217
  // Calculate the number of keys and values we should have
218
9
  capacity_ = HowManyPairs(num_desired);
219
220
  // 1) Ensure index len a power of 2, to avoid expensive modulus % operation
221
  // 2) Introduce hash table load factor.   Use capacity_+1 to simulate ceil()
222
  // div, not floor() div.
223
9
  index_len_ = RoundUp((capacity_ + 1) * 5 / 4);
224
9
  DCHECK(index_len_ > capacity_);
225
226
0
  index_ = NewSlab<int>(index_len_);
227
153
  for (int i = 0; i < index_len_; ++i) {
228
144
    index_->items_[i] = kEmptyEntry;
229
144
  }
230
231
  // These are DENSE, while index_ is sparse.
232
9
  keys_ = NewSlab<K>(capacity_);
233
9
  values_ = NewSlab<V>(capacity_);
234
235
9
  if (old_k != nullptr) {  // rehash if there were any entries
236
    // log("REHASH num_desired %d", num_desired);
237
0
    len_ = 0;
238
0
    for (int i = 0; i < old_len; ++i) {
239
0
      set(old_k->items_[i], old_v->items_[i]);
240
0
    }
241
0
  }
242
9
}
_ZN4DictIP6BigStrS1_E7reserveEi
Line
Count
Source
208
1
void Dict<K, V>::reserve(int num_desired) {
209
1
  if (capacity_ >= num_desired) {
210
0
    return;  // Don't do anything if there's already enough space.
211
0
  }
212
213
1
  int old_len = len_;
214
1
  Slab<K>* old_k = keys_;
215
1
  Slab<V>* old_v = values_;
216
217
  // Calculate the number of keys and values we should have
218
1
  capacity_ = HowManyPairs(num_desired);
219
220
  // 1) Ensure index len a power of 2, to avoid expensive modulus % operation
221
  // 2) Introduce hash table load factor.   Use capacity_+1 to simulate ceil()
222
  // div, not floor() div.
223
1
  index_len_ = RoundUp((capacity_ + 1) * 5 / 4);
224
1
  DCHECK(index_len_ > capacity_);
225
226
0
  index_ = NewSlab<int>(index_len_);
227
9
  for (int i = 0; i < index_len_; ++i) {
228
8
    index_->items_[i] = kEmptyEntry;
229
8
  }
230
231
  // These are DENSE, while index_ is sparse.
232
1
  keys_ = NewSlab<K>(capacity_);
233
1
  values_ = NewSlab<V>(capacity_);
234
235
1
  if (old_k != nullptr) {  // rehash if there were any entries
236
    // log("REHASH num_desired %d", num_desired);
237
0
    len_ = 0;
238
0
    for (int i = 0; i < old_len; ++i) {
239
0
      set(old_k->items_[i], old_v->items_[i]);
240
0
    }
241
0
  }
242
1
}
243
244
template <typename K, typename V>
245
8
V Dict<K, V>::at(K key) const {
246
8
  int kv_index = find_kv_index(key);
247
8
  if (kv_index == kNotFound) {
248
0
    throw Alloc<KeyError>();
249
8
  } else {
250
8
    return values_->items_[kv_index];
251
8
  }
252
8
}
_ZNK4DictIP6BigStriE2atES1_
Line
Count
Source
245
5
V Dict<K, V>::at(K key) const {
246
5
  int kv_index = find_kv_index(key);
247
5
  if (kv_index == kNotFound) {
248
0
    throw Alloc<KeyError>();
249
5
  } else {
250
5
    return values_->items_[kv_index];
251
5
  }
252
5
}
_ZNK4DictIiP6BigStrE2atEi
Line
Count
Source
245
1
V Dict<K, V>::at(K key) const {
246
1
  int kv_index = find_kv_index(key);
247
1
  if (kv_index == kNotFound) {
248
0
    throw Alloc<KeyError>();
249
1
  } else {
250
1
    return values_->items_[kv_index];
251
1
  }
252
1
}
_ZNK4DictIP6BigStrS1_E2atES1_
Line
Count
Source
245
2
V Dict<K, V>::at(K key) const {
246
2
  int kv_index = find_kv_index(key);
247
2
  if (kv_index == kNotFound) {
248
0
    throw Alloc<KeyError>();
249
2
  } else {
250
2
    return values_->items_[kv_index];
251
2
  }
252
2
}
253
254
template <typename K, typename V>
255
V Dict<K, V>::get(K key) const {
256
  int kv_index = find_kv_index(key);
257
  if (kv_index == kNotFound) {
258
    return nullptr;
259
  } else {
260
    return values_->items_[kv_index];
261
  }
262
}
263
264
template <typename K, typename V>
265
V Dict<K, V>::get(K key, V default_val) const {
266
  int kv_index = find_kv_index(key);
267
  if (kv_index == kNotFound) {
268
    return default_val;
269
  } else {
270
    return values_->items_[kv_index];
271
  }
272
}
273
274
template <typename K, typename V>
275
List<K>* Dict<K, V>::keys() const {
276
  return ListFromDictSlab<K>(keys_, len_);
277
}
278
279
template <typename K, typename V>
280
List<V>* Dict<K, V>::values() const {
281
  return ListFromDictSlab<V>(values_, len_);
282
}
283
284
template <typename K, typename V>
285
void Dict<K, V>::clear() {
286
  // Maintain invariant
287
  for (int i = 0; i < index_len_; ++i) {
288
    index_->items_[i] = kEmptyEntry;
289
  }
290
291
  if (keys_) {
292
    memset(keys_->items_, 0, len_ * sizeof(K));  // zero for GC scan
293
  }
294
  if (values_) {
295
    memset(values_->items_, 0, len_ * sizeof(V));  // zero for GC scan
296
  }
297
  len_ = 0;
298
}
299
300
// TODO:
301
// - Special case to intern BigStr* when it's hashed?  How?
302
//   - Should we have wrappers like:
303
//   - V GetAndIntern<V>(D, &string_key)
304
//   - SetAndIntern<V>(D, &string_key, value)
305
//   This will enable duplicate copies of the string to be garbage collected
306
template <typename K, typename V>
307
96
int Dict<K, V>::hash_and_probe(K key) const {
308
96
  if (capacity_ == 0) {
309
14
    return kTooSmall;
310
14
  }
311
312
  // Hash the key onto a slot in the index. If the first slot is occupied,
313
  // probe until an empty one is found.
314
82
  unsigned h = hash_key(key);
315
  // faster % using & -- assuming index_len_ is power of 2
316
82
  int init_bucket = h & (index_len_ - 1);
317
318
  // If we see a tombstone along the probing path, stash it.
319
82
  int open_slot = -1;
320
321
98
  for (int i = 0; i < index_len_; ++i) {
322
    // Start at init_bucket and wrap araound
323
324
    // faster % using & -- assuming index_len_ is power of 2
325
98
    int slot = (i + init_bucket) & (index_len_ - 1);
326
327
98
    int kv_index = index_->items_[slot];
328
98
    DCHECK(kv_index < len_);
329
    // Optimistically this is the common case once the table has been populated.
330
98
    if (kv_index >= 0) {
331
23
      unsigned h2 = hash_key(keys_->items_[kv_index]);
332
23
      if (h == h2 && keys_equal(keys_->items_[kv_index], key)) {
333
7
        return slot;
334
7
      }
335
23
    }
336
337
91
    if (kv_index == kEmptyEntry) {
338
75
      if (open_slot != -1) {
339
0
        slot = open_slot;
340
0
      }
341
      // If there isn't room in the entry arrays, tell the caller to resize.
342
75
      return len_ < capacity_ ? slot : kTooSmall;
343
75
    }
344
345
    // Tombstone or collided keys unequal. Keep scanning.
346
16
    DCHECK(kv_index >= 0 || kv_index == kDeletedEntry);
347
16
    if (kv_index == kDeletedEntry && open_slot == -1) {
348
      // NOTE: We only record the open slot here. We DON'T return it. If we're
349
      // looking for a key that was writen before this tombstone was written to
350
      // the index we should continue probing until we get to that key. If we
351
      // get to an empty index slot or the end of the index then we know we are
352
      // dealing with a new key and can safely replace the tombstone without
353
      // disrupting any existing keys.
354
0
      open_slot = slot;
355
0
    }
356
16
  }
357
358
0
  if (open_slot != -1) {
359
0
    return len_ < capacity_ ? open_slot : kTooSmall;
360
0
  }
361
362
0
  return kTooSmall;
363
0
}
_ZNK4DictIP6BigStriE14hash_and_probeES1_
Line
Count
Source
307
19
int Dict<K, V>::hash_and_probe(K key) const {
308
19
  if (capacity_ == 0) {
309
4
    return kTooSmall;
310
4
  }
311
312
  // Hash the key onto a slot in the index. If the first slot is occupied,
313
  // probe until an empty one is found.
314
15
  unsigned h = hash_key(key);
315
  // faster % using & -- assuming index_len_ is power of 2
316
15
  int init_bucket = h & (index_len_ - 1);
317
318
  // If we see a tombstone along the probing path, stash it.
319
15
  int open_slot = -1;
320
321
21
  for (int i = 0; i < index_len_; ++i) {
322
    // Start at init_bucket and wrap araound
323
324
    // faster % using & -- assuming index_len_ is power of 2
325
21
    int slot = (i + init_bucket) & (index_len_ - 1);
326
327
21
    int kv_index = index_->items_[slot];
328
21
    DCHECK(kv_index < len_);
329
    // Optimistically this is the common case once the table has been populated.
330
21
    if (kv_index >= 0) {
331
12
      unsigned h2 = hash_key(keys_->items_[kv_index]);
332
12
      if (h == h2 && keys_equal(keys_->items_[kv_index], key)) {
333
6
        return slot;
334
6
      }
335
12
    }
336
337
15
    if (kv_index == kEmptyEntry) {
338
9
      if (open_slot != -1) {
339
0
        slot = open_slot;
340
0
      }
341
      // If there isn't room in the entry arrays, tell the caller to resize.
342
9
      return len_ < capacity_ ? slot : kTooSmall;
343
9
    }
344
345
    // Tombstone or collided keys unequal. Keep scanning.
346
6
    DCHECK(kv_index >= 0 || kv_index == kDeletedEntry);
347
6
    if (kv_index == kDeletedEntry && open_slot == -1) {
348
      // NOTE: We only record the open slot here. We DON'T return it. If we're
349
      // looking for a key that was writen before this tombstone was written to
350
      // the index we should continue probing until we get to that key. If we
351
      // get to an empty index slot or the end of the index then we know we are
352
      // dealing with a new key and can safely replace the tombstone without
353
      // disrupting any existing keys.
354
0
      open_slot = slot;
355
0
    }
356
6
  }
357
358
0
  if (open_slot != -1) {
359
0
    return len_ < capacity_ ? open_slot : kTooSmall;
360
0
  }
361
362
0
  return kTooSmall;
363
0
}
Unexecuted instantiation: _ZNK4DictIiP6BigStrE14hash_and_probeEi
_ZNK4DictIP6BigStrS1_E14hash_and_probeES1_
Line
Count
Source
307
3
int Dict<K, V>::hash_and_probe(K key) const {
308
3
  if (capacity_ == 0) {
309
1
    return kTooSmall;
310
1
  }
311
312
  // Hash the key onto a slot in the index. If the first slot is occupied,
313
  // probe until an empty one is found.
314
2
  unsigned h = hash_key(key);
315
  // faster % using & -- assuming index_len_ is power of 2
316
2
  int init_bucket = h & (index_len_ - 1);
317
318
  // If we see a tombstone along the probing path, stash it.
319
2
  int open_slot = -1;
320
321
2
  for (int i = 0; i < index_len_; ++i) {
322
    // Start at init_bucket and wrap araound
323
324
    // faster % using & -- assuming index_len_ is power of 2
325
2
    int slot = (i + init_bucket) & (index_len_ - 1);
326
327
2
    int kv_index = index_->items_[slot];
328
2
    DCHECK(kv_index < len_);
329
    // Optimistically this is the common case once the table has been populated.
330
2
    if (kv_index >= 0) {
331
1
      unsigned h2 = hash_key(keys_->items_[kv_index]);
332
1
      if (h == h2 && keys_equal(keys_->items_[kv_index], key)) {
333
1
        return slot;
334
1
      }
335
1
    }
336
337
1
    if (kv_index == kEmptyEntry) {
338
1
      if (open_slot != -1) {
339
0
        slot = open_slot;
340
0
      }
341
      // If there isn't room in the entry arrays, tell the caller to resize.
342
1
      return len_ < capacity_ ? slot : kTooSmall;
343
1
    }
344
345
    // Tombstone or collided keys unequal. Keep scanning.
346
0
    DCHECK(kv_index >= 0 || kv_index == kDeletedEntry);
347
0
    if (kv_index == kDeletedEntry && open_slot == -1) {
348
      // NOTE: We only record the open slot here. We DON'T return it. If we're
349
      // looking for a key that was writen before this tombstone was written to
350
      // the index we should continue probing until we get to that key. If we
351
      // get to an empty index slot or the end of the index then we know we are
352
      // dealing with a new key and can safely replace the tombstone without
353
      // disrupting any existing keys.
354
0
      open_slot = slot;
355
0
    }
356
0
  }
357
358
0
  if (open_slot != -1) {
359
0
    return len_ < capacity_ ? open_slot : kTooSmall;
360
0
  }
361
362
0
  return kTooSmall;
363
0
}
_ZNK4DictIibE14hash_and_probeEi
Line
Count
Source
307
74
int Dict<K, V>::hash_and_probe(K key) const {
308
74
  if (capacity_ == 0) {
309
9
    return kTooSmall;
310
9
  }
311
312
  // Hash the key onto a slot in the index. If the first slot is occupied,
313
  // probe until an empty one is found.
314
65
  unsigned h = hash_key(key);
315
  // faster % using & -- assuming index_len_ is power of 2
316
65
  int init_bucket = h & (index_len_ - 1);
317
318
  // If we see a tombstone along the probing path, stash it.
319
65
  int open_slot = -1;
320
321
75
  for (int i = 0; i < index_len_; ++i) {
322
    // Start at init_bucket and wrap araound
323
324
    // faster % using & -- assuming index_len_ is power of 2
325
75
    int slot = (i + init_bucket) & (index_len_ - 1);
326
327
75
    int kv_index = index_->items_[slot];
328
75
    DCHECK(kv_index < len_);
329
    // Optimistically this is the common case once the table has been populated.
330
75
    if (kv_index >= 0) {
331
10
      unsigned h2 = hash_key(keys_->items_[kv_index]);
332
10
      if (h == h2 && keys_equal(keys_->items_[kv_index], key)) {
333
0
        return slot;
334
0
      }
335
10
    }
336
337
75
    if (kv_index == kEmptyEntry) {
338
65
      if (open_slot != -1) {
339
0
        slot = open_slot;
340
0
      }
341
      // If there isn't room in the entry arrays, tell the caller to resize.
342
65
      return len_ < capacity_ ? slot : kTooSmall;
343
65
    }
344
345
    // Tombstone or collided keys unequal. Keep scanning.
346
10
    DCHECK(kv_index >= 0 || kv_index == kDeletedEntry);
347
10
    if (kv_index == kDeletedEntry && open_slot == -1) {
348
      // NOTE: We only record the open slot here. We DON'T return it. If we're
349
      // looking for a key that was writen before this tombstone was written to
350
      // the index we should continue probing until we get to that key. If we
351
      // get to an empty index slot or the end of the index then we know we are
352
      // dealing with a new key and can safely replace the tombstone without
353
      // disrupting any existing keys.
354
0
      open_slot = slot;
355
0
    }
356
10
  }
357
358
0
  if (open_slot != -1) {
359
0
    return len_ < capacity_ ? open_slot : kTooSmall;
360
0
  }
361
362
0
  return kTooSmall;
363
0
}
364
365
template <typename K, typename V>
366
46
int Dict<K, V>::find_kv_index(K key) const {
367
46
  if (index_ != nullptr) {  // Common case
368
34
    int pos = hash_and_probe(key);
369
34
    if (pos == kTooSmall) {
370
0
      return kNotFound;
371
0
    }
372
34
    DCHECK(pos >= 0);
373
0
    int kv_index = index_->items_[pos];
374
34
    if (kv_index < 0) {
375
28
      return kNotFound;
376
28
    }
377
6
    return kv_index;
378
34
  }
379
380
  // Linear search on GlobalDict instances.
381
  // TODO: Should we populate and compare their hash values?
382
12
  for (int i = 0; i < len_; ++i) {
383
3
    if (keys_equal(keys_->items_[i], key)) {
384
3
      return i;
385
3
    }
386
3
  }
387
388
9
  return kNotFound;
389
12
}
_ZNK4DictIP6BigStriE13find_kv_indexES1_
Line
Count
Source
366
6
int Dict<K, V>::find_kv_index(K key) const {
367
6
  if (index_ != nullptr) {  // Common case
368
5
    int pos = hash_and_probe(key);
369
5
    if (pos == kTooSmall) {
370
0
      return kNotFound;
371
0
    }
372
5
    DCHECK(pos >= 0);
373
0
    int kv_index = index_->items_[pos];
374
5
    if (kv_index < 0) {
375
0
      return kNotFound;
376
0
    }
377
5
    return kv_index;
378
5
  }
379
380
  // Linear search on GlobalDict instances.
381
  // TODO: Should we populate and compare their hash values?
382
1
  for (int i = 0; i < len_; ++i) {
383
1
    if (keys_equal(keys_->items_[i], key)) {
384
1
      return i;
385
1
    }
386
1
  }
387
388
0
  return kNotFound;
389
1
}
_ZNK4DictIiP6BigStrE13find_kv_indexEi
Line
Count
Source
366
1
int Dict<K, V>::find_kv_index(K key) const {
367
1
  if (index_ != nullptr) {  // Common case
368
0
    int pos = hash_and_probe(key);
369
0
    if (pos == kTooSmall) {
370
0
      return kNotFound;
371
0
    }
372
0
    DCHECK(pos >= 0);
373
0
    int kv_index = index_->items_[pos];
374
0
    if (kv_index < 0) {
375
0
      return kNotFound;
376
0
    }
377
0
    return kv_index;
378
0
  }
379
380
  // Linear search on GlobalDict instances.
381
  // TODO: Should we populate and compare their hash values?
382
1
  for (int i = 0; i < len_; ++i) {
383
1
    if (keys_equal(keys_->items_[i], key)) {
384
1
      return i;
385
1
    }
386
1
  }
387
388
0
  return kNotFound;
389
1
}
_ZNK4DictIP6BigStrS1_E13find_kv_indexES1_
Line
Count
Source
366
2
int Dict<K, V>::find_kv_index(K key) const {
367
2
  if (index_ != nullptr) {  // Common case
368
1
    int pos = hash_and_probe(key);
369
1
    if (pos == kTooSmall) {
370
0
      return kNotFound;
371
0
    }
372
1
    DCHECK(pos >= 0);
373
0
    int kv_index = index_->items_[pos];
374
1
    if (kv_index < 0) {
375
0
      return kNotFound;
376
0
    }
377
1
    return kv_index;
378
1
  }
379
380
  // Linear search on GlobalDict instances.
381
  // TODO: Should we populate and compare their hash values?
382
1
  for (int i = 0; i < len_; ++i) {
383
1
    if (keys_equal(keys_->items_[i], key)) {
384
1
      return i;
385
1
    }
386
1
  }
387
388
0
  return kNotFound;
389
1
}
_ZNK4DictIibE13find_kv_indexEi
Line
Count
Source
366
37
int Dict<K, V>::find_kv_index(K key) const {
367
37
  if (index_ != nullptr) {  // Common case
368
28
    int pos = hash_and_probe(key);
369
28
    if (pos == kTooSmall) {
370
0
      return kNotFound;
371
0
    }
372
28
    DCHECK(pos >= 0);
373
0
    int kv_index = index_->items_[pos];
374
28
    if (kv_index < 0) {
375
28
      return kNotFound;
376
28
    }
377
0
    return kv_index;
378
28
  }
379
380
  // Linear search on GlobalDict instances.
381
  // TODO: Should we populate and compare their hash values?
382
9
  for (int i = 0; i < len_; ++i) {
383
0
    if (keys_equal(keys_->items_[i], key)) {
384
0
      return i;
385
0
    }
386
0
  }
387
388
9
  return kNotFound;
389
9
}
390
391
template <typename K, typename V>
392
48
void Dict<K, V>::set(K key, V val) {
393
48
  DCHECK(obj_header().heap_tag != HeapTag::Global);
394
0
  int pos = hash_and_probe(key);
395
48
  if (pos == kTooSmall) {
396
14
    reserve(len_ + 1);
397
14
    pos = hash_and_probe(key);
398
14
  }
399
48
  DCHECK(pos >= 0);
400
401
0
  int kv_index = index_->items_[pos];
402
48
  DCHECK(kv_index < len_);
403
48
  if (kv_index < 0) {
404
    // Write new entries to the end of the k/v arrays. This allows us to recall
405
    // insertion order until the first deletion.
406
47
    keys_->items_[len_] = key;
407
47
    values_->items_[len_] = val;
408
47
    index_->items_[pos] = len_;
409
47
    len_++;
410
47
    DCHECK(len_ <= capacity_);
411
47
  } else {
412
1
    values_->items_[kv_index] = val;
413
1
  }
414
48
}
_ZN4DictIP6BigStriE3setES1_i
Line
Count
Source
392
10
void Dict<K, V>::set(K key, V val) {
393
10
  DCHECK(obj_header().heap_tag != HeapTag::Global);
394
0
  int pos = hash_and_probe(key);
395
10
  if (pos == kTooSmall) {
396
4
    reserve(len_ + 1);
397
4
    pos = hash_and_probe(key);
398
4
  }
399
10
  DCHECK(pos >= 0);
400
401
0
  int kv_index = index_->items_[pos];
402
10
  DCHECK(kv_index < len_);
403
10
  if (kv_index < 0) {
404
    // Write new entries to the end of the k/v arrays. This allows us to recall
405
    // insertion order until the first deletion.
406
9
    keys_->items_[len_] = key;
407
9
    values_->items_[len_] = val;
408
9
    index_->items_[pos] = len_;
409
9
    len_++;
410
9
    DCHECK(len_ <= capacity_);
411
9
  } else {
412
1
    values_->items_[kv_index] = val;
413
1
  }
414
10
}
_ZN4DictIibE3setEib
Line
Count
Source
392
37
void Dict<K, V>::set(K key, V val) {
393
37
  DCHECK(obj_header().heap_tag != HeapTag::Global);
394
0
  int pos = hash_and_probe(key);
395
37
  if (pos == kTooSmall) {
396
9
    reserve(len_ + 1);
397
9
    pos = hash_and_probe(key);
398
9
  }
399
37
  DCHECK(pos >= 0);
400
401
0
  int kv_index = index_->items_[pos];
402
37
  DCHECK(kv_index < len_);
403
37
  if (kv_index < 0) {
404
    // Write new entries to the end of the k/v arrays. This allows us to recall
405
    // insertion order until the first deletion.
406
37
    keys_->items_[len_] = key;
407
37
    values_->items_[len_] = val;
408
37
    index_->items_[pos] = len_;
409
37
    len_++;
410
37
    DCHECK(len_ <= capacity_);
411
37
  } else {
412
0
    values_->items_[kv_index] = val;
413
0
  }
414
37
}
_ZN4DictIP6BigStrS1_E3setES1_S1_
Line
Count
Source
392
1
void Dict<K, V>::set(K key, V val) {
393
1
  DCHECK(obj_header().heap_tag != HeapTag::Global);
394
0
  int pos = hash_and_probe(key);
395
1
  if (pos == kTooSmall) {
396
1
    reserve(len_ + 1);
397
1
    pos = hash_and_probe(key);
398
1
  }
399
1
  DCHECK(pos >= 0);
400
401
0
  int kv_index = index_->items_[pos];
402
1
  DCHECK(kv_index < len_);
403
1
  if (kv_index < 0) {
404
    // Write new entries to the end of the k/v arrays. This allows us to recall
405
    // insertion order until the first deletion.
406
1
    keys_->items_[len_] = key;
407
1
    values_->items_[len_] = val;
408
1
    index_->items_[pos] = len_;
409
1
    len_++;
410
1
    DCHECK(len_ <= capacity_);
411
1
  } else {
412
0
    values_->items_[kv_index] = val;
413
0
  }
414
1
}
415
416
template <typename K, typename V>
417
5
inline int len(const Dict<K, V>* d) {
418
5
  return d->len_;
419
5
}
_Z3lenIP6BigStrS1_EiPK4DictIT_T0_E
Line
Count
Source
417
3
inline int len(const Dict<K, V>* d) {
418
3
  return d->len_;
419
3
}
_Z3lenIP6BigStriEiPK4DictIT_T0_E
Line
Count
Source
417
1
inline int len(const Dict<K, V>* d) {
418
1
  return d->len_;
419
1
}
_Z3lenIiP6BigStrEiPK4DictIT_T0_E
Line
Count
Source
417
1
inline int len(const Dict<K, V>* d) {
418
1
  return d->len_;
419
1
}
420
421
template <class K, class V>
422
class DictIter {
423
 public:
424
3
  explicit DictIter(Dict<K, V>* D) : D_(D), pos_(0) {
425
3
  }
_ZN8DictIterIP6BigStriEC2EP4DictIS1_iE
Line
Count
Source
424
3
  explicit DictIter(Dict<K, V>* D) : D_(D), pos_(0) {
425
3
  }
Unexecuted instantiation: _ZN8DictIterIP6BigStrPN10value_asdl7value_tEEC2EP4DictIS1_S4_E
Unexecuted instantiation: _ZN8DictIterIP6BigStrPN12runtime_asdl7HayNodeEEC2EP4DictIS1_S4_E
Unexecuted instantiation: _ZN8DictIterIlP6BigStrEC2EP4DictIlS1_E
Unexecuted instantiation: _ZN8DictIterIP6BigStrS1_EC2EP4DictIS1_S1_E
Unexecuted instantiation: _ZN8DictIterIP6BigStrPN12runtime_asdl4CellEEC2EP4DictIS1_S4_E
426
9
  void Next() {
427
9
    pos_++;
428
9
  }
_ZN8DictIterIP6BigStriE4NextEv
Line
Count
Source
426
9
  void Next() {
427
9
    pos_++;
428
9
  }
Unexecuted instantiation: _ZN8DictIterIP6BigStrPN10value_asdl7value_tEE4NextEv
Unexecuted instantiation: _ZN8DictIterIP6BigStrPN12runtime_asdl7HayNodeEE4NextEv
Unexecuted instantiation: _ZN8DictIterIlP6BigStrE4NextEv
Unexecuted instantiation: _ZN8DictIterIP6BigStrS1_E4NextEv
Unexecuted instantiation: _ZN8DictIterIP6BigStrPN12runtime_asdl4CellEE4NextEv
429
12
  bool Done() {
430
12
    return pos_ == D_->len_;
431
12
  }
_ZN8DictIterIP6BigStriE4DoneEv
Line
Count
Source
429
12
  bool Done() {
430
12
    return pos_ == D_->len_;
431
12
  }
Unexecuted instantiation: _ZN8DictIterIP6BigStrPN10value_asdl7value_tEE4DoneEv
Unexecuted instantiation: _ZN8DictIterIP6BigStrPN12runtime_asdl7HayNodeEE4DoneEv
Unexecuted instantiation: _ZN8DictIterIlP6BigStrE4DoneEv
Unexecuted instantiation: _ZN8DictIterIP6BigStrS1_E4DoneEv
Unexecuted instantiation: _ZN8DictIterIP6BigStrPN12runtime_asdl4CellEE4DoneEv
432
9
  K Key() {
433
9
    return D_->keys_->items_[pos_];
434
9
  }
_ZN8DictIterIP6BigStriE3KeyEv
Line
Count
Source
432
9
  K Key() {
433
9
    return D_->keys_->items_[pos_];
434
9
  }
Unexecuted instantiation: _ZN8DictIterIP6BigStrPN10value_asdl7value_tEE3KeyEv
Unexecuted instantiation: _ZN8DictIterIP6BigStrPN12runtime_asdl7HayNodeEE3KeyEv
Unexecuted instantiation: _ZN8DictIterIlP6BigStrE3KeyEv
Unexecuted instantiation: _ZN8DictIterIP6BigStrS1_E3KeyEv
Unexecuted instantiation: _ZN8DictIterIP6BigStrPN12runtime_asdl4CellEE3KeyEv
435
6
  V Value() {
436
6
    return D_->values_->items_[pos_];
437
6
  }
_ZN8DictIterIP6BigStriE5ValueEv
Line
Count
Source
435
6
  V Value() {
436
6
    return D_->values_->items_[pos_];
437
6
  }
Unexecuted instantiation: _ZN8DictIterIP6BigStrPN10value_asdl7value_tEE5ValueEv
Unexecuted instantiation: _ZN8DictIterIP6BigStrPN12runtime_asdl7HayNodeEE5ValueEv
Unexecuted instantiation: _ZN8DictIterIlP6BigStrE5ValueEv
Unexecuted instantiation: _ZN8DictIterIP6BigStrS1_E5ValueEv
Unexecuted instantiation: _ZN8DictIterIP6BigStrPN12runtime_asdl4CellEE5ValueEv
438
439
 private:
440
  const Dict<K, V>* D_;
441
  int pos_;
442
};
443
444
// dict(mylist) converts a list of (k, v) tuples into a dict
445
template <typename K, typename V>
446
Dict<K, V>* dict(List<Tuple2<K, V>*>* l) {
447
  auto ret = Alloc<Dict<K, V>>();
448
  ret->reserve(len(l));
449
  for (ListIter<Tuple2<K, V>*> it(l); !it.Done(); it.Next()) {
450
    ret->set(it.Value()->at0(), it.Value()->at1());
451
  }
452
  return ret;
453
}
454
455
template <class K, class V>
456
void Dict<K, V>::update(List<Tuple2<K, V>*>* pairs) {
457
  reserve(len_ + len(pairs));
458
  for (ListIter<Tuple2<K, V>*> it(pairs); !it.Done(); it.Next()) {
459
    set(it.Value()->at0(), it.Value()->at1());
460
  }
461
}
462
463
template <class K, class V>
464
void Dict<K, V>::update(Dict<K, V>* other) {
465
  reserve(len_ + len(other));
466
  for (DictIter<K, V> it(other); !it.Done(); it.Next()) {
467
    set(it.Key(), it.Value());
468
  }
469
}
470
471
#endif  // MYCPP_GC_DICT_H