OILS / vendor / souffle / datastructure / LambdaBTree.h View on Github | oilshell.org

620 lines, 276 significant
1/*
2 * Souffle - A Datalog Compiler
3 * Copyright (c) 2018, Souffle Developers
4 * Licensed under the Universal Permissive License v 1.0 as shown at:
5 * - https://opensource.org/licenses/UPL
6 * - <souffle root>/licenses/SOUFFLE-UPL.txt
7 */
8
9/************************************************************************
10 *
11 * @file LambdaBTree.h
12 *
13 * An implementation of a generic B-tree data structure including
14 * interfaces for utilizing instances as set or multiset containers.
15 * Allows the user to provide a function to execute on successful insert
16 * Be careful using this, it currently expects a pair as the key.
17 *
18 ***********************************************************************/
19
20#pragma once
21
22#include "souffle/datastructure/BTree.h"
23#include "souffle/utility/ContainerUtil.h"
24#include "souffle/utility/ParallelUtil.h"
25#include <atomic>
26#include <cassert>
27#include <typeinfo>
28#include <vector>
29
30namespace souffle {
31
32namespace detail {
33/**
34 * The actual implementation of a b-tree data structure.
35 *
36 * @tparam Key .. the element type to be stored in this tree
37 * @tparam Comparator .. a class defining an order on the stored elements
38 * @tparam Allocator .. utilized for allocating memory for required nodes
39 * @tparam blockSize .. determines the number of bytes/block utilized by leaf nodes
40 * @tparam SearchStrategy .. enables switching between linear, binary or any other search strategy
41 * @tparam isSet .. true = set, false = multiset
42 * @tparam Functor .. a std::function that is called on successful (new) insert
43 */
44template <typename Key, typename Comparator,
45 typename Allocator, // is ignored so far - TODO: add support
46 unsigned blockSize, typename SearchStrategy, bool isSet, typename Functor,
47 typename WeakComparator = Comparator, typename Updater = detail::updater<Key>>
48class LambdaBTree : public btree<Key, Comparator, Allocator, blockSize, SearchStrategy, isSet, WeakComparator,
49 Updater> {
50public:
51 using parenttype =
52 btree<Key, Comparator, Allocator, blockSize, SearchStrategy, isSet, WeakComparator, Updater>;
53
54 LambdaBTree(const Comparator& comp = Comparator(), const WeakComparator& weak_comp = WeakComparator())
55 : parenttype(comp, weak_comp) {}
56
57 /**
58 * Inserts the given key into this tree.
59 */
60 typename Functor::result_type insert(Key& k, const Functor& f) {
61 typename parenttype::operation_hints hints;
62 return insert(k, hints, f);
63 }
64
65 // rewriting this because of david's changes
66 typename Functor::result_type insert(
67 Key& k, typename parenttype::operation_hints& hints, const Functor& f) {
68#ifdef IS_PARALLEL
69
70 // special handling for inserting first element
71 while (this->root == nullptr) {
72 // try obtaining root-lock
73 if (!this->root_lock.try_start_write()) {
74 // somebody else was faster => re-check
75 continue;
76 }
77
78 // check loop condition again
79 if (this->root != nullptr) {
80 // somebody else was faster => normal insert
81 this->root_lock.abort_write();
82 break;
83 }
84
85 // create new node
86 this->leftmost = new typename parenttype::leaf_node();
87 this->leftmost->numElements = 1;
88 // call the functor as we've successfully inserted
89 typename Functor::result_type res = f(k);
90
91 this->leftmost->keys[0] = k;
92 this->root = this->leftmost;
93
94 // operation complete => we can release the root lock
95 this->root_lock.end_write();
96
97 hints.last_insert.access(this->leftmost);
98
99 return res;
100 }
101
102 // insert using iterative implementation
103
104 typename parenttype::node* cur = nullptr;
105
106 // test last insert hints
107 typename parenttype::lock_type::Lease cur_lease;
108
109 auto checkHint = [&](typename parenttype::node* last_insert) {
110 // ignore null pointer
111 if (!last_insert) return false;
112 // get a read lease on indicated node
113 auto hint_lease = last_insert->lock.start_read();
114 // check whether it covers the key
115 if (!this->weak_covers(last_insert, k)) return false;
116 // and if there was no concurrent modification
117 if (!last_insert->lock.validate(hint_lease)) return false;
118 // use hinted location
119 cur = last_insert;
120 // and keep lease
121 cur_lease = hint_lease;
122 // we found a hit
123 return true;
124 };
125
126 if (hints.last_insert.any(checkHint)) {
127 // register this as a hit
128 this->hint_stats.inserts.addHit();
129 } else {
130 // register this as a miss
131 this->hint_stats.inserts.addMiss();
132 }
133
134 // if there is no valid hint ..
135 if (!cur) {
136 do {
137 // get root - access lock
138 auto root_lease = this->root_lock.start_read();
139
140 // start with root
141 cur = this->root;
142
143 // get lease of the next node to be accessed
144 cur_lease = cur->lock.start_read();
145
146 // check validity of root pointer
147 if (this->root_lock.end_read(root_lease)) {
148 break;
149 }
150
151 } while (true);
152 }
153
154 while (true) {
155 // handle inner nodes
156 if (cur->inner) {
157 auto a = &(cur->keys[0]);
158 auto b = &(cur->keys[cur->numElements]);
159
160 auto pos = this->search.lower_bound(k, a, b, this->weak_comp);
161 auto idx = pos - a;
162
163 // early exit for sets
164 if (isSet && pos != b && this->weak_equal(*pos, k)) {
165 // validate results
166 if (!cur->lock.validate(cur_lease)) {
167 // start over again
168 return insert(k, hints, f);
169 }
170
171 // update provenance information
172 if (typeid(Comparator) != typeid(WeakComparator) && this->less(k, *pos)) {
173 if (!cur->lock.try_upgrade_to_write(cur_lease)) {
174 // start again
175 return insert(k, hints, f);
176 }
177 this->update(*pos, k);
178
179 // get result before releasing lock
180 auto res = (*pos).second;
181
182 cur->lock.end_write();
183 return res;
184 }
185
186 // get the result before releasing lock
187 auto res = (*pos).second;
188
189 // check validity
190 if (!cur->lock.validate(cur_lease)) {
191 // start over again
192 return insert(k, hints, f);
193 }
194
195 // we found the element => return the result
196 return res;
197 }
198
199 // get next pointer
200 auto next = cur->getChild(idx);
201
202 // get lease on next level
203 auto next_lease = next->lock.start_read();
204
205 // check whether there was a write
206 if (!cur->lock.end_read(cur_lease)) {
207 // start over
208 return insert(k, hints, f);
209 }
210
211 // go to next
212 cur = next;
213
214 // move on lease
215 cur_lease = next_lease;
216
217 continue;
218 }
219
220 // the rest is for leaf nodes
221 assert(!cur->inner);
222
223 // -- insert node in leaf node --
224
225 auto a = &(cur->keys[0]);
226 auto b = &(cur->keys[cur->numElements]);
227
228 auto pos = this->search.upper_bound(k, a, b, this->weak_comp);
229 auto idx = pos - a;
230
231 // early exit for sets
232 if (isSet && pos != a && this->weak_equal(*(pos - 1), k)) {
233 // validate result
234 if (!cur->lock.validate(cur_lease)) {
235 // start over again
236 return insert(k, hints, f);
237 }
238
239 // TODO (pnappa): remove provenance from LambdaBTree - no use for it
240 // update provenance information
241 if (typeid(Comparator) != typeid(WeakComparator) && this->less(k, *(pos - 1))) {
242 if (!cur->lock.try_upgrade_to_write(cur_lease)) {
243 // start again
244 return insert(k, hints, f);
245 }
246 this->update(*(pos - 1), k);
247
248 // retrieve result before releasing lock
249 auto res = (*(pos - 1)).second;
250
251 cur->lock.end_write();
252 return res;
253 }
254
255 // read result (atomic) -- just as a proof of concept, this is actually not valid!!
256 std::atomic<typename Functor::result_type>& loc =
257 *reinterpret_cast<std::atomic<typename Functor::result_type>*>(&(*(pos - 1)).second);
258 auto res = loc.load(std::memory_order_relaxed);
259
260 // check validity
261 if (!cur->lock.validate(cur_lease)) {
262 // start over again
263 return insert(k, hints, f);
264 }
265
266 // we found the element => done
267 return res;
268 }
269
270 // upgrade to write-permission
271 if (!cur->lock.try_upgrade_to_write(cur_lease)) {
272 // something has changed => restart
273 hints.last_insert.access(cur);
274 return insert(k, hints, f);
275 }
276
277 if (cur->numElements >= parenttype::node::maxKeys) {
278 // -- lock parents --
279 auto priv = cur;
280 auto parent = priv->parent;
281 std::vector<typename parenttype::node*> parents;
282 do {
283 if (parent) {
284 parent->lock.start_write();
285 while (true) {
286 // check whether parent is correct
287 if (parent == priv->parent) {
288 break;
289 }
290 // switch parent
291 parent->lock.abort_write();
292 parent = priv->parent;
293 parent->lock.start_write();
294 }
295 } else {
296 // lock root lock => since cur is root
297 this->root_lock.start_write();
298 }
299
300 // record locked node
301 parents.push_back(parent);
302
303 // stop at "sphere of influence"
304 if (!parent || !parent->isFull()) {
305 break;
306 }
307
308 // go one step higher
309 priv = parent;
310 parent = parent->parent;
311
312 } while (true);
313
314 // split this node
315 auto old_root = this->root;
316 idx -= cur->rebalance_or_split(const_cast<typename parenttype::node**>(&this->root),
317 this->root_lock, static_cast<int>(idx), parents);
318
319 // release parent lock
320 for (auto it = parents.rbegin(); it != parents.rend(); ++it) {
321 auto parent = *it;
322
323 // release this lock
324 if (parent) {
325 parent->lock.end_write();
326 } else {
327 if (old_root != this->root) {
328 this->root_lock.end_write();
329 } else {
330 this->root_lock.abort_write();
331 }
332 }
333 }
334
335 // insert element in right fragment
336 if (((typename parenttype::size_type)idx) > cur->numElements) {
337 // release current lock
338 cur->lock.end_write();
339
340 // insert in sibling
341 return insert(k, hints, f);
342 }
343 }
344
345 // ok - no split necessary
346 assert(cur->numElements < parenttype::node::maxKeys && "Split required!");
347
348 // move keys
349 for (int j = static_cast<int>(cur->numElements); j > idx; --j) {
350 cur->keys[j] = cur->keys[j - 1];
351 }
352
353 // insert new element
354 typename Functor::result_type res = f(k);
355 cur->keys[idx] = k;
356 cur->numElements++;
357
358 // release lock on current node
359 cur->lock.end_write();
360
361 // remember last insertion position
362 hints.last_insert.access(cur);
363 return res;
364 }
365
366#else
367 // special handling for inserting first element
368 if (this->empty()) {
369 // create new node
370 this->leftmost = new typename parenttype::leaf_node();
371 this->leftmost->numElements = 1;
372 // call the functor as we've successfully inserted
373 typename Functor::result_type res = f(k);
374 this->leftmost->keys[0] = k;
375 this->root = this->leftmost;
376
377 hints.last_insert.access(this->leftmost);
378
379 return res;
380 }
381
382 // insert using iterative implementation
383 typename parenttype::node* cur = this->root;
384
385 auto checkHints = [&](typename parenttype::node* last_insert) {
386 if (!last_insert) return false;
387 if (!this->weak_covers(last_insert, k)) return false;
388 cur = last_insert;
389 return true;
390 };
391
392 // test last insert
393 if (hints.last_insert.any(checkHints)) {
394 this->hint_stats.inserts.addHit();
395 } else {
396 this->hint_stats.inserts.addMiss();
397 }
398
399 while (true) {
400 // handle inner nodes
401 if (cur->inner) {
402 auto a = &(cur->keys[0]);
403 auto b = &(cur->keys[cur->numElements]);
404
405 auto pos = this->search.lower_bound(k, a, b, this->weak_comp);
406 auto idx = pos - a;
407
408 // early exit for sets
409 if (isSet && pos != b && this->weak_equal(*pos, k)) {
410 // update provenance information
411 if (typeid(Comparator) != typeid(WeakComparator) && this->less(k, *pos)) {
412 this->update(*pos, k);
413 return (*pos).second;
414 }
415
416 return (*pos).second;
417 }
418
419 cur = cur->getChild(idx);
420 continue;
421 }
422
423 // the rest is for leaf nodes
424 assert(!cur->inner);
425
426 // -- insert node in leaf node --
427
428 auto a = &(cur->keys[0]);
429 auto b = &(cur->keys[cur->numElements]);
430
431 auto pos = this->search.upper_bound(k, a, b, this->weak_comp);
432 auto idx = pos - a;
433
434 // early exit for sets
435 if (isSet && pos != a && this->weak_equal(*(pos - 1), k)) {
436 // update provenance information
437 if (typeid(Comparator) != typeid(WeakComparator) && this->less(k, *(pos - 1))) {
438 this->update(*(pos - 1), k);
439 return (*(pos - 1)).second;
440 }
441
442 return (*(pos - 1)).second;
443 }
444
445 if (cur->numElements >= parenttype::node::maxKeys) {
446 // split this node
447 idx -= cur->rebalance_or_split(const_cast<typename parenttype::node**>(&this->root),
448 this->root_lock, static_cast<int>(idx));
449
450 // insert element in right fragment
451 if (((typename parenttype::size_type)idx) > cur->numElements) {
452 idx -= cur->numElements + 1;
453 cur = cur->parent->getChild(cur->position + 1);
454 }
455 }
456
457 // ok - no split necessary
458 assert(cur->numElements < parenttype::node::maxKeys && "Split required!");
459
460 // move keys
461 for (int j = cur->numElements; j > idx; --j) {
462 cur->keys[j] = cur->keys[j - 1];
463 }
464
465 // call the functor as we've successfully inserted
466 typename Functor::result_type res = f(k);
467 // insert new element
468 cur->keys[idx] = k;
469 cur->numElements++;
470
471 // remember last insertion position
472 hints.last_insert.access(cur);
473 return res;
474 }
475#endif
476 }
477
478 /**
479 * Inserts the given range of elements into this tree.
480 */
481 template <typename Iter>
482 void insert(const Iter& a, const Iter& b) {
483 // TODO: improve this beyond a naive insert
484 typename parenttype::operation_hints hints;
485 // a naive insert so far .. seems to work fine
486 for (auto it = a; it != b; ++it) {
487 // use insert with hint
488 insert(*it, hints);
489 }
490 }
491
492 /**
493 * Swaps the content of this tree with the given tree. This
494 * is a much more efficient operation than creating a copy and
495 * realizing the swap utilizing assignment operations.
496 */
497 void swap(LambdaBTree& other) {
498 // swap the content
499 std::swap(this->root, other.root);
500 std::swap(this->leftmost, other.leftmost);
501 }
502
503 // Implementation of the assignment operation for trees.
504 LambdaBTree& operator=(const LambdaBTree& other) {
505 // check identity
506 if (this == &other) {
507 return *this;
508 }
509
510 // create a deep-copy of the content of the other tree
511 // shortcut for empty sets
512 if (other.empty()) {
513 return *this;
514 }
515
516 // clone content (deep copy)
517 this->root = other.root->clone();
518
519 // update leftmost reference
520 auto tmp = this->root;
521 while (!tmp->isLeaf()) {
522 tmp = tmp->getChild(0);
523 }
524 this->leftmost = static_cast<typename parenttype::leaf_node*>(tmp);
525
526 // done
527 return *this;
528 }
529
530 // Implementation of an equality operation for trees.
531 bool operator==(const LambdaBTree& other) const {
532 // check identity
533 if (this == &other) {
534 return true;
535 }
536
537 // check size
538 if (this->size() != other.size()) {
539 return false;
540 }
541 if (this->size() < other.size()) {
542 return other == *this;
543 }
544
545 // check content
546 for (const auto& key : other) {
547 if (!contains(key)) {
548 return false;
549 }
550 }
551 return true;
552 }
553
554 // Implementation of an inequality operation for trees.
555 bool operator!=(const LambdaBTree& other) const {
556 return !(*this == other);
557 }
558};
559
560} // end namespace detail
561
562/**
563 * A b-tree based set implementation.
564 *
565 * @tparam Key .. the element type to be stored in this set
566 * @tparam Functor .. a std::function that is invoked on successful insert
567 * @tparam Comparator .. a class defining an order on the stored elements
568 * @tparam Allocator .. utilized for allocating memory for required nodes
569 * @tparam blockSize .. determines the number of bytes/block utilized by leaf nodes
570 * @tparam SearchStrategy .. enables switching between linear, binary or any other search strategy
571 */
572template <typename Key, typename Functor, typename Comparator = detail::comparator<Key>,
573 typename Allocator = std::allocator<Key>, // is ignored so far
574 unsigned blockSize = 256, typename SearchStrategy = typename detail::default_strategy<Key>::type>
575class LambdaBTreeSet
576 : public detail::LambdaBTree<Key, Comparator, Allocator, blockSize, SearchStrategy, true, Functor> {
577 using super = detail::LambdaBTree<Key, Comparator, Allocator, blockSize, SearchStrategy, true, Functor>;
578
579 friend class detail::LambdaBTree<Key, Comparator, Allocator, blockSize, SearchStrategy, true, Functor>;
580
581public:
582 /**
583 * A default constructor creating an empty set.
584 */
585 LambdaBTreeSet(const Comparator& comp = Comparator()) : super(comp) {}
586
587 /**
588 * A constructor creating a set based on the given range.
589 */
590 template <typename Iter>
591 LambdaBTreeSet(const Iter& a, const Iter& b) {
592 this->insert(a, b);
593 }
594
595 // A copy constructor.
596 LambdaBTreeSet(const LambdaBTreeSet& other) : super(other) {}
597
598 // A move constructor.
599 LambdaBTreeSet(LambdaBTreeSet&& other) : super(std::move(other)) {}
600
601private:
602 // A constructor required by the bulk-load facility.
603 template <typename s, typename n, typename l>
604 LambdaBTreeSet(s size, n* root, l* leftmost) : super::parenttype(size, root, leftmost) {}
605
606public:
607 // Support for the assignment operator.
608 LambdaBTreeSet& operator=(const LambdaBTreeSet& other) {
609 super::operator=(other);
610 return *this;
611 }
612
613 // Support for the bulk-load operator.
614 template <typename Iter>
615 static LambdaBTreeSet load(const Iter& a, const Iter& b) {
616 return super::template load<LambdaBTreeSet>(a, b);
617 }
618};
619
620} // end of namespace souffle