1 | /*
|
2 | * Souffle - A Datalog Compiler
|
3 | * Copyright (c) 2018, Souffle Developers
|
4 | * Licensed under the Universal Permissive License v 1.0 as shown at:
|
5 | * - https://opensource.org/licenses/UPL
|
6 | * - <souffle root>/licenses/SOUFFLE-UPL.txt
|
7 | */
|
8 |
|
9 | /************************************************************************
|
10 | *
|
11 | * @file LambdaBTree.h
|
12 | *
|
13 | * An implementation of a generic B-tree data structure including
|
14 | * interfaces for utilizing instances as set or multiset containers.
|
15 | * Allows the user to provide a function to execute on successful insert
|
16 | * Be careful using this, it currently expects a pair as the key.
|
17 | *
|
18 | ***********************************************************************/
|
19 |
|
20 | #pragma once
|
21 |
|
22 | #include "souffle/datastructure/BTree.h"
|
23 | #include "souffle/utility/ContainerUtil.h"
|
24 | #include "souffle/utility/ParallelUtil.h"
|
25 | #include <atomic>
|
26 | #include <cassert>
|
27 | #include <typeinfo>
|
28 | #include <vector>
|
29 |
|
30 | namespace souffle {
|
31 |
|
32 | namespace detail {
|
33 | /**
|
34 | * The actual implementation of a b-tree data structure.
|
35 | *
|
36 | * @tparam Key .. the element type to be stored in this tree
|
37 | * @tparam Comparator .. a class defining an order on the stored elements
|
38 | * @tparam Allocator .. utilized for allocating memory for required nodes
|
39 | * @tparam blockSize .. determines the number of bytes/block utilized by leaf nodes
|
40 | * @tparam SearchStrategy .. enables switching between linear, binary or any other search strategy
|
41 | * @tparam isSet .. true = set, false = multiset
|
42 | * @tparam Functor .. a std::function that is called on successful (new) insert
|
43 | */
|
44 | template <typename Key, typename Comparator,
|
45 | typename Allocator, // is ignored so far - TODO: add support
|
46 | unsigned blockSize, typename SearchStrategy, bool isSet, typename Functor,
|
47 | typename WeakComparator = Comparator, typename Updater = detail::updater<Key>>
|
48 | class LambdaBTree : public btree<Key, Comparator, Allocator, blockSize, SearchStrategy, isSet, WeakComparator,
|
49 | Updater> {
|
50 | public:
|
51 | using parenttype =
|
52 | btree<Key, Comparator, Allocator, blockSize, SearchStrategy, isSet, WeakComparator, Updater>;
|
53 |
|
54 | LambdaBTree(const Comparator& comp = Comparator(), const WeakComparator& weak_comp = WeakComparator())
|
55 | : parenttype(comp, weak_comp) {}
|
56 |
|
57 | /**
|
58 | * Inserts the given key into this tree.
|
59 | */
|
60 | typename Functor::result_type insert(Key& k, const Functor& f) {
|
61 | typename parenttype::operation_hints hints;
|
62 | return insert(k, hints, f);
|
63 | }
|
64 |
|
65 | // rewriting this because of david's changes
|
66 | typename Functor::result_type insert(
|
67 | Key& k, typename parenttype::operation_hints& hints, const Functor& f) {
|
68 | #ifdef IS_PARALLEL
|
69 |
|
70 | // special handling for inserting first element
|
71 | while (this->root == nullptr) {
|
72 | // try obtaining root-lock
|
73 | if (!this->root_lock.try_start_write()) {
|
74 | // somebody else was faster => re-check
|
75 | continue;
|
76 | }
|
77 |
|
78 | // check loop condition again
|
79 | if (this->root != nullptr) {
|
80 | // somebody else was faster => normal insert
|
81 | this->root_lock.abort_write();
|
82 | break;
|
83 | }
|
84 |
|
85 | // create new node
|
86 | this->leftmost = new typename parenttype::leaf_node();
|
87 | this->leftmost->numElements = 1;
|
88 | // call the functor as we've successfully inserted
|
89 | typename Functor::result_type res = f(k);
|
90 |
|
91 | this->leftmost->keys[0] = k;
|
92 | this->root = this->leftmost;
|
93 |
|
94 | // operation complete => we can release the root lock
|
95 | this->root_lock.end_write();
|
96 |
|
97 | hints.last_insert.access(this->leftmost);
|
98 |
|
99 | return res;
|
100 | }
|
101 |
|
102 | // insert using iterative implementation
|
103 |
|
104 | typename parenttype::node* cur = nullptr;
|
105 |
|
106 | // test last insert hints
|
107 | typename parenttype::lock_type::Lease cur_lease;
|
108 |
|
109 | auto checkHint = [&](typename parenttype::node* last_insert) {
|
110 | // ignore null pointer
|
111 | if (!last_insert) return false;
|
112 | // get a read lease on indicated node
|
113 | auto hint_lease = last_insert->lock.start_read();
|
114 | // check whether it covers the key
|
115 | if (!this->weak_covers(last_insert, k)) return false;
|
116 | // and if there was no concurrent modification
|
117 | if (!last_insert->lock.validate(hint_lease)) return false;
|
118 | // use hinted location
|
119 | cur = last_insert;
|
120 | // and keep lease
|
121 | cur_lease = hint_lease;
|
122 | // we found a hit
|
123 | return true;
|
124 | };
|
125 |
|
126 | if (hints.last_insert.any(checkHint)) {
|
127 | // register this as a hit
|
128 | this->hint_stats.inserts.addHit();
|
129 | } else {
|
130 | // register this as a miss
|
131 | this->hint_stats.inserts.addMiss();
|
132 | }
|
133 |
|
134 | // if there is no valid hint ..
|
135 | if (!cur) {
|
136 | do {
|
137 | // get root - access lock
|
138 | auto root_lease = this->root_lock.start_read();
|
139 |
|
140 | // start with root
|
141 | cur = this->root;
|
142 |
|
143 | // get lease of the next node to be accessed
|
144 | cur_lease = cur->lock.start_read();
|
145 |
|
146 | // check validity of root pointer
|
147 | if (this->root_lock.end_read(root_lease)) {
|
148 | break;
|
149 | }
|
150 |
|
151 | } while (true);
|
152 | }
|
153 |
|
154 | while (true) {
|
155 | // handle inner nodes
|
156 | if (cur->inner) {
|
157 | auto a = &(cur->keys[0]);
|
158 | auto b = &(cur->keys[cur->numElements]);
|
159 |
|
160 | auto pos = this->search.lower_bound(k, a, b, this->weak_comp);
|
161 | auto idx = pos - a;
|
162 |
|
163 | // early exit for sets
|
164 | if (isSet && pos != b && this->weak_equal(*pos, k)) {
|
165 | // validate results
|
166 | if (!cur->lock.validate(cur_lease)) {
|
167 | // start over again
|
168 | return insert(k, hints, f);
|
169 | }
|
170 |
|
171 | // update provenance information
|
172 | if (typeid(Comparator) != typeid(WeakComparator) && this->less(k, *pos)) {
|
173 | if (!cur->lock.try_upgrade_to_write(cur_lease)) {
|
174 | // start again
|
175 | return insert(k, hints, f);
|
176 | }
|
177 | this->update(*pos, k);
|
178 |
|
179 | // get result before releasing lock
|
180 | auto res = (*pos).second;
|
181 |
|
182 | cur->lock.end_write();
|
183 | return res;
|
184 | }
|
185 |
|
186 | // get the result before releasing lock
|
187 | auto res = (*pos).second;
|
188 |
|
189 | // check validity
|
190 | if (!cur->lock.validate(cur_lease)) {
|
191 | // start over again
|
192 | return insert(k, hints, f);
|
193 | }
|
194 |
|
195 | // we found the element => return the result
|
196 | return res;
|
197 | }
|
198 |
|
199 | // get next pointer
|
200 | auto next = cur->getChild(idx);
|
201 |
|
202 | // get lease on next level
|
203 | auto next_lease = next->lock.start_read();
|
204 |
|
205 | // check whether there was a write
|
206 | if (!cur->lock.end_read(cur_lease)) {
|
207 | // start over
|
208 | return insert(k, hints, f);
|
209 | }
|
210 |
|
211 | // go to next
|
212 | cur = next;
|
213 |
|
214 | // move on lease
|
215 | cur_lease = next_lease;
|
216 |
|
217 | continue;
|
218 | }
|
219 |
|
220 | // the rest is for leaf nodes
|
221 | assert(!cur->inner);
|
222 |
|
223 | // -- insert node in leaf node --
|
224 |
|
225 | auto a = &(cur->keys[0]);
|
226 | auto b = &(cur->keys[cur->numElements]);
|
227 |
|
228 | auto pos = this->search.upper_bound(k, a, b, this->weak_comp);
|
229 | auto idx = pos - a;
|
230 |
|
231 | // early exit for sets
|
232 | if (isSet && pos != a && this->weak_equal(*(pos - 1), k)) {
|
233 | // validate result
|
234 | if (!cur->lock.validate(cur_lease)) {
|
235 | // start over again
|
236 | return insert(k, hints, f);
|
237 | }
|
238 |
|
239 | // TODO (pnappa): remove provenance from LambdaBTree - no use for it
|
240 | // update provenance information
|
241 | if (typeid(Comparator) != typeid(WeakComparator) && this->less(k, *(pos - 1))) {
|
242 | if (!cur->lock.try_upgrade_to_write(cur_lease)) {
|
243 | // start again
|
244 | return insert(k, hints, f);
|
245 | }
|
246 | this->update(*(pos - 1), k);
|
247 |
|
248 | // retrieve result before releasing lock
|
249 | auto res = (*(pos - 1)).second;
|
250 |
|
251 | cur->lock.end_write();
|
252 | return res;
|
253 | }
|
254 |
|
255 | // read result (atomic) -- just as a proof of concept, this is actually not valid!!
|
256 | std::atomic<typename Functor::result_type>& loc =
|
257 | *reinterpret_cast<std::atomic<typename Functor::result_type>*>(&(*(pos - 1)).second);
|
258 | auto res = loc.load(std::memory_order_relaxed);
|
259 |
|
260 | // check validity
|
261 | if (!cur->lock.validate(cur_lease)) {
|
262 | // start over again
|
263 | return insert(k, hints, f);
|
264 | }
|
265 |
|
266 | // we found the element => done
|
267 | return res;
|
268 | }
|
269 |
|
270 | // upgrade to write-permission
|
271 | if (!cur->lock.try_upgrade_to_write(cur_lease)) {
|
272 | // something has changed => restart
|
273 | hints.last_insert.access(cur);
|
274 | return insert(k, hints, f);
|
275 | }
|
276 |
|
277 | if (cur->numElements >= parenttype::node::maxKeys) {
|
278 | // -- lock parents --
|
279 | auto priv = cur;
|
280 | auto parent = priv->parent;
|
281 | std::vector<typename parenttype::node*> parents;
|
282 | do {
|
283 | if (parent) {
|
284 | parent->lock.start_write();
|
285 | while (true) {
|
286 | // check whether parent is correct
|
287 | if (parent == priv->parent) {
|
288 | break;
|
289 | }
|
290 | // switch parent
|
291 | parent->lock.abort_write();
|
292 | parent = priv->parent;
|
293 | parent->lock.start_write();
|
294 | }
|
295 | } else {
|
296 | // lock root lock => since cur is root
|
297 | this->root_lock.start_write();
|
298 | }
|
299 |
|
300 | // record locked node
|
301 | parents.push_back(parent);
|
302 |
|
303 | // stop at "sphere of influence"
|
304 | if (!parent || !parent->isFull()) {
|
305 | break;
|
306 | }
|
307 |
|
308 | // go one step higher
|
309 | priv = parent;
|
310 | parent = parent->parent;
|
311 |
|
312 | } while (true);
|
313 |
|
314 | // split this node
|
315 | auto old_root = this->root;
|
316 | idx -= cur->rebalance_or_split(const_cast<typename parenttype::node**>(&this->root),
|
317 | this->root_lock, static_cast<int>(idx), parents);
|
318 |
|
319 | // release parent lock
|
320 | for (auto it = parents.rbegin(); it != parents.rend(); ++it) {
|
321 | auto parent = *it;
|
322 |
|
323 | // release this lock
|
324 | if (parent) {
|
325 | parent->lock.end_write();
|
326 | } else {
|
327 | if (old_root != this->root) {
|
328 | this->root_lock.end_write();
|
329 | } else {
|
330 | this->root_lock.abort_write();
|
331 | }
|
332 | }
|
333 | }
|
334 |
|
335 | // insert element in right fragment
|
336 | if (((typename parenttype::size_type)idx) > cur->numElements) {
|
337 | // release current lock
|
338 | cur->lock.end_write();
|
339 |
|
340 | // insert in sibling
|
341 | return insert(k, hints, f);
|
342 | }
|
343 | }
|
344 |
|
345 | // ok - no split necessary
|
346 | assert(cur->numElements < parenttype::node::maxKeys && "Split required!");
|
347 |
|
348 | // move keys
|
349 | for (int j = static_cast<int>(cur->numElements); j > idx; --j) {
|
350 | cur->keys[j] = cur->keys[j - 1];
|
351 | }
|
352 |
|
353 | // insert new element
|
354 | typename Functor::result_type res = f(k);
|
355 | cur->keys[idx] = k;
|
356 | cur->numElements++;
|
357 |
|
358 | // release lock on current node
|
359 | cur->lock.end_write();
|
360 |
|
361 | // remember last insertion position
|
362 | hints.last_insert.access(cur);
|
363 | return res;
|
364 | }
|
365 |
|
366 | #else
|
367 | // special handling for inserting first element
|
368 | if (this->empty()) {
|
369 | // create new node
|
370 | this->leftmost = new typename parenttype::leaf_node();
|
371 | this->leftmost->numElements = 1;
|
372 | // call the functor as we've successfully inserted
|
373 | typename Functor::result_type res = f(k);
|
374 | this->leftmost->keys[0] = k;
|
375 | this->root = this->leftmost;
|
376 |
|
377 | hints.last_insert.access(this->leftmost);
|
378 |
|
379 | return res;
|
380 | }
|
381 |
|
382 | // insert using iterative implementation
|
383 | typename parenttype::node* cur = this->root;
|
384 |
|
385 | auto checkHints = [&](typename parenttype::node* last_insert) {
|
386 | if (!last_insert) return false;
|
387 | if (!this->weak_covers(last_insert, k)) return false;
|
388 | cur = last_insert;
|
389 | return true;
|
390 | };
|
391 |
|
392 | // test last insert
|
393 | if (hints.last_insert.any(checkHints)) {
|
394 | this->hint_stats.inserts.addHit();
|
395 | } else {
|
396 | this->hint_stats.inserts.addMiss();
|
397 | }
|
398 |
|
399 | while (true) {
|
400 | // handle inner nodes
|
401 | if (cur->inner) {
|
402 | auto a = &(cur->keys[0]);
|
403 | auto b = &(cur->keys[cur->numElements]);
|
404 |
|
405 | auto pos = this->search.lower_bound(k, a, b, this->weak_comp);
|
406 | auto idx = pos - a;
|
407 |
|
408 | // early exit for sets
|
409 | if (isSet && pos != b && this->weak_equal(*pos, k)) {
|
410 | // update provenance information
|
411 | if (typeid(Comparator) != typeid(WeakComparator) && this->less(k, *pos)) {
|
412 | this->update(*pos, k);
|
413 | return (*pos).second;
|
414 | }
|
415 |
|
416 | return (*pos).second;
|
417 | }
|
418 |
|
419 | cur = cur->getChild(idx);
|
420 | continue;
|
421 | }
|
422 |
|
423 | // the rest is for leaf nodes
|
424 | assert(!cur->inner);
|
425 |
|
426 | // -- insert node in leaf node --
|
427 |
|
428 | auto a = &(cur->keys[0]);
|
429 | auto b = &(cur->keys[cur->numElements]);
|
430 |
|
431 | auto pos = this->search.upper_bound(k, a, b, this->weak_comp);
|
432 | auto idx = pos - a;
|
433 |
|
434 | // early exit for sets
|
435 | if (isSet && pos != a && this->weak_equal(*(pos - 1), k)) {
|
436 | // update provenance information
|
437 | if (typeid(Comparator) != typeid(WeakComparator) && this->less(k, *(pos - 1))) {
|
438 | this->update(*(pos - 1), k);
|
439 | return (*(pos - 1)).second;
|
440 | }
|
441 |
|
442 | return (*(pos - 1)).second;
|
443 | }
|
444 |
|
445 | if (cur->numElements >= parenttype::node::maxKeys) {
|
446 | // split this node
|
447 | idx -= cur->rebalance_or_split(const_cast<typename parenttype::node**>(&this->root),
|
448 | this->root_lock, static_cast<int>(idx));
|
449 |
|
450 | // insert element in right fragment
|
451 | if (((typename parenttype::size_type)idx) > cur->numElements) {
|
452 | idx -= cur->numElements + 1;
|
453 | cur = cur->parent->getChild(cur->position + 1);
|
454 | }
|
455 | }
|
456 |
|
457 | // ok - no split necessary
|
458 | assert(cur->numElements < parenttype::node::maxKeys && "Split required!");
|
459 |
|
460 | // move keys
|
461 | for (int j = cur->numElements; j > idx; --j) {
|
462 | cur->keys[j] = cur->keys[j - 1];
|
463 | }
|
464 |
|
465 | // call the functor as we've successfully inserted
|
466 | typename Functor::result_type res = f(k);
|
467 | // insert new element
|
468 | cur->keys[idx] = k;
|
469 | cur->numElements++;
|
470 |
|
471 | // remember last insertion position
|
472 | hints.last_insert.access(cur);
|
473 | return res;
|
474 | }
|
475 | #endif
|
476 | }
|
477 |
|
478 | /**
|
479 | * Inserts the given range of elements into this tree.
|
480 | */
|
481 | template <typename Iter>
|
482 | void insert(const Iter& a, const Iter& b) {
|
483 | // TODO: improve this beyond a naive insert
|
484 | typename parenttype::operation_hints hints;
|
485 | // a naive insert so far .. seems to work fine
|
486 | for (auto it = a; it != b; ++it) {
|
487 | // use insert with hint
|
488 | insert(*it, hints);
|
489 | }
|
490 | }
|
491 |
|
492 | /**
|
493 | * Swaps the content of this tree with the given tree. This
|
494 | * is a much more efficient operation than creating a copy and
|
495 | * realizing the swap utilizing assignment operations.
|
496 | */
|
497 | void swap(LambdaBTree& other) {
|
498 | // swap the content
|
499 | std::swap(this->root, other.root);
|
500 | std::swap(this->leftmost, other.leftmost);
|
501 | }
|
502 |
|
503 | // Implementation of the assignment operation for trees.
|
504 | LambdaBTree& operator=(const LambdaBTree& other) {
|
505 | // check identity
|
506 | if (this == &other) {
|
507 | return *this;
|
508 | }
|
509 |
|
510 | // create a deep-copy of the content of the other tree
|
511 | // shortcut for empty sets
|
512 | if (other.empty()) {
|
513 | return *this;
|
514 | }
|
515 |
|
516 | // clone content (deep copy)
|
517 | this->root = other.root->clone();
|
518 |
|
519 | // update leftmost reference
|
520 | auto tmp = this->root;
|
521 | while (!tmp->isLeaf()) {
|
522 | tmp = tmp->getChild(0);
|
523 | }
|
524 | this->leftmost = static_cast<typename parenttype::leaf_node*>(tmp);
|
525 |
|
526 | // done
|
527 | return *this;
|
528 | }
|
529 |
|
530 | // Implementation of an equality operation for trees.
|
531 | bool operator==(const LambdaBTree& other) const {
|
532 | // check identity
|
533 | if (this == &other) {
|
534 | return true;
|
535 | }
|
536 |
|
537 | // check size
|
538 | if (this->size() != other.size()) {
|
539 | return false;
|
540 | }
|
541 | if (this->size() < other.size()) {
|
542 | return other == *this;
|
543 | }
|
544 |
|
545 | // check content
|
546 | for (const auto& key : other) {
|
547 | if (!contains(key)) {
|
548 | return false;
|
549 | }
|
550 | }
|
551 | return true;
|
552 | }
|
553 |
|
554 | // Implementation of an inequality operation for trees.
|
555 | bool operator!=(const LambdaBTree& other) const {
|
556 | return !(*this == other);
|
557 | }
|
558 | };
|
559 |
|
560 | } // end namespace detail
|
561 |
|
562 | /**
|
563 | * A b-tree based set implementation.
|
564 | *
|
565 | * @tparam Key .. the element type to be stored in this set
|
566 | * @tparam Functor .. a std::function that is invoked on successful insert
|
567 | * @tparam Comparator .. a class defining an order on the stored elements
|
568 | * @tparam Allocator .. utilized for allocating memory for required nodes
|
569 | * @tparam blockSize .. determines the number of bytes/block utilized by leaf nodes
|
570 | * @tparam SearchStrategy .. enables switching between linear, binary or any other search strategy
|
571 | */
|
572 | template <typename Key, typename Functor, typename Comparator = detail::comparator<Key>,
|
573 | typename Allocator = std::allocator<Key>, // is ignored so far
|
574 | unsigned blockSize = 256, typename SearchStrategy = typename detail::default_strategy<Key>::type>
|
575 | class LambdaBTreeSet
|
576 | : public detail::LambdaBTree<Key, Comparator, Allocator, blockSize, SearchStrategy, true, Functor> {
|
577 | using super = detail::LambdaBTree<Key, Comparator, Allocator, blockSize, SearchStrategy, true, Functor>;
|
578 |
|
579 | friend class detail::LambdaBTree<Key, Comparator, Allocator, blockSize, SearchStrategy, true, Functor>;
|
580 |
|
581 | public:
|
582 | /**
|
583 | * A default constructor creating an empty set.
|
584 | */
|
585 | LambdaBTreeSet(const Comparator& comp = Comparator()) : super(comp) {}
|
586 |
|
587 | /**
|
588 | * A constructor creating a set based on the given range.
|
589 | */
|
590 | template <typename Iter>
|
591 | LambdaBTreeSet(const Iter& a, const Iter& b) {
|
592 | this->insert(a, b);
|
593 | }
|
594 |
|
595 | // A copy constructor.
|
596 | LambdaBTreeSet(const LambdaBTreeSet& other) : super(other) {}
|
597 |
|
598 | // A move constructor.
|
599 | LambdaBTreeSet(LambdaBTreeSet&& other) : super(std::move(other)) {}
|
600 |
|
601 | private:
|
602 | // A constructor required by the bulk-load facility.
|
603 | template <typename s, typename n, typename l>
|
604 | LambdaBTreeSet(s size, n* root, l* leftmost) : super::parenttype(size, root, leftmost) {}
|
605 |
|
606 | public:
|
607 | // Support for the assignment operator.
|
608 | LambdaBTreeSet& operator=(const LambdaBTreeSet& other) {
|
609 | super::operator=(other);
|
610 | return *this;
|
611 | }
|
612 |
|
613 | // Support for the bulk-load operator.
|
614 | template <typename Iter>
|
615 | static LambdaBTreeSet load(const Iter& a, const Iter& b) {
|
616 | return super::template load<LambdaBTreeSet>(a, b);
|
617 | }
|
618 | };
|
619 |
|
620 | } // end of namespace souffle
|