| 1 | /*
|
| 2 | * Souffle - A Datalog Compiler
|
| 3 | * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved
|
| 4 | * Licensed under the Universal Permissive License v 1.0 as shown at:
|
| 5 | * - https://opensource.org/licenses/UPL
|
| 6 | * - <souffle root>/licenses/SOUFFLE-UPL.txt
|
| 7 | */
|
| 8 |
|
| 9 | /************************************************************************
|
| 10 | *
|
| 11 | * @file BTree.h
|
| 12 | *
|
| 13 | * An implementation of a generic B-tree data structure including
|
| 14 | * interfaces for utilizing instances as set or multiset containers.
|
| 15 | *
|
| 16 | ***********************************************************************/
|
| 17 |
|
| 18 | #pragma once
|
| 19 |
|
| 20 | #include "souffle/datastructure/BTreeUtil.h"
|
| 21 | #include "souffle/utility/CacheUtil.h"
|
| 22 | #include "souffle/utility/ContainerUtil.h"
|
| 23 | #include "souffle/utility/MiscUtil.h"
|
| 24 | #include "souffle/utility/ParallelUtil.h"
|
| 25 | #include <algorithm>
|
| 26 | #include <cassert>
|
| 27 | #include <cstddef>
|
| 28 | #include <cstdint>
|
| 29 | #include <iostream>
|
| 30 | #include <iterator>
|
| 31 | #include <string>
|
| 32 | #include <tuple>
|
| 33 | #include <type_traits>
|
| 34 | #include <typeinfo>
|
| 35 | #include <vector>
|
| 36 |
|
| 37 | namespace souffle {
|
| 38 |
|
| 39 | namespace detail {
|
| 40 |
|
| 41 | /**
|
| 42 | * The actual implementation of a b-tree data structure.
|
| 43 | *
|
| 44 | * @tparam Key .. the element type to be stored in this tree
|
| 45 | * @tparam Comparator .. a class defining an order on the stored elements
|
| 46 | * @tparam Allocator .. utilized for allocating memory for required nodes
|
| 47 | * @tparam blockSize .. determines the number of bytes/block utilized by leaf nodes
|
| 48 | * @tparam SearchStrategy .. enables switching between linear, binary or any other search strategy
|
| 49 | * @tparam isSet .. true = set, false = multiset
|
| 50 | */
|
| 51 | template <typename Key, typename Comparator,
|
| 52 | typename Allocator, // is ignored so far - TODO: add support
|
| 53 | unsigned blockSize, typename SearchStrategy, bool isSet, typename WeakComparator = Comparator,
|
| 54 | typename Updater = detail::updater<Key>>
|
| 55 | class btree {
|
| 56 | public:
|
| 57 | class iterator;
|
| 58 | using const_iterator = iterator;
|
| 59 |
|
| 60 | using key_type = Key;
|
| 61 | using element_type = Key;
|
| 62 | using chunk = range<iterator>;
|
| 63 |
|
| 64 | protected:
|
| 65 | /* ------------- static utilities ----------------- */
|
| 66 |
|
| 67 | const static SearchStrategy search;
|
| 68 |
|
| 69 | /* ---------- comparison utilities ---------------- */
|
| 70 |
|
| 71 | mutable Comparator comp;
|
| 72 |
|
| 73 | bool less(const Key& a, const Key& b) const {
|
| 74 | return comp.less(a, b);
|
| 75 | }
|
| 76 |
|
| 77 | bool equal(const Key& a, const Key& b) const {
|
| 78 | return comp.equal(a, b);
|
| 79 | }
|
| 80 |
|
| 81 | mutable WeakComparator weak_comp;
|
| 82 |
|
| 83 | bool weak_less(const Key& a, const Key& b) const {
|
| 84 | return weak_comp.less(a, b);
|
| 85 | }
|
| 86 |
|
| 87 | bool weak_equal(const Key& a, const Key& b) const {
|
| 88 | return weak_comp.equal(a, b);
|
| 89 | }
|
| 90 |
|
| 91 | /* -------------- updater utilities ------------- */
|
| 92 |
|
| 93 | mutable Updater upd;
|
| 94 | bool update(Key& old_k, const Key& new_k) {
|
| 95 | return upd.update(old_k, new_k);
|
| 96 | }
|
| 97 |
|
| 98 | /* -------------- the node type ----------------- */
|
| 99 |
|
| 100 | using size_type = std::size_t;
|
| 101 | using field_index_type = uint8_t;
|
| 102 | using lock_type = OptimisticReadWriteLock;
|
| 103 |
|
| 104 | struct node;
|
| 105 |
|
| 106 | /**
|
| 107 | * The base type of all node types containing essential
|
| 108 | * book-keeping information.
|
| 109 | */
|
| 110 | struct base {
|
| 111 | #ifdef IS_PARALLEL
|
| 112 |
|
| 113 | // the parent node
|
| 114 | node* volatile parent;
|
| 115 |
|
| 116 | // a lock for synchronizing parallel operations on this node
|
| 117 | lock_type lock;
|
| 118 |
|
| 119 | // the number of keys in this node
|
| 120 | volatile size_type numElements;
|
| 121 |
|
| 122 | // the position in the parent node
|
| 123 | volatile field_index_type position;
|
| 124 | #else
|
| 125 | // the parent node
|
| 126 | node* parent;
|
| 127 |
|
| 128 | // the number of keys in this node
|
| 129 | size_type numElements;
|
| 130 |
|
| 131 | // the position in the parent node
|
| 132 | field_index_type position;
|
| 133 | #endif
|
| 134 |
|
| 135 | // a flag indicating whether this is a inner node or not
|
| 136 | const bool inner;
|
| 137 |
|
| 138 | /**
|
| 139 | * A simple constructor for nodes
|
| 140 | */
|
| 141 | base(bool inner) : parent(nullptr), numElements(0), position(0), inner(inner) {}
|
| 142 |
|
| 143 | bool isLeaf() const {
|
| 144 | return !inner;
|
| 145 | }
|
| 146 |
|
| 147 | bool isInner() const {
|
| 148 | return inner;
|
| 149 | }
|
| 150 |
|
| 151 | node* getParent() const {
|
| 152 | return parent;
|
| 153 | }
|
| 154 |
|
| 155 | field_index_type getPositionInParent() const {
|
| 156 | return position;
|
| 157 | }
|
| 158 |
|
| 159 | size_type getNumElements() const {
|
| 160 | return numElements;
|
| 161 | }
|
| 162 | };
|
| 163 |
|
| 164 | struct inner_node;
|
| 165 |
|
| 166 | /**
|
| 167 | * The actual, generic node implementation covering the operations
|
| 168 | * for both, inner and leaf nodes.
|
| 169 | */
|
| 170 | struct node : public base {
|
| 171 | /**
|
| 172 | * The number of keys/node desired by the user.
|
| 173 | */
|
| 174 | static constexpr std::size_t desiredNumKeys =
|
| 175 | ((blockSize > sizeof(base)) ? blockSize - sizeof(base) : 0) / sizeof(Key);
|
| 176 |
|
| 177 | /**
|
| 178 | * The actual number of keys/node corrected by functional requirements.
|
| 179 | */
|
| 180 | static constexpr std::size_t maxKeys = (desiredNumKeys > 3) ? desiredNumKeys : 3;
|
| 181 |
|
| 182 | // the keys stored in this node
|
| 183 | Key keys[maxKeys];
|
| 184 |
|
| 185 | // a simple constructor
|
| 186 | node(bool inner) : base(inner) {}
|
| 187 |
|
| 188 | /**
|
| 189 | * A deep-copy operation creating a clone of this node.
|
| 190 | */
|
| 191 | node* clone() const {
|
| 192 | // create a clone of this node
|
| 193 | node* res = (this->isInner()) ? static_cast<node*>(new inner_node())
|
| 194 | : static_cast<node*>(new leaf_node());
|
| 195 |
|
| 196 | // copy basic fields
|
| 197 | res->position = this->position;
|
| 198 | res->numElements = this->numElements;
|
| 199 |
|
| 200 | for (size_type i = 0; i < this->numElements; ++i) {
|
| 201 | res->keys[i] = this->keys[i];
|
| 202 | }
|
| 203 |
|
| 204 | // if this is a leaf we are done
|
| 205 | if (this->isLeaf()) {
|
| 206 | return res;
|
| 207 | }
|
| 208 |
|
| 209 | // copy child nodes recursively
|
| 210 | auto* ires = (inner_node*)res;
|
| 211 | for (size_type i = 0; i <= this->numElements; ++i) {
|
| 212 | ires->children[i] = this->getChild(i)->clone();
|
| 213 | ires->children[i]->parent = res;
|
| 214 | }
|
| 215 |
|
| 216 | // that's it
|
| 217 | return res;
|
| 218 | }
|
| 219 |
|
| 220 | /**
|
| 221 | * A utility function providing a reference to this node as
|
| 222 | * an inner node.
|
| 223 | */
|
| 224 | inner_node& asInnerNode() {
|
| 225 | assert(this->inner && "Invalid cast!");
|
| 226 | return *static_cast<inner_node*>(this);
|
| 227 | }
|
| 228 |
|
| 229 | /**
|
| 230 | * A utility function providing a reference to this node as
|
| 231 | * a const inner node.
|
| 232 | */
|
| 233 | const inner_node& asInnerNode() const {
|
| 234 | assert(this->inner && "Invalid cast!");
|
| 235 | return *static_cast<const inner_node*>(this);
|
| 236 | }
|
| 237 |
|
| 238 | /**
|
| 239 | * Computes the number of nested levels of the tree rooted
|
| 240 | * by this node.
|
| 241 | */
|
| 242 | size_type getDepth() const {
|
| 243 | if (this->isLeaf()) {
|
| 244 | return 1;
|
| 245 | }
|
| 246 | return getChild(0)->getDepth() + 1;
|
| 247 | }
|
| 248 |
|
| 249 | /**
|
| 250 | * Counts the number of nodes contained in the sub-tree rooted
|
| 251 | * by this node.
|
| 252 | */
|
| 253 | size_type countNodes() const {
|
| 254 | if (this->isLeaf()) {
|
| 255 | return 1;
|
| 256 | }
|
| 257 | size_type sum = 1;
|
| 258 | for (unsigned i = 0; i <= this->numElements; ++i) {
|
| 259 | sum += getChild(i)->countNodes();
|
| 260 | }
|
| 261 | return sum;
|
| 262 | }
|
| 263 |
|
| 264 | /**
|
| 265 | * Counts the number of entries contained in the sub-tree rooted
|
| 266 | * by this node.
|
| 267 | */
|
| 268 | size_type countEntries() const {
|
| 269 | if (this->isLeaf()) {
|
| 270 | return this->numElements;
|
| 271 | }
|
| 272 | size_type sum = this->numElements;
|
| 273 | for (unsigned i = 0; i <= this->numElements; ++i) {
|
| 274 | sum += getChild(i)->countEntries();
|
| 275 | }
|
| 276 | return sum;
|
| 277 | }
|
| 278 |
|
| 279 | /**
|
| 280 | * Determines the amount of memory used by the sub-tree rooted
|
| 281 | * by this node.
|
| 282 | */
|
| 283 | size_type getMemoryUsage() const {
|
| 284 | if (this->isLeaf()) {
|
| 285 | return sizeof(leaf_node);
|
| 286 | }
|
| 287 | size_type res = sizeof(inner_node);
|
| 288 | for (unsigned i = 0; i <= this->numElements; ++i) {
|
| 289 | res += getChild(i)->getMemoryUsage();
|
| 290 | }
|
| 291 | return res;
|
| 292 | }
|
| 293 |
|
| 294 | /**
|
| 295 | * Obtains a pointer to the array of child-pointers
|
| 296 | * of this node -- if it is an inner node.
|
| 297 | */
|
| 298 | node** getChildren() {
|
| 299 | return asInnerNode().children;
|
| 300 | }
|
| 301 |
|
| 302 | /**
|
| 303 | * Obtains a pointer to the array of const child-pointers
|
| 304 | * of this node -- if it is an inner node.
|
| 305 | */
|
| 306 | node* const* getChildren() const {
|
| 307 | return asInnerNode().children;
|
| 308 | }
|
| 309 |
|
| 310 | /**
|
| 311 | * Obtains a reference to the child of the given index.
|
| 312 | */
|
| 313 | node* getChild(size_type s) const {
|
| 314 | return asInnerNode().children[s];
|
| 315 | }
|
| 316 |
|
| 317 | /**
|
| 318 | * Checks whether this node is empty -- can happen due to biased insertion.
|
| 319 | */
|
| 320 | bool isEmpty() const {
|
| 321 | return this->numElements == 0;
|
| 322 | }
|
| 323 |
|
| 324 | /**
|
| 325 | * Checks whether this node is full.
|
| 326 | */
|
| 327 | bool isFull() const {
|
| 328 | return this->numElements == maxKeys;
|
| 329 | }
|
| 330 |
|
| 331 | /**
|
| 332 | * Obtains the point at which full nodes should be split.
|
| 333 | * Conventional b-trees always split in half. However, in cases
|
| 334 | * where in-order insertions are frequent, a split assigning
|
| 335 | * larger portions to the right fragment provide higher performance
|
| 336 | * and a better node-filling rate.
|
| 337 | */
|
| 338 | int getSplitPoint(int /*unused*/) {
|
| 339 | return static_cast<int>(std::min(3 * maxKeys / 4, maxKeys - 2));
|
| 340 | }
|
| 341 |
|
| 342 | /**
|
| 343 | * Splits this node.
|
| 344 | *
|
| 345 | * @param root .. a pointer to the root-pointer of the enclosing b-tree
|
| 346 | * (might have to be updated if the root-node needs to be split)
|
| 347 | * @param idx .. the position of the insert causing the split
|
| 348 | */
|
| 349 | #ifdef IS_PARALLEL
|
| 350 | void split(node** root, lock_type& root_lock, int idx, std::vector<node*>& locked_nodes) {
|
| 351 | assert(this->lock.is_write_locked());
|
| 352 | assert(!this->parent || this->parent->lock.is_write_locked());
|
| 353 | assert((this->parent != nullptr) || root_lock.is_write_locked());
|
| 354 | assert(this->isLeaf() || souffle::contains(locked_nodes, this));
|
| 355 | assert(!this->parent || souffle::contains(locked_nodes, const_cast<node*>(this->parent)));
|
| 356 | #else
|
| 357 | void split(node** root, lock_type& root_lock, int idx) {
|
| 358 | #endif
|
| 359 | assert(this->numElements == maxKeys);
|
| 360 |
|
| 361 | // get middle element
|
| 362 | int split_point = getSplitPoint(idx);
|
| 363 |
|
| 364 | // create a new sibling node
|
| 365 | node* sibling = (this->inner) ? static_cast<node*>(new inner_node())
|
| 366 | : static_cast<node*>(new leaf_node());
|
| 367 |
|
| 368 | #ifdef IS_PARALLEL
|
| 369 | // lock sibling
|
| 370 | sibling->lock.start_write();
|
| 371 | locked_nodes.push_back(sibling);
|
| 372 | #endif
|
| 373 |
|
| 374 | // move data over to the new node
|
| 375 | for (unsigned i = split_point + 1, j = 0; i < maxKeys; ++i, ++j) {
|
| 376 | sibling->keys[j] = keys[i];
|
| 377 | }
|
| 378 |
|
| 379 | // move child pointers
|
| 380 | if (this->inner) {
|
| 381 | // move pointers to sibling
|
| 382 | auto* other = static_cast<inner_node*>(sibling);
|
| 383 | for (unsigned i = split_point + 1, j = 0; i <= maxKeys; ++i, ++j) {
|
| 384 | other->children[j] = getChildren()[i];
|
| 385 | other->children[j]->parent = other;
|
| 386 | other->children[j]->position = static_cast<field_index_type>(j);
|
| 387 | }
|
| 388 | }
|
| 389 |
|
| 390 | // update number of elements
|
| 391 | this->numElements = split_point;
|
| 392 | sibling->numElements = maxKeys - split_point - 1;
|
| 393 |
|
| 394 | // update parent
|
| 395 | #ifdef IS_PARALLEL
|
| 396 | grow_parent(root, root_lock, sibling, locked_nodes);
|
| 397 | #else
|
| 398 | grow_parent(root, root_lock, sibling);
|
| 399 | #endif
|
| 400 | }
|
| 401 |
|
| 402 | /**
|
| 403 | * Moves keys from this node to one of its siblings or splits
|
| 404 | * this node to make some space for the insertion of an element at
|
| 405 | * position idx.
|
| 406 | *
|
| 407 | * Returns the number of elements moved to the left side, 0 in case
|
| 408 | * of a split. The number of moved elements will be <= the given idx.
|
| 409 | *
|
| 410 | * @param root .. the root node of the b-tree being part of
|
| 411 | * @param idx .. the position of the insert triggering this operation
|
| 412 | */
|
| 413 | // TODO: remove root_lock ... no longer needed
|
| 414 | #ifdef IS_PARALLEL
|
| 415 | int rebalance_or_split(node** root, lock_type& root_lock, int idx, std::vector<node*>& locked_nodes) {
|
| 416 | assert(this->lock.is_write_locked());
|
| 417 | assert(!this->parent || this->parent->lock.is_write_locked());
|
| 418 | assert((this->parent != nullptr) || root_lock.is_write_locked());
|
| 419 | assert(this->isLeaf() || souffle::contains(locked_nodes, this));
|
| 420 | assert(!this->parent || souffle::contains(locked_nodes, const_cast<node*>(this->parent)));
|
| 421 | #else
|
| 422 | int rebalance_or_split(node** root, lock_type& root_lock, int idx) {
|
| 423 | #endif
|
| 424 |
|
| 425 | // this node is full ... and needs some space
|
| 426 | assert(this->numElements == maxKeys);
|
| 427 |
|
| 428 | // get snap-shot of parent
|
| 429 | auto parent = this->parent;
|
| 430 | auto pos = this->position;
|
| 431 |
|
| 432 | // Option A) re-balance data
|
| 433 | if (parent && pos > 0) {
|
| 434 | node* left = parent->getChild(pos - 1);
|
| 435 |
|
| 436 | #ifdef IS_PARALLEL
|
| 437 | // lock access to left sibling
|
| 438 | if (!left->lock.try_start_write()) {
|
| 439 | // left node is currently updated => skip balancing and split
|
| 440 | split(root, root_lock, idx, locked_nodes);
|
| 441 | return 0;
|
| 442 | }
|
| 443 | #endif
|
| 444 |
|
| 445 | // compute number of elements to be movable to left
|
| 446 | // space available in left vs. insertion index
|
| 447 | size_type num = static_cast<size_type>(
|
| 448 | std::min<int>(static_cast<int>(maxKeys - left->numElements), idx));
|
| 449 |
|
| 450 | // if there are elements to move ..
|
| 451 | if (num > 0) {
|
| 452 | Key* splitter = &(parent->keys[this->position - 1]);
|
| 453 |
|
| 454 | // .. move keys to left node
|
| 455 | left->keys[left->numElements] = *splitter;
|
| 456 | for (size_type i = 0; i < num - 1; ++i) {
|
| 457 | left->keys[left->numElements + 1 + i] = keys[i];
|
| 458 | }
|
| 459 | *splitter = keys[num - 1];
|
| 460 |
|
| 461 | // shift keys in this node to the left
|
| 462 | for (size_type i = 0; i < this->numElements - num; ++i) {
|
| 463 | keys[i] = keys[i + num];
|
| 464 | }
|
| 465 |
|
| 466 | // .. and children if necessary
|
| 467 | if (this->isInner()) {
|
| 468 | auto* ileft = static_cast<inner_node*>(left);
|
| 469 | auto* iright = static_cast<inner_node*>(this);
|
| 470 |
|
| 471 | // move children
|
| 472 | for (field_index_type i = 0; i < num; ++i) {
|
| 473 | ileft->children[left->numElements + i + 1] = iright->children[i];
|
| 474 | }
|
| 475 |
|
| 476 | // update moved children
|
| 477 | for (size_type i = 0; i < num; ++i) {
|
| 478 | iright->children[i]->parent = ileft;
|
| 479 | iright->children[i]->position =
|
| 480 | static_cast<field_index_type>(left->numElements + i) + 1;
|
| 481 | }
|
| 482 |
|
| 483 | // shift child-pointer to the left
|
| 484 | for (size_type i = 0; i < this->numElements - num + 1; ++i) {
|
| 485 | iright->children[i] = iright->children[i + num];
|
| 486 | }
|
| 487 |
|
| 488 | // update position of children
|
| 489 | for (size_type i = 0; i < this->numElements - num + 1; ++i) {
|
| 490 | iright->children[i]->position = static_cast<field_index_type>(i);
|
| 491 | }
|
| 492 | }
|
| 493 |
|
| 494 | // update node sizes
|
| 495 | left->numElements += num;
|
| 496 | this->numElements -= num;
|
| 497 |
|
| 498 | #ifdef IS_PARALLEL
|
| 499 | left->lock.end_write();
|
| 500 | #endif
|
| 501 |
|
| 502 | // done
|
| 503 | return static_cast<int>(num);
|
| 504 | }
|
| 505 |
|
| 506 | #ifdef IS_PARALLEL
|
| 507 | left->lock.abort_write();
|
| 508 | #endif
|
| 509 | }
|
| 510 |
|
| 511 | // Option B) split node
|
| 512 | #ifdef IS_PARALLEL
|
| 513 | split(root, root_lock, idx, locked_nodes);
|
| 514 | #else
|
| 515 | split(root, root_lock, idx);
|
| 516 | #endif
|
| 517 | return 0; // = no re-balancing
|
| 518 | }
|
| 519 |
|
| 520 | private:
|
| 521 | /**
|
| 522 | * Inserts a new sibling into the parent of this node utilizing
|
| 523 | * the last key of this node as a separation key. (for internal
|
| 524 | * use only)
|
| 525 | *
|
| 526 | * @param root .. a pointer to the root-pointer of the containing tree
|
| 527 | * @param sibling .. the new right-sibling to be add to the parent node
|
| 528 | */
|
| 529 | #ifdef IS_PARALLEL
|
| 530 | void grow_parent(node** root, lock_type& root_lock, node* sibling, std::vector<node*>& locked_nodes) {
|
| 531 | assert(this->lock.is_write_locked());
|
| 532 | assert(!this->parent || this->parent->lock.is_write_locked());
|
| 533 | assert((this->parent != nullptr) || root_lock.is_write_locked());
|
| 534 | assert(this->isLeaf() || souffle::contains(locked_nodes, this));
|
| 535 | assert(!this->parent || souffle::contains(locked_nodes, const_cast<node*>(this->parent)));
|
| 536 | #else
|
| 537 | void grow_parent(node** root, lock_type& root_lock, node* sibling) {
|
| 538 | #endif
|
| 539 |
|
| 540 | if (this->parent == nullptr) {
|
| 541 | assert(*root == this);
|
| 542 |
|
| 543 | // create a new root node
|
| 544 | auto* new_root = new inner_node();
|
| 545 | new_root->numElements = 1;
|
| 546 | new_root->keys[0] = keys[this->numElements];
|
| 547 |
|
| 548 | new_root->children[0] = this;
|
| 549 | new_root->children[1] = sibling;
|
| 550 |
|
| 551 | // link this and the sibling node to new root
|
| 552 | this->parent = new_root;
|
| 553 | sibling->parent = new_root;
|
| 554 | sibling->position = 1;
|
| 555 |
|
| 556 | // switch root node
|
| 557 | *root = new_root;
|
| 558 |
|
| 559 | } else {
|
| 560 | // insert new element in parent element
|
| 561 | auto parent = this->parent;
|
| 562 | auto pos = this->position;
|
| 563 |
|
| 564 | #ifdef IS_PARALLEL
|
| 565 | parent->insert_inner(
|
| 566 | root, root_lock, pos, this, keys[this->numElements], sibling, locked_nodes);
|
| 567 | #else
|
| 568 | parent->insert_inner(root, root_lock, pos, this, keys[this->numElements], sibling);
|
| 569 | #endif
|
| 570 | }
|
| 571 | }
|
| 572 |
|
| 573 | /**
|
| 574 | * Inserts a new element into an inner node (for internal use only).
|
| 575 | *
|
| 576 | * @param root .. a pointer to the root-pointer of the containing tree
|
| 577 | * @param pos .. the position to insert the new key
|
| 578 | * @param key .. the key to insert
|
| 579 | * @param newNode .. the new right-child of the inserted key
|
| 580 | */
|
| 581 | #ifdef IS_PARALLEL
|
| 582 | void insert_inner(node** root, lock_type& root_lock, unsigned pos, node* predecessor, const Key& key,
|
| 583 | node* newNode, std::vector<node*>& locked_nodes) {
|
| 584 | assert(this->lock.is_write_locked());
|
| 585 | assert(souffle::contains(locked_nodes, this));
|
| 586 | #else
|
| 587 | void insert_inner(node** root, lock_type& root_lock, unsigned pos, node* predecessor, const Key& key,
|
| 588 | node* newNode) {
|
| 589 | #endif
|
| 590 |
|
| 591 | // check capacity
|
| 592 | if (this->numElements >= maxKeys) {
|
| 593 | #ifdef IS_PARALLEL
|
| 594 | assert(!this->parent || this->parent->lock.is_write_locked());
|
| 595 | assert((this->parent) || root_lock.is_write_locked());
|
| 596 | assert(!this->parent || souffle::contains(locked_nodes, const_cast<node*>(this->parent)));
|
| 597 | #endif
|
| 598 |
|
| 599 | // split this node
|
| 600 | #ifdef IS_PARALLEL
|
| 601 | pos -= rebalance_or_split(root, root_lock, pos, locked_nodes);
|
| 602 | #else
|
| 603 | pos -= rebalance_or_split(root, root_lock, pos);
|
| 604 | #endif
|
| 605 |
|
| 606 | // complete insertion within new sibling if necessary
|
| 607 | if (pos > this->numElements) {
|
| 608 | // correct position
|
| 609 | pos = pos - static_cast<unsigned int>(this->numElements) - 1;
|
| 610 |
|
| 611 | // get new sibling
|
| 612 | auto other = this->parent->getChild(this->position + 1);
|
| 613 |
|
| 614 | #ifdef IS_PARALLEL
|
| 615 | // make sure other side is write locked
|
| 616 | assert(other->lock.is_write_locked());
|
| 617 | assert(souffle::contains(locked_nodes, other));
|
| 618 |
|
| 619 | // search for new position (since other may have been altered in the meanwhile)
|
| 620 | size_type i = 0;
|
| 621 | for (; i <= other->numElements; ++i) {
|
| 622 | if (other->getChild(i) == predecessor) {
|
| 623 | break;
|
| 624 | }
|
| 625 | }
|
| 626 |
|
| 627 | pos = (i > static_cast<unsigned>(other->numElements)) ? 0 : static_cast<unsigned>(i);
|
| 628 | other->insert_inner(root, root_lock, pos, predecessor, key, newNode, locked_nodes);
|
| 629 | #else
|
| 630 | other->insert_inner(root, root_lock, pos, predecessor, key, newNode);
|
| 631 | #endif
|
| 632 | return;
|
| 633 | }
|
| 634 | }
|
| 635 |
|
| 636 | // move bigger keys one forward
|
| 637 | for (int i = static_cast<int>(this->numElements) - 1; i >= (int)pos; --i) {
|
| 638 | keys[i + 1] = keys[i];
|
| 639 | getChildren()[i + 2] = getChildren()[i + 1];
|
| 640 | ++getChildren()[i + 2]->position;
|
| 641 | }
|
| 642 |
|
| 643 | // ensure proper position
|
| 644 | assert(getChild(pos) == predecessor);
|
| 645 |
|
| 646 | // insert new element
|
| 647 | keys[pos] = key;
|
| 648 | getChildren()[pos + 1] = newNode;
|
| 649 | newNode->parent = this;
|
| 650 | newNode->position = static_cast<field_index_type>(pos) + 1;
|
| 651 | ++this->numElements;
|
| 652 | }
|
| 653 |
|
| 654 | public:
|
| 655 | /**
|
| 656 | * Prints a textual representation of this tree to the given output stream.
|
| 657 | * This feature is mainly intended for debugging and tuning purposes.
|
| 658 | *
|
| 659 | * @see btree::printTree
|
| 660 | */
|
| 661 | void printTree(std::ostream& out, const std::string& prefix) const {
|
| 662 | // print the header
|
| 663 | out << prefix << "@" << this << "[" << ((int)(this->position)) << "] - "
|
| 664 | << (this->inner ? "i" : "") << "node : " << this->numElements << "/" << maxKeys << " [";
|
| 665 |
|
| 666 | // print the keys
|
| 667 | for (unsigned i = 0; i < this->numElements; i++) {
|
| 668 | out << keys[i];
|
| 669 | if (i != this->numElements - 1) {
|
| 670 | out << ",";
|
| 671 | }
|
| 672 | }
|
| 673 | out << "]";
|
| 674 |
|
| 675 | // print references to children
|
| 676 | if (this->inner) {
|
| 677 | out << " - [";
|
| 678 | for (unsigned i = 0; i <= this->numElements; i++) {
|
| 679 | out << getChildren()[i];
|
| 680 | if (i != this->numElements) {
|
| 681 | out << ",";
|
| 682 | }
|
| 683 | }
|
| 684 | out << "]";
|
| 685 | }
|
| 686 |
|
| 687 | #ifdef IS_PARALLEL
|
| 688 | // print the lock state
|
| 689 | if (this->lock.is_write_locked()) {
|
| 690 | std::cout << " locked";
|
| 691 | }
|
| 692 | #endif
|
| 693 |
|
| 694 | out << "\n";
|
| 695 |
|
| 696 | // print the children recursively
|
| 697 | if (this->inner) {
|
| 698 | for (unsigned i = 0; i < this->numElements + 1; ++i) {
|
| 699 | static_cast<const inner_node*>(this)->children[i]->printTree(out, prefix + " ");
|
| 700 | }
|
| 701 | }
|
| 702 | }
|
| 703 |
|
| 704 | /**
|
| 705 | * A function decomposing the sub-tree rooted by this node into approximately equally
|
| 706 | * sized chunks. To minimize computational overhead, no strict load balance nor limit
|
| 707 | * on the number of actual chunks is given.
|
| 708 | *
|
| 709 | * @see btree::getChunks()
|
| 710 | *
|
| 711 | * @param res .. the list of chunks to be extended
|
| 712 | * @param num .. the number of chunks to be produced
|
| 713 | * @param begin .. the iterator to start the first chunk with
|
| 714 | * @param end .. the iterator to end the last chunk with
|
| 715 | * @return the handed in list of chunks extended by generated chunks
|
| 716 | */
|
| 717 | std::vector<chunk>& collectChunks(
|
| 718 | std::vector<chunk>& res, size_type num, const iterator& begin, const iterator& end) const {
|
| 719 | assert(num > 0);
|
| 720 |
|
| 721 | // special case: this node is empty
|
| 722 | if (isEmpty()) {
|
| 723 | if (begin != end) {
|
| 724 | res.push_back(chunk(begin, end));
|
| 725 | }
|
| 726 | return res;
|
| 727 | }
|
| 728 |
|
| 729 | // special case: a single chunk is requested
|
| 730 | if (num == 1) {
|
| 731 | res.push_back(chunk(begin, end));
|
| 732 | return res;
|
| 733 | }
|
| 734 |
|
| 735 | // cut-off
|
| 736 | if (this->isLeaf() || num < (this->numElements + 1)) {
|
| 737 | auto step = this->numElements / num;
|
| 738 | if (step == 0) {
|
| 739 | step = 1;
|
| 740 | }
|
| 741 |
|
| 742 | size_type i = 0;
|
| 743 |
|
| 744 | // the first chunk starts at the begin
|
| 745 | res.push_back(chunk(begin, iterator(this, static_cast<field_index_type>(step) - 1)));
|
| 746 |
|
| 747 | // split up the main part
|
| 748 | for (i = step - 1; i < this->numElements - step; i += step) {
|
| 749 | res.push_back(chunk(iterator(this, static_cast<field_index_type>(i)),
|
| 750 | iterator(this, static_cast<field_index_type>(i + step))));
|
| 751 | }
|
| 752 |
|
| 753 | // the last chunk runs to the end
|
| 754 | res.push_back(chunk(iterator(this, static_cast<field_index_type>(i)), end));
|
| 755 |
|
| 756 | // done
|
| 757 | return res;
|
| 758 | }
|
| 759 |
|
| 760 | // else: collect chunks of sub-set elements
|
| 761 |
|
| 762 | auto part = num / (this->numElements + 1);
|
| 763 | assert(part > 0);
|
| 764 | getChild(0)->collectChunks(res, part, begin, iterator(this, 0));
|
| 765 | for (size_type i = 1; i < this->numElements; i++) {
|
| 766 | getChild(i)->collectChunks(res, part, iterator(this, static_cast<field_index_type>(i - 1)),
|
| 767 | iterator(this, static_cast<field_index_type>(i)));
|
| 768 | }
|
| 769 | getChild(this->numElements)
|
| 770 | ->collectChunks(res, num - (part * this->numElements),
|
| 771 | iterator(this, static_cast<field_index_type>(this->numElements) - 1), end);
|
| 772 |
|
| 773 | // done
|
| 774 | return res;
|
| 775 | }
|
| 776 |
|
| 777 | /**
|
| 778 | * A function to verify the consistency of this node.
|
| 779 | *
|
| 780 | * @param root ... a reference to the root of the enclosing tree.
|
| 781 | * @return true if valid, false otherwise
|
| 782 | */
|
| 783 | template <typename Comp>
|
| 784 | bool check(Comp& comp, const node* root) const {
|
| 785 | bool valid = true;
|
| 786 |
|
| 787 | // check fill-state
|
| 788 | if (this->numElements > maxKeys) {
|
| 789 | std::cout << "Node with " << this->numElements << "/" << maxKeys << " encountered!\n";
|
| 790 | valid = false;
|
| 791 | }
|
| 792 |
|
| 793 | // check root state
|
| 794 | if (root == this) {
|
| 795 | if (this->parent != nullptr) {
|
| 796 | std::cout << "Root not properly linked!\n";
|
| 797 | valid = false;
|
| 798 | }
|
| 799 | } else {
|
| 800 | // check parent relation
|
| 801 | if (!this->parent) {
|
| 802 | std::cout << "Invalid null-parent!\n";
|
| 803 | valid = false;
|
| 804 | } else {
|
| 805 | if (this->parent->getChildren()[this->position] != this) {
|
| 806 | std::cout << "Parent reference invalid!\n";
|
| 807 | std::cout << " Node: " << this << "\n";
|
| 808 | std::cout << " Parent: " << this->parent << "\n";
|
| 809 | std::cout << " Position: " << ((int)this->position) << "\n";
|
| 810 | valid = false;
|
| 811 | }
|
| 812 |
|
| 813 | // check parent key
|
| 814 | if (valid && this->position != 0 &&
|
| 815 | !(comp(this->parent->keys[this->position - 1], keys[0]) < ((isSet) ? 0 : 1))) {
|
| 816 | std::cout << "Left parent key not lower bound!\n";
|
| 817 | std::cout << " Node: " << this << "\n";
|
| 818 | std::cout << " Parent: " << this->parent << "\n";
|
| 819 | std::cout << " Position: " << ((int)this->position) << "\n";
|
| 820 | std::cout << " Key: " << (this->parent->keys[this->position]) << "\n";
|
| 821 | std::cout << " Lower: " << (keys[0]) << "\n";
|
| 822 | valid = false;
|
| 823 | }
|
| 824 |
|
| 825 | // check parent key
|
| 826 | if (valid && this->position != this->parent->numElements &&
|
| 827 | !(comp(keys[this->numElements - 1], this->parent->keys[this->position]) <
|
| 828 | ((isSet) ? 0 : 1))) {
|
| 829 | std::cout << "Right parent key not lower bound!\n";
|
| 830 | std::cout << " Node: " << this << "\n";
|
| 831 | std::cout << " Parent: " << this->parent << "\n";
|
| 832 | std::cout << " Position: " << ((int)this->position) << "\n";
|
| 833 | std::cout << " Key: " << (this->parent->keys[this->position]) << "\n";
|
| 834 | std::cout << " Upper: " << (keys[0]) << "\n";
|
| 835 | valid = false;
|
| 836 | }
|
| 837 | }
|
| 838 | }
|
| 839 |
|
| 840 | // check element order
|
| 841 | if (this->numElements > 0) {
|
| 842 | for (unsigned i = 0; i < this->numElements - 1; i++) {
|
| 843 | if (valid && !(comp(keys[i], keys[i + 1]) < ((isSet) ? 0 : 1))) {
|
| 844 | std::cout << "Element order invalid!\n";
|
| 845 | std::cout << " @" << this << " key " << i << " is " << keys[i] << " vs "
|
| 846 | << keys[i + 1] << "\n";
|
| 847 | valid = false;
|
| 848 | }
|
| 849 | }
|
| 850 | }
|
| 851 |
|
| 852 | // check state of sub-nodes
|
| 853 | if (this->inner) {
|
| 854 | for (unsigned i = 0; i <= this->numElements; i++) {
|
| 855 | valid &= getChildren()[i]->check(comp, root);
|
| 856 | }
|
| 857 | }
|
| 858 |
|
| 859 | return valid;
|
| 860 | }
|
| 861 | }; // namespace detail
|
| 862 |
|
| 863 | /**
|
| 864 | * The data type representing inner nodes of the b-tree. It extends
|
| 865 | * the generic implementation of a node by the storage locations
|
| 866 | * of child pointers.
|
| 867 | */
|
| 868 | struct inner_node : public node {
|
| 869 | // references to child nodes owned by this node
|
| 870 | node* children[node::maxKeys + 1];
|
| 871 |
|
| 872 | // a simple default constructor initializing member fields
|
| 873 | inner_node() : node(true) {}
|
| 874 |
|
| 875 | // clear up child nodes recursively
|
| 876 | ~inner_node() {
|
| 877 | for (unsigned i = 0; i <= this->numElements; ++i) {
|
| 878 | if (children[i] != nullptr) {
|
| 879 | if (children[i]->isLeaf()) {
|
| 880 | delete static_cast<leaf_node*>(children[i]);
|
| 881 | } else {
|
| 882 | delete static_cast<inner_node*>(children[i]);
|
| 883 | }
|
| 884 | }
|
| 885 | }
|
| 886 | }
|
| 887 | };
|
| 888 |
|
| 889 | /**
|
| 890 | * The data type representing leaf nodes of the b-tree. It does not
|
| 891 | * add any capabilities to the generic node type.
|
| 892 | */
|
| 893 | struct leaf_node : public node {
|
| 894 | // a simple default constructor initializing member fields
|
| 895 | leaf_node() : node(false) {}
|
| 896 | };
|
| 897 |
|
| 898 | // ------------------- iterators ------------------------
|
| 899 |
|
| 900 | public:
|
| 901 | /**
|
| 902 | * The iterator type to be utilized for scanning through btree instances.
|
| 903 | */
|
| 904 | class iterator {
|
| 905 | // a pointer to the node currently referred to
|
| 906 | node const* cur;
|
| 907 |
|
| 908 | // the index of the element currently addressed within the referenced node
|
| 909 | field_index_type pos = 0;
|
| 910 |
|
| 911 | public:
|
| 912 | using iterator_category = std::forward_iterator_tag;
|
| 913 | using value_type = Key;
|
| 914 | using difference_type = ptrdiff_t;
|
| 915 | using pointer = value_type*;
|
| 916 | using reference = value_type&;
|
| 917 |
|
| 918 | // default constructor -- creating an end-iterator
|
| 919 | iterator() : cur(nullptr) {}
|
| 920 |
|
| 921 | // creates an iterator referencing a specific element within a given node
|
| 922 | iterator(node const* cur, field_index_type pos) : cur(cur), pos(pos) {}
|
| 923 |
|
| 924 | // a copy constructor
|
| 925 | iterator(const iterator& other) : cur(other.cur), pos(other.pos) {}
|
| 926 |
|
| 927 | // an assignment operator
|
| 928 | iterator& operator=(const iterator& other) {
|
| 929 | cur = other.cur;
|
| 930 | pos = other.pos;
|
| 931 | return *this;
|
| 932 | }
|
| 933 |
|
| 934 | // the equality operator as required by the iterator concept
|
| 935 | bool operator==(const iterator& other) const {
|
| 936 | return cur == other.cur && pos == other.pos;
|
| 937 | }
|
| 938 |
|
| 939 | // the not-equality operator as required by the iterator concept
|
| 940 | bool operator!=(const iterator& other) const {
|
| 941 | return !(*this == other);
|
| 942 | }
|
| 943 |
|
| 944 | // the deref operator as required by the iterator concept
|
| 945 | const Key& operator*() const {
|
| 946 | return cur->keys[pos];
|
| 947 | }
|
| 948 |
|
| 949 | // the increment operator as required by the iterator concept
|
| 950 | iterator& operator++() {
|
| 951 | // the quick mode -- if in a leaf and there are elements left
|
| 952 | if (cur->isLeaf() && ++pos < cur->getNumElements()) {
|
| 953 | return *this;
|
| 954 | }
|
| 955 |
|
| 956 | // otherwise it is a bit more tricky
|
| 957 |
|
| 958 | // A) currently in an inner node => go to the left-most child
|
| 959 | if (cur->isInner()) {
|
| 960 | cur = cur->getChildren()[pos + 1];
|
| 961 | while (!cur->isLeaf()) {
|
| 962 | cur = cur->getChildren()[0];
|
| 963 | }
|
| 964 | pos = 0;
|
| 965 |
|
| 966 | // nodes may be empty due to biased insertion
|
| 967 | if (!cur->isEmpty()) {
|
| 968 | return *this;
|
| 969 | }
|
| 970 | }
|
| 971 |
|
| 972 | // B) we are at the right-most element of a leaf => go to next inner node
|
| 973 | assert(cur->isLeaf());
|
| 974 | assert(pos == cur->getNumElements());
|
| 975 |
|
| 976 | while (cur != nullptr && pos == cur->getNumElements()) {
|
| 977 | pos = cur->getPositionInParent();
|
| 978 | cur = cur->getParent();
|
| 979 | }
|
| 980 | return *this;
|
| 981 | }
|
| 982 |
|
| 983 | // prints a textual representation of this iterator to the given stream (mainly for debugging)
|
| 984 | void print(std::ostream& out = std::cout) const {
|
| 985 | out << cur << "[" << (int)pos << "]";
|
| 986 | }
|
| 987 | };
|
| 988 |
|
| 989 | /**
|
| 990 | * A collection of operation hints speeding up some of the involved operations
|
| 991 | * by exploiting temporal locality.
|
| 992 | */
|
| 993 | template <unsigned size = 1>
|
| 994 | struct btree_operation_hints {
|
| 995 | using node_cache = LRUCache<node*, size>;
|
| 996 |
|
| 997 | // the node where the last insertion terminated
|
| 998 | node_cache last_insert;
|
| 999 |
|
| 1000 | // the node where the last find-operation terminated
|
| 1001 | node_cache last_find_end;
|
| 1002 |
|
| 1003 | // the node where the last lower-bound operation terminated
|
| 1004 | node_cache last_lower_bound_end;
|
| 1005 |
|
| 1006 | // the node where the last upper-bound operation terminated
|
| 1007 | node_cache last_upper_bound_end;
|
| 1008 |
|
| 1009 | // default constructor
|
| 1010 | btree_operation_hints() = default;
|
| 1011 |
|
| 1012 | // resets all hints (to be triggered e.g. when deleting nodes)
|
| 1013 | void clear() {
|
| 1014 | last_insert.clear(nullptr);
|
| 1015 | last_find_end.clear(nullptr);
|
| 1016 | last_lower_bound_end.clear(nullptr);
|
| 1017 | last_upper_bound_end.clear(nullptr);
|
| 1018 | }
|
| 1019 | };
|
| 1020 |
|
| 1021 | using operation_hints = btree_operation_hints<1>;
|
| 1022 |
|
| 1023 | protected:
|
| 1024 | #ifdef IS_PARALLEL
|
| 1025 | // a pointer to the root node of this tree
|
| 1026 | node* volatile root;
|
| 1027 |
|
| 1028 | // a lock to synchronize update operations on the root pointer
|
| 1029 | lock_type root_lock;
|
| 1030 | #else
|
| 1031 | // a pointer to the root node of this tree
|
| 1032 | node* root;
|
| 1033 |
|
| 1034 | // required to not duplicate too much code
|
| 1035 | lock_type root_lock;
|
| 1036 | #endif
|
| 1037 |
|
| 1038 | // a pointer to the left-most node of this tree (initial note for iteration)
|
| 1039 | leaf_node* leftmost;
|
| 1040 |
|
| 1041 | /* -------------- operator hint statistics ----------------- */
|
| 1042 |
|
| 1043 | // an aggregation of statistical values of the hint utilization
|
| 1044 | struct hint_statistics {
|
| 1045 | // the counter for insertion operations
|
| 1046 | CacheAccessCounter inserts;
|
| 1047 |
|
| 1048 | // the counter for contains operations
|
| 1049 | CacheAccessCounter contains;
|
| 1050 |
|
| 1051 | // the counter for lower_bound operations
|
| 1052 | CacheAccessCounter lower_bound;
|
| 1053 |
|
| 1054 | // the counter for upper_bound operations
|
| 1055 | CacheAccessCounter upper_bound;
|
| 1056 | };
|
| 1057 |
|
| 1058 | // the hint statistic of this b-tree instance
|
| 1059 | mutable hint_statistics hint_stats;
|
| 1060 |
|
| 1061 | public:
|
| 1062 | // the maximum number of keys stored per node
|
| 1063 | static constexpr std::size_t max_keys_per_node = node::maxKeys;
|
| 1064 |
|
| 1065 | // -- ctors / dtors --
|
| 1066 |
|
| 1067 | // the default constructor creating an empty tree
|
| 1068 | btree(Comparator comp = Comparator(), WeakComparator weak_comp = WeakComparator())
|
| 1069 | : comp(std::move(comp)), weak_comp(std::move(weak_comp)), root(nullptr), leftmost(nullptr) {}
|
| 1070 |
|
| 1071 | // a constructor creating a tree from the given iterator range
|
| 1072 | template <typename Iter>
|
| 1073 | btree(const Iter& a, const Iter& b) : root(nullptr), leftmost(nullptr) {
|
| 1074 | insert(a, b);
|
| 1075 | }
|
| 1076 |
|
| 1077 | // a move constructor
|
| 1078 | btree(btree&& other)
|
| 1079 | : comp(other.comp), weak_comp(other.weak_comp), root(other.root), leftmost(other.leftmost) {
|
| 1080 | other.root = nullptr;
|
| 1081 | other.leftmost = nullptr;
|
| 1082 | }
|
| 1083 |
|
| 1084 | // a copy constructor
|
| 1085 | btree(const btree& set) : comp(set.comp), weak_comp(set.weak_comp), root(nullptr), leftmost(nullptr) {
|
| 1086 | // use assignment operator for a deep copy
|
| 1087 | *this = set;
|
| 1088 | }
|
| 1089 |
|
| 1090 | protected:
|
| 1091 | /**
|
| 1092 | * An internal constructor enabling the specific creation of a tree
|
| 1093 | * based on internal parameters.
|
| 1094 | */
|
| 1095 | btree(size_type /* size */, node* root, leaf_node* leftmost) : root(root), leftmost(leftmost) {}
|
| 1096 |
|
| 1097 | public:
|
| 1098 | // the destructor freeing all contained nodes
|
| 1099 | ~btree() {
|
| 1100 | clear();
|
| 1101 | }
|
| 1102 |
|
| 1103 | // -- mutators and observers --
|
| 1104 |
|
| 1105 | // emptiness check
|
| 1106 | bool empty() const {
|
| 1107 | return root == nullptr;
|
| 1108 | }
|
| 1109 |
|
| 1110 | // determines the number of elements in this tree
|
| 1111 | size_type size() const {
|
| 1112 | return (root) ? root->countEntries() : 0;
|
| 1113 | }
|
| 1114 |
|
| 1115 | /**
|
| 1116 | * Inserts the given key into this tree.
|
| 1117 | */
|
| 1118 | bool insert(const Key& k) {
|
| 1119 | operation_hints hints;
|
| 1120 | return insert(k, hints);
|
| 1121 | }
|
| 1122 |
|
| 1123 | /**
|
| 1124 | * Inserts the given key into this tree.
|
| 1125 | */
|
| 1126 | bool insert(const Key& k, operation_hints& hints) {
|
| 1127 | #ifdef IS_PARALLEL
|
| 1128 |
|
| 1129 | // special handling for inserting first element
|
| 1130 | while (root == nullptr) {
|
| 1131 | // try obtaining root-lock
|
| 1132 | if (!root_lock.try_start_write()) {
|
| 1133 | // somebody else was faster => re-check
|
| 1134 | continue;
|
| 1135 | }
|
| 1136 |
|
| 1137 | // check loop condition again
|
| 1138 | if (root != nullptr) {
|
| 1139 | // somebody else was faster => normal insert
|
| 1140 | root_lock.end_write();
|
| 1141 | break;
|
| 1142 | }
|
| 1143 |
|
| 1144 | // create new node
|
| 1145 | leftmost = new leaf_node();
|
| 1146 | leftmost->numElements = 1;
|
| 1147 | leftmost->keys[0] = k;
|
| 1148 | root = leftmost;
|
| 1149 |
|
| 1150 | // operation complete => we can release the root lock
|
| 1151 | root_lock.end_write();
|
| 1152 |
|
| 1153 | hints.last_insert.access(leftmost);
|
| 1154 |
|
| 1155 | return true;
|
| 1156 | }
|
| 1157 |
|
| 1158 | // insert using iterative implementation
|
| 1159 |
|
| 1160 | node* cur = nullptr;
|
| 1161 |
|
| 1162 | // test last insert hints
|
| 1163 | lock_type::Lease cur_lease;
|
| 1164 |
|
| 1165 | auto checkHint = [&](node* last_insert) {
|
| 1166 | // ignore null pointer
|
| 1167 | if (!last_insert) return false;
|
| 1168 | // get a read lease on indicated node
|
| 1169 | auto hint_lease = last_insert->lock.start_read();
|
| 1170 | // check whether it covers the key
|
| 1171 | if (!weak_covers(last_insert, k)) return false;
|
| 1172 | // and if there was no concurrent modification
|
| 1173 | if (!last_insert->lock.validate(hint_lease)) return false;
|
| 1174 | // use hinted location
|
| 1175 | cur = last_insert;
|
| 1176 | // and keep lease
|
| 1177 | cur_lease = hint_lease;
|
| 1178 | // we found a hit
|
| 1179 | return true;
|
| 1180 | };
|
| 1181 |
|
| 1182 | if (hints.last_insert.any(checkHint)) {
|
| 1183 | // register this as a hit
|
| 1184 | hint_stats.inserts.addHit();
|
| 1185 | } else {
|
| 1186 | // register this as a miss
|
| 1187 | hint_stats.inserts.addMiss();
|
| 1188 | }
|
| 1189 |
|
| 1190 | // if there is no valid hint ..
|
| 1191 | if (!cur) {
|
| 1192 | do {
|
| 1193 | // get root - access lock
|
| 1194 | auto root_lease = root_lock.start_read();
|
| 1195 |
|
| 1196 | // start with root
|
| 1197 | cur = root;
|
| 1198 |
|
| 1199 | // get lease of the next node to be accessed
|
| 1200 | cur_lease = cur->lock.start_read();
|
| 1201 |
|
| 1202 | // check validity of root pointer
|
| 1203 | if (root_lock.end_read(root_lease)) {
|
| 1204 | break;
|
| 1205 | }
|
| 1206 |
|
| 1207 | } while (true);
|
| 1208 | }
|
| 1209 |
|
| 1210 | while (true) {
|
| 1211 | // handle inner nodes
|
| 1212 | if (cur->inner) {
|
| 1213 | auto a = &(cur->keys[0]);
|
| 1214 | auto b = &(cur->keys[cur->numElements]);
|
| 1215 |
|
| 1216 | auto pos = search.lower_bound(k, a, b, weak_comp);
|
| 1217 | auto idx = pos - a;
|
| 1218 |
|
| 1219 | // early exit for sets
|
| 1220 | if (isSet && pos != b && weak_equal(*pos, k)) {
|
| 1221 | // validate results
|
| 1222 | if (!cur->lock.validate(cur_lease)) {
|
| 1223 | // start over again
|
| 1224 | return insert(k, hints);
|
| 1225 | }
|
| 1226 |
|
| 1227 | // update provenance information
|
| 1228 | if (typeid(Comparator) != typeid(WeakComparator)) {
|
| 1229 | if (!cur->lock.try_upgrade_to_write(cur_lease)) {
|
| 1230 | // start again
|
| 1231 | return insert(k, hints);
|
| 1232 | }
|
| 1233 | bool updated = update(*pos, k);
|
| 1234 | cur->lock.end_write();
|
| 1235 | return updated;
|
| 1236 | }
|
| 1237 |
|
| 1238 | // we found the element => no check of lock necessary
|
| 1239 | return false;
|
| 1240 | }
|
| 1241 |
|
| 1242 | // get next pointer
|
| 1243 | auto next = cur->getChild(idx);
|
| 1244 |
|
| 1245 | // get lease on next level
|
| 1246 | auto next_lease = next->lock.start_read();
|
| 1247 |
|
| 1248 | // check whether there was a write
|
| 1249 | if (!cur->lock.end_read(cur_lease)) {
|
| 1250 | // start over
|
| 1251 | return insert(k, hints);
|
| 1252 | }
|
| 1253 |
|
| 1254 | // go to next
|
| 1255 | cur = next;
|
| 1256 |
|
| 1257 | // move on lease
|
| 1258 | cur_lease = next_lease;
|
| 1259 |
|
| 1260 | continue;
|
| 1261 | }
|
| 1262 |
|
| 1263 | // the rest is for leaf nodes
|
| 1264 | assert(!cur->inner);
|
| 1265 |
|
| 1266 | // -- insert node in leaf node --
|
| 1267 |
|
| 1268 | auto a = &(cur->keys[0]);
|
| 1269 | auto b = &(cur->keys[cur->numElements]);
|
| 1270 |
|
| 1271 | auto pos = search.upper_bound(k, a, b, weak_comp);
|
| 1272 | auto idx = pos - a;
|
| 1273 |
|
| 1274 | // early exit for sets
|
| 1275 | if (isSet && pos != a && weak_equal(*(pos - 1), k)) {
|
| 1276 | // validate result
|
| 1277 | if (!cur->lock.validate(cur_lease)) {
|
| 1278 | // start over again
|
| 1279 | return insert(k, hints);
|
| 1280 | }
|
| 1281 |
|
| 1282 | // update provenance information
|
| 1283 | if (typeid(Comparator) != typeid(WeakComparator)) {
|
| 1284 | if (!cur->lock.try_upgrade_to_write(cur_lease)) {
|
| 1285 | // start again
|
| 1286 | return insert(k, hints);
|
| 1287 | }
|
| 1288 | bool updated = update(*(pos - 1), k);
|
| 1289 | cur->lock.end_write();
|
| 1290 | return updated;
|
| 1291 | }
|
| 1292 |
|
| 1293 | // we found the element => done
|
| 1294 | return false;
|
| 1295 | }
|
| 1296 |
|
| 1297 | // upgrade to write-permission
|
| 1298 | if (!cur->lock.try_upgrade_to_write(cur_lease)) {
|
| 1299 | // something has changed => restart
|
| 1300 | hints.last_insert.access(cur);
|
| 1301 | return insert(k, hints);
|
| 1302 | }
|
| 1303 |
|
| 1304 | if (cur->numElements >= node::maxKeys) {
|
| 1305 | // -- lock parents --
|
| 1306 | auto priv = cur;
|
| 1307 | auto parent = priv->parent;
|
| 1308 | std::vector<node*> parents;
|
| 1309 | do {
|
| 1310 | if (parent) {
|
| 1311 | parent->lock.start_write();
|
| 1312 | while (true) {
|
| 1313 | // check whether parent is correct
|
| 1314 | if (parent == priv->parent) {
|
| 1315 | break;
|
| 1316 | }
|
| 1317 | // switch parent
|
| 1318 | parent->lock.abort_write();
|
| 1319 | parent = priv->parent;
|
| 1320 | parent->lock.start_write();
|
| 1321 | }
|
| 1322 | } else {
|
| 1323 | // lock root lock => since cur is root
|
| 1324 | root_lock.start_write();
|
| 1325 | }
|
| 1326 |
|
| 1327 | // record locked node
|
| 1328 | parents.push_back(parent);
|
| 1329 |
|
| 1330 | // stop at "sphere of influence"
|
| 1331 | if (!parent || !parent->isFull()) {
|
| 1332 | break;
|
| 1333 | }
|
| 1334 |
|
| 1335 | // go one step higher
|
| 1336 | priv = parent;
|
| 1337 | parent = parent->parent;
|
| 1338 |
|
| 1339 | } while (true);
|
| 1340 |
|
| 1341 | // split this node
|
| 1342 | auto old_root = root;
|
| 1343 | idx -= cur->rebalance_or_split(
|
| 1344 | const_cast<node**>(&root), root_lock, static_cast<int>(idx), parents);
|
| 1345 |
|
| 1346 | // release parent lock
|
| 1347 | for (auto it = parents.rbegin(); it != parents.rend(); ++it) {
|
| 1348 | auto parent = *it;
|
| 1349 |
|
| 1350 | // release this lock
|
| 1351 | if (parent) {
|
| 1352 | parent->lock.end_write();
|
| 1353 | } else {
|
| 1354 | if (old_root != root) {
|
| 1355 | root_lock.end_write();
|
| 1356 | } else {
|
| 1357 | root_lock.abort_write();
|
| 1358 | }
|
| 1359 | }
|
| 1360 | }
|
| 1361 |
|
| 1362 | // insert element in right fragment
|
| 1363 | if (((size_type)idx) > cur->numElements) {
|
| 1364 | // release current lock
|
| 1365 | cur->lock.end_write();
|
| 1366 |
|
| 1367 | // insert in sibling
|
| 1368 | return insert(k, hints);
|
| 1369 | }
|
| 1370 | }
|
| 1371 |
|
| 1372 | // ok - no split necessary
|
| 1373 | assert(cur->numElements < node::maxKeys && "Split required!");
|
| 1374 |
|
| 1375 | // move keys
|
| 1376 | for (int j = static_cast<int>(cur->numElements); j > static_cast<int>(idx); --j) {
|
| 1377 | cur->keys[j] = cur->keys[j - 1];
|
| 1378 | }
|
| 1379 |
|
| 1380 | // insert new element
|
| 1381 | cur->keys[idx] = k;
|
| 1382 | cur->numElements++;
|
| 1383 |
|
| 1384 | // release lock on current node
|
| 1385 | cur->lock.end_write();
|
| 1386 |
|
| 1387 | // remember last insertion position
|
| 1388 | hints.last_insert.access(cur);
|
| 1389 | return true;
|
| 1390 | }
|
| 1391 |
|
| 1392 | #else
|
| 1393 | // special handling for inserting first element
|
| 1394 | if (empty()) {
|
| 1395 | // create new node
|
| 1396 | leftmost = new leaf_node();
|
| 1397 | leftmost->numElements = 1;
|
| 1398 | leftmost->keys[0] = k;
|
| 1399 | root = leftmost;
|
| 1400 |
|
| 1401 | hints.last_insert.access(leftmost);
|
| 1402 |
|
| 1403 | return true;
|
| 1404 | }
|
| 1405 |
|
| 1406 | // insert using iterative implementation
|
| 1407 | node* cur = root;
|
| 1408 |
|
| 1409 | auto checkHints = [&](node* last_insert) {
|
| 1410 | if (!last_insert) return false;
|
| 1411 | if (!weak_covers(last_insert, k)) return false;
|
| 1412 | cur = last_insert;
|
| 1413 | return true;
|
| 1414 | };
|
| 1415 |
|
| 1416 | // test last insert
|
| 1417 | if (hints.last_insert.any(checkHints)) {
|
| 1418 | hint_stats.inserts.addHit();
|
| 1419 | } else {
|
| 1420 | hint_stats.inserts.addMiss();
|
| 1421 | }
|
| 1422 |
|
| 1423 | while (true) {
|
| 1424 | // handle inner nodes
|
| 1425 | if (cur->inner) {
|
| 1426 | auto a = &(cur->keys[0]);
|
| 1427 | auto b = &(cur->keys[cur->numElements]);
|
| 1428 |
|
| 1429 | auto pos = search.lower_bound(k, a, b, weak_comp);
|
| 1430 | auto idx = pos - a;
|
| 1431 |
|
| 1432 | // early exit for sets
|
| 1433 | if (isSet && pos != b && weak_equal(*pos, k)) {
|
| 1434 | // update provenance information
|
| 1435 | if (typeid(Comparator) != typeid(WeakComparator)) {
|
| 1436 | return update(*pos, k);
|
| 1437 | }
|
| 1438 |
|
| 1439 | return false;
|
| 1440 | }
|
| 1441 |
|
| 1442 | cur = cur->getChild(idx);
|
| 1443 | continue;
|
| 1444 | }
|
| 1445 |
|
| 1446 | // the rest is for leaf nodes
|
| 1447 | assert(!cur->inner);
|
| 1448 |
|
| 1449 | // -- insert node in leaf node --
|
| 1450 |
|
| 1451 | auto a = &(cur->keys[0]);
|
| 1452 | auto b = &(cur->keys[cur->numElements]);
|
| 1453 |
|
| 1454 | auto pos = search.upper_bound(k, a, b, weak_comp);
|
| 1455 | auto idx = pos - a;
|
| 1456 |
|
| 1457 | // early exit for sets
|
| 1458 | if (isSet && pos != a && weak_equal(*(pos - 1), k)) {
|
| 1459 | // update provenance information
|
| 1460 | if (typeid(Comparator) != typeid(WeakComparator)) {
|
| 1461 | return update(*(pos - 1), k);
|
| 1462 | }
|
| 1463 |
|
| 1464 | return false;
|
| 1465 | }
|
| 1466 |
|
| 1467 | if (cur->numElements >= node::maxKeys) {
|
| 1468 | // split this node
|
| 1469 | idx -= cur->rebalance_or_split(&root, root_lock, static_cast<int>(idx));
|
| 1470 |
|
| 1471 | // insert element in right fragment
|
| 1472 | if (((size_type)idx) > cur->numElements) {
|
| 1473 | idx -= cur->numElements + 1;
|
| 1474 | cur = cur->parent->getChild(cur->position + 1);
|
| 1475 | }
|
| 1476 | }
|
| 1477 |
|
| 1478 | // ok - no split necessary
|
| 1479 | assert(cur->numElements < node::maxKeys && "Split required!");
|
| 1480 |
|
| 1481 | // move keys
|
| 1482 | for (int j = static_cast<int>(cur->numElements); j > idx; --j) {
|
| 1483 | cur->keys[j] = cur->keys[j - 1];
|
| 1484 | }
|
| 1485 |
|
| 1486 | // insert new element
|
| 1487 | cur->keys[idx] = k;
|
| 1488 | cur->numElements++;
|
| 1489 |
|
| 1490 | // remember last insertion position
|
| 1491 | hints.last_insert.access(cur);
|
| 1492 |
|
| 1493 | return true;
|
| 1494 | }
|
| 1495 | #endif
|
| 1496 | }
|
| 1497 |
|
| 1498 | /**
|
| 1499 | * Inserts the given range of elements into this tree.
|
| 1500 | */
|
| 1501 | template <typename Iter>
|
| 1502 | void insert(const Iter& a, const Iter& b) {
|
| 1503 | // TODO: improve this beyond a naive insert
|
| 1504 | operation_hints hints;
|
| 1505 | // a naive insert so far .. seems to work fine
|
| 1506 | for (auto it = a; it != b; ++it) {
|
| 1507 | // use insert with hint
|
| 1508 | insert(*it, hints);
|
| 1509 | }
|
| 1510 | }
|
| 1511 |
|
| 1512 | // Obtains an iterator referencing the first element of the tree.
|
| 1513 | iterator begin() const {
|
| 1514 | return iterator(leftmost, 0);
|
| 1515 | }
|
| 1516 |
|
| 1517 | // Obtains an iterator referencing the position after the last element of the tree.
|
| 1518 | iterator end() const {
|
| 1519 | return iterator();
|
| 1520 | }
|
| 1521 |
|
| 1522 | /**
|
| 1523 | * Partitions the full range of this set into up to a given number of chunks.
|
| 1524 | * The chunks will cover approximately the same number of elements. Also, the
|
| 1525 | * number of chunks will only approximate the desired number of chunks.
|
| 1526 | *
|
| 1527 | * @param num .. the number of chunks requested
|
| 1528 | * @return a list of chunks partitioning this tree
|
| 1529 | */
|
| 1530 | std::vector<chunk> partition(size_type num) const {
|
| 1531 | return getChunks(num);
|
| 1532 | }
|
| 1533 |
|
| 1534 | std::vector<chunk> getChunks(size_type num) const {
|
| 1535 | std::vector<chunk> res;
|
| 1536 | if (empty()) {
|
| 1537 | return res;
|
| 1538 | }
|
| 1539 | return root->collectChunks(res, num, begin(), end());
|
| 1540 | }
|
| 1541 |
|
| 1542 | /**
|
| 1543 | * Determines whether the given element is a member of this tree.
|
| 1544 | */
|
| 1545 | bool contains(const Key& k) const {
|
| 1546 | operation_hints hints;
|
| 1547 | return contains(k, hints);
|
| 1548 | }
|
| 1549 |
|
| 1550 | /**
|
| 1551 | * Determines whether the given element is a member of this tree.
|
| 1552 | */
|
| 1553 | bool contains(const Key& k, operation_hints& hints) const {
|
| 1554 | return find(k, hints) != end();
|
| 1555 | }
|
| 1556 |
|
| 1557 | /**
|
| 1558 | * Locates the given key within this tree and returns an iterator
|
| 1559 | * referencing its position. If not found, an end-iterator will be returned.
|
| 1560 | */
|
| 1561 | iterator find(const Key& k) const {
|
| 1562 | operation_hints hints;
|
| 1563 | return find(k, hints);
|
| 1564 | }
|
| 1565 |
|
| 1566 | /**
|
| 1567 | * Locates the given key within this tree and returns an iterator
|
| 1568 | * referencing its position. If not found, an end-iterator will be returned.
|
| 1569 | */
|
| 1570 | iterator find(const Key& k, operation_hints& hints) const {
|
| 1571 | if (empty()) {
|
| 1572 | return end();
|
| 1573 | }
|
| 1574 |
|
| 1575 | node* cur = root;
|
| 1576 |
|
| 1577 | auto checkHints = [&](node* last_find_end) {
|
| 1578 | if (!last_find_end) return false;
|
| 1579 | if (!covers(last_find_end, k)) return false;
|
| 1580 | cur = last_find_end;
|
| 1581 | return true;
|
| 1582 | };
|
| 1583 |
|
| 1584 | // test last location searched (temporal locality)
|
| 1585 | if (hints.last_find_end.any(checkHints)) {
|
| 1586 | // register it as a hit
|
| 1587 | hint_stats.contains.addHit();
|
| 1588 | } else {
|
| 1589 | // register it as a miss
|
| 1590 | hint_stats.contains.addMiss();
|
| 1591 | }
|
| 1592 |
|
| 1593 | // an iterative implementation (since 2/7 faster than recursive)
|
| 1594 |
|
| 1595 | while (true) {
|
| 1596 | auto a = &(cur->keys[0]);
|
| 1597 | auto b = &(cur->keys[cur->numElements]);
|
| 1598 |
|
| 1599 | auto pos = search(k, a, b, comp);
|
| 1600 |
|
| 1601 | if (pos < b && equal(*pos, k)) {
|
| 1602 | hints.last_find_end.access(cur);
|
| 1603 | return iterator(cur, static_cast<field_index_type>(pos - a));
|
| 1604 | }
|
| 1605 |
|
| 1606 | if (!cur->inner) {
|
| 1607 | hints.last_find_end.access(cur);
|
| 1608 | return end();
|
| 1609 | }
|
| 1610 |
|
| 1611 | // continue search in child node
|
| 1612 | cur = cur->getChild(pos - a);
|
| 1613 | }
|
| 1614 | }
|
| 1615 |
|
| 1616 | /**
|
| 1617 | * Obtains a lower boundary for the given key -- hence an iterator referencing
|
| 1618 | * the smallest value that is not less the given key. If there is no such element,
|
| 1619 | * an end-iterator will be returned.
|
| 1620 | */
|
| 1621 | iterator lower_bound(const Key& k) const {
|
| 1622 | operation_hints hints;
|
| 1623 | return lower_bound(k, hints);
|
| 1624 | }
|
| 1625 |
|
| 1626 | /**
|
| 1627 | * Obtains a lower boundary for the given key -- hence an iterator referencing
|
| 1628 | * the smallest value that is not less the given key. If there is no such element,
|
| 1629 | * an end-iterator will be returned.
|
| 1630 | */
|
| 1631 | iterator lower_bound(const Key& k, operation_hints& hints) const {
|
| 1632 | if (empty()) {
|
| 1633 | return end();
|
| 1634 | }
|
| 1635 |
|
| 1636 | node* cur = root;
|
| 1637 |
|
| 1638 | auto checkHints = [&](node* last_lower_bound_end) {
|
| 1639 | if (!last_lower_bound_end) return false;
|
| 1640 | if (!covers(last_lower_bound_end, k)) return false;
|
| 1641 | cur = last_lower_bound_end;
|
| 1642 | return true;
|
| 1643 | };
|
| 1644 |
|
| 1645 | // test last searched node
|
| 1646 | if (hints.last_lower_bound_end.any(checkHints)) {
|
| 1647 | hint_stats.lower_bound.addHit();
|
| 1648 | } else {
|
| 1649 | hint_stats.lower_bound.addMiss();
|
| 1650 | }
|
| 1651 |
|
| 1652 | iterator res = end();
|
| 1653 | while (true) {
|
| 1654 | auto a = &(cur->keys[0]);
|
| 1655 | auto b = &(cur->keys[cur->numElements]);
|
| 1656 |
|
| 1657 | auto pos = search.lower_bound(k, a, b, comp);
|
| 1658 | auto idx = static_cast<field_index_type>(pos - a);
|
| 1659 |
|
| 1660 | if (!cur->inner) {
|
| 1661 | hints.last_lower_bound_end.access(cur);
|
| 1662 | return (pos != b) ? iterator(cur, idx) : res;
|
| 1663 | }
|
| 1664 |
|
| 1665 | if (isSet && pos != b && equal(*pos, k)) {
|
| 1666 | return iterator(cur, idx);
|
| 1667 | }
|
| 1668 |
|
| 1669 | if (pos != b) {
|
| 1670 | res = iterator(cur, idx);
|
| 1671 | }
|
| 1672 |
|
| 1673 | cur = cur->getChild(idx);
|
| 1674 | }
|
| 1675 | }
|
| 1676 |
|
| 1677 | /**
|
| 1678 | * Obtains an upper boundary for the given key -- hence an iterator referencing
|
| 1679 | * the first element that the given key is less than the referenced value. If
|
| 1680 | * there is no such element, an end-iterator will be returned.
|
| 1681 | */
|
| 1682 | iterator upper_bound(const Key& k) const {
|
| 1683 | operation_hints hints;
|
| 1684 | return upper_bound(k, hints);
|
| 1685 | }
|
| 1686 |
|
| 1687 | /**
|
| 1688 | * Obtains an upper boundary for the given key -- hence an iterator referencing
|
| 1689 | * the first element that the given key is less than the referenced value. If
|
| 1690 | * there is no such element, an end-iterator will be returned.
|
| 1691 | */
|
| 1692 | iterator upper_bound(const Key& k, operation_hints& hints) const {
|
| 1693 | if (empty()) {
|
| 1694 | return end();
|
| 1695 | }
|
| 1696 |
|
| 1697 | node* cur = root;
|
| 1698 |
|
| 1699 | auto checkHints = [&](node* last_upper_bound_end) {
|
| 1700 | if (!last_upper_bound_end) return false;
|
| 1701 | if (!coversUpperBound(last_upper_bound_end, k)) return false;
|
| 1702 | cur = last_upper_bound_end;
|
| 1703 | return true;
|
| 1704 | };
|
| 1705 |
|
| 1706 | // test last search node
|
| 1707 | if (hints.last_upper_bound_end.any(checkHints)) {
|
| 1708 | hint_stats.upper_bound.addHit();
|
| 1709 | } else {
|
| 1710 | hint_stats.upper_bound.addMiss();
|
| 1711 | }
|
| 1712 |
|
| 1713 | iterator res = end();
|
| 1714 | while (true) {
|
| 1715 | auto a = &(cur->keys[0]);
|
| 1716 | auto b = &(cur->keys[cur->numElements]);
|
| 1717 |
|
| 1718 | auto pos = search.upper_bound(k, a, b, comp);
|
| 1719 | auto idx = static_cast<field_index_type>(pos - a);
|
| 1720 |
|
| 1721 | if (!cur->inner) {
|
| 1722 | hints.last_upper_bound_end.access(cur);
|
| 1723 | return (pos != b) ? iterator(cur, idx) : res;
|
| 1724 | }
|
| 1725 |
|
| 1726 | if (pos != b) {
|
| 1727 | res = iterator(cur, idx);
|
| 1728 | }
|
| 1729 |
|
| 1730 | cur = cur->getChild(idx);
|
| 1731 | }
|
| 1732 | }
|
| 1733 |
|
| 1734 | /**
|
| 1735 | * Clears this tree.
|
| 1736 | */
|
| 1737 | void clear() {
|
| 1738 | if (root != nullptr) {
|
| 1739 | if (root->isLeaf()) {
|
| 1740 | delete static_cast<leaf_node*>(root);
|
| 1741 | } else {
|
| 1742 | delete static_cast<inner_node*>(root);
|
| 1743 | }
|
| 1744 | }
|
| 1745 | root = nullptr;
|
| 1746 | leftmost = nullptr;
|
| 1747 | }
|
| 1748 |
|
| 1749 | /**
|
| 1750 | * Swaps the content of this tree with the given tree. This
|
| 1751 | * is a much more efficient operation than creating a copy and
|
| 1752 | * realizing the swap utilizing assignment operations.
|
| 1753 | */
|
| 1754 | void swap(btree& other) {
|
| 1755 | // swap the content
|
| 1756 | std::swap(root, other.root);
|
| 1757 | std::swap(leftmost, other.leftmost);
|
| 1758 | }
|
| 1759 |
|
| 1760 | // Implementation of the assignment operation for trees.
|
| 1761 | btree& operator=(const btree& other) {
|
| 1762 | // check identity
|
| 1763 | if (this == &other) {
|
| 1764 | return *this;
|
| 1765 | }
|
| 1766 |
|
| 1767 | // create a deep-copy of the content of the other tree
|
| 1768 | // shortcut for empty sets
|
| 1769 | if (other.empty()) {
|
| 1770 | return *this;
|
| 1771 | }
|
| 1772 |
|
| 1773 | // clone content (deep copy)
|
| 1774 | root = other.root->clone();
|
| 1775 |
|
| 1776 | // update leftmost reference
|
| 1777 | auto tmp = root;
|
| 1778 | while (!tmp->isLeaf()) {
|
| 1779 | tmp = tmp->getChild(0);
|
| 1780 | }
|
| 1781 | leftmost = static_cast<leaf_node*>(tmp);
|
| 1782 |
|
| 1783 | // done
|
| 1784 | return *this;
|
| 1785 | }
|
| 1786 |
|
| 1787 | // Implementation of an equality operation for trees.
|
| 1788 | bool operator==(const btree& other) const {
|
| 1789 | // check identity
|
| 1790 | if (this == &other) {
|
| 1791 | return true;
|
| 1792 | }
|
| 1793 |
|
| 1794 | // check size
|
| 1795 | if (size() != other.size()) {
|
| 1796 | return false;
|
| 1797 | }
|
| 1798 | if (size() < other.size()) {
|
| 1799 | return other == *this;
|
| 1800 | }
|
| 1801 |
|
| 1802 | // check content
|
| 1803 | for (const auto& key : other) {
|
| 1804 | if (!contains(key)) {
|
| 1805 | return false;
|
| 1806 | }
|
| 1807 | }
|
| 1808 | return true;
|
| 1809 | }
|
| 1810 |
|
| 1811 | // Implementation of an inequality operation for trees.
|
| 1812 | bool operator!=(const btree& other) const {
|
| 1813 | return !(*this == other);
|
| 1814 | }
|
| 1815 |
|
| 1816 | // -- for debugging --
|
| 1817 |
|
| 1818 | // Determines the number of levels contained in this tree.
|
| 1819 | size_type getDepth() const {
|
| 1820 | return (empty()) ? 0 : root->getDepth();
|
| 1821 | }
|
| 1822 |
|
| 1823 | // Determines the number of nodes contained in this tree.
|
| 1824 | size_type getNumNodes() const {
|
| 1825 | return (empty()) ? 0 : root->countNodes();
|
| 1826 | }
|
| 1827 |
|
| 1828 | // Determines the amount of memory used by this data structure
|
| 1829 | size_type getMemoryUsage() const {
|
| 1830 | return sizeof(*this) + (empty() ? 0 : root->getMemoryUsage());
|
| 1831 | }
|
| 1832 |
|
| 1833 | /*
|
| 1834 | * Prints a textual representation of this tree to the given
|
| 1835 | * output stream (mostly for debugging and tuning).
|
| 1836 | */
|
| 1837 | void printTree(std::ostream& out = std::cout) const {
|
| 1838 | out << "B-Tree with " << size() << " elements:\n";
|
| 1839 | if (empty()) {
|
| 1840 | out << " - empty - \n";
|
| 1841 | } else {
|
| 1842 | root->printTree(out, "");
|
| 1843 | }
|
| 1844 | }
|
| 1845 |
|
| 1846 | /**
|
| 1847 | * Prints a textual summary of statistical properties of this
|
| 1848 | * tree to the given output stream (for debugging and tuning).
|
| 1849 | */
|
| 1850 | void printStats(std::ostream& out = std::cout) const {
|
| 1851 | auto nodes = getNumNodes();
|
| 1852 | out << " ---------------------------------\n";
|
| 1853 | out << " Elements: " << size() << "\n";
|
| 1854 | out << " Depth: " << (empty() ? 0 : root->getDepth()) << "\n";
|
| 1855 | out << " Nodes: " << nodes << "\n";
|
| 1856 | out << " ---------------------------------\n";
|
| 1857 | out << " Size of inner node: " << sizeof(inner_node) << "\n";
|
| 1858 | out << " Size of leaf node: " << sizeof(leaf_node) << "\n";
|
| 1859 | out << " Size of Key: " << sizeof(Key) << "\n";
|
| 1860 | out << " max keys / node: " << node::maxKeys << "\n";
|
| 1861 | out << " avg keys / node: " << (size() / (double)nodes) << "\n";
|
| 1862 | out << " avg filling rate: " << ((size() / (double)nodes) / node::maxKeys) << "\n";
|
| 1863 | out << " ---------------------------------\n";
|
| 1864 | out << " insert-hint (hits/misses/total): " << hint_stats.inserts.getHits() << "/"
|
| 1865 | << hint_stats.inserts.getMisses() << "/" << hint_stats.inserts.getAccesses() << "\n";
|
| 1866 | out << " contains-hint(hits/misses/total):" << hint_stats.contains.getHits() << "/"
|
| 1867 | << hint_stats.contains.getMisses() << "/" << hint_stats.contains.getAccesses() << "\n";
|
| 1868 | out << " lower-bound-hint (hits/misses/total):" << hint_stats.lower_bound.getHits() << "/"
|
| 1869 | << hint_stats.lower_bound.getMisses() << "/" << hint_stats.lower_bound.getAccesses() << "\n";
|
| 1870 | out << " upper-bound-hint (hits/misses/total):" << hint_stats.upper_bound.getHits() << "/"
|
| 1871 | << hint_stats.upper_bound.getMisses() << "/" << hint_stats.upper_bound.getAccesses() << "\n";
|
| 1872 | out << " ---------------------------------\n";
|
| 1873 | }
|
| 1874 |
|
| 1875 | /**
|
| 1876 | * Checks the consistency of this tree.
|
| 1877 | */
|
| 1878 | bool check() {
|
| 1879 | auto ok = empty() || root->check(comp, root);
|
| 1880 | if (!ok) {
|
| 1881 | printTree();
|
| 1882 | }
|
| 1883 | return ok;
|
| 1884 | }
|
| 1885 |
|
| 1886 | /**
|
| 1887 | * A static member enabling the bulk-load of ordered data into an empty
|
| 1888 | * tree. This function is much more efficient in creating a index over
|
| 1889 | * an ordered set of elements than an iterative insertion of values.
|
| 1890 | *
|
| 1891 | * @tparam Iter .. the type of iterator specifying the range
|
| 1892 | * it must be a random-access iterator
|
| 1893 | */
|
| 1894 | template <typename R, typename Iter>
|
| 1895 | static typename std::enable_if<std::is_same<typename std::iterator_traits<Iter>::iterator_category,
|
| 1896 | std::random_access_iterator_tag>::value,
|
| 1897 | R>::type
|
| 1898 | load(const Iter& a, const Iter& b) {
|
| 1899 | // quick exit - empty range
|
| 1900 | if (a == b) {
|
| 1901 | return R();
|
| 1902 | }
|
| 1903 |
|
| 1904 | // resolve tree recursively
|
| 1905 | auto root = buildSubTree(a, b - 1);
|
| 1906 |
|
| 1907 | // find leftmost node
|
| 1908 | node* leftmost = root;
|
| 1909 | while (!leftmost->isLeaf()) {
|
| 1910 | leftmost = leftmost->getChild(0);
|
| 1911 | }
|
| 1912 |
|
| 1913 | // build result
|
| 1914 | return R(b - a, root, static_cast<leaf_node*>(leftmost));
|
| 1915 | }
|
| 1916 |
|
| 1917 | protected:
|
| 1918 | /**
|
| 1919 | * Determines whether the range covered by the given node is also
|
| 1920 | * covering the given key value.
|
| 1921 | */
|
| 1922 | bool covers(const node* node, const Key& k) const {
|
| 1923 | if (isSet) {
|
| 1924 | // in sets we can include the ends as covered elements
|
| 1925 | return !node->isEmpty() && !less(k, node->keys[0]) && !less(node->keys[node->numElements - 1], k);
|
| 1926 | }
|
| 1927 | // in multi-sets the ends may not be completely covered
|
| 1928 | return !node->isEmpty() && less(node->keys[0], k) && less(k, node->keys[node->numElements - 1]);
|
| 1929 | }
|
| 1930 |
|
| 1931 | /**
|
| 1932 | * Determines whether the range covered by the given node is also
|
| 1933 | * covering the given key value.
|
| 1934 | */
|
| 1935 | bool weak_covers(const node* node, const Key& k) const {
|
| 1936 | if (isSet) {
|
| 1937 | // in sets we can include the ends as covered elements
|
| 1938 | return !node->isEmpty() && !weak_less(k, node->keys[0]) &&
|
| 1939 | !weak_less(node->keys[node->numElements - 1], k);
|
| 1940 | }
|
| 1941 | // in multi-sets the ends may not be completely covered
|
| 1942 | return !node->isEmpty() && weak_less(node->keys[0], k) &&
|
| 1943 | weak_less(k, node->keys[node->numElements - 1]);
|
| 1944 | }
|
| 1945 |
|
| 1946 | private:
|
| 1947 | /**
|
| 1948 | * Determines whether the range covered by this node covers
|
| 1949 | * the upper bound of the given key.
|
| 1950 | */
|
| 1951 | bool coversUpperBound(const node* node, const Key& k) const {
|
| 1952 | // ignore edges
|
| 1953 | return !node->isEmpty() && !less(k, node->keys[0]) && less(k, node->keys[node->numElements - 1]);
|
| 1954 | }
|
| 1955 |
|
| 1956 | // Utility function for the load operation above.
|
| 1957 | template <typename Iter>
|
| 1958 | static node* buildSubTree(const Iter& a, const Iter& b) {
|
| 1959 | const int N = node::maxKeys;
|
| 1960 |
|
| 1961 | // divide range in N+1 sub-ranges
|
| 1962 | int64_t length = (b - a) + 1;
|
| 1963 |
|
| 1964 | // terminal case: length is less then maxKeys
|
| 1965 | if (length <= N) {
|
| 1966 | // create a leaf node
|
| 1967 | node* res = new leaf_node();
|
| 1968 | res->numElements = length;
|
| 1969 |
|
| 1970 | for (int i = 0; i < length; ++i) {
|
| 1971 | res->keys[i] = a[i];
|
| 1972 | }
|
| 1973 |
|
| 1974 | return res;
|
| 1975 | }
|
| 1976 |
|
| 1977 | // recursive case - compute step size
|
| 1978 | int numKeys = N;
|
| 1979 | int64_t step = ((length - numKeys) / (numKeys + 1));
|
| 1980 |
|
| 1981 | while (numKeys > 1 && (step < N / 2)) {
|
| 1982 | numKeys--;
|
| 1983 | step = ((length - numKeys) / (numKeys + 1));
|
| 1984 | }
|
| 1985 |
|
| 1986 | // create inner node
|
| 1987 | node* res = new inner_node();
|
| 1988 | res->numElements = numKeys;
|
| 1989 |
|
| 1990 | Iter c = a;
|
| 1991 | for (int i = 0; i < numKeys; i++) {
|
| 1992 | // get dividing key
|
| 1993 | res->keys[i] = c[step];
|
| 1994 |
|
| 1995 | // get sub-tree
|
| 1996 | auto child = buildSubTree(c, c + (step - 1));
|
| 1997 | child->parent = res;
|
| 1998 | child->position = i;
|
| 1999 | res->getChildren()[i] = child;
|
| 2000 |
|
| 2001 | c = c + (step + 1);
|
| 2002 | }
|
| 2003 |
|
| 2004 | // and the remaining part
|
| 2005 | auto child = buildSubTree(c, b);
|
| 2006 | child->parent = res;
|
| 2007 | child->position = numKeys;
|
| 2008 | res->getChildren()[numKeys] = child;
|
| 2009 |
|
| 2010 | // done
|
| 2011 | return res;
|
| 2012 | }
|
| 2013 | }; // namespace souffle
|
| 2014 |
|
| 2015 | // Instantiation of static member search.
|
| 2016 | template <typename Key, typename Comparator, typename Allocator, unsigned blockSize, typename SearchStrategy,
|
| 2017 | bool isSet, typename WeakComparator, typename Updater>
|
| 2018 | const SearchStrategy
|
| 2019 | btree<Key, Comparator, Allocator, blockSize, SearchStrategy, isSet, WeakComparator, Updater>::search;
|
| 2020 |
|
| 2021 | } // end namespace detail
|
| 2022 |
|
| 2023 | /**
|
| 2024 | * A b-tree based set implementation.
|
| 2025 | *
|
| 2026 | * @tparam Key .. the element type to be stored in this set
|
| 2027 | * @tparam Comparator .. a class defining an order on the stored elements
|
| 2028 | * @tparam Allocator .. utilized for allocating memory for required nodes
|
| 2029 | * @tparam blockSize .. determines the number of bytes/block utilized by leaf nodes
|
| 2030 | * @tparam SearchStrategy .. enables switching between linear, binary or any other search strategy
|
| 2031 | */
|
| 2032 | template <typename Key, typename Comparator = detail::comparator<Key>,
|
| 2033 | typename Allocator = std::allocator<Key>, // is ignored so far
|
| 2034 | unsigned blockSize = 256,
|
| 2035 | typename SearchStrategy = typename souffle::detail::default_strategy<Key>::type,
|
| 2036 | typename WeakComparator = Comparator, typename Updater = souffle::detail::updater<Key>>
|
| 2037 | class btree_set : public souffle::detail::btree<Key, Comparator, Allocator, blockSize, SearchStrategy, true,
|
| 2038 | WeakComparator, Updater> {
|
| 2039 | using super = souffle::detail::btree<Key, Comparator, Allocator, blockSize, SearchStrategy, true,
|
| 2040 | WeakComparator, Updater>;
|
| 2041 |
|
| 2042 | friend class souffle::detail::btree<Key, Comparator, Allocator, blockSize, SearchStrategy, true,
|
| 2043 | WeakComparator, Updater>;
|
| 2044 |
|
| 2045 | public:
|
| 2046 | /**
|
| 2047 | * A default constructor creating an empty set.
|
| 2048 | */
|
| 2049 | btree_set(const Comparator& comp = Comparator(), const WeakComparator& weak_comp = WeakComparator())
|
| 2050 | : super(comp, weak_comp) {}
|
| 2051 |
|
| 2052 | /**
|
| 2053 | * A constructor creating a set based on the given range.
|
| 2054 | */
|
| 2055 | template <typename Iter>
|
| 2056 | btree_set(const Iter& a, const Iter& b) {
|
| 2057 | this->insert(a, b);
|
| 2058 | }
|
| 2059 |
|
| 2060 | // A copy constructor.
|
| 2061 | btree_set(const btree_set& other) : super(other) {}
|
| 2062 |
|
| 2063 | // A move constructor.
|
| 2064 | btree_set(btree_set&& other) : super(std::move(other)) {}
|
| 2065 |
|
| 2066 | private:
|
| 2067 | // A constructor required by the bulk-load facility.
|
| 2068 | template <typename s, typename n, typename l>
|
| 2069 | btree_set(s size, n* root, l* leftmost) : super(size, root, leftmost) {}
|
| 2070 |
|
| 2071 | public:
|
| 2072 | // Support for the assignment operator.
|
| 2073 | btree_set& operator=(const btree_set& other) {
|
| 2074 | super::operator=(other);
|
| 2075 | return *this;
|
| 2076 | }
|
| 2077 |
|
| 2078 | // Support for the bulk-load operator.
|
| 2079 | template <typename Iter>
|
| 2080 | static btree_set load(const Iter& a, const Iter& b) {
|
| 2081 | return super::template load<btree_set>(a, b);
|
| 2082 | }
|
| 2083 | };
|
| 2084 |
|
| 2085 | /**
|
| 2086 | * A b-tree based multi-set implementation.
|
| 2087 | *
|
| 2088 | * @tparam Key .. the element type to be stored in this set
|
| 2089 | * @tparam Comparator .. a class defining an order on the stored elements
|
| 2090 | * @tparam Allocator .. utilized for allocating memory for required nodes
|
| 2091 | * @tparam blockSize .. determines the number of bytes/block utilized by leaf nodes
|
| 2092 | * @tparam SearchStrategy .. enables switching between linear, binary or any other search strategy
|
| 2093 | */
|
| 2094 | template <typename Key, typename Comparator = detail::comparator<Key>,
|
| 2095 | typename Allocator = std::allocator<Key>, // is ignored so far
|
| 2096 | unsigned blockSize = 256,
|
| 2097 | typename SearchStrategy = typename souffle::detail::default_strategy<Key>::type,
|
| 2098 | typename WeakComparator = Comparator, typename Updater = souffle::detail::updater<Key>>
|
| 2099 | class btree_multiset : public souffle::detail::btree<Key, Comparator, Allocator, blockSize, SearchStrategy,
|
| 2100 | false, WeakComparator, Updater> {
|
| 2101 | using super = souffle::detail::btree<Key, Comparator, Allocator, blockSize, SearchStrategy, false,
|
| 2102 | WeakComparator, Updater>;
|
| 2103 |
|
| 2104 | friend class souffle::detail::btree<Key, Comparator, Allocator, blockSize, SearchStrategy, false,
|
| 2105 | WeakComparator, Updater>;
|
| 2106 |
|
| 2107 | public:
|
| 2108 | /**
|
| 2109 | * A default constructor creating an empty set.
|
| 2110 | */
|
| 2111 | btree_multiset(const Comparator& comp = Comparator(), const WeakComparator& weak_comp = WeakComparator())
|
| 2112 | : super(comp, weak_comp) {}
|
| 2113 |
|
| 2114 | /**
|
| 2115 | * A constructor creating a set based on the given range.
|
| 2116 | */
|
| 2117 | template <typename Iter>
|
| 2118 | btree_multiset(const Iter& a, const Iter& b) {
|
| 2119 | this->insert(a, b);
|
| 2120 | }
|
| 2121 |
|
| 2122 | // A copy constructor.
|
| 2123 | btree_multiset(const btree_multiset& other) : super(other) {}
|
| 2124 |
|
| 2125 | // A move constructor.
|
| 2126 | btree_multiset(btree_multiset&& other) : super(std::move(other)) {}
|
| 2127 |
|
| 2128 | private:
|
| 2129 | // A constructor required by the bulk-load facility.
|
| 2130 | template <typename s, typename n, typename l>
|
| 2131 | btree_multiset(s size, n* root, l* leftmost) : super(size, root, leftmost) {}
|
| 2132 |
|
| 2133 | public:
|
| 2134 | // Support for the assignment operator.
|
| 2135 | btree_multiset& operator=(const btree_multiset& other) {
|
| 2136 | super::operator=(other);
|
| 2137 | return *this;
|
| 2138 | }
|
| 2139 |
|
| 2140 | // Support for the bulk-load operator.
|
| 2141 | template <typename Iter>
|
| 2142 | static btree_multiset load(const Iter& a, const Iter& b) {
|
| 2143 | return super::template load<btree_multiset>(a, b);
|
| 2144 | }
|
| 2145 | };
|
| 2146 |
|
| 2147 | } // end of namespace souffle
|