Advertisement
Guest User

Untitled

a guest
Dec 11th, 2018
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 13.43 KB | None | 0 0
  1. /* Copyright (C) 2006 David Garcia
  2. * You may use, copy, modify, merge, publish, distribute, sublicense,
  3. * and/or sell copies of this software subject to the following conditions:
  4. * THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  5. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  6. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  7. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  8. * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  9. * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
  10. * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  11. */
  12.  
  13. #if !defined BPLUSTREE_HPP_227824
  14. #define BPLUSTREE_HPP_227824
  15.  
  16. // This is required for glibc to define std::posix_memalign
  17. #if !defined(_XOPEN_SOURCE) || (_XOPEN_SOURCE < 600)
  18. #define _XOPEN_SOURCE 600
  19. #endif
  20. #include <stdlib.h>
  21.  
  22. #include <assert.h>
  23. #include <boost/static_assert.hpp>
  24. #include <boost/pool/object_pool.hpp>
  25.  
  26. // DEBUG
  27. #include <iostream>
  28. using std::cout;
  29. using std::endl;
  30.  
  31. #if defined DEBUG_ASSERT
  32. # warning "DEBUG_ASSERT overloaded"
  33. #else
  34. # if defined DEBUG
  35. # define DEBUG_ASSERT(expr) assert(expr)
  36. # else
  37. # define DEBUG_ASSERT(expr)
  38. # endif
  39. #endif
  40.  
  41. template <typename KEY, typename VALUE, unsigned N, unsigned M,
  42. unsigned INNER_NODE_PADDING= 0, unsigned LEAF_NODE_PADDING= 0,
  43. unsigned NODE_ALIGNMENT= 64>
  44. class BPlusTree
  45. {
  46. public:
  47. // N must be greater than two to make the split of
  48. // two inner nodes sensible.
  49. BOOST_STATIC_ASSERT(N>2);
  50. // Leaf nodes must be able to hold at least one element
  51. BOOST_STATIC_ASSERT(M>0);
  52.  
  53. // Builds a new empty tree.
  54. BPlusTree()
  55. : depth(0),
  56. root(new_leaf_node())
  57. {
  58. // DEBUG
  59. // cout << "sizeof(LeafNode)==" << sizeof(LeafNode) << endl;
  60. // cout << "sizeof(InnerNode)==" << sizeof(InnerNode) << endl;
  61. }
  62.  
  63. ~BPlusTree() {
  64. // Empty. Memory deallocation is done automatically
  65. // when innerPool and leafPool are destroyed.
  66. }
  67.  
  68. // Inserts a pair (key, value). If there is a previous pair with
  69. // the same key, the old value is overwritten with the new one.
  70. void insert(KEY key, VALUE value) {
  71. InsertionResult result;
  72. bool was_split;
  73. if( depth == 0 ) {
  74. // The root is a leaf node
  75. DEBUG_ASSERT( *reinterpret_cast<NodeType*>(root) ==
  76. NODE_LEAF);
  77. was_split= leaf_insert(reinterpret_cast<LeafNode*>
  78. (root), key, value, &result);
  79. } else {
  80. // The root is an inner node
  81. DEBUG_ASSERT( *reinterpret_cast<NodeType*>
  82. (root) == NODE_INNER );
  83. was_split= inner_insert(reinterpret_cast<InnerNode*>
  84. (root), depth, key, value, &result);
  85. }
  86. if( was_split ) {
  87. // The old root was splitted in two parts.
  88. // We have to create a new root pointing to them
  89. depth++;
  90. root= new_inner_node();
  91. InnerNode* rootProxy=
  92. reinterpret_cast<InnerNode*>(root);
  93. rootProxy->num_keys= 1;
  94. rootProxy->keys[0]= result.key;
  95. rootProxy->children[0]= result.left;
  96. rootProxy->children[1]= result.right;
  97. }
  98. }
  99.  
  100. // Looks for the given key. If it is not found, it returns false,
  101. // if it is found, it returns true and copies the associated value
  102. // unless the pointer is null.
  103. bool find(const KEY& key, VALUE* value= 0) const {
  104. InnerNode* inner;
  105. register void* node= root;
  106. register unsigned d= depth, index;
  107. while( d-- != 0 ) {
  108. inner= reinterpret_cast<InnerNode*>(node);
  109. DEBUG_ASSERT( inner->type == NODE_INNER );
  110. index= inner_position_for(key, inner->keys,
  111. inner->num_keys);
  112. node= inner->children[index];
  113. }
  114. LeafNode* leaf= reinterpret_cast<LeafNode*>(node);
  115. DEBUG_ASSERT( leaf->type == NODE_LEAF );
  116. index= leaf_position_for(key, leaf->keys, leaf->num_keys);
  117. if( leaf->keys[index] == key ) {
  118. if( value != 0 ) {
  119. *value= leaf->values[index];
  120. }
  121. return true;
  122. } else {
  123. return false;
  124. }
  125. }
  126.  
  127. // Returns the size of an inner node
  128. // It is useful when optimizing performance with cache alignment.
  129. unsigned sizeof_inner_node() const {
  130. return sizeof(InnerNode);
  131. }
  132.  
  133. // Returns the size of a leaf node.
  134. // It is useful when optimizing performance with cache alignment.
  135. unsigned sizeof_leaf_node() const {
  136. return sizeof(LeafNode);
  137. }
  138.  
  139.  
  140. private:
  141. // Used when debugging
  142. enum NodeType {NODE_INNER=0xDEADBEEF, NODE_LEAF=0xC0FFEE};
  143.  
  144. // Leaf nodes store pairs of keys and values.
  145. struct LeafNode {
  146. #ifdef DEBUG
  147. LeafNode() : type(NODE_LEAF), num_keys(0) {}
  148. const NodeType type;
  149. #else
  150. LeafNode() : num_keys(0) {}
  151. #endif
  152. unsigned num_keys;
  153. KEY keys[M];
  154. VALUE values[M];
  155. unsigned char _pad[LEAF_NODE_PADDING];
  156. };
  157.  
  158. // Inner nodes store pointers to other nodes interleaved with keys.
  159. struct InnerNode {
  160. #ifdef DEBUG
  161. InnerNode() : type(NODE_INNER), num_keys(0) {}
  162. const NodeType type;
  163. #else
  164. InnerNode() : num_keys(0) {}
  165. #endif
  166. unsigned num_keys;
  167. KEY keys[N];
  168. void* children[N+1];
  169. unsigned char _pad[INNER_NODE_PADDING];
  170. };
  171.  
  172. // Custom allocator that returns aligned blocks of memory
  173. template <unsigned ALIGNMENT>
  174. struct AlignedMemoryAllocator {
  175. typedef std::size_t size_type;
  176. typedef std::ptrdiff_t difference_type;
  177.  
  178. static char* malloc(const size_type bytes)
  179. {
  180. void* result;
  181. if( posix_memalign(&result, ALIGNMENT, bytes) != 0 ) {
  182. result= 0;
  183. }
  184. // Alternative: result= std::malloc(bytes);
  185. return reinterpret_cast<char*>(result);
  186. }
  187. static void free(char* const block)
  188. { std::free(block); }
  189. };
  190.  
  191. // Returns a pointer to a fresh leaf node.
  192. LeafNode* new_leaf_node() {
  193. LeafNode* result;
  194. //result= new LeafNode();
  195. result= leafPool.construct();
  196. //cout << "New LeafNode at " << result << endl;
  197. return result;
  198. }
  199.  
  200. // Frees a leaf node previously allocated with new_leaf_node()
  201. void delete_leaf_node(LeafNode* node) {
  202. DEBUG_ASSERT( node->type == NODE_LEAF );
  203. //cout << "Deleting LeafNode at " << node << endl;
  204. // Alternatively: delete node;
  205. leafPool.destroy(node);
  206. }
  207.  
  208. // Returns a pointer to a fresh inner node.
  209. InnerNode* new_inner_node() {
  210. InnerNode* result;
  211. // Alternatively: result= new InnerNode();
  212. result= innerPool.construct();
  213. //cout << "New InnerNode at " << result << endl;
  214. return result;
  215. }
  216.  
  217. // Frees an inner node previously allocated with new_inner_node()
  218. void delete_inner_node(InnerNode* node) {
  219. DEBUG_ASSERT( node->type == NODE_INNER );
  220. //cout << "Deleting InnerNode at " << node << endl;
  221. // Alternatively: delete node;
  222. innerPool.destroy(node);
  223. }
  224.  
  225. // Data type returned by the private insertion methods.
  226. struct InsertionResult {
  227. KEY key;
  228. void* left;
  229. void* right;
  230. };
  231.  
  232. // Returns the position where 'key' should be inserted in a leaf node
  233. // that has the given keys.
  234. static unsigned leaf_position_for(const KEY& key, const KEY* keys,
  235. unsigned num_keys) {
  236. // Simple linear search. Faster for small values of N or M
  237. unsigned k= 0;
  238. while((k < num_keys) && (keys[k]<key)) {
  239. ++k;
  240. }
  241. return k;
  242. /*
  243. // Binary search. It is faster when N or M is > 100,
  244. // but the cost of the division renders it useless
  245. // for smaller values of N or M.
  246. XXX--- needs to be re-checked because the linear search
  247. has changed since the last update to the following ---XXX
  248. int left= -1, right= num_keys, middle;
  249. while( right -left > 1 ) {
  250. middle= (left+right)/2;
  251. if( keys[middle] < key) {
  252. left= middle;
  253. } else {
  254. right= middle;
  255. }
  256. }
  257. //assert( right == k );
  258. return unsigned(right);
  259. */
  260. }
  261.  
  262. // Returns the position where 'key' should be inserted in an inner node
  263. // that has the given keys.
  264. static unsigned inner_position_for(const KEY& key, const KEY* keys,
  265. unsigned num_keys) {
  266. // Simple linear search. Faster for small values of N or M
  267. unsigned k= 0;
  268. while((k < num_keys) && ((keys[k]<key) || (keys[k]==key))) {
  269. ++k;
  270. }
  271. return k;
  272. // Binary search is faster when N or M is > 100,
  273. // but the cost of the division renders it useless
  274. // for smaller values of N or M.
  275. }
  276.  
  277. bool leaf_insert(LeafNode* node, KEY& key,
  278. VALUE& value, InsertionResult* result) {
  279. DEBUG_ASSERT( node->type == NODE_LEAF );
  280. assert( node->num_keys <= M );
  281. bool was_split= false;
  282. // Simple linear search
  283. unsigned i= leaf_position_for(key, node->keys, node->num_keys);
  284. if( node->num_keys == M ) {
  285. // The node was full. We must split it
  286. unsigned treshold= (M+1)/2;
  287. LeafNode* new_sibling= new_leaf_node();
  288. new_sibling->num_keys= node->num_keys -treshold;
  289. for(unsigned j=0; j < new_sibling->num_keys; ++j) {
  290. new_sibling->keys[j]= node->keys[treshold+j];
  291. new_sibling->values[j]=
  292. node->values[treshold+j];
  293. }
  294. node->num_keys= treshold;
  295. if( i < treshold ) {
  296. // Inserted element goes to left sibling
  297. leaf_insert_nonfull(node, key, value, i);
  298. } else {
  299. // Inserted element goes to right sibling
  300. leaf_insert_nonfull(new_sibling, key, value,
  301. i-treshold);
  302. }
  303. // Notify the parent about the split
  304. was_split= true;
  305. result->key= new_sibling->keys[0];
  306. result->left= node;
  307. result->right= new_sibling;
  308. } else {
  309. // The node was not full
  310. leaf_insert_nonfull(node, key, value, i);
  311. }
  312. return was_split;
  313. }
  314.  
  315. static void leaf_insert_nonfull(LeafNode* node, KEY& key, VALUE& value,
  316. unsigned index) {
  317. DEBUG_ASSERT( node->type == NODE_LEAF );
  318. assert( node->num_keys < M );
  319. assert( index <= M );
  320. assert( index <= node->num_keys );
  321. if( (index < M) && (node->num_keys!=0) && (node->keys[index] == key) ) {
  322. // We are inserting a duplicate value.
  323. // Simply overwrite the old one
  324. node->values[index]= value;
  325. } else {
  326. // The key we are inserting is unique
  327. for(unsigned i=node->num_keys; i > index; --i) {
  328. node->keys[i]= node->keys[i-1];
  329. node->values[i]= node->values[i-1];
  330. }
  331. node->num_keys++;
  332. node->keys[index]= key;
  333. node->values[index]= value;
  334. }
  335. }
  336.  
  337. bool inner_insert(InnerNode* node, unsigned depth, KEY& key,
  338. VALUE& value, InsertionResult* result) {
  339. DEBUG_ASSERT( node->type == NODE_INNER );
  340. assert( depth != 0 );
  341. // Early split if node is full.
  342. // This is not the canonical algorithm for B+ trees,
  343. // but it is simpler and does not break the definition.
  344. bool was_split= false;
  345. if( node->num_keys == N ) {
  346. // Split
  347. unsigned treshold= (N+1)/2;
  348. InnerNode* new_sibling= new_inner_node();
  349. new_sibling->num_keys= node->num_keys -treshold;
  350. for(unsigned i=0; i < new_sibling->num_keys; ++i) {
  351. new_sibling->keys[i]= node->keys[treshold+i];
  352. new_sibling->children[i]=
  353. node->children[treshold+i];
  354. }
  355. new_sibling->children[new_sibling->num_keys]=
  356. node->children[node->num_keys];
  357. node->num_keys= treshold-1;
  358. // Set up the return variable
  359. was_split= true;
  360. result->key= node->keys[treshold-1];
  361. result->left= node;
  362. result->right= new_sibling;
  363. // Now insert in the appropriate sibling
  364. if( key < result->key ) {
  365. inner_insert_nonfull(node, depth, key, value);
  366. } else {
  367. inner_insert_nonfull(new_sibling, depth, key,
  368. value);
  369. }
  370. } else {
  371. // No split
  372. inner_insert_nonfull(node, depth, key, value);
  373. }
  374. return was_split;
  375. }
  376.  
  377. void inner_insert_nonfull(InnerNode* node, unsigned depth, KEY& key,
  378. VALUE& value) {
  379. DEBUG_ASSERT( node->type == NODE_INNER );
  380. assert( node->num_keys < N );
  381. assert( depth != 0 );
  382. // Simple linear search
  383. unsigned index= inner_position_for(key, node->keys,
  384. node->num_keys);
  385. InsertionResult result;
  386. bool was_split;
  387. if( depth-1 == 0 ) {
  388. // The children are leaf nodes
  389. for(unsigned kk=0; kk < node->num_keys+1; ++kk) {
  390. DEBUG_ASSERT( *reinterpret_cast<NodeType*>
  391. (node->children[kk]) == NODE_LEAF );
  392. }
  393. was_split= leaf_insert(reinterpret_cast<LeafNode*>
  394. (node->children[index]), key, value, &result);
  395. } else {
  396. // The children are inner nodes
  397. for(unsigned kk=0; kk < node->num_keys+1; ++kk) {
  398. DEBUG_ASSERT( *reinterpret_cast<NodeType*>
  399. (node->children[kk]) == NODE_INNER );
  400. }
  401. InnerNode* child= reinterpret_cast<InnerNode*>
  402. (node->children[index]);
  403. was_split= inner_insert( child, depth-1, key, value,
  404. &result);
  405. }
  406. if( was_split ) {
  407. if( index == node->num_keys ) {
  408. // Insertion at the rightmost key
  409. node->keys[index]= result.key;
  410. node->children[index]= result.left;
  411. node->children[index+1]= result.right;
  412. node->num_keys++;
  413. } else {
  414. // Insertion not at the rightmost key
  415. node->children[node->num_keys+1]=
  416. node->children[node->num_keys];
  417. for(unsigned i=node->num_keys; i!=index; --i) {
  418. node->children[i]= node->children[i-1];
  419. node->keys[i]= node->keys[i-1];
  420. }
  421. node->children[index]= result.left;
  422. node->children[index+1]= result.right;
  423. node->keys[index]= result.key;
  424. node->num_keys++;
  425. }
  426. } // else the current node is not affected
  427. }
  428.  
  429. typedef AlignedMemoryAllocator<NODE_ALIGNMENT> AlignedAllocator;
  430.  
  431. // Node memory allocators. IMPORTANT NOTE: they must be declared
  432. // before the root to make sure that they are properly initialised
  433. // before being used to allocate any node.
  434. boost::object_pool<InnerNode, AlignedAllocator> innerPool;
  435. boost::object_pool<LeafNode, AlignedAllocator> leafPool;
  436. // Depth of the tree. A tree of depth 0 only has a leaf node.
  437. unsigned depth;
  438. // Pointer to the root node. It may be a leaf or an inner node, but
  439. // it is never null.
  440. void* root;
  441. };
  442.  
  443. #endif // !defined BPLUSTREE_HPP_227824
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement