From d6f97767bb523641a59998061d37dc05f8893c61 Mon Sep 17 00:00:00 2001 From: Sam Stevens Date: Sat, 7 Jul 2018 16:51:22 +0100 Subject: [PATCH] Finished implementing B+ trees --- src/bplus_tree.c | 112 ++++++++++++++++++++++++++++++++++++--- src/bplus_tree.helpers.c | 19 +++++++ src/main.c | 3 ++ tests/bplus_tree_test.c | 20 +++++-- 4 files changed, 142 insertions(+), 12 deletions(-) diff --git a/src/bplus_tree.c b/src/bplus_tree.c index b1b3124..9a9b93f 100644 --- a/src/bplus_tree.c +++ b/src/bplus_tree.c @@ -148,7 +148,7 @@ void print_bplus_node(BPlusNode *node, size_t indent) { } char *debug_bplus_node_str(BPlusNode *node, char *str, size_t *strSize) { - APPEND_STR(str, *strSize, "["); + APPEND_STR(str, *strSize, " ["); if (node->leftPointer != NULL) { str = debug_bplus_node_str(node->leftPointer, str, strSize); } @@ -161,7 +161,7 @@ char *debug_bplus_node_str(BPlusNode *node, char *str, size_t *strSize) { } } - APPEND_STR(str, *strSize, "]"); + APPEND_STR(str, *strSize, "] "); return str; } @@ -234,6 +234,9 @@ BPlusNode *bplus_tree_find_leaf(BPlusTree *tree, uint64_t key) { bool bplus_tree_insert(BPlusTree *tree, uint64_t newKey, void *newValue) { BPlusNode *node = bplus_tree_find_leaf(tree, newKey); assert(node != NULL && node->isInternal == false); +#ifdef BTREE_DEBUG + _bplus_tree_check_parents(tree); +#endif if (node->keyCount < node->order - 1) { //Can insert at this node @@ -317,6 +320,9 @@ bool bplus_tree_insert(BPlusTree *tree, uint64_t newKey, void *newValue) { bool bplus_tree_delete(BPlusTree *tree, uint64_t key) { BPlusNode *leaf = bplus_tree_find_leaf(tree, key); assert(leaf != NULL && leaf->isInternal == false); +#ifdef BTREE_DEBUG + _bplus_tree_check_parents(tree); +#endif //Find and remove kv bool removed = false; @@ -348,6 +354,9 @@ bool bplus_tree_delete(BPlusTree *tree, uint64_t key) { BPlusKV *kv = leaf->rightLeaf->keys[0]; assert(bplus_node_remove_kv(leaf->rightLeaf, kv)); assert(bplus_node_insert_kv(leaf, kv)); + if (kv->rightPointer != NULL) { + kv->rightPointer->parent = leaf; + } //Update the right leafs parent pointer for (size_t i = 0; i < leaf->parent->keyCount; i++) { if (leaf->parent->keys[i]->rightPointer == leaf->rightLeaf) { @@ -366,6 +375,9 @@ bool bplus_tree_delete(BPlusTree *tree, uint64_t key) { BPlusKV *kv = leaf->leftLeaf->keys[leaf->keyCount - 1]; assert(bplus_node_remove_kv(leaf->leftLeaf, kv)); assert(bplus_node_insert_kv(leaf, kv)); + if (kv->rightPointer != NULL) { + kv->rightPointer->parent = leaf; + } //Update the key pointing to this leaf for (size_t i = 0; i < leaf->parent->keyCount; i++) { if (leaf->parent->keys[i]->rightPointer == leaf) { @@ -385,8 +397,12 @@ bool bplus_tree_delete(BPlusTree *tree, uint64_t key) { && leaf->keyCount + leaf->rightLeaf->keyCount < tree->order) { //Move all keys from right leaf to this leaf while (leaf->rightLeaf->keyCount > 0) { - assert(bplus_node_insert_kv(leaf, leaf->rightLeaf->keys[0])); - assert(bplus_node_remove_kv(leaf->rightLeaf, leaf->rightLeaf->keys[0])); + BPlusKV *kv = leaf->rightLeaf->keys[0]; + assert(bplus_node_insert_kv(leaf, kv)); + if (kv->rightPointer != NULL) { + kv->rightPointer = leaf; + } + assert(bplus_node_remove_kv(leaf->rightLeaf, kv)); } //Remove right leaf and parent pointer for (size_t i = 0; i < leaf->parent->keyCount; i++) { @@ -395,7 +411,7 @@ bool bplus_tree_delete(BPlusTree *tree, uint64_t key) { //Remove key bplus_node_remove_kv(leaf->parent, kv); //Update leaf pointers - leaf->rightLeaf = leaf->rightLeaf; + leaf->rightLeaf = leaf->rightLeaf->rightLeaf; if (leaf->rightLeaf != NULL) { leaf->rightLeaf->leftLeaf = leaf; } @@ -406,14 +422,16 @@ bool bplus_tree_delete(BPlusTree *tree, uint64_t key) { } merged = true; } - //Try left leaf - if (leaf->leftLeaf != NULL + else if (leaf->leftLeaf != NULL && leaf->parent == leaf->leftLeaf->parent && leaf->keyCount + leaf->leftLeaf->keyCount < tree->order) { //Move all keys into left leaf while (leaf->keyCount > 0) { assert(bplus_node_insert_kv(leaf->leftLeaf, leaf->keys[0])); + if (leaf->keys[0]->rightPointer != NULL) { + leaf->keys[0]->rightPointer->parent = leaf->leftLeaf; + } assert(bplus_node_remove_kv(leaf, leaf->keys[0])); } //Remove this leaf and parent pointer (if any) @@ -443,6 +461,31 @@ bool bplus_tree_delete(BPlusTree *tree, uint64_t key) { BPlusNode *node = leaf->parent; while (node != NULL) { + //Merge has emptied the parent, promote keys from left pointer to this node + if (node->keyCount == 0) { + BPlusNode *leftChild = node->leftPointer; + if (leftChild->keyCount > 0) { + while (leftChild->keyCount > 0) { + BPlusKV *moveKV = leftChild->keys[0]; + bplus_node_insert_kv(node, moveKV); + if (moveKV->rightPointer != NULL) { + moveKV->rightPointer->parent = node; + } + bplus_node_remove_kv(leftChild, moveKV); + } + } + node->leftPointer = leftChild->leftPointer; + if (node->leftPointer != NULL) { + node->leftPointer->parent = node; + } + leftChild->leftPointer = NULL; + free_bplus_node(leftChild); + if (node == tree->root && node->leftPointer == NULL) { + node->isInternal = false; + } + node = node->parent; + continue; + } //Dont need to do anything if node has enough keys or is the root if (node->keyCount > 1 || node->parent == NULL) { @@ -484,7 +527,7 @@ bool bplus_tree_delete(BPlusTree *tree, uint64_t key) { node->leftPointer->parent = node; kv->rightPointer = NULL; free_bplus_kv(kv); - //Update our parent pointer + //Update our parent pointer key for (size_t i = 0; i < node->parent->keyCount; i++) { if (node->parent->keys[i]->rightPointer == node) { node->parent->keys[i]->key = node->keys[0]->key; @@ -495,7 +538,60 @@ bool bplus_tree_delete(BPlusTree *tree, uint64_t key) { //Couldn't borrow key, merge with a sibling + if (right != NULL && right->keyCount <= 1) { + //Take left pointer and add a key for it + BPlusKV *newKV = new_bplus_kv(right->leftPointer->keys[0]->key, NULL, right->leftPointer); + newKV->rightPointer->parent = node; + bplus_node_insert_kv(node, newKV); + right->leftPointer = NULL; + //Move keys + while (right->keyCount > 0) { + BPlusKV *moveKV = right->keys[0]; + bplus_node_remove_kv(right, moveKV); + bplus_node_insert_kv(node, moveKV); + if (moveKV->rightPointer != NULL) { + moveKV->rightPointer->parent = node; + } + } + //Remove right node + for (size_t i = 0; i < node->parent->keyCount; i++) { + if (node->parent->keys[i]->rightPointer == right) { + BPlusKV *kv = node->parent->keys[i]; + bplus_node_remove_kv(node->parent, kv); + free_bplus_kv(kv); + break; + } + } + node = node->parent; + } else if (left != NULL && left->keyCount <= 1) { + //Merge us into the left node + //Create new key from left pointer + BPlusKV *newKV = new_bplus_kv(node->leftPointer->keys[0]->key, NULL, node->leftPointer); + newKV->rightPointer->parent = left; + bplus_node_insert_kv(left, newKV); + node->leftPointer = NULL; + //Move keys + while (node->keyCount > 0) { + BPlusKV *moveKV = node->keys[0]; + bplus_node_remove_kv(node, moveKV); + bplus_node_insert_kv(left, moveKV); + if (moveKV->rightPointer != NULL) { + moveKV->rightPointer->parent = leaf; + } + } + //Remove node + BPlusNode *parent = node->parent; + for (size_t i = 0; i < node->parent->keyCount; i++) { + if (node->parent->keys[i]->rightPointer == node) { + BPlusKV *kv = node->parent->keys[i]; + bplus_node_remove_kv(node->parent, kv); + free_bplus_kv(kv); + } + } + node = parent; + } } + return true; } BPlusKV *bplus_tree_find(BPlusTree *tree, uint64_t key) { diff --git a/src/bplus_tree.helpers.c b/src/bplus_tree.helpers.c index d3ed079..b6a8873 100644 --- a/src/bplus_tree.helpers.c +++ b/src/bplus_tree.helpers.c @@ -50,3 +50,22 @@ BPlusNode *_bplus_node_get_right_sibling(BPlusNode *node) { return node->rightLeaf; } } + +void _bplus_node_check_parents(BPlusNode *parent) { + if (parent->leftPointer != NULL) { + assert(parent->leftPointer->parent == parent); + _bplus_node_check_parents(parent->leftPointer); + } + for (size_t i = 0; i < parent->keyCount; i++) { + if (parent->keys[i]->rightPointer != NULL) { + assert(parent->keys[i]->rightPointer->parent == parent); + _bplus_node_check_parents(parent->keys[i]->rightPointer); + } + } +} + +void _bplus_tree_check_parents(BPlusTree *tree) { + assert(tree->root != NULL); + assert(tree->root->parent == NULL); + _bplus_node_check_parents(tree->root); +} \ No newline at end of file diff --git a/src/main.c b/src/main.c index f3fdee5..20beede 100644 --- a/src/main.c +++ b/src/main.c @@ -3,11 +3,14 @@ #include #include #include +#include #include "InputBuffer.h" #include "scanner.h" #include "parser.h" +#include "bplus_tree.h" + void prompt() { printf("SDB> "); } diff --git a/tests/bplus_tree_test.c b/tests/bplus_tree_test.c index 1c5d186..4b16eaf 100644 --- a/tests/bplus_tree_test.c +++ b/tests/bplus_tree_test.c @@ -13,21 +13,21 @@ START_TEST(insert_keys_4) char *buffer = NULL; - ck_assert_str_eq(debug_bplus_tree_str(tree, buffer), "B+<4>[]"); + ck_assert_str_eq(debug_bplus_tree_str(tree, buffer), "B+<4> [] "); bplus_tree_insert(tree, 10, NULL); bplus_tree_insert(tree, 20, NULL); - ck_assert_str_eq(debug_bplus_tree_str(tree, buffer), "B+<4>[{10*}{20*}]"); + ck_assert_str_eq(debug_bplus_tree_str(tree, buffer), "B+<4> [{10*}{20*}] "); bplus_tree_insert(tree, 30, NULL); bplus_tree_insert(tree, 40, NULL); - ck_assert_str_eq(debug_bplus_tree_str(tree, buffer), "B+<4>[[{10*}{20*}]{30}[{30*}{40*}]]"); + ck_assert_str_eq(debug_bplus_tree_str(tree, buffer), "B+<4> [ [{10*}{20*}] {30} [{30*}{40*}] ] "); bplus_tree_insert(tree, 35, NULL); bplus_tree_insert(tree, 50, NULL); - ck_assert_str_eq(debug_bplus_tree_str(tree, buffer), "B+<4>[[{10*}{20*}]{30}[{30*}{35*}]{40}[{40*}{50*}]]"); + ck_assert_str_eq(debug_bplus_tree_str(tree, buffer), "B+<4> [ [{10*}{20*}] {30} [{30*}{35*}] {40} [{40*}{50*}] ] "); free_bplus_tree(tree); @@ -63,6 +63,17 @@ START_TEST(find_keys_4) free_bplus_tree(tree); }END_TEST +START_TEST(delete_key_leaf) { + BPlusTree *tree = new_bplus_tree(4); + for(uint64_t i=1;i<=16;i++) { + ck_assert(bplus_tree_insert(tree, i, NULL) == true); + } + for(uint64_t i=1;i<=16;i++) { + ck_assert(bplus_tree_delete(tree, i) == true); + } + free_bplus_tree(tree); +}END_TEST; + Suite *blus_tree_suite(void) { Suite *s = suite_create("B+ Tree"); @@ -71,6 +82,7 @@ Suite *blus_tree_suite(void) { tcase_add_test(core, insert_keys_4); tcase_add_test(core, find_keys_4); + tcase_add_test(core, delete_key_leaf); return s; } \ No newline at end of file