Working on B+ tree implementation.

This commit is contained in:
2018-06-21 08:04:33 +01:00
parent 1c74b586fd
commit 777697d9ab
4 changed files with 320 additions and 3 deletions

View File

@@ -428,7 +428,7 @@ void print_column_spec_list(ColumnSpecList *list) {
for (size_t i = 0; i < list->length; i++) {
ColumnSpec *spec = list->columns[i];
if (spec->type == COLTYPE_STRING) {
printf("%s STRING(%d)", spec->identifier, spec->size);
printf("%s STRING(%ld)", spec->identifier, spec->size);
} else if (spec->type == COLTYPE_INT) {
printf("%s INTEGER", spec->identifier);
}
@@ -470,6 +470,6 @@ void print_value(Value *value) {
}
printf("'");
} else if (value->type == VALUE_NUMBER) {
printf("%lld", value->number);
printf("%ld", value->number);
}
}

251
src/bplus_tree.c Normal file
View File

@@ -0,0 +1,251 @@
//
// Created by sam on 14/06/18.
//
#include <malloc.h>
#include <string.h>
#include <assert.h>
#include "bplus_tree.h"
BPlusKV *new_bplus_kv(uint64_t key, void *value, BPlusNode *leftPointer) {
BPlusKV *kv = malloc(sizeof(BPlusKV));
kv->key = key;
kv->value = value;
kv->rightPointer = leftPointer;
return kv;
}
void free_bplus_kv(BPlusKV *kv) {
if (kv->rightPointer != NULL) {
free_bplus_node(kv->rightPointer);
}
if (kv->value != NULL) {
free(kv->value);
}
free(kv);
}
BPlusNode *new_bplus_node(uint64_t id, bool isInternal, size_t order) {
BPlusNode *node = malloc(sizeof(BPlusNode));
node->id = id;
node->isInternal = isInternal;
node->order = order;
node->parent = NULL;
node->keys = calloc(order, sizeof(BPlusKV *));
node->keyCount = 0;
node->leftPointer = NULL;
node->leftLeaf = NULL;
node->rightLeaf = NULL;
return node;
}
void free_bplus_node(BPlusNode *node) {
for (size_t i = 0; i < node->keyCount; i++) {
free_bplus_kv(node->keys[i]);
}
free(node->keys);
if (node->leftPointer != NULL) {
free_bplus_node(node->leftPointer);
}
free(node);
}
bool bplus_node_insert_kv(BPlusNode *node, BPlusKV *kv) {
if (node->keyCount == node->order) {
return false;
}
//Insert if empty
if (node->keyCount == 0) {
node->keys[0] = kv;
node->keyCount = 1;
return true;
}
//Check if can add to end
if (kv->key > node->keys[node->keyCount - 1]->key) {
node->keys[node->keyCount++] = kv;
return true;
}
//Find an index where we should insert
size_t i;
for (i = 0; i < node->keyCount; i++) {
if (kv->key < node->keys[i]->key) {
break;
}
}
//Make hole for new value
for (size_t k = node->order - 1; k > i; k--) {
node->keys[k] = node->keys[k - 1];
}
node->keys[i] = kv;
node->keyCount++;
return true;
}
void print_bplus_node(BPlusNode *node, size_t indent) {
char *indentStr = calloc(indent + 1, sizeof(char));
memset(indentStr, ' ', sizeof(char) * indent);
if (node->parent == NULL) {
printf("%sROOT #%ld (%ld/%ld)\n", indentStr, node->id, node->keyCount, node->order);
} else if (node->isInternal) {
printf("%sINTERNAL #%ld (%ld/%ld)\n", indentStr, node->id, node->keyCount, node->order);
} else {
printf("%sLEAF #%ld (%ld/%ld)\n", indentStr, node->id, node->keyCount, node->order);
}
if (node->leftPointer != NULL) {
print_bplus_node(node->leftPointer, indent + 4);
}
for (size_t i = 0; i < node->keyCount; i++) {
printf("%s Key: %ld\t", indentStr, node->keys[i]->key);
if (!node->isInternal) {
if (node->keys[i]->value != NULL) {
printf("Value: %p", node->keys[i]->value);
} else {
printf("Value: NULL");
}
}
printf("\n");
if (node->keys[i]->rightPointer != NULL) {
print_bplus_node(node->keys[i]->rightPointer, indent + 4);
}
}
free(indentStr);
}
BPlusTree *new_bplus_tree(size_t order) {
BPlusTree *tree = malloc(sizeof(BPlusTree));
tree->order = order;
tree->minFill = order / 2;
tree->nextID = 0;
tree->root = new_bplus_node(tree->nextID++, false, order);
return tree;
}
void free_bplus_tree(BPlusTree *tree) {
if (tree->root != NULL) {
free_bplus_node(tree->root);
}
free(tree);
}
void print_bplus_tree(BPlusTree *tree) {
printf("B+ Tree\n");
printf("- Order: %ld\n", tree->order);
printf("- Min Fill: %ld\n", tree->minFill);
if (tree->root != NULL) {
print_bplus_node(tree->root, 2);
} else {
printf(" EMPTY TREE\n");
}
}
BPlusNode *bplus_tree_find_leaf(BPlusTree *tree, uint64_t key) {
BPlusNode *node = tree->root;
//Descend to correct leaf node
while (node != NULL) {
//Found our way to a leaf node
if (!node->isInternal) {
break;
}
//We are assuming at least one key
if (key < node->keys[0]->key) {
//Follow left pointer if less than first key
node = node->leftPointer;
continue;
}
size_t i;
for (i = 1; i < node->keyCount; i++) {
if (key < node->keys[i]->key) {
break;
}
}
node = node->keys[i - 1]->rightPointer;
}
return node;
}
bool bplus_tree_insert(BPlusTree *tree, uint64_t newKey, void *newValue) {
BPlusNode *node = bplus_tree_find_leaf(tree, newKey);
assert(node != NULL);
if (node->keyCount < node->order - 1) {
//Can insert at this node
BPlusKV *kv = new_bplus_kv(newKey, newValue, NULL);
bool insertKV = bplus_node_insert_kv(node, kv);
assert(insertKV == true);
} else {
//Note: as this is a leaf node, we are not updating/maintaining any pointers
//Insert and split
BPlusKV *kv = new_bplus_kv(newKey, newValue, NULL);
bool insertKV = bplus_node_insert_kv(node, kv);
assert(insertKV == true);
//Decide on new midpoint
size_t midpointIndex = node->order / 2;
//Create new node and update leaf links
BPlusNode *newNode = new_bplus_node(tree->nextID++, false, node->order);
newNode->parent = node->parent;
newNode->rightLeaf = node->rightLeaf;
newNode->leftLeaf = node;
node->rightLeaf = newNode;
//Move keys >= midpoint to a new node
size_t k = 0;
for (size_t i = midpointIndex; i < node->order; i++) {
newNode->keys[k++] = node->keys[i];
node->keys[i] = NULL;
newNode->keyCount++;
node->keyCount--;
}
//Send midpoint key to parent
BPlusKV *ascendingKV = new_bplus_kv(newNode->keys[0]->key, NULL, NULL);
ascendingKV->rightPointer = newNode;
do {
if (node->parent == NULL) {
//Split root, create a new node with the ascending kv and replace it
BPlusNode *newRoot = new_bplus_node(tree->nextID++, true, tree->order);
newRoot->leftPointer = node;
bplus_node_insert_kv(newRoot, ascendingKV);
tree->root = newRoot;
newNode->parent = tree->root;
node->parent = tree->root;
break;
} else {
node = node->parent;
bplus_node_insert_kv(node, ascendingKV);
if (node->keyCount < node->order) {
break;
} else {
//Parent needs splitting
midpointIndex = node->order / 2;
//Create new node
newNode = new_bplus_node(tree->nextID++, true, node->order);
newNode->parent = node->parent;
//Move midpoint up and move keys > midpoint to new node
k = 0;
for (size_t i = midpointIndex + 1; i < node->order; i++) {
newNode->keys[k++] = node->keys[i];
node->keys[i] = NULL;
newNode->keyCount++;
node->keyCount--;
}
ascendingKV = node->keys[midpointIndex];
node->keys[midpointIndex] = NULL;
node->keyCount--;
newNode->leftPointer = ascendingKV->rightPointer;
ascendingKV->rightPointer = newNode;
}
}
} while (node != NULL);
}
}

65
src/bplus_tree.h Normal file
View File

@@ -0,0 +1,65 @@
//
// Created by sam on 14/06/18.
//
#ifndef SDB_BPLUS_TREE_H
#define SDB_BPLUS_TREE_H
#include <inttypes.h>
#include <stddef.h>
#include <stdbool.h>
struct BPlusNode_t;
typedef struct BPlusNode_t BPlusNode;
struct BPlusKV_t {
uint64_t key;
void *value;
BPlusNode *rightPointer;
};
typedef struct BPlusKV_t BPlusKV;
BPlusKV *new_bplus_kv(uint64_t key, void *value, BPlusNode *leftPointer);
void free_bplus_kv(BPlusKV *kv);
struct BPlusNode_t {
uint64_t id;
bool isInternal;
size_t order;
BPlusNode *parent;
BPlusKV **keys;
size_t keyCount;
BPlusNode *leftPointer;
BPlusNode *leftLeaf;
BPlusNode *rightLeaf;
};
BPlusNode *new_bplus_node(uint64_t id, bool isInternal, size_t order);
void free_bplus_node(BPlusNode *node);
bool bplus_node_insert_kv(BPlusNode *node, BPlusKV *kv);
void print_bplus_node(BPlusNode *node, size_t indent);
struct BPlusTree_t {
size_t order;
size_t minFill;
BPlusNode *root;
uint64_t nextID;
};
typedef struct BPlusTree_t BPlusTree;
BPlusTree *new_bplus_tree(size_t order);
void free_bplus_tree(BPlusTree *tree);
void print_bplus_tree(BPlusTree *tree);
BPlusNode * bplus_tree_find_leaf(BPlusTree *tree, uint64_t key);
bool bplus_tree_insert(BPlusTree *tree, uint64_t key, void *value);
#endif //SDB_BPLUS_TREE_H