diff --git a/CMakeLists.txt b/CMakeLists.txt index 4afb572..30bd3a3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,4 +3,5 @@ project(SDB C) set(CMAKE_C_STANDARD 11) -add_executable(SDB src/main.c src/InputBuffer.c src/InputBuffer.h src/SQL.c src/SQL.h src/scanner.c src/scanner.h src/parser.c src/parser.h) +include_directories(${CMAKE_SOURCE_DIR}/lib) +add_executable(SDB src/main.c src/InputBuffer.c src/InputBuffer.h src/SQL.c src/SQL.h src/scanner.c src/scanner.h src/parser.c src/parser.h src/bplus_tree.c src/bplus_tree.h) diff --git a/src/SQL.c b/src/SQL.c index 0efda5f..2d00412 100644 --- a/src/SQL.c +++ b/src/SQL.c @@ -428,7 +428,7 @@ void print_column_spec_list(ColumnSpecList *list) { for (size_t i = 0; i < list->length; i++) { ColumnSpec *spec = list->columns[i]; if (spec->type == COLTYPE_STRING) { - printf("%s STRING(%d)", spec->identifier, spec->size); + printf("%s STRING(%ld)", spec->identifier, spec->size); } else if (spec->type == COLTYPE_INT) { printf("%s INTEGER", spec->identifier); } @@ -470,6 +470,6 @@ void print_value(Value *value) { } printf("'"); } else if (value->type == VALUE_NUMBER) { - printf("%lld", value->number); + printf("%ld", value->number); } } \ No newline at end of file diff --git a/src/bplus_tree.c b/src/bplus_tree.c new file mode 100644 index 0000000..6946d44 --- /dev/null +++ b/src/bplus_tree.c @@ -0,0 +1,251 @@ +// +// Created by sam on 14/06/18. +// + +#include +#include +#include +#include "bplus_tree.h" + +BPlusKV *new_bplus_kv(uint64_t key, void *value, BPlusNode *leftPointer) { + BPlusKV *kv = malloc(sizeof(BPlusKV)); + kv->key = key; + kv->value = value; + kv->rightPointer = leftPointer; + return kv; +} + +void free_bplus_kv(BPlusKV *kv) { + if (kv->rightPointer != NULL) { + free_bplus_node(kv->rightPointer); + } + if (kv->value != NULL) { + free(kv->value); + } + free(kv); +} + +BPlusNode *new_bplus_node(uint64_t id, bool isInternal, size_t order) { + BPlusNode *node = malloc(sizeof(BPlusNode)); + node->id = id; + node->isInternal = isInternal; + node->order = order; + node->parent = NULL; + node->keys = calloc(order, sizeof(BPlusKV *)); + node->keyCount = 0; + node->leftPointer = NULL; + node->leftLeaf = NULL; + node->rightLeaf = NULL; + return node; +} + +void free_bplus_node(BPlusNode *node) { + for (size_t i = 0; i < node->keyCount; i++) { + free_bplus_kv(node->keys[i]); + } + free(node->keys); + if (node->leftPointer != NULL) { + free_bplus_node(node->leftPointer); + } + free(node); +} + +bool bplus_node_insert_kv(BPlusNode *node, BPlusKV *kv) { + if (node->keyCount == node->order) { + return false; + } + //Insert if empty + if (node->keyCount == 0) { + node->keys[0] = kv; + node->keyCount = 1; + return true; + } + //Check if can add to end + if (kv->key > node->keys[node->keyCount - 1]->key) { + node->keys[node->keyCount++] = kv; + return true; + } + + //Find an index where we should insert + size_t i; + for (i = 0; i < node->keyCount; i++) { + if (kv->key < node->keys[i]->key) { + break; + } + } + //Make hole for new value + for (size_t k = node->order - 1; k > i; k--) { + node->keys[k] = node->keys[k - 1]; + } + node->keys[i] = kv; + node->keyCount++; + return true; +} + +void print_bplus_node(BPlusNode *node, size_t indent) { + char *indentStr = calloc(indent + 1, sizeof(char)); + memset(indentStr, ' ', sizeof(char) * indent); + + if (node->parent == NULL) { + printf("%sROOT #%ld (%ld/%ld)\n", indentStr, node->id, node->keyCount, node->order); + } else if (node->isInternal) { + printf("%sINTERNAL #%ld (%ld/%ld)\n", indentStr, node->id, node->keyCount, node->order); + } else { + printf("%sLEAF #%ld (%ld/%ld)\n", indentStr, node->id, node->keyCount, node->order); + } + + if (node->leftPointer != NULL) { + print_bplus_node(node->leftPointer, indent + 4); + } + for (size_t i = 0; i < node->keyCount; i++) { + printf("%s Key: %ld\t", indentStr, node->keys[i]->key); + if (!node->isInternal) { + if (node->keys[i]->value != NULL) { + printf("Value: %p", node->keys[i]->value); + } else { + printf("Value: NULL"); + } + } + printf("\n"); + if (node->keys[i]->rightPointer != NULL) { + print_bplus_node(node->keys[i]->rightPointer, indent + 4); + } + } + + free(indentStr); +} + +BPlusTree *new_bplus_tree(size_t order) { + BPlusTree *tree = malloc(sizeof(BPlusTree)); + tree->order = order; + tree->minFill = order / 2; + tree->nextID = 0; + tree->root = new_bplus_node(tree->nextID++, false, order); + return tree; +} + +void free_bplus_tree(BPlusTree *tree) { + if (tree->root != NULL) { + free_bplus_node(tree->root); + } + free(tree); +} + +void print_bplus_tree(BPlusTree *tree) { + printf("B+ Tree\n"); + printf("- Order: %ld\n", tree->order); + printf("- Min Fill: %ld\n", tree->minFill); + + if (tree->root != NULL) { + print_bplus_node(tree->root, 2); + } else { + printf(" EMPTY TREE\n"); + } +} + +BPlusNode *bplus_tree_find_leaf(BPlusTree *tree, uint64_t key) { + BPlusNode *node = tree->root; + + //Descend to correct leaf node + while (node != NULL) { + //Found our way to a leaf node + if (!node->isInternal) { + break; + } + //We are assuming at least one key + if (key < node->keys[0]->key) { + //Follow left pointer if less than first key + node = node->leftPointer; + continue; + } + size_t i; + for (i = 1; i < node->keyCount; i++) { + if (key < node->keys[i]->key) { + break; + } + } + node = node->keys[i - 1]->rightPointer; + } + return node; +} + +bool bplus_tree_insert(BPlusTree *tree, uint64_t newKey, void *newValue) { + BPlusNode *node = bplus_tree_find_leaf(tree, newKey); + assert(node != NULL); + + if (node->keyCount < node->order - 1) { + //Can insert at this node + BPlusKV *kv = new_bplus_kv(newKey, newValue, NULL); + bool insertKV = bplus_node_insert_kv(node, kv); + assert(insertKV == true); + } else { + //Note: as this is a leaf node, we are not updating/maintaining any pointers + //Insert and split + BPlusKV *kv = new_bplus_kv(newKey, newValue, NULL); + bool insertKV = bplus_node_insert_kv(node, kv); + assert(insertKV == true); + + //Decide on new midpoint + size_t midpointIndex = node->order / 2; + + //Create new node and update leaf links + BPlusNode *newNode = new_bplus_node(tree->nextID++, false, node->order); + newNode->parent = node->parent; + newNode->rightLeaf = node->rightLeaf; + newNode->leftLeaf = node; + node->rightLeaf = newNode; + + //Move keys >= midpoint to a new node + size_t k = 0; + for (size_t i = midpointIndex; i < node->order; i++) { + newNode->keys[k++] = node->keys[i]; + node->keys[i] = NULL; + newNode->keyCount++; + node->keyCount--; + } + + //Send midpoint key to parent + BPlusKV *ascendingKV = new_bplus_kv(newNode->keys[0]->key, NULL, NULL); + ascendingKV->rightPointer = newNode; + + do { + if (node->parent == NULL) { + //Split root, create a new node with the ascending kv and replace it + BPlusNode *newRoot = new_bplus_node(tree->nextID++, true, tree->order); + newRoot->leftPointer = node; + bplus_node_insert_kv(newRoot, ascendingKV); + tree->root = newRoot; + newNode->parent = tree->root; + node->parent = tree->root; + break; + } else { + node = node->parent; + bplus_node_insert_kv(node, ascendingKV); + if (node->keyCount < node->order) { + break; + } else { + //Parent needs splitting + midpointIndex = node->order / 2; + //Create new node + newNode = new_bplus_node(tree->nextID++, true, node->order); + newNode->parent = node->parent; + //Move midpoint up and move keys > midpoint to new node + k = 0; + for (size_t i = midpointIndex + 1; i < node->order; i++) { + newNode->keys[k++] = node->keys[i]; + node->keys[i] = NULL; + newNode->keyCount++; + node->keyCount--; + } + ascendingKV = node->keys[midpointIndex]; + node->keys[midpointIndex] = NULL; + node->keyCount--; + newNode->leftPointer = ascendingKV->rightPointer; + ascendingKV->rightPointer = newNode; + } + } + } while (node != NULL); + + } + +} \ No newline at end of file diff --git a/src/bplus_tree.h b/src/bplus_tree.h new file mode 100644 index 0000000..e84e63d --- /dev/null +++ b/src/bplus_tree.h @@ -0,0 +1,65 @@ +// +// Created by sam on 14/06/18. +// + +#ifndef SDB_BPLUS_TREE_H +#define SDB_BPLUS_TREE_H + +#include +#include +#include + +struct BPlusNode_t; +typedef struct BPlusNode_t BPlusNode; + +struct BPlusKV_t { + uint64_t key; + void *value; + BPlusNode *rightPointer; +}; +typedef struct BPlusKV_t BPlusKV; + +BPlusKV *new_bplus_kv(uint64_t key, void *value, BPlusNode *leftPointer); + +void free_bplus_kv(BPlusKV *kv); + +struct BPlusNode_t { + uint64_t id; + bool isInternal; + size_t order; + BPlusNode *parent; + BPlusKV **keys; + size_t keyCount; + BPlusNode *leftPointer; + BPlusNode *leftLeaf; + BPlusNode *rightLeaf; +}; + +BPlusNode *new_bplus_node(uint64_t id, bool isInternal, size_t order); + +void free_bplus_node(BPlusNode *node); + +bool bplus_node_insert_kv(BPlusNode *node, BPlusKV *kv); + +void print_bplus_node(BPlusNode *node, size_t indent); + +struct BPlusTree_t { + size_t order; + size_t minFill; + BPlusNode *root; + uint64_t nextID; +}; +typedef struct BPlusTree_t BPlusTree; + +BPlusTree *new_bplus_tree(size_t order); + +void free_bplus_tree(BPlusTree *tree); + +void print_bplus_tree(BPlusTree *tree); + +BPlusNode * bplus_tree_find_leaf(BPlusTree *tree, uint64_t key); + +bool bplus_tree_insert(BPlusTree *tree, uint64_t key, void *value); + + +#endif //SDB_BPLUS_TREE_H