From 566bf94225f667c3d1bb186502114dfe80d1ead0 Mon Sep 17 00:00:00 2001 From: Sam Stevens Date: Sat, 23 Jun 2018 16:21:12 +0100 Subject: [PATCH] Implemented find/find closest for B+ tree. Added test suite. Fixed some b+ tree bugs. --- .idea/inspectionProfiles/Project_Default.xml | 8 ++ .idea/misc.xml | 6 + CMakeLists.txt | 40 ++++++- cmake/FindCheck.cmake | 57 ++++++++++ lib/.gitkeep | 0 src/CMakeLists.txt | 7 ++ src/bplus_tree.c | 110 +++++++++++++++++++ src/bplus_tree.h | 13 +++ tests/CMakeLists.txt | 13 +++ tests/bplus_tree_test.c | 76 +++++++++++++ tests/bplus_tree_test.h | 12 ++ tests/check_sdb.c | 21 ++++ 12 files changed, 360 insertions(+), 3 deletions(-) create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 cmake/FindCheck.cmake create mode 100644 lib/.gitkeep create mode 100644 src/CMakeLists.txt create mode 100644 tests/CMakeLists.txt create mode 100644 tests/bplus_tree_test.c create mode 100644 tests/bplus_tree_test.h create mode 100644 tests/check_sdb.c diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..5a8f883 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,8 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 8822db8..ec82416 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,6 +1,12 @@ + + + + + + diff --git a/CMakeLists.txt b/CMakeLists.txt index 30bd3a3..a7909e8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,41 @@ cmake_minimum_required(VERSION 3.10) -project(SDB C) +set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") +project(SDB C) set(CMAKE_C_STANDARD 11) -include_directories(${CMAKE_SOURCE_DIR}/lib) -add_executable(SDB src/main.c src/InputBuffer.c src/InputBuffer.h src/SQL.c src/SQL.h src/scanner.c src/scanner.h src/parser.c src/parser.h src/bplus_tree.c src/bplus_tree.h) +#Check options +include(CheckCSourceCompiles) +include(CheckCSourceRuns) +include(CheckFunctionExists) +include(CheckIncludeFile) +include(CheckIncludeFiles) +include(CheckLibraryExists) +include(CheckSymbolExists) +include(CheckTypeSize) +set(INCLUDES "") +macro(ck_check_include_file header var) + check_include_files("${INCLUDES};${header}" ${var}) + if(${var}) + set(INCLUDES ${INCLUDES} ${header}) + endif(${var}) +endmacro(ck_check_include_file) + +ck_check_include_file("stdlib.h" HAVE_STDLIB_H) +check_type_size(intmax_t INTMAX_T) +check_type_size(uintmax_t UINTMAX_T) + +check_type_size(pid_t PID_T) +if(NOT HAVE_PID_T) + if(WIN32) + set(pid_t "int") + else(WIN32) + MESSAGE(FATAL_ERROR "pid_t doesn't exist on this platform?") + endif(WIN32) +endif(NOT HAVE_PID_T) + +add_subdirectory(src) +add_subdirectory(tests) + +enable_testing() +add_test(NAME check_SDB COMMAND check_SDB) \ No newline at end of file diff --git a/cmake/FindCheck.cmake b/cmake/FindCheck.cmake new file mode 100644 index 0000000..5ff065e --- /dev/null +++ b/cmake/FindCheck.cmake @@ -0,0 +1,57 @@ +# - Try to find the CHECK libraries +# Once done this will define +# +# CHECK_FOUND - system has check +# CHECK_INCLUDE_DIR - the check include directory +# CHECK_LIBRARIES - check library +# +# This configuration file for finding libcheck is originally from +# the opensync project. The originally was downloaded from here: +# opensync.org/browser/branches/3rd-party-cmake-modules/modules/FindCheck.cmake +# +# Copyright (c) 2007 Daniel Gollub +# Copyright (c) 2007 Bjoern Ricks +# +# Redistribution and use is allowed according to the terms of the New +# BSD license. +# For details see the accompanying COPYING-CMAKE-SCRIPTS file. + + +INCLUDE( FindPkgConfig ) + +# Take care about check.pc settings +PKG_SEARCH_MODULE( CHECK check ) + +# Look for CHECK include dir and libraries +IF( NOT CHECK_FOUND ) + IF ( CHECK_INSTALL_DIR ) + MESSAGE ( STATUS "Using override CHECK_INSTALL_DIR to find check" ) + SET ( CHECK_INCLUDE_DIR "${CHECK_INSTALL_DIR}/include" ) + SET ( CHECK_INCLUDE_DIRS "${CHECK_INCLUDE_DIR}" ) + FIND_LIBRARY( CHECK_LIBRARY NAMES check PATHS "${CHECK_INSTALL_DIR}/lib" ) + FIND_LIBRARY( COMPAT_LIBRARY NAMES compat PATHS "${CHECK_INSTALL_DIR}/lib" ) + SET ( CHECK_LIBRARIES "${CHECK_LIBRARY}" "${COMPAT_LIBRARY}" ) + ELSE ( CHECK_INSTALL_DIR ) + FIND_PATH( CHECK_INCLUDE_DIR check.h ) + FIND_LIBRARY( CHECK_LIBRARIES NAMES check ) + ENDIF ( CHECK_INSTALL_DIR ) + + IF ( CHECK_INCLUDE_DIR AND CHECK_LIBRARIES ) + SET( CHECK_FOUND 1 ) + IF ( NOT Check_FIND_QUIETLY ) + MESSAGE ( STATUS "Found CHECK: ${CHECK_LIBRARIES}" ) + ENDIF ( NOT Check_FIND_QUIETLY ) + ELSE ( CHECK_INCLUDE_DIR AND CHECK_LIBRARIES ) + IF ( Check_FIND_REQUIRED ) + MESSAGE( FATAL_ERROR "Could NOT find CHECK" ) + ELSE ( Check_FIND_REQUIRED ) + IF ( NOT Check_FIND_QUIETLY ) + MESSAGE( STATUS "Could NOT find CHECK" ) + ENDIF ( NOT Check_FIND_QUIETLY ) + ENDIF ( Check_FIND_REQUIRED ) + ENDIF ( CHECK_INCLUDE_DIR AND CHECK_LIBRARIES ) +ENDIF( NOT CHECK_FOUND ) + +# Hide advanced variables from CMake GUIs +MARK_AS_ADVANCED( CHECK_INCLUDE_DIR CHECK_LIBRARIES ) + diff --git a/lib/.gitkeep b/lib/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..68b6b35 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,7 @@ + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../lib) + +add_library(SDBLib STATIC InputBuffer.c InputBuffer.h SQL.c SQL.h scanner.c scanner.h parser.c parser.h bplus_tree.c bplus_tree.h) + +add_executable(SDB main.c) +target_link_libraries(SDB SDBLib) \ No newline at end of file diff --git a/src/bplus_tree.c b/src/bplus_tree.c index 6946d44..2e97b1a 100644 --- a/src/bplus_tree.c +++ b/src/bplus_tree.c @@ -7,6 +7,14 @@ #include #include "bplus_tree.h" +#define APPEND_STR(str, size, _) { \ + if (strlen(_) + strlen(str) + 1 > (size)) { \ + (size) *= 2; \ + (str) = realloc(str, (size) * sizeof(char)); \ + } \ + strcat(str, _);\ +} + BPlusKV *new_bplus_kv(uint64_t key, void *value, BPlusNode *leftPointer) { BPlusKV *kv = malloc(sizeof(BPlusKV)); kv->key = key; @@ -93,6 +101,9 @@ void print_bplus_node(BPlusNode *node, size_t indent) { } else { printf("%sLEAF #%ld (%ld/%ld)\n", indentStr, node->id, node->keyCount, node->order); } + if (node->parent != NULL) { + printf("%s Parent #%ld\n", indentStr, node->parent->id); + } if (node->leftPointer != NULL) { print_bplus_node(node->leftPointer, indent + 4); @@ -115,6 +126,24 @@ void print_bplus_node(BPlusNode *node, size_t indent) { free(indentStr); } +char *debug_bplus_node_str(BPlusNode *node, char *str, size_t *strSize) { + APPEND_STR(str, *strSize, "["); + if (node->leftPointer != NULL) { + str = debug_bplus_node_str(node->leftPointer, str, strSize); + } + char buffer[32] = {0}; + for (size_t i = 0; i < node->keyCount; i++) { + snprintf(buffer, 32, node->isInternal ? "{%ld}" : "{%ld*}", node->keys[i]->key); + APPEND_STR(str, *strSize, buffer); + if (node->keys[i]->rightPointer != NULL) { + str = debug_bplus_node_str(node->keys[i]->rightPointer, str, strSize); + } + } + + APPEND_STR(str, *strSize, "]"); + return str; +} + BPlusTree *new_bplus_tree(size_t order) { BPlusTree *tree = malloc(sizeof(BPlusTree)); tree->order = order; @@ -143,6 +172,18 @@ void print_bplus_tree(BPlusTree *tree) { } } +char *debug_bplus_tree_str(BPlusTree *tree, char *str) { + size_t strSize = 32; + str = realloc(str, strSize * sizeof(char)); + memset(str, 0, strSize * sizeof(char)); + + snprintf(str, 32, "B+<%ld>", tree->order); + + str = debug_bplus_node_str(tree->root, str, &strSize); + + return str; +} + BPlusNode *bplus_tree_find_leaf(BPlusTree *tree, uint64_t key) { BPlusNode *node = tree->root; @@ -232,6 +273,7 @@ bool bplus_tree_insert(BPlusTree *tree, uint64_t newKey, void *newValue) { //Move midpoint up and move keys > midpoint to new node k = 0; for (size_t i = midpointIndex + 1; i < node->order; i++) { + node->keys[i]->rightPointer->parent = newNode; newNode->keys[k++] = node->keys[i]; node->keys[i] = NULL; newNode->keyCount++; @@ -241,6 +283,7 @@ bool bplus_tree_insert(BPlusTree *tree, uint64_t newKey, void *newValue) { node->keys[midpointIndex] = NULL; node->keyCount--; newNode->leftPointer = ascendingKV->rightPointer; + newNode->leftPointer->parent = newNode; ascendingKV->rightPointer = newNode; } } @@ -248,4 +291,71 @@ bool bplus_tree_insert(BPlusTree *tree, uint64_t newKey, void *newValue) { } +} + +BPlusKV *bplus_tree_find(BPlusTree *tree, uint64_t key) { + BPlusNode *leaf = bplus_tree_find_leaf(tree, key); + if (leaf == NULL) { + return NULL; + } + for (size_t i = 0; i < leaf->keyCount; i++) { + if (leaf->keys[i]->key == key) { + return leaf->keys[i]; + } + } +} + +BPlusKV *bplus_tree_find_closest(BPlusTree *tree, uint64_t key, BPlusFindComp dir) { + //Do regular find if we are only looking for key + if (dir == FIND_EQ) { + return bplus_tree_find(tree, key); + } + BPlusNode *leaf = bplus_tree_find_leaf(tree, key); + if (leaf == NULL) { + return NULL; + } + //Scan right until we find a value >= key + ssize_t i; + do { + bool done = false; + for (i = 0; i < leaf->keyCount; i++) { + if (leaf->keys[i]->key == key && (dir & FIND_EQ) == FIND_EQ) { + //Return the value if we are looking for value = key + done = true; + break; + } else if (leaf->keys[i]->key > key) { + //Return the value if we are looking for value > key + if ((dir & FIND_GT) == FIND_GT) { + return leaf->keys[i]; + } else { + done = true; + break; + } + } + } + if (done) { + break; + } + if (leaf->rightLeaf == NULL) { + break; + } + leaf = leaf->rightLeaf; + } while (leaf != NULL); + + //Didn't find an acceptable value >= key and we are not looking for value < key + if ((dir & FIND_LT) == 0) { + return NULL; + } + + //Scan left until we find a value < key + do { + for (i = leaf->keyCount - 1; i >= 0; i--) { + if (leaf->keys[i]->key < key) { + return leaf->keys[i]; + } + } + leaf = leaf->leftLeaf; + } while (leaf != NULL); + + return NULL; } \ No newline at end of file diff --git a/src/bplus_tree.h b/src/bplus_tree.h index e84e63d..4160c68 100644 --- a/src/bplus_tree.h +++ b/src/bplus_tree.h @@ -43,6 +43,8 @@ bool bplus_node_insert_kv(BPlusNode *node, BPlusKV *kv); void print_bplus_node(BPlusNode *node, size_t indent); +char * debug_bplus_node_str(BPlusNode *node, char *str, size_t *strSize); + struct BPlusTree_t { size_t order; size_t minFill; @@ -56,10 +58,21 @@ BPlusTree *new_bplus_tree(size_t order); void free_bplus_tree(BPlusTree *tree); void print_bplus_tree(BPlusTree *tree); +char *debug_bplus_tree_str(BPlusTree *tree, char *str); BPlusNode * bplus_tree_find_leaf(BPlusTree *tree, uint64_t key); bool bplus_tree_insert(BPlusTree *tree, uint64_t key, void *value); +BPlusKV * bplus_tree_find(BPlusTree *tree, uint64_t key); + +enum BPlusFindComp_t { + FIND_NONE = 0, + FIND_LT = 1, + FIND_EQ = 2, + FIND_GT = 4 +}; +typedef enum BPlusFindComp_t BPlusFindComp; +BPlusKV * bplus_tree_find_closest(BPlusTree *tree, uint64_t key, BPlusFindComp dir); #endif //SDB_BPLUS_TREE_H diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000..e85108c --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,13 @@ +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../src) + +find_package(Check REQUIRED) +find_package (Threads REQUIRED) +include_directories(${CHECK_INCLUDE_DIRS}) +link_directories(${CHECK_LIBRARY_DIRS}) + +set(TEST_SOURCES + check_sdb.c + bplus_tree_test.c bplus_tree_test.h) + +add_executable(check_SDB ${TEST_SOURCES}) +target_link_libraries(check_SDB SDBLib ${CHECK_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) \ No newline at end of file diff --git a/tests/bplus_tree_test.c b/tests/bplus_tree_test.c new file mode 100644 index 0000000..1c5d186 --- /dev/null +++ b/tests/bplus_tree_test.c @@ -0,0 +1,76 @@ +// +// Created by sam on 23/06/18. +// + +#include +#include "bplus_tree_test.h" +#include "../src/bplus_tree.h" + +START_TEST(insert_keys_4) + { + + BPlusTree *tree = new_bplus_tree(4); + + char *buffer = NULL; + + ck_assert_str_eq(debug_bplus_tree_str(tree, buffer), "B+<4>[]"); + + bplus_tree_insert(tree, 10, NULL); + bplus_tree_insert(tree, 20, NULL); + + ck_assert_str_eq(debug_bplus_tree_str(tree, buffer), "B+<4>[{10*}{20*}]"); + + bplus_tree_insert(tree, 30, NULL); + bplus_tree_insert(tree, 40, NULL); + + ck_assert_str_eq(debug_bplus_tree_str(tree, buffer), "B+<4>[[{10*}{20*}]{30}[{30*}{40*}]]"); + + bplus_tree_insert(tree, 35, NULL); + bplus_tree_insert(tree, 50, NULL); + ck_assert_str_eq(debug_bplus_tree_str(tree, buffer), "B+<4>[[{10*}{20*}]{30}[{30*}{35*}]{40}[{40*}{50*}]]"); + + free_bplus_tree(tree); + + if (buffer != NULL) free(buffer); + + }END_TEST + +START_TEST(find_keys_4) + { + BPlusTree *tree = new_bplus_tree(4); + + bplus_tree_insert(tree, 10, NULL); + bplus_tree_insert(tree, 20, NULL); + bplus_tree_insert(tree, 30, NULL); + bplus_tree_insert(tree, 40, NULL); + + ck_assert(bplus_tree_find(tree, 10) != NULL); + ck_assert(bplus_tree_find(tree, 40) != NULL); + + BPlusKV *kv; + kv = bplus_tree_find_closest(tree, 15, FIND_LT | FIND_EQ); + ck_assert_ptr_ne(kv, NULL); + ck_assert_int_eq(kv->key, 10); + + kv = bplus_tree_find_closest(tree, 30, FIND_LT); + ck_assert_ptr_ne(kv, NULL); + ck_assert_int_eq(kv->key, 20); + + kv = bplus_tree_find_closest(tree, 30, FIND_GT); + ck_assert_ptr_ne(kv, NULL); + ck_assert_int_eq(kv->key, 40); + + free_bplus_tree(tree); + }END_TEST + +Suite *blus_tree_suite(void) { + Suite *s = suite_create("B+ Tree"); + + TCase *core = tcase_create("Insert"); + suite_add_tcase(s, core); + + tcase_add_test(core, insert_keys_4); + tcase_add_test(core, find_keys_4); + + return s; +} \ No newline at end of file diff --git a/tests/bplus_tree_test.h b/tests/bplus_tree_test.h new file mode 100644 index 0000000..d07f971 --- /dev/null +++ b/tests/bplus_tree_test.h @@ -0,0 +1,12 @@ +// +// Created by sam on 23/06/18. +// + +#ifndef SDB_BPLUS_TREE_TEST_H +#define SDB_BPLUS_TREE_TEST_H + +#include + +Suite * blus_tree_suite(void); + +#endif //SDB_BPLUS_TREE_TEST_H diff --git a/tests/check_sdb.c b/tests/check_sdb.c new file mode 100644 index 0000000..f2875d6 --- /dev/null +++ b/tests/check_sdb.c @@ -0,0 +1,21 @@ +// +// Created by sam on 22/06/18. +// + +#include +#include +#include "bplus_tree_test.h" + +int main(void) { + int number_failed; + Suite *s; + SRunner *sr; + + s = blus_tree_suite(); + sr = srunner_create(s); + + srunner_run_all(sr, CK_NORMAL); + number_failed = srunner_ntests_failed(sr); + srunner_free(sr); + return (number_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +} \ No newline at end of file