diff --git a/.gitignore b/.gitignore index 93a40ba..0dfa05a 100644 --- a/.gitignore +++ b/.gitignore @@ -78,6 +78,4 @@ CTestTestfile.cmake build -# End of https://www.gitignore.io/api/cmake,clion - -cmake-build-debug* \ No newline at end of file +# End of https://www.gitignore.io/api/cmake,clion \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 94df92e..4afb572 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,4 +3,4 @@ project(SDB C) set(CMAKE_C_STANDARD 11) -add_executable(SDB src/main.c src/InputBuffer.c src/InputBuffer.h src/SQL.c src/SQL.h src/scanner.c src/scanner.h) +add_executable(SDB src/main.c src/InputBuffer.c src/InputBuffer.h src/SQL.c src/SQL.h src/scanner.c src/scanner.h src/parser.c src/parser.h) diff --git a/Language.txt b/Language.txt index ddec3da..37feb1f 100644 --- a/Language.txt +++ b/Language.txt @@ -8,7 +8,7 @@ Drop Table X StatementList = StatementList, Statement | Statement ; Statement = SelectStmt | InsertStmt | UpdateStmt | DeleteStmt | CreateStmt | DropStmt ';' ; -SelectStmt = 'Select', FieldList, 'From', Identifier, AssignmentList ; +SelectStmt = 'Select', FieldList, 'From', Identifier, ['Where', ComparisonGroup] ; InsertStmt = 'Insert Into', Identifier, 'Set', AssignmentList ; UpdateStmt = 'Update', Identifier, 'Set', AssignmentList, 'Where', AssignmentList ; DeleteStmt = 'Delete From', Identifier, 'Where', AssignmentList ; @@ -17,7 +17,7 @@ DropStmt = 'Drop Table', Identifier ; ColumnSpecList = ColumnSpec | ColumnSpecList, ',', ColumnSpec ; ColumnSpec = Identifier, ColumnType | Identifier, ColumnType, ColumnOption ; -ColumnType = 'String(', number, ')' | 'Int' ; +ColumnType = 'String', '(', number, ')' | 'Int' | 'Integer' ; ColumnOption = 'Index' ; FieldList = Identifier | FieldList, ',', Identifier ; diff --git a/src/SQL.c b/src/SQL.c index a2fda27..8789967 100644 --- a/src/SQL.c +++ b/src/SQL.c @@ -20,17 +20,18 @@ void free_value(Value *value) { } Comparison *new_comparison() { - Comparison* comparison = malloc(sizeof(Comparison)); + Comparison *comparison = malloc(sizeof(Comparison)); comparison->identifier = NULL; comparison->value = NULL; comparison->comp = COMP_NONE; return comparison; } + void free_comparison(Comparison *comparison) { if (comparison->value != NULL) { free(comparison->value); } - if(comparison->identifier != NULL) { + if (comparison->identifier != NULL) { free(comparison->identifier); } free(comparison); @@ -42,9 +43,10 @@ ComparisonGroup *new_comparision_group() { group->length = 0; return group; } -void free_comparison_group(ComparisonGroup* group) { + +void free_comparison_group(ComparisonGroup *group) { if (group->length > 0) { - for(size_t i=0; ilength; i++) { + for (size_t i = 0; i < group->length; i++) { free(group->comparisons[i]); } free(group->comparisons); @@ -52,9 +54,10 @@ void free_comparison_group(ComparisonGroup* group) { } free(group); } + void append_comparison_group(ComparisonGroup *group, Comparison *comparison) { group->length++; - group->comparisons = realloc(group->comparisons, sizeof(Assignment) * group->length); + group->comparisons = realloc(group->comparisons, sizeof(Assignment *) * group->length); group->comparisons[group->length - 1] = comparison; } @@ -94,7 +97,7 @@ void free_assignment_list(AssignmentList *assignmentList) { void append_assignment_list(AssignmentList *list, Assignment *assignment) { list->length++; - list->assignments = realloc(list->assignments, sizeof(Assignment) * list->length); + list->assignments = realloc(list->assignments, sizeof(Assignment *) * list->length); list->assignments[list->length - 1] = assignment; } @@ -160,7 +163,7 @@ void free_column_spec_list(ColumnSpecList *list) { void append_column_spec_list(ColumnSpecList *list, ColumnSpec *spec) { list->length++; - list->columns = realloc(list->columns, sizeof(ColumnSpec) * list->length); + list->columns = realloc(list->columns, sizeof(ColumnSpec *) * list->length); list->columns[list->length - 1] = spec; } @@ -325,6 +328,6 @@ void free_statement_list(StatementList *list) { void append_statement_list(StatementList *list, Statement *statement) { list->length++; - list->statements = realloc(list->statements, sizeof(Statement) * list->length); + list->statements = realloc(list->statements, sizeof(Statement *) * list->length); list->statements[list->length - 1] = statement; } \ No newline at end of file diff --git a/src/main.c b/src/main.c index aac9ec3..bd99557 100644 --- a/src/main.c +++ b/src/main.c @@ -4,6 +4,7 @@ #include "InputBuffer.h" #include "scanner.h" +#include "parser.h" void prompt() { printf("SDB> "); @@ -24,29 +25,23 @@ void read_input(InputBuffer *buffer) { void parse_input(char *input) { Scanner *scanner = new_scanner(strdup(input)); - Scanner_Result *result = NULL; + Parser *parser = new_parser(); - while ((result = scanner_next_token(scanner, result)) != NULL) { - if (result->token == T_STRING) { - printf("Found String: '%s'\n", result->valueStr); - } else if (result->token == T_IDENTIFIER) { - printf("Found Identifier: %s\n", result->valueStr); - } else if (result->token == T_NUMBER) { - printf("Found Number %lld\n", result->valueInt); - } else { - printf("Found Token: %d\n", result->token); - } + ParserNode *node = parser_parse(parser, scanner); + + if (parser->status == PARSESTATE_ERROR) { + printf("Parse Error: %s\n", parser->errMsg); } - if (scanner->state == SCANSTATE_ERROR) { - if (scanner->errMsg != NULL) { - fprintf(stderr, "%s\n", scanner->errMsg); - } else { - fprintf(stderr, "Parse Error!\n"); - } + if (node != NULL) { + printf("%s\n", input); + parser_print_node_tree(node, 0); + free_parser_node(node); } free_scanner(scanner); + free_parser(parser); + } int main() { @@ -55,6 +50,7 @@ int main() { setbuf(stdout, 0); setbuf(stderr, 0); #endif + InputBuffer *buffer = input_buffer_new(); while (true) { diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..13e0233 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,608 @@ +// +// Created by Sam on 07/06/2018. +// + +#include +#include +#include +#include "parser.h" + +char *parser_status_to_str(ParseStatus status) { +#define X(t) case t: return #t; + + switch (status) { + PARSE_STATUS_LIST + default: + return "UNKNOWN"; + } + +#undef X +} + +char *parser_node_type_to_str(ParserNodeType nodeType) { +#define X(t) case t: return #t; + + switch (nodeType) { + PARSE_NODE_TYPE_LIST + default: + return "UNKNOWN"; + } + +#undef X +} + +ParserNode *new_parser_node(ParserNodeType type, ScannerToken *token) { + ParserNode *node = malloc(sizeof(ScannerToken)); + node->parent = NULL; + node->type = type; + node->phase = 0; + node->token = token; + node->children = NULL; + node->childrenLength = 0; + return node; +} + +void free_parser_node(ParserNode *node) { + if (node->token != NULL) { + free_scanner_token(node->token); + } + if (node->childrenLength > 0) { + for (size_t i = 0; i < node->childrenLength; i++) { + free_parser_node(node->children[i]); + } + free(node->children); + } + free(node); +} + +void append_parser_node(ParserNode *node, ParserNode *child) { + node->childrenLength++; + node->children = realloc(node->children, sizeof(ParserNode *) * node->childrenLength); + node->children[node->childrenLength - 1] = child; + child->parent = node; +} + +Parser *new_parser() { + Parser *result = malloc(sizeof(Parser)); + result->status = PARSESTATE_NONE; + result->errMsg = NULL; + return result; +} + +void free_parser(Parser *parser) { + if (parser->errMsg != NULL) { + free(parser->errMsg); + } + free(parser); +} + +#define NEXT_TOKEN() {\ + token = scanner_next_token(scanner, token);\ + } + +#define EXPECT(token_type) {\ + NEXT_TOKEN();\ + if (token == NULL || token->type != (token_type)) {\ + parser_set_error(parser, "Unexpected input, expected "#token_type, token);\ + }\ + } +#define DECEND_NODE(type) { \ + ParserNode* newNode = new_parser_node(type, token); \ + append_parser_node(node, newNode);\ + node = newNode;\ + token = NULL;\ + } +#define ASCEND_NODE() {\ + node = node->parent;\ + if (node == NULL) { parser_set_error(parser, "Parent node was null", NULL); }\ + } + +ParserNode *parser_parse(Parser *parser, Scanner *scanner) { + if (scanner->state != SCANSTATE_START) { + parser_set_error(parser, "Scanner not ready", NULL); + return NULL; + } + + if (parser->status != PARSESTATE_NONE) { + parser_set_error(parser, "Parser was not reset before use", NULL); + return NULL; + } + + ScannerToken *token = NULL; + ParserNode *root = new_parser_node(NODE_STATEMENT_LIST, NULL); + ParserNode *node = root; + + parser->status = PARSESTATE_RUNNING; + + while (parser->status == PARSESTATE_RUNNING) { + switch (node->type) { + case NODE_STATEMENT_LIST: { + NEXT_TOKEN(); + if (token == NULL) { + //End of input + parser->status = PARSESTATE_DONE; + break; + } + switch (token->type) { + case T_KW_SELECT: { + DECEND_NODE(NODE_SELECT_STMT); + } + break; + case T_KW_INSERT: { + DECEND_NODE(NODE_INSERT_STMT); + EXPECT(T_KW_INTO); + } + break; + case T_KW_UPDATE: { + DECEND_NODE(NODE_UPDATE_STMT); + } + break; + case T_KW_DELETE: { + DECEND_NODE(NODE_DELETE_STMT); + EXPECT(T_KW_FROM); + } + break; + case T_KW_CREATE: { + DECEND_NODE(NODE_CREATE_STMT); + EXPECT(T_KW_TABLE); + } + break; + case T_KW_DROP: { + DECEND_NODE(NODE_DROP_STMT); + EXPECT(T_KW_TABLE); + } + break; + case T_SEMICOLON: { + parser_set_error(parser, "Unexpected end of statement", token); + } + break; + default: + parser_set_error(parser, "Unexpected node type", token); + break; + } + } + break; + case NODE_SELECT_STMT: + switch (node->phase) { + case 0: {//field list + ParserNode *fieldList = new_parser_node(NODE_FIELD_LIST, NULL); + append_parser_node(node, fieldList); + while (parser->status != PARSESTATE_ERROR && node->phase == 0) { + NEXT_TOKEN(); + if (token == NULL) { + parser_set_error(parser, "Expected table name", token); + break; + } + switch (token->type) { + case T_IDENTIFIER: + append_parser_node(fieldList, new_parser_node(NODE_IDENTIFIER, token)); + token = NULL; + break; + case T_COMMA: + break; + case T_KW_FROM: + if (fieldList->childrenLength == 0) { + parser_set_error(parser, "Field list empty", token); + } + node->phase++; + break; + default: + parser_set_error(parser, "Unexpected token, expecting , or identifier", token); + break; + } + } + } + break; + case 1: {//table name + NEXT_TOKEN(); + if (token != NULL && token->type == T_IDENTIFIER) { + append_parser_node(node, new_parser_node(NODE_IDENTIFIER, token)); + token = NULL; + node->phase++; + NEXT_TOKEN(); + //Expect where or end of command + if (token == NULL || (token->type != T_KW_WHERE && token->type != T_SEMICOLON)) { + parser_set_error(parser, "Expected WHERE or ;", token); + } else { + if (token->type == T_KW_WHERE) { + ParserNode *group = new_parser_node(NODE_COMPARISON_GROUP, NULL); + append_parser_node(node, group); + node = group; + break; + } else if (token->type == T_SEMICOLON) { + node->phase = -1; + //Finished reading select + ASCEND_NODE(); + break; + } + } + } else { + parser_set_error(parser, "Expected table name", token); + } + } + break; + case 2: //after assignment list + EXPECT(T_SEMICOLON); + ASCEND_NODE(); + break; + default: + parser_set_error(parser, "Unknown phase reading select", token); + break; + } + break; + case NODE_INSERT_STMT: { + switch (node->phase) { + case 0: { //table name + NEXT_TOKEN(); + if (token == NULL || token->type != T_IDENTIFIER) { + parser_set_error(parser, "Expecting identifier", token); + break; + } + append_parser_node(node, new_parser_node(NODE_IDENTIFIER, token)); + token = NULL; + node->phase++; + EXPECT(T_KW_SET); + ParserNode *assignments = new_parser_node(NODE_ASSIGNMENT_LIST, NULL); + append_parser_node(node, assignments); + node = assignments; + + } + break; + case 1: { // end of assignment list + EXPECT(T_SEMICOLON); + ASCEND_NODE(); + } + break; + default: + parser_set_error(parser, "Unknown phase reading insert", token); + break; + } + } + break; + case NODE_UPDATE_STMT: { + switch (node->phase) { + case 0: { + NEXT_TOKEN(); + if (token == NULL || token->type != T_IDENTIFIER) { + parser_set_error(parser, "Expected identifier", token); + break; + } + append_parser_node(node, new_parser_node(NODE_IDENTIFIER, token)); + token = NULL; + node->phase++; + EXPECT(T_KW_SET); + ParserNode *assignments = new_parser_node(NODE_ASSIGNMENT_LIST, NULL); + append_parser_node(node, assignments); + node = assignments; + } + break; + case 1: { //end of command or where + NEXT_TOKEN(); + if (token == NULL || (token->type != T_SEMICOLON && token->type != T_KW_WHERE)) { + parser_set_error(parser, "Expected ; or WHERE", token); + break; + } + if (token->type == T_SEMICOLON) { + ASCEND_NODE(); + } else if (token->type == T_KW_WHERE) { + node->phase++; + ParserNode *assignments = new_parser_node(NODE_ASSIGNMENT_LIST, NULL); + append_parser_node(node, assignments); + node = assignments; + } + } + break; + case 2: { + EXPECT(T_SEMICOLON); + ASCEND_NODE(); + } + break; + default: + parser_set_error(parser, "Unknown phase reading update", token); + break; + } + } + break; + case NODE_DELETE_STMT: { + switch (node->phase) { + case 0: { + NEXT_TOKEN(); + if (token == NULL || token->type != T_IDENTIFIER) { + parser_set_error(parser, "Expected identifier", token); + break; + } + append_parser_node(node, new_parser_node(NODE_IDENTIFIER, token)); + token = NULL; + NEXT_TOKEN(); + //Where or end of command + if (token == NULL || (token->type != T_KW_WHERE && token->type != T_SEMICOLON)) { + parser_set_error(parser, "Expected ; or WHERE", token); + break; + } + if (token->type == T_KW_WHERE) { + node->phase++; + ParserNode *comparison = new_parser_node(NODE_COMPARISON_GROUP, NULL); + append_parser_node(node, comparison); + node = comparison; + } else if (token->type == T_SEMICOLON) { + ASCEND_NODE(); + break; + } + } + break; + case 1: { + EXPECT(T_SEMICOLON); + ASCEND_NODE(); + } + break; + default: + parser_set_error(parser, "Unknown phase reading delete", token); + break; + } + } + break; + case NODE_CREATE_STMT: { + switch (node->phase) { + case 0: { + NEXT_TOKEN(); + if (token == NULL || token->type != T_IDENTIFIER) { + parser_set_error(parser, "Expected identifier", token); + break; + } + append_parser_node(node, new_parser_node(NODE_IDENTIFIER, token)); + token = NULL; + EXPECT(T_PAREN_OPEN); + node->phase++; + ParserNode *columnSpecs = new_parser_node(NODE_COLUMN_SPEC_LIST, NULL); + append_parser_node(node, columnSpecs); + node = columnSpecs; + } + break; + case 1: { + EXPECT(T_PAREN_CLOSE); + EXPECT(T_SEMICOLON); + ASCEND_NODE(); + } + break; + default: + parser_set_error(parser, "Unknown phase reading create", token); + break; + } + } + case NODE_DROP_STMT: { + NEXT_TOKEN(); + if (token == NULL || token->type != T_IDENTIFIER) { + parser_set_error(parser, "Expected identifier", token); + break; + } + append_parser_node(node, new_parser_node(NODE_IDENTIFIER, token)); + token = NULL; + EXPECT(T_SEMICOLON); + ASCEND_NODE(); + } + break; + case NODE_COLUMN_SPEC_LIST: { + while (parser->status != PARSESTATE_ERROR) { + NEXT_TOKEN(); + if (token == NULL || token->type != T_IDENTIFIER) { + parser_set_error(parser, "Expected identifier", token); + break; + } + ParserNode *columnSpec = new_parser_node(NODE_COLUMN_SPEC, token); + token = NULL; + append_parser_node(node, columnSpec); + NEXT_TOKEN(); + if (token == NULL || (token->type != T_KW_STRING && token->type != T_KW_INT)) { + parser_set_error(parser, "Expected one of STRING, INT, INTEGER", token); + break; + } + if (token->type == T_KW_STRING) { + ParserNode *columnType = new_parser_node(NODE_COLUMN_TYPE, token); + token = NULL; + append_parser_node(columnSpec, columnType); + EXPECT(T_PAREN_OPEN); + NEXT_TOKEN(); + if (token == NULL || token->type != T_NUMBER) { + parser_set_error(parser, "Expected number", token); + break; + } + append_parser_node(columnType, new_parser_node(NODE_COLUMN_TYPE_SPECIFIER, token)); + token = NULL; + EXPECT(T_PAREN_CLOSE); + } else if (token->type == T_KW_INT) { + append_parser_node(columnSpec, new_parser_node(NODE_COLUMN_TYPE, token)); + token = NULL; + } + NEXT_TOKEN(); + if (token == NULL) { + ASCEND_NODE(); + break; + } + //Look for options + if (token->type == T_KW_INDEX) { + append_parser_node(node, new_parser_node(NODE_COLUMN_OPTION, token)); + token = NULL; + NEXT_TOKEN(); + if (token == NULL) { + ASCEND_NODE(); + break; + } + } + //Comma or ascend + if (token->type != T_COMMA) { + scanner_push_buffer(scanner, token); + token = NULL; + ASCEND_NODE(); + break; + } + } + } + break; + case NODE_ASSIGNMENT_LIST: { + //Read assignments + while (parser->status != PARSESTATE_ERROR) { + NEXT_TOKEN(); + if (token == NULL || token->type != T_IDENTIFIER) { + parser_set_error(parser, "Expected identifier", token); + break; + } + ParserNode *assignment = new_parser_node(NODE_ASSIGNMENT, token); + token = NULL; + append_parser_node(node, assignment); + EXPECT(T_COMP_EQ); + NEXT_TOKEN(); + if (token == NULL || (token->type != T_STRING && token->type != T_NUMBER)) { + parser_set_error(parser, "Expected value", token); + break; + } + append_parser_node(assignment, new_parser_node(NODE_VALUE, token)); + token = NULL; + //Check for comma + NEXT_TOKEN(); + if (token == NULL) { + ASCEND_NODE(); + break; + } + if (token->type != T_COMMA) { + scanner_push_buffer(scanner, token); + token = NULL; + ASCEND_NODE(); + break; + } + } + } + break; + case NODE_COMPARISON_GROUP: { + NEXT_TOKEN(); + if (token == NULL) { + parser_set_error(parser, "Expected identifier", NULL); + break; + } + switch (token->type) { + case T_IDENTIFIER: + scanner_push_buffer(scanner, token); + token = NULL; + DECEND_NODE(NODE_COMPARISON); + break; + case T_COMP_AND: + break; + default: + scanner_push_buffer(scanner, token); + token = NULL; + ASCEND_NODE(); + break; + } + } + break; + case NODE_COMPARISON: + while (parser->status != PARSESTATE_ERROR) { + NEXT_TOKEN(); + if (token == NULL) { + parser_set_error(parser, "Unexpected end of input", NULL); + break; + } + switch (node->phase) { + case 0: + if (token->type == T_IDENTIFIER) { + append_parser_node(node, new_parser_node(NODE_IDENTIFIER, token)); + token = NULL; + node->phase++; + } else { + parser_set_error(parser, "Expected identifier", token); + } + break; + case 1: + if (token->type == T_COMP_EQ || token->type == T_COMP_NEQ) { + append_parser_node(node, new_parser_node(NODE_COMPARISON, token)); + token = NULL; + node->phase++; + } else { + parser_set_error(parser, "Expected comparator", token); + } + break; + case 2: + if (token->type == T_STRING || token->type == T_NUMBER) { + append_parser_node(node, new_parser_node(NODE_VALUE, token)); + token = NULL; + node->phase = -1; + } else { + parser_set_error(parser, "Expected value", token); + } + break; + default: + parser_set_error(parser, "Unknown phase reading comparison", token); + break; + } + if (node->phase == -1) { + ASCEND_NODE(); + break; + } + } + break; + default: + parser_set_error(parser, "Parser in unknown state", token); + break; + } + } + if (token != NULL) { + free_scanner_token(token); + } + + if (scanner->state == SCANSTATE_ERROR) { + parser_set_error(parser, scanner->errMsg, NULL); + } + + if (parser->status != PARSESTATE_ERROR) { + return root; + } + + free_parser_node(root); + return NULL; +} + +#undef NEXT_TOKEN +#undef EXPECT +#undef DECEND_NODE +#undef ASCEND_NODE + +void parser_set_error(Parser *parser, char *err, ScannerToken *token) { + parser->status = PARSESTATE_ERROR; + if (token != NULL) { + char *errMsg = calloc(128, sizeof(char)); + snprintf(errMsg, 128, "[%d:%d] %s", token->lineNo, token->linePos, err); + parser->errMsg = strdup(errMsg); + free(errMsg); + } else { + parser->errMsg = strdup(err); + } +} + +void parser_print_node_tree(ParserNode *node, size_t indent) { + char *indentStr = calloc(indent + 1, sizeof(char)); + memset(indentStr, ' ', sizeof(char) * indent); + + //Type + printf("%s%s", indentStr, parser_node_type_to_str(node->type)); + + free(indentStr); + + //Token + if (node->token != NULL) { + printf(": %s", scanner_token_type_to_str(node->token->type)); + switch (node->token->type) { + case T_IDENTIFIER: + case T_STRING: + printf("<%s>", node->token->valueStr); + break; + case T_NUMBER: + printf("<%lld>", node->token->valueInt); + break; + default: + break; + } + } + printf("\n"); + for (size_t i = 0; i < node->childrenLength; i++) { + parser_print_node_tree(node->children[i], indent + 2); + } +} \ No newline at end of file diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000..57c9fcd --- /dev/null +++ b/src/parser.h @@ -0,0 +1,92 @@ +// +// Created by Sam on 07/06/2018. +// + +#ifndef SDB_PARSER_H +#define SDB_PARSER_H + +#include +#include +#include "scanner.h" +#include "sql.h" + +#define PARSE_STATUS_LIST \ + X(PARSESTATE_NONE) \ + X(PARSESTATE_RUNNING) \ + X(PARSESTATE_DONE) \ + X(PARSESTATE_ERROR) \ + +#define X(t) t, +enum ParseStatus_t { + PARSE_STATUS_LIST +}; +#undef X +typedef enum ParseStatus_t ParseStatus; + +char *parser_status_to_str(ParseStatus status); + +#define PARSE_NODE_TYPE_LIST \ + X(NODE_NONE) \ + X(NODE_STATEMENT_LIST) \ + X(NODE_SELECT_STMT) \ + X(NODE_INSERT_STMT) \ + X(NODE_UPDATE_STMT) \ + X(NODE_DELETE_STMT) \ + X(NODE_CREATE_STMT) \ + X(NODE_DROP_STMT) \ + X(NODE_COLUMN_SPEC_LIST) \ + X(NODE_COLUMN_SPEC) \ + X(NODE_COLUMN_TYPE) \ + X(NODE_COLUMN_TYPE_SPECIFIER) \ + X(NODE_COLUMN_OPTION) \ + X(NODE_FIELD_LIST) \ + X(NODE_ASSIGNMENT_LIST) \ + X(NODE_ASSIGNMENT) \ + X(NODE_COMPARISON_GROUP) \ + X(NODE_COMPARISON) \ + X(NODE_COMPARATOR) \ + X(NODE_IDENTIFIER) \ + X(NODE_VALUE) + +#define X(t) t, +enum ParserNodeType_t { + PARSE_NODE_TYPE_LIST +}; +typedef enum ParserNodeType_t ParserNodeType; +#undef X + +char* parser_node_type_to_str(ParserNodeType nodeType); + +struct ParserNode_t { + struct ParserNode_t *parent; + ParserNodeType type; + ssize_t phase; + ScannerToken *token; + struct ParserNode_t **children; + size_t childrenLength; +}; +typedef struct ParserNode_t ParserNode; + +ParserNode *new_parser_node(ParserNodeType type, ScannerToken *token); + +void free_parser_node(ParserNode *node); + +void append_parser_node(ParserNode *node, ParserNode *child); + +struct Parser_t { + ParseStatus status; + char *errMsg; +}; +typedef struct Parser_t Parser; + +Parser *new_parser(); + +void free_parser(Parser *parser); + +ParserNode *parser_parse(Parser *parser, Scanner *scanner); + +void parser_set_error(Parser *parser, char *err, ScannerToken *token); + +void parser_print_node_tree(ParserNode *node, size_t indent); + +#endif //SDB_PARSER_H diff --git a/src/scanner.c b/src/scanner.c index 0660a9a..e99f6dc 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -9,72 +9,129 @@ #include #include "scanner.h" -Scanner_Result *new_scanner_result() { - Scanner_Result *result = malloc(sizeof(Scanner_Result)); - clear_scanner_result(result); +char* scanner_token_type_to_str(ScannerTokenType tokenType) { +#define X(t) case t: return #t; + switch (tokenType) { + SCANNER_TOKEN_TYPE_LIST + default: + return "UNKNOWN"; + } +#undef X +} + +ScannerToken *new_scanner_token() { + ScannerToken *result = malloc(sizeof(ScannerToken)); + clear_scanner_token(result); return result; } -void clear_scanner_result(Scanner_Result *result) { - result->token = T_NONE; +void clear_scanner_token(ScannerToken *result) { + result->type = T_NONE; result->valueInt = 0; if (result->valueStr != NULL) { free(result->valueStr); } result->valueStr = NULL; + result->lineNo = 0; + result->linePos = 0; } -void free_scanner_result(Scanner_Result *result) { +void free_scanner_token(ScannerToken *result) { if (result->valueStr != NULL) { free(result); } free(result); } +char * scanner_state_to_str(ScannerState state) { +#define X(t) case t: return #t; + switch (state) { + SCANNER_STATE_LIST + default: + return "UNKNOWN"; + } +#undef X +} + Scanner *new_scanner(char *input) { Scanner *scanner = malloc(sizeof(Scanner)); - scanner->input = input; - scanner->pos = 0; + scanner->errMsg = NULL; + scanner->state = SCANSTATE_START; + scanner->buffer = NULL; + scanner->bufferIndex = 0; + scanner->bufferLength = 0; scanner->lineNo = 1; scanner->linePos = 1; - scanner->state = SCANSTATE_START; - scanner->errMsg = NULL; + scanner->input = input; + scanner->pos = 0; return scanner; } void free_scanner(Scanner *scanner) { - if (scanner->input != NULL) { - free(scanner->input); - } + reset_scanner(scanner, NULL); free(scanner); } -void reuse_scanner(Scanner *scanner, char *input) { +void reset_scanner(Scanner *scanner, char *input) { + scanner->pos = 0; + scanner->lineNo = 1; + scanner->linePos = 1; + scanner->state = SCANSTATE_START; if (scanner->input != NULL) { free(scanner->input); } scanner->input = input; if (scanner->errMsg != NULL) { - free(scanner->input); - scanner->errMsg = NULL; + free(scanner->errMsg); } - scanner->pos = 0; - scanner->lineNo = 1; - scanner->linePos = 1; - scanner->state = SCANSTATE_START; scanner->errMsg = NULL; + if (scanner->bufferIndex > 0) { + for (size_t i = 0; i < scanner->bufferIndex; i++) { + free_scanner_token(scanner->buffer[i]); + } + scanner->bufferIndex = 0; + } + if (scanner->bufferLength > 0) { + free(scanner->buffer); + scanner->buffer = NULL; + } } -Scanner_Result *scanner_next_token(Scanner *scanner, Scanner_Result *result) { +void scanner_push_buffer(Scanner *scanner, ScannerToken *token) { + if (scanner->bufferLength == 0) { + scanner->bufferLength = 1; + scanner->buffer = malloc(scanner->bufferLength * sizeof(ScannerToken*)); + } else if (scanner->bufferIndex + 1 == scanner->bufferLength) { + scanner->bufferLength *= 2; + scanner->buffer = realloc(scanner->buffer, scanner->bufferLength * sizeof(ScannerToken*)); + } + scanner->buffer[scanner->bufferIndex++] = token; +} + +ScannerToken *scanner_next_token(Scanner *scanner, ScannerToken *token) { + //Return if already done if (scanner->state == SCANSTATE_DONE || scanner->state == SCANSTATE_ERROR) { - if (result != NULL) free_scanner_result(result); + if (token != NULL) free_scanner_token(token); return NULL; } - if (result == NULL) { - result = new_scanner_result(); - } else { - clear_scanner_result(result); + + if (scanner->bufferIndex > 0) { + if (token != NULL) { + free_scanner_token(token); + } + scanner->bufferIndex--; + token = scanner->buffer[scanner->bufferIndex]; + scanner->buffer[scanner->bufferIndex] = NULL; + return token; } + + //Setup result + if (token == NULL) { + token = new_scanner_token(); + } else { + clear_scanner_token(token); + } + //Consume white space while (scanner_peek_char(scanner) != 0 && isspace(scanner_peek_char(scanner))) { if (scanner_next_char(scanner) == '\n') { @@ -82,51 +139,67 @@ Scanner_Result *scanner_next_token(Scanner *scanner, Scanner_Result *result) { scanner->linePos = 1; } } + //Check for end of string if (scanner_peek_char(scanner) == 0) { scanner->state = SCANSTATE_DONE; - free_scanner_result(result); + free_scanner_token(token); return NULL; } + //Record position of token in the input + token->lineNo = scanner->lineNo; + token->linePos = scanner->linePos; + //Keywords - if (scanner_match(scanner, "select", true)) result->token = K_SELECT; - else if (scanner_match(scanner, "from", true)) result->token = K_FROM; - else if (scanner_match(scanner, "insert", true)) result->token = K_INSERT; - else if (scanner_match(scanner, "into", true)) result->token = K_INTO; - else if (scanner_match(scanner, "set", true)) result->token = K_SET; - else if (scanner_match(scanner, "update", true)) result->token = K_UPDATE; - else if (scanner_match(scanner, "where", true)) result->token = K_WHERE; - else if (scanner_match(scanner, "delete", true)) result->token = K_DELETE; - else if (scanner_match(scanner, "create", true)) result->token = K_CREATE; - else if (scanner_match(scanner, "table", true)) result->token = K_TABLE; - else if (scanner_match(scanner, "drop", true)) result->token = K_DROP; - else if (scanner_match(scanner, "string", true)) result->token = K_STRING; - else if (scanner_match(scanner, "int", true)) result->token = K_INT; - else if (scanner_match(scanner, "index", true)) result->token = K_INDEX; - if (result->token != T_NONE) { - return result; + if (scanner_match(scanner, "select", true)) token->type = T_KW_SELECT; + else if (scanner_match(scanner, "from", true)) token->type = T_KW_FROM; + else if (scanner_match(scanner, "insert", true)) token->type = T_KW_INSERT; + else if (scanner_match(scanner, "into", true)) token->type = T_KW_INTO; + else if (scanner_match(scanner, "set", true)) token->type = T_KW_SET; + else if (scanner_match(scanner, "update", true)) token->type = T_KW_UPDATE; + else if (scanner_match(scanner, "where", true)) token->type = T_KW_WHERE; + else if (scanner_match(scanner, "delete", true)) token->type = T_KW_DELETE; + else if (scanner_match(scanner, "create", true)) token->type = T_KW_CREATE; + else if (scanner_match(scanner, "table", true)) token->type = T_KW_TABLE; + else if (scanner_match(scanner, "drop", true)) token->type = T_KW_DROP; + else if (scanner_match(scanner, "string", true)) token->type = T_KW_STRING; + else if (scanner_match(scanner, "int", true)) token->type = T_KW_INT; + else if (scanner_match(scanner, "integer", true)) token->type = T_KW_INT; + else if (scanner_match(scanner, "index", true)) token->type = T_KW_INDEX; + if (token->type != T_NONE) { + return token; } //Comparators - if (scanner_match(scanner, "=", false)) result->token = T_COMP_EQ; - if (scanner_match(scanner, "<>", false)) result->token = T_COMP_NEQ; - if (scanner_match(scanner, "and", true)) result->token = T_COMP_AND; - if (result->token != T_NONE) { - return result; + if (scanner_match(scanner, "=", false)) token->type = T_COMP_EQ; + else if (scanner_match(scanner, "<>", false)) token->type = T_COMP_NEQ; + else if (scanner_match(scanner, "and", true)) token->type = T_COMP_AND; + if (token->type != T_NONE) { + return token; } //Punctuation - if (scanner_peek_char(scanner) == ',') { - scanner_next_char(scanner); - result->token = T_COMMA; + char nextChar = scanner_peek_char(scanner); + switch (nextChar) { + case ',': + token->type = T_COMMA; + break; + case ';': + token->type = T_SEMICOLON; + break; + case '(': + token->type = T_PAREN_OPEN; + break; + case ')': + token->type = T_PAREN_CLOSE; + break; + default: + break; } - if (scanner_peek_char(scanner) == ';') { + if (token->type != T_NONE) { scanner_next_char(scanner); - result->token = T_SEMICOLON; - } - if (result->token != T_NONE) { - return result; + return token; } //Numbers @@ -141,22 +214,22 @@ Scanner_Result *scanner_next_token(Scanner *scanner, Scanner_Result *result) { if (isdigit(scanner_peek_char(scanner))) { //Exceed length of int array! scanner_set_error(scanner, "Number exceeded allowed length"); - free_scanner_result(result); + free_scanner_token(token); return NULL; } - result->token = T_NUMBER; + token->type = T_NUMBER; //convert number - result->valueInt = (uint64_t) strtol(intInput, NULL, 10); - return result; + token->valueInt = (uint64_t) strtol(intInput, NULL, 10); + return token; } //Strings if (scanner_peek_char(scanner) == '"' || scanner_peek_char(scanner) == '\'') { - if (scanner_read_string(scanner, result, scanner_next_char(scanner)) == false) { - free_scanner_result(result); + if (scanner_read_string(scanner, token, scanner_next_char(scanner)) == false) { + free_scanner_token(token); return NULL; } - return result; + return token; } //Identifiers @@ -173,19 +246,19 @@ Scanner_Result *scanner_next_token(Scanner *scanner, Scanner_Result *result) { ident = realloc(ident, sizeof(char) * ident_size); } } while ((isalnum(scanner_peek_char(scanner)) || scanner_peek_char(scanner) == '_')); - result->token = T_IDENTIFIER; - result->valueStr = strdup(ident); + token->type = T_IDENTIFIER; + token->valueStr = strdup(ident); free(ident); - return result; + return token; } //Nothing matched - scanner_set_error(scanner, "Unknown text"); - free_scanner_result(result); + scanner_set_error(scanner, "Unexpected input"); + free_scanner_token(token); return NULL; } -bool scanner_read_string(Scanner *scanner, Scanner_Result *result, char quoteType) { +bool scanner_read_string(Scanner *scanner, ScannerToken *result, char quoteType) { char next = 0; size_t length = 8; @@ -230,7 +303,7 @@ bool scanner_read_string(Scanner *scanner, Scanner_Result *result, char quoteTyp } } else if (next == quoteType) { //end of string - result->token = T_STRING; + result->type = T_STRING; result->valueStr = strdup(str); free(str); return true; @@ -295,7 +368,7 @@ void scanner_set_error(Scanner *scanner, const char *errText) { //Create error msg char *errMsg = malloc(sizeof(char) * 128); - snprintf(errMsg, 128, "Error at line %d:%d %s", scanner->lineNo, scanner->linePos, errText); + snprintf(errMsg, 128, "[%d:%d]: %s", scanner->lineNo, scanner->linePos, errText); if (scanner->errMsg != NULL) { free(scanner->errMsg); diff --git a/src/scanner.h b/src/scanner.h index a7244c0..3990ba8 100644 --- a/src/scanner.h +++ b/src/scanner.h @@ -7,61 +7,72 @@ #include #include +#include -enum Scanner_Token_t { - T_NONE, +#define SCANNER_TOKEN_TYPE_LIST \ + X(T_NONE) \ + X(T_STRING) \ + X(T_NUMBER) \ + X(T_IDENTIFIER) \ + X(T_COMMA) \ + X(T_SEMICOLON) \ + X(T_PAREN_OPEN) \ + X(T_PAREN_CLOSE) \ + X(T_COMP_EQ) \ + X(T_COMP_NEQ) \ + X(T_COMP_AND) \ + X(T_KW_SELECT) \ + X(T_KW_FROM) \ + X(T_KW_INSERT) \ + X(T_KW_INTO) \ + X(T_KW_SET) \ + X(T_KW_UPDATE) \ + X(T_KW_WHERE) \ + X(T_KW_DELETE) \ + X(T_KW_CREATE) \ + X(T_KW_TABLE) \ + X(T_KW_DROP) \ + X(T_KW_STRING) \ + X(T_KW_INT) \ + X(T_KW_INDEX) - //Values - T_STRING, - T_NUMBER, - T_IDENTIFIER, - - //Punctuation - T_COMMA, - T_SEMICOLON, - - //Comparators - T_COMP_EQ, - T_COMP_NEQ, - T_COMP_AND, - - //Keywords - K_SELECT, - K_FROM, - K_INSERT, - K_INTO, - K_SET, - K_UPDATE, - K_WHERE, - K_DELETE, - K_CREATE, - K_TABLE, - K_DROP, - K_STRING, - K_INT, - K_INDEX +#define X(t) t, +enum ScannerTokenType_t { + SCANNER_TOKEN_TYPE_LIST }; -typedef enum Scanner_Token_t Scanner_Token; +#undef X +typedef enum ScannerTokenType_t ScannerTokenType; -struct Scanner_Result_t { - Scanner_Token token; +char* scanner_token_type_to_str(ScannerTokenType tokenType); + +struct ScannerToken_t { + ScannerTokenType type; char *valueStr; uint64_t valueInt; + size_t lineNo; + size_t linePos; }; -typedef struct Scanner_Result_t Scanner_Result; +typedef struct ScannerToken_t ScannerToken; -Scanner_Result *new_scanner_result(); +ScannerToken *new_scanner_token(); -void clear_scanner_result(Scanner_Result *result); +void clear_scanner_token(ScannerToken *result); -void free_scanner_result(Scanner_Result *result); +void free_scanner_token(ScannerToken *result); -enum Scanner_State_t { - SCANSTATE_START, - SCANSTATE_ERROR, - SCANSTATE_DONE +#define SCANNER_STATE_LIST \ + X(SCANSTATE_START) \ + X(SCANSTATE_ERROR) \ + X(SCANSTATE_DONE) + +#define X(t) t, +enum ScannerState_t { + SCANNER_STATE_LIST }; -typedef enum Scanner_State_t Scanner_State; +#undef X +typedef enum ScannerState_t ScannerState; + +char * scanner_state_to_str(ScannerState state); struct Scanner_t { char *input; @@ -69,7 +80,10 @@ struct Scanner_t { size_t lineNo; size_t linePos; size_t pos; - Scanner_State state; + ScannerState state; + ScannerToken **buffer; + size_t bufferLength; + size_t bufferIndex; }; typedef struct Scanner_t Scanner; @@ -77,11 +91,13 @@ Scanner *new_scanner(char *input); void free_scanner(Scanner *scanner); -void reuse_scanner(Scanner *scanner, char *input); +void reset_scanner(Scanner *scanner, char *input); -Scanner_Result *scanner_next_token(Scanner *scanner, Scanner_Result *result); +void scanner_push_buffer(Scanner *scanner, ScannerToken *token); -bool scanner_read_string(Scanner *scanner, Scanner_Result *result, char quoteType); +ScannerToken *scanner_next_token(Scanner *scanner, ScannerToken *token); + +bool scanner_read_string(Scanner *scanner, ScannerToken *result, char quoteType); char scanner_next_char(Scanner *scanner);