Implemented parser.

This commit is contained in:
2018-06-09 16:39:20 +01:00
parent cb262733ed
commit 9e60b793c6
9 changed files with 935 additions and 149 deletions

View File

@@ -20,17 +20,18 @@ void free_value(Value *value) {
}
Comparison *new_comparison() {
Comparison* comparison = malloc(sizeof(Comparison));
Comparison *comparison = malloc(sizeof(Comparison));
comparison->identifier = NULL;
comparison->value = NULL;
comparison->comp = COMP_NONE;
return comparison;
}
void free_comparison(Comparison *comparison) {
if (comparison->value != NULL) {
free(comparison->value);
}
if(comparison->identifier != NULL) {
if (comparison->identifier != NULL) {
free(comparison->identifier);
}
free(comparison);
@@ -42,9 +43,10 @@ ComparisonGroup *new_comparision_group() {
group->length = 0;
return group;
}
void free_comparison_group(ComparisonGroup* group) {
void free_comparison_group(ComparisonGroup *group) {
if (group->length > 0) {
for(size_t i=0; i<group->length; i++) {
for (size_t i = 0; i < group->length; i++) {
free(group->comparisons[i]);
}
free(group->comparisons);
@@ -52,9 +54,10 @@ void free_comparison_group(ComparisonGroup* group) {
}
free(group);
}
void append_comparison_group(ComparisonGroup *group, Comparison *comparison) {
group->length++;
group->comparisons = realloc(group->comparisons, sizeof(Assignment) * group->length);
group->comparisons = realloc(group->comparisons, sizeof(Assignment *) * group->length);
group->comparisons[group->length - 1] = comparison;
}
@@ -94,7 +97,7 @@ void free_assignment_list(AssignmentList *assignmentList) {
void append_assignment_list(AssignmentList *list, Assignment *assignment) {
list->length++;
list->assignments = realloc(list->assignments, sizeof(Assignment) * list->length);
list->assignments = realloc(list->assignments, sizeof(Assignment *) * list->length);
list->assignments[list->length - 1] = assignment;
}
@@ -160,7 +163,7 @@ void free_column_spec_list(ColumnSpecList *list) {
void append_column_spec_list(ColumnSpecList *list, ColumnSpec *spec) {
list->length++;
list->columns = realloc(list->columns, sizeof(ColumnSpec) * list->length);
list->columns = realloc(list->columns, sizeof(ColumnSpec *) * list->length);
list->columns[list->length - 1] = spec;
}
@@ -325,6 +328,6 @@ void free_statement_list(StatementList *list) {
void append_statement_list(StatementList *list, Statement *statement) {
list->length++;
list->statements = realloc(list->statements, sizeof(Statement) * list->length);
list->statements = realloc(list->statements, sizeof(Statement *) * list->length);
list->statements[list->length - 1] = statement;
}

View File

@@ -4,6 +4,7 @@
#include "InputBuffer.h"
#include "scanner.h"
#include "parser.h"
void prompt() {
printf("SDB> ");
@@ -24,29 +25,23 @@ void read_input(InputBuffer *buffer) {
void parse_input(char *input) {
Scanner *scanner = new_scanner(strdup(input));
Scanner_Result *result = NULL;
Parser *parser = new_parser();
while ((result = scanner_next_token(scanner, result)) != NULL) {
if (result->token == T_STRING) {
printf("Found String: '%s'\n", result->valueStr);
} else if (result->token == T_IDENTIFIER) {
printf("Found Identifier: %s\n", result->valueStr);
} else if (result->token == T_NUMBER) {
printf("Found Number %lld\n", result->valueInt);
} else {
printf("Found Token: %d\n", result->token);
}
ParserNode *node = parser_parse(parser, scanner);
if (parser->status == PARSESTATE_ERROR) {
printf("Parse Error: %s\n", parser->errMsg);
}
if (scanner->state == SCANSTATE_ERROR) {
if (scanner->errMsg != NULL) {
fprintf(stderr, "%s\n", scanner->errMsg);
} else {
fprintf(stderr, "Parse Error!\n");
}
if (node != NULL) {
printf("%s\n", input);
parser_print_node_tree(node, 0);
free_parser_node(node);
}
free_scanner(scanner);
free_parser(parser);
}
int main() {
@@ -55,6 +50,7 @@ int main() {
setbuf(stdout, 0);
setbuf(stderr, 0);
#endif
InputBuffer *buffer = input_buffer_new();
while (true) {

608
src/parser.c Normal file
View File

@@ -0,0 +1,608 @@
//
// Created by Sam on 07/06/2018.
//
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "parser.h"
char *parser_status_to_str(ParseStatus status) {
#define X(t) case t: return #t;
switch (status) {
PARSE_STATUS_LIST
default:
return "UNKNOWN";
}
#undef X
}
char *parser_node_type_to_str(ParserNodeType nodeType) {
#define X(t) case t: return #t;
switch (nodeType) {
PARSE_NODE_TYPE_LIST
default:
return "UNKNOWN";
}
#undef X
}
ParserNode *new_parser_node(ParserNodeType type, ScannerToken *token) {
ParserNode *node = malloc(sizeof(ScannerToken));
node->parent = NULL;
node->type = type;
node->phase = 0;
node->token = token;
node->children = NULL;
node->childrenLength = 0;
return node;
}
void free_parser_node(ParserNode *node) {
if (node->token != NULL) {
free_scanner_token(node->token);
}
if (node->childrenLength > 0) {
for (size_t i = 0; i < node->childrenLength; i++) {
free_parser_node(node->children[i]);
}
free(node->children);
}
free(node);
}
void append_parser_node(ParserNode *node, ParserNode *child) {
node->childrenLength++;
node->children = realloc(node->children, sizeof(ParserNode *) * node->childrenLength);
node->children[node->childrenLength - 1] = child;
child->parent = node;
}
Parser *new_parser() {
Parser *result = malloc(sizeof(Parser));
result->status = PARSESTATE_NONE;
result->errMsg = NULL;
return result;
}
void free_parser(Parser *parser) {
if (parser->errMsg != NULL) {
free(parser->errMsg);
}
free(parser);
}
#define NEXT_TOKEN() {\
token = scanner_next_token(scanner, token);\
}
#define EXPECT(token_type) {\
NEXT_TOKEN();\
if (token == NULL || token->type != (token_type)) {\
parser_set_error(parser, "Unexpected input, expected "#token_type, token);\
}\
}
#define DECEND_NODE(type) { \
ParserNode* newNode = new_parser_node(type, token); \
append_parser_node(node, newNode);\
node = newNode;\
token = NULL;\
}
#define ASCEND_NODE() {\
node = node->parent;\
if (node == NULL) { parser_set_error(parser, "Parent node was null", NULL); }\
}
ParserNode *parser_parse(Parser *parser, Scanner *scanner) {
if (scanner->state != SCANSTATE_START) {
parser_set_error(parser, "Scanner not ready", NULL);
return NULL;
}
if (parser->status != PARSESTATE_NONE) {
parser_set_error(parser, "Parser was not reset before use", NULL);
return NULL;
}
ScannerToken *token = NULL;
ParserNode *root = new_parser_node(NODE_STATEMENT_LIST, NULL);
ParserNode *node = root;
parser->status = PARSESTATE_RUNNING;
while (parser->status == PARSESTATE_RUNNING) {
switch (node->type) {
case NODE_STATEMENT_LIST: {
NEXT_TOKEN();
if (token == NULL) {
//End of input
parser->status = PARSESTATE_DONE;
break;
}
switch (token->type) {
case T_KW_SELECT: {
DECEND_NODE(NODE_SELECT_STMT);
}
break;
case T_KW_INSERT: {
DECEND_NODE(NODE_INSERT_STMT);
EXPECT(T_KW_INTO);
}
break;
case T_KW_UPDATE: {
DECEND_NODE(NODE_UPDATE_STMT);
}
break;
case T_KW_DELETE: {
DECEND_NODE(NODE_DELETE_STMT);
EXPECT(T_KW_FROM);
}
break;
case T_KW_CREATE: {
DECEND_NODE(NODE_CREATE_STMT);
EXPECT(T_KW_TABLE);
}
break;
case T_KW_DROP: {
DECEND_NODE(NODE_DROP_STMT);
EXPECT(T_KW_TABLE);
}
break;
case T_SEMICOLON: {
parser_set_error(parser, "Unexpected end of statement", token);
}
break;
default:
parser_set_error(parser, "Unexpected node type", token);
break;
}
}
break;
case NODE_SELECT_STMT:
switch (node->phase) {
case 0: {//field list
ParserNode *fieldList = new_parser_node(NODE_FIELD_LIST, NULL);
append_parser_node(node, fieldList);
while (parser->status != PARSESTATE_ERROR && node->phase == 0) {
NEXT_TOKEN();
if (token == NULL) {
parser_set_error(parser, "Expected table name", token);
break;
}
switch (token->type) {
case T_IDENTIFIER:
append_parser_node(fieldList, new_parser_node(NODE_IDENTIFIER, token));
token = NULL;
break;
case T_COMMA:
break;
case T_KW_FROM:
if (fieldList->childrenLength == 0) {
parser_set_error(parser, "Field list empty", token);
}
node->phase++;
break;
default:
parser_set_error(parser, "Unexpected token, expecting , or identifier", token);
break;
}
}
}
break;
case 1: {//table name
NEXT_TOKEN();
if (token != NULL && token->type == T_IDENTIFIER) {
append_parser_node(node, new_parser_node(NODE_IDENTIFIER, token));
token = NULL;
node->phase++;
NEXT_TOKEN();
//Expect where or end of command
if (token == NULL || (token->type != T_KW_WHERE && token->type != T_SEMICOLON)) {
parser_set_error(parser, "Expected WHERE or ;", token);
} else {
if (token->type == T_KW_WHERE) {
ParserNode *group = new_parser_node(NODE_COMPARISON_GROUP, NULL);
append_parser_node(node, group);
node = group;
break;
} else if (token->type == T_SEMICOLON) {
node->phase = -1;
//Finished reading select
ASCEND_NODE();
break;
}
}
} else {
parser_set_error(parser, "Expected table name", token);
}
}
break;
case 2: //after assignment list
EXPECT(T_SEMICOLON);
ASCEND_NODE();
break;
default:
parser_set_error(parser, "Unknown phase reading select", token);
break;
}
break;
case NODE_INSERT_STMT: {
switch (node->phase) {
case 0: { //table name
NEXT_TOKEN();
if (token == NULL || token->type != T_IDENTIFIER) {
parser_set_error(parser, "Expecting identifier", token);
break;
}
append_parser_node(node, new_parser_node(NODE_IDENTIFIER, token));
token = NULL;
node->phase++;
EXPECT(T_KW_SET);
ParserNode *assignments = new_parser_node(NODE_ASSIGNMENT_LIST, NULL);
append_parser_node(node, assignments);
node = assignments;
}
break;
case 1: { // end of assignment list
EXPECT(T_SEMICOLON);
ASCEND_NODE();
}
break;
default:
parser_set_error(parser, "Unknown phase reading insert", token);
break;
}
}
break;
case NODE_UPDATE_STMT: {
switch (node->phase) {
case 0: {
NEXT_TOKEN();
if (token == NULL || token->type != T_IDENTIFIER) {
parser_set_error(parser, "Expected identifier", token);
break;
}
append_parser_node(node, new_parser_node(NODE_IDENTIFIER, token));
token = NULL;
node->phase++;
EXPECT(T_KW_SET);
ParserNode *assignments = new_parser_node(NODE_ASSIGNMENT_LIST, NULL);
append_parser_node(node, assignments);
node = assignments;
}
break;
case 1: { //end of command or where
NEXT_TOKEN();
if (token == NULL || (token->type != T_SEMICOLON && token->type != T_KW_WHERE)) {
parser_set_error(parser, "Expected ; or WHERE", token);
break;
}
if (token->type == T_SEMICOLON) {
ASCEND_NODE();
} else if (token->type == T_KW_WHERE) {
node->phase++;
ParserNode *assignments = new_parser_node(NODE_ASSIGNMENT_LIST, NULL);
append_parser_node(node, assignments);
node = assignments;
}
}
break;
case 2: {
EXPECT(T_SEMICOLON);
ASCEND_NODE();
}
break;
default:
parser_set_error(parser, "Unknown phase reading update", token);
break;
}
}
break;
case NODE_DELETE_STMT: {
switch (node->phase) {
case 0: {
NEXT_TOKEN();
if (token == NULL || token->type != T_IDENTIFIER) {
parser_set_error(parser, "Expected identifier", token);
break;
}
append_parser_node(node, new_parser_node(NODE_IDENTIFIER, token));
token = NULL;
NEXT_TOKEN();
//Where or end of command
if (token == NULL || (token->type != T_KW_WHERE && token->type != T_SEMICOLON)) {
parser_set_error(parser, "Expected ; or WHERE", token);
break;
}
if (token->type == T_KW_WHERE) {
node->phase++;
ParserNode *comparison = new_parser_node(NODE_COMPARISON_GROUP, NULL);
append_parser_node(node, comparison);
node = comparison;
} else if (token->type == T_SEMICOLON) {
ASCEND_NODE();
break;
}
}
break;
case 1: {
EXPECT(T_SEMICOLON);
ASCEND_NODE();
}
break;
default:
parser_set_error(parser, "Unknown phase reading delete", token);
break;
}
}
break;
case NODE_CREATE_STMT: {
switch (node->phase) {
case 0: {
NEXT_TOKEN();
if (token == NULL || token->type != T_IDENTIFIER) {
parser_set_error(parser, "Expected identifier", token);
break;
}
append_parser_node(node, new_parser_node(NODE_IDENTIFIER, token));
token = NULL;
EXPECT(T_PAREN_OPEN);
node->phase++;
ParserNode *columnSpecs = new_parser_node(NODE_COLUMN_SPEC_LIST, NULL);
append_parser_node(node, columnSpecs);
node = columnSpecs;
}
break;
case 1: {
EXPECT(T_PAREN_CLOSE);
EXPECT(T_SEMICOLON);
ASCEND_NODE();
}
break;
default:
parser_set_error(parser, "Unknown phase reading create", token);
break;
}
}
case NODE_DROP_STMT: {
NEXT_TOKEN();
if (token == NULL || token->type != T_IDENTIFIER) {
parser_set_error(parser, "Expected identifier", token);
break;
}
append_parser_node(node, new_parser_node(NODE_IDENTIFIER, token));
token = NULL;
EXPECT(T_SEMICOLON);
ASCEND_NODE();
}
break;
case NODE_COLUMN_SPEC_LIST: {
while (parser->status != PARSESTATE_ERROR) {
NEXT_TOKEN();
if (token == NULL || token->type != T_IDENTIFIER) {
parser_set_error(parser, "Expected identifier", token);
break;
}
ParserNode *columnSpec = new_parser_node(NODE_COLUMN_SPEC, token);
token = NULL;
append_parser_node(node, columnSpec);
NEXT_TOKEN();
if (token == NULL || (token->type != T_KW_STRING && token->type != T_KW_INT)) {
parser_set_error(parser, "Expected one of STRING, INT, INTEGER", token);
break;
}
if (token->type == T_KW_STRING) {
ParserNode *columnType = new_parser_node(NODE_COLUMN_TYPE, token);
token = NULL;
append_parser_node(columnSpec, columnType);
EXPECT(T_PAREN_OPEN);
NEXT_TOKEN();
if (token == NULL || token->type != T_NUMBER) {
parser_set_error(parser, "Expected number", token);
break;
}
append_parser_node(columnType, new_parser_node(NODE_COLUMN_TYPE_SPECIFIER, token));
token = NULL;
EXPECT(T_PAREN_CLOSE);
} else if (token->type == T_KW_INT) {
append_parser_node(columnSpec, new_parser_node(NODE_COLUMN_TYPE, token));
token = NULL;
}
NEXT_TOKEN();
if (token == NULL) {
ASCEND_NODE();
break;
}
//Look for options
if (token->type == T_KW_INDEX) {
append_parser_node(node, new_parser_node(NODE_COLUMN_OPTION, token));
token = NULL;
NEXT_TOKEN();
if (token == NULL) {
ASCEND_NODE();
break;
}
}
//Comma or ascend
if (token->type != T_COMMA) {
scanner_push_buffer(scanner, token);
token = NULL;
ASCEND_NODE();
break;
}
}
}
break;
case NODE_ASSIGNMENT_LIST: {
//Read assignments
while (parser->status != PARSESTATE_ERROR) {
NEXT_TOKEN();
if (token == NULL || token->type != T_IDENTIFIER) {
parser_set_error(parser, "Expected identifier", token);
break;
}
ParserNode *assignment = new_parser_node(NODE_ASSIGNMENT, token);
token = NULL;
append_parser_node(node, assignment);
EXPECT(T_COMP_EQ);
NEXT_TOKEN();
if (token == NULL || (token->type != T_STRING && token->type != T_NUMBER)) {
parser_set_error(parser, "Expected value", token);
break;
}
append_parser_node(assignment, new_parser_node(NODE_VALUE, token));
token = NULL;
//Check for comma
NEXT_TOKEN();
if (token == NULL) {
ASCEND_NODE();
break;
}
if (token->type != T_COMMA) {
scanner_push_buffer(scanner, token);
token = NULL;
ASCEND_NODE();
break;
}
}
}
break;
case NODE_COMPARISON_GROUP: {
NEXT_TOKEN();
if (token == NULL) {
parser_set_error(parser, "Expected identifier", NULL);
break;
}
switch (token->type) {
case T_IDENTIFIER:
scanner_push_buffer(scanner, token);
token = NULL;
DECEND_NODE(NODE_COMPARISON);
break;
case T_COMP_AND:
break;
default:
scanner_push_buffer(scanner, token);
token = NULL;
ASCEND_NODE();
break;
}
}
break;
case NODE_COMPARISON:
while (parser->status != PARSESTATE_ERROR) {
NEXT_TOKEN();
if (token == NULL) {
parser_set_error(parser, "Unexpected end of input", NULL);
break;
}
switch (node->phase) {
case 0:
if (token->type == T_IDENTIFIER) {
append_parser_node(node, new_parser_node(NODE_IDENTIFIER, token));
token = NULL;
node->phase++;
} else {
parser_set_error(parser, "Expected identifier", token);
}
break;
case 1:
if (token->type == T_COMP_EQ || token->type == T_COMP_NEQ) {
append_parser_node(node, new_parser_node(NODE_COMPARISON, token));
token = NULL;
node->phase++;
} else {
parser_set_error(parser, "Expected comparator", token);
}
break;
case 2:
if (token->type == T_STRING || token->type == T_NUMBER) {
append_parser_node(node, new_parser_node(NODE_VALUE, token));
token = NULL;
node->phase = -1;
} else {
parser_set_error(parser, "Expected value", token);
}
break;
default:
parser_set_error(parser, "Unknown phase reading comparison", token);
break;
}
if (node->phase == -1) {
ASCEND_NODE();
break;
}
}
break;
default:
parser_set_error(parser, "Parser in unknown state", token);
break;
}
}
if (token != NULL) {
free_scanner_token(token);
}
if (scanner->state == SCANSTATE_ERROR) {
parser_set_error(parser, scanner->errMsg, NULL);
}
if (parser->status != PARSESTATE_ERROR) {
return root;
}
free_parser_node(root);
return NULL;
}
#undef NEXT_TOKEN
#undef EXPECT
#undef DECEND_NODE
#undef ASCEND_NODE
void parser_set_error(Parser *parser, char *err, ScannerToken *token) {
parser->status = PARSESTATE_ERROR;
if (token != NULL) {
char *errMsg = calloc(128, sizeof(char));
snprintf(errMsg, 128, "[%d:%d] %s", token->lineNo, token->linePos, err);
parser->errMsg = strdup(errMsg);
free(errMsg);
} else {
parser->errMsg = strdup(err);
}
}
void parser_print_node_tree(ParserNode *node, size_t indent) {
char *indentStr = calloc(indent + 1, sizeof(char));
memset(indentStr, ' ', sizeof(char) * indent);
//Type
printf("%s%s", indentStr, parser_node_type_to_str(node->type));
free(indentStr);
//Token
if (node->token != NULL) {
printf(": %s", scanner_token_type_to_str(node->token->type));
switch (node->token->type) {
case T_IDENTIFIER:
case T_STRING:
printf("<%s>", node->token->valueStr);
break;
case T_NUMBER:
printf("<%lld>", node->token->valueInt);
break;
default:
break;
}
}
printf("\n");
for (size_t i = 0; i < node->childrenLength; i++) {
parser_print_node_tree(node->children[i], indent + 2);
}
}

92
src/parser.h Normal file
View File

@@ -0,0 +1,92 @@
//
// Created by Sam on 07/06/2018.
//
#ifndef SDB_PARSER_H
#define SDB_PARSER_H
#include <stddef.h>
#include <stdbool.h>
#include "scanner.h"
#include "sql.h"
#define PARSE_STATUS_LIST \
X(PARSESTATE_NONE) \
X(PARSESTATE_RUNNING) \
X(PARSESTATE_DONE) \
X(PARSESTATE_ERROR) \
#define X(t) t,
enum ParseStatus_t {
PARSE_STATUS_LIST
};
#undef X
typedef enum ParseStatus_t ParseStatus;
char *parser_status_to_str(ParseStatus status);
#define PARSE_NODE_TYPE_LIST \
X(NODE_NONE) \
X(NODE_STATEMENT_LIST) \
X(NODE_SELECT_STMT) \
X(NODE_INSERT_STMT) \
X(NODE_UPDATE_STMT) \
X(NODE_DELETE_STMT) \
X(NODE_CREATE_STMT) \
X(NODE_DROP_STMT) \
X(NODE_COLUMN_SPEC_LIST) \
X(NODE_COLUMN_SPEC) \
X(NODE_COLUMN_TYPE) \
X(NODE_COLUMN_TYPE_SPECIFIER) \
X(NODE_COLUMN_OPTION) \
X(NODE_FIELD_LIST) \
X(NODE_ASSIGNMENT_LIST) \
X(NODE_ASSIGNMENT) \
X(NODE_COMPARISON_GROUP) \
X(NODE_COMPARISON) \
X(NODE_COMPARATOR) \
X(NODE_IDENTIFIER) \
X(NODE_VALUE)
#define X(t) t,
enum ParserNodeType_t {
PARSE_NODE_TYPE_LIST
};
typedef enum ParserNodeType_t ParserNodeType;
#undef X
char* parser_node_type_to_str(ParserNodeType nodeType);
struct ParserNode_t {
struct ParserNode_t *parent;
ParserNodeType type;
ssize_t phase;
ScannerToken *token;
struct ParserNode_t **children;
size_t childrenLength;
};
typedef struct ParserNode_t ParserNode;
ParserNode *new_parser_node(ParserNodeType type, ScannerToken *token);
void free_parser_node(ParserNode *node);
void append_parser_node(ParserNode *node, ParserNode *child);
struct Parser_t {
ParseStatus status;
char *errMsg;
};
typedef struct Parser_t Parser;
Parser *new_parser();
void free_parser(Parser *parser);
ParserNode *parser_parse(Parser *parser, Scanner *scanner);
void parser_set_error(Parser *parser, char *err, ScannerToken *token);
void parser_print_node_tree(ParserNode *node, size_t indent);
#endif //SDB_PARSER_H

View File

@@ -9,72 +9,129 @@
#include <stdio.h>
#include "scanner.h"
Scanner_Result *new_scanner_result() {
Scanner_Result *result = malloc(sizeof(Scanner_Result));
clear_scanner_result(result);
char* scanner_token_type_to_str(ScannerTokenType tokenType) {
#define X(t) case t: return #t;
switch (tokenType) {
SCANNER_TOKEN_TYPE_LIST
default:
return "UNKNOWN";
}
#undef X
}
ScannerToken *new_scanner_token() {
ScannerToken *result = malloc(sizeof(ScannerToken));
clear_scanner_token(result);
return result;
}
void clear_scanner_result(Scanner_Result *result) {
result->token = T_NONE;
void clear_scanner_token(ScannerToken *result) {
result->type = T_NONE;
result->valueInt = 0;
if (result->valueStr != NULL) {
free(result->valueStr);
}
result->valueStr = NULL;
result->lineNo = 0;
result->linePos = 0;
}
void free_scanner_result(Scanner_Result *result) {
void free_scanner_token(ScannerToken *result) {
if (result->valueStr != NULL) {
free(result);
}
free(result);
}
char * scanner_state_to_str(ScannerState state) {
#define X(t) case t: return #t;
switch (state) {
SCANNER_STATE_LIST
default:
return "UNKNOWN";
}
#undef X
}
Scanner *new_scanner(char *input) {
Scanner *scanner = malloc(sizeof(Scanner));
scanner->input = input;
scanner->pos = 0;
scanner->errMsg = NULL;
scanner->state = SCANSTATE_START;
scanner->buffer = NULL;
scanner->bufferIndex = 0;
scanner->bufferLength = 0;
scanner->lineNo = 1;
scanner->linePos = 1;
scanner->state = SCANSTATE_START;
scanner->errMsg = NULL;
scanner->input = input;
scanner->pos = 0;
return scanner;
}
void free_scanner(Scanner *scanner) {
if (scanner->input != NULL) {
free(scanner->input);
}
reset_scanner(scanner, NULL);
free(scanner);
}
void reuse_scanner(Scanner *scanner, char *input) {
void reset_scanner(Scanner *scanner, char *input) {
scanner->pos = 0;
scanner->lineNo = 1;
scanner->linePos = 1;
scanner->state = SCANSTATE_START;
if (scanner->input != NULL) {
free(scanner->input);
}
scanner->input = input;
if (scanner->errMsg != NULL) {
free(scanner->input);
scanner->errMsg = NULL;
free(scanner->errMsg);
}
scanner->pos = 0;
scanner->lineNo = 1;
scanner->linePos = 1;
scanner->state = SCANSTATE_START;
scanner->errMsg = NULL;
if (scanner->bufferIndex > 0) {
for (size_t i = 0; i < scanner->bufferIndex; i++) {
free_scanner_token(scanner->buffer[i]);
}
scanner->bufferIndex = 0;
}
if (scanner->bufferLength > 0) {
free(scanner->buffer);
scanner->buffer = NULL;
}
}
Scanner_Result *scanner_next_token(Scanner *scanner, Scanner_Result *result) {
void scanner_push_buffer(Scanner *scanner, ScannerToken *token) {
if (scanner->bufferLength == 0) {
scanner->bufferLength = 1;
scanner->buffer = malloc(scanner->bufferLength * sizeof(ScannerToken*));
} else if (scanner->bufferIndex + 1 == scanner->bufferLength) {
scanner->bufferLength *= 2;
scanner->buffer = realloc(scanner->buffer, scanner->bufferLength * sizeof(ScannerToken*));
}
scanner->buffer[scanner->bufferIndex++] = token;
}
ScannerToken *scanner_next_token(Scanner *scanner, ScannerToken *token) {
//Return if already done
if (scanner->state == SCANSTATE_DONE || scanner->state == SCANSTATE_ERROR) {
if (result != NULL) free_scanner_result(result);
if (token != NULL) free_scanner_token(token);
return NULL;
}
if (result == NULL) {
result = new_scanner_result();
} else {
clear_scanner_result(result);
if (scanner->bufferIndex > 0) {
if (token != NULL) {
free_scanner_token(token);
}
scanner->bufferIndex--;
token = scanner->buffer[scanner->bufferIndex];
scanner->buffer[scanner->bufferIndex] = NULL;
return token;
}
//Setup result
if (token == NULL) {
token = new_scanner_token();
} else {
clear_scanner_token(token);
}
//Consume white space
while (scanner_peek_char(scanner) != 0 && isspace(scanner_peek_char(scanner))) {
if (scanner_next_char(scanner) == '\n') {
@@ -82,51 +139,67 @@ Scanner_Result *scanner_next_token(Scanner *scanner, Scanner_Result *result) {
scanner->linePos = 1;
}
}
//Check for end of string
if (scanner_peek_char(scanner) == 0) {
scanner->state = SCANSTATE_DONE;
free_scanner_result(result);
free_scanner_token(token);
return NULL;
}
//Record position of token in the input
token->lineNo = scanner->lineNo;
token->linePos = scanner->linePos;
//Keywords
if (scanner_match(scanner, "select", true)) result->token = K_SELECT;
else if (scanner_match(scanner, "from", true)) result->token = K_FROM;
else if (scanner_match(scanner, "insert", true)) result->token = K_INSERT;
else if (scanner_match(scanner, "into", true)) result->token = K_INTO;
else if (scanner_match(scanner, "set", true)) result->token = K_SET;
else if (scanner_match(scanner, "update", true)) result->token = K_UPDATE;
else if (scanner_match(scanner, "where", true)) result->token = K_WHERE;
else if (scanner_match(scanner, "delete", true)) result->token = K_DELETE;
else if (scanner_match(scanner, "create", true)) result->token = K_CREATE;
else if (scanner_match(scanner, "table", true)) result->token = K_TABLE;
else if (scanner_match(scanner, "drop", true)) result->token = K_DROP;
else if (scanner_match(scanner, "string", true)) result->token = K_STRING;
else if (scanner_match(scanner, "int", true)) result->token = K_INT;
else if (scanner_match(scanner, "index", true)) result->token = K_INDEX;
if (result->token != T_NONE) {
return result;
if (scanner_match(scanner, "select", true)) token->type = T_KW_SELECT;
else if (scanner_match(scanner, "from", true)) token->type = T_KW_FROM;
else if (scanner_match(scanner, "insert", true)) token->type = T_KW_INSERT;
else if (scanner_match(scanner, "into", true)) token->type = T_KW_INTO;
else if (scanner_match(scanner, "set", true)) token->type = T_KW_SET;
else if (scanner_match(scanner, "update", true)) token->type = T_KW_UPDATE;
else if (scanner_match(scanner, "where", true)) token->type = T_KW_WHERE;
else if (scanner_match(scanner, "delete", true)) token->type = T_KW_DELETE;
else if (scanner_match(scanner, "create", true)) token->type = T_KW_CREATE;
else if (scanner_match(scanner, "table", true)) token->type = T_KW_TABLE;
else if (scanner_match(scanner, "drop", true)) token->type = T_KW_DROP;
else if (scanner_match(scanner, "string", true)) token->type = T_KW_STRING;
else if (scanner_match(scanner, "int", true)) token->type = T_KW_INT;
else if (scanner_match(scanner, "integer", true)) token->type = T_KW_INT;
else if (scanner_match(scanner, "index", true)) token->type = T_KW_INDEX;
if (token->type != T_NONE) {
return token;
}
//Comparators
if (scanner_match(scanner, "=", false)) result->token = T_COMP_EQ;
if (scanner_match(scanner, "<>", false)) result->token = T_COMP_NEQ;
if (scanner_match(scanner, "and", true)) result->token = T_COMP_AND;
if (result->token != T_NONE) {
return result;
if (scanner_match(scanner, "=", false)) token->type = T_COMP_EQ;
else if (scanner_match(scanner, "<>", false)) token->type = T_COMP_NEQ;
else if (scanner_match(scanner, "and", true)) token->type = T_COMP_AND;
if (token->type != T_NONE) {
return token;
}
//Punctuation
if (scanner_peek_char(scanner) == ',') {
scanner_next_char(scanner);
result->token = T_COMMA;
char nextChar = scanner_peek_char(scanner);
switch (nextChar) {
case ',':
token->type = T_COMMA;
break;
case ';':
token->type = T_SEMICOLON;
break;
case '(':
token->type = T_PAREN_OPEN;
break;
case ')':
token->type = T_PAREN_CLOSE;
break;
default:
break;
}
if (scanner_peek_char(scanner) == ';') {
if (token->type != T_NONE) {
scanner_next_char(scanner);
result->token = T_SEMICOLON;
}
if (result->token != T_NONE) {
return result;
return token;
}
//Numbers
@@ -141,22 +214,22 @@ Scanner_Result *scanner_next_token(Scanner *scanner, Scanner_Result *result) {
if (isdigit(scanner_peek_char(scanner))) {
//Exceed length of int array!
scanner_set_error(scanner, "Number exceeded allowed length");
free_scanner_result(result);
free_scanner_token(token);
return NULL;
}
result->token = T_NUMBER;
token->type = T_NUMBER;
//convert number
result->valueInt = (uint64_t) strtol(intInput, NULL, 10);
return result;
token->valueInt = (uint64_t) strtol(intInput, NULL, 10);
return token;
}
//Strings
if (scanner_peek_char(scanner) == '"' || scanner_peek_char(scanner) == '\'') {
if (scanner_read_string(scanner, result, scanner_next_char(scanner)) == false) {
free_scanner_result(result);
if (scanner_read_string(scanner, token, scanner_next_char(scanner)) == false) {
free_scanner_token(token);
return NULL;
}
return result;
return token;
}
//Identifiers
@@ -173,19 +246,19 @@ Scanner_Result *scanner_next_token(Scanner *scanner, Scanner_Result *result) {
ident = realloc(ident, sizeof(char) * ident_size);
}
} while ((isalnum(scanner_peek_char(scanner)) || scanner_peek_char(scanner) == '_'));
result->token = T_IDENTIFIER;
result->valueStr = strdup(ident);
token->type = T_IDENTIFIER;
token->valueStr = strdup(ident);
free(ident);
return result;
return token;
}
//Nothing matched
scanner_set_error(scanner, "Unknown text");
free_scanner_result(result);
scanner_set_error(scanner, "Unexpected input");
free_scanner_token(token);
return NULL;
}
bool scanner_read_string(Scanner *scanner, Scanner_Result *result, char quoteType) {
bool scanner_read_string(Scanner *scanner, ScannerToken *result, char quoteType) {
char next = 0;
size_t length = 8;
@@ -230,7 +303,7 @@ bool scanner_read_string(Scanner *scanner, Scanner_Result *result, char quoteTyp
}
} else if (next == quoteType) {
//end of string
result->token = T_STRING;
result->type = T_STRING;
result->valueStr = strdup(str);
free(str);
return true;
@@ -295,7 +368,7 @@ void scanner_set_error(Scanner *scanner, const char *errText) {
//Create error msg
char *errMsg = malloc(sizeof(char) * 128);
snprintf(errMsg, 128, "Error at line %d:%d %s", scanner->lineNo, scanner->linePos, errText);
snprintf(errMsg, 128, "[%d:%d]: %s", scanner->lineNo, scanner->linePos, errText);
if (scanner->errMsg != NULL) {
free(scanner->errMsg);

View File

@@ -7,61 +7,72 @@
#include <stdbool.h>
#include <stdint.h>
#include <stddef.h>
enum Scanner_Token_t {
T_NONE,
#define SCANNER_TOKEN_TYPE_LIST \
X(T_NONE) \
X(T_STRING) \
X(T_NUMBER) \
X(T_IDENTIFIER) \
X(T_COMMA) \
X(T_SEMICOLON) \
X(T_PAREN_OPEN) \
X(T_PAREN_CLOSE) \
X(T_COMP_EQ) \
X(T_COMP_NEQ) \
X(T_COMP_AND) \
X(T_KW_SELECT) \
X(T_KW_FROM) \
X(T_KW_INSERT) \
X(T_KW_INTO) \
X(T_KW_SET) \
X(T_KW_UPDATE) \
X(T_KW_WHERE) \
X(T_KW_DELETE) \
X(T_KW_CREATE) \
X(T_KW_TABLE) \
X(T_KW_DROP) \
X(T_KW_STRING) \
X(T_KW_INT) \
X(T_KW_INDEX)
//Values
T_STRING,
T_NUMBER,
T_IDENTIFIER,
//Punctuation
T_COMMA,
T_SEMICOLON,
//Comparators
T_COMP_EQ,
T_COMP_NEQ,
T_COMP_AND,
//Keywords
K_SELECT,
K_FROM,
K_INSERT,
K_INTO,
K_SET,
K_UPDATE,
K_WHERE,
K_DELETE,
K_CREATE,
K_TABLE,
K_DROP,
K_STRING,
K_INT,
K_INDEX
#define X(t) t,
enum ScannerTokenType_t {
SCANNER_TOKEN_TYPE_LIST
};
typedef enum Scanner_Token_t Scanner_Token;
#undef X
typedef enum ScannerTokenType_t ScannerTokenType;
struct Scanner_Result_t {
Scanner_Token token;
char* scanner_token_type_to_str(ScannerTokenType tokenType);
struct ScannerToken_t {
ScannerTokenType type;
char *valueStr;
uint64_t valueInt;
size_t lineNo;
size_t linePos;
};
typedef struct Scanner_Result_t Scanner_Result;
typedef struct ScannerToken_t ScannerToken;
Scanner_Result *new_scanner_result();
ScannerToken *new_scanner_token();
void clear_scanner_result(Scanner_Result *result);
void clear_scanner_token(ScannerToken *result);
void free_scanner_result(Scanner_Result *result);
void free_scanner_token(ScannerToken *result);
enum Scanner_State_t {
SCANSTATE_START,
SCANSTATE_ERROR,
SCANSTATE_DONE
#define SCANNER_STATE_LIST \
X(SCANSTATE_START) \
X(SCANSTATE_ERROR) \
X(SCANSTATE_DONE)
#define X(t) t,
enum ScannerState_t {
SCANNER_STATE_LIST
};
typedef enum Scanner_State_t Scanner_State;
#undef X
typedef enum ScannerState_t ScannerState;
char * scanner_state_to_str(ScannerState state);
struct Scanner_t {
char *input;
@@ -69,7 +80,10 @@ struct Scanner_t {
size_t lineNo;
size_t linePos;
size_t pos;
Scanner_State state;
ScannerState state;
ScannerToken **buffer;
size_t bufferLength;
size_t bufferIndex;
};
typedef struct Scanner_t Scanner;
@@ -77,11 +91,13 @@ Scanner *new_scanner(char *input);
void free_scanner(Scanner *scanner);
void reuse_scanner(Scanner *scanner, char *input);
void reset_scanner(Scanner *scanner, char *input);
Scanner_Result *scanner_next_token(Scanner *scanner, Scanner_Result *result);
void scanner_push_buffer(Scanner *scanner, ScannerToken *token);
bool scanner_read_string(Scanner *scanner, Scanner_Result *result, char quoteType);
ScannerToken *scanner_next_token(Scanner *scanner, ScannerToken *token);
bool scanner_read_string(Scanner *scanner, ScannerToken *result, char quoteType);
char scanner_next_char(Scanner *scanner);