Implemented language token scanner.

This commit is contained in:
2018-06-06 22:54:45 +01:00
parent 8335bbefe6
commit 8b7a0fba4e
4 changed files with 277 additions and 18 deletions

View File

@@ -6,10 +6,11 @@
#define SDB_SQL_H #define SDB_SQL_H
#include <stddef.h> #include <stddef.h>
#include <inttypes.h>
union Value_t { union Value_t {
char *string; char *string;
int number; uint64_t number;
}; };
typedef union Value_t Value; typedef union Value_t Value;

View File

@@ -3,12 +3,13 @@
#include <mem.h> #include <mem.h>
#include "InputBuffer.h" #include "InputBuffer.h"
#include "scanner.h"
void prompt() { void prompt() {
printf("SDB> "); printf("SDB> ");
} }
void readInput(InputBuffer *buffer) { void read_input(InputBuffer *buffer) {
ssize_t read = getline(&buffer->buffer, &buffer->bufferLength, stdin); ssize_t read = getline(&buffer->buffer, &buffer->bufferLength, stdin);
if (read <= 0) { if (read <= 0) {
@@ -21,17 +22,51 @@ void readInput(InputBuffer *buffer) {
buffer->buffer[read - 1] = 0; buffer->buffer[read - 1] = 0;
} }
void parse_input(char *input) {
Scanner *scanner = new_scanner(strdup(input));
Scanner_Result *result = NULL;
while ((result = scanner_next_token(scanner, result)) != NULL) {
if (result->token == T_STRING) {
printf("Found String: %s\n", result->value_str);
} else if (result->token == T_IDENTIFIER) {
printf("Found Identifier: %s\n", result->value_str);
} else if (result->token == T_NUMBER) {
printf("Found Number %lld\n", result->value_int);
} else {
printf("Found Token: %d\n", result->token);
}
}
if (scanner->state == SCANSTATE_ERROR) {
if (scanner->errMsg != NULL) {
printf("%s\n", scanner->errMsg);
} else {
printf("Parse Error!");
}
}
free_scanner(scanner);
}
int main() { int main() {
#if defined(_WIN32) || defined(WIN32)
setbuf(stdout, 0);
setbuf(stderr, 0);
#endif
InputBuffer *buffer = input_buffer_new(); InputBuffer *buffer = input_buffer_new();
while (true) { while (true) {
prompt(); prompt();
readInput(buffer); read_input(buffer);
if (strcmpi(buffer->buffer, ".exit") == 0) { if (strcmpi(buffer->buffer, ".exit") == 0) {
break; break;
} else { } else {
printf("Unknown Command or Query\n"); parse_input(buffer->buffer);
} }
} }

View File

@@ -6,14 +6,22 @@
#include <string.h> #include <string.h>
#include <ctype.h> #include <ctype.h>
#include <stdbool.h> #include <stdbool.h>
#include <stdio.h>
#include "scanner.h" #include "scanner.h"
Scanner_Result *new_scanner_result() { Scanner_Result *new_scanner_result() {
Scanner_Result *result = malloc(sizeof(Scanner_Result)); Scanner_Result *result = malloc(sizeof(Scanner_Result));
clear_scanner_result(result);
return result;
}
void clear_scanner_result(Scanner_Result *result) {
result->token = T_NONE; result->token = T_NONE;
result->value_int = 0; result->value_int = 0;
if (result->value_str != NULL) {
free(result->value_str);
}
result->value_str = NULL; result->value_str = NULL;
return result;
} }
void free_scanner_result(Scanner_Result *result) { void free_scanner_result(Scanner_Result *result) {
@@ -27,7 +35,10 @@ Scanner *new_scanner(char *input) {
Scanner *scanner = malloc(sizeof(Scanner)); Scanner *scanner = malloc(sizeof(Scanner));
scanner->input = input; scanner->input = input;
scanner->pos = 0; scanner->pos = 0;
scanner->lineNo = 1;
scanner->linePos = 1;
scanner->state = SCANSTATE_START; scanner->state = SCANSTATE_START;
scanner->errMsg = NULL;
return scanner; return scanner;
} }
@@ -39,33 +50,213 @@ void free_scanner(Scanner *scanner) {
} }
void reuse_scanner(Scanner *scanner, char *input) { void reuse_scanner(Scanner *scanner, char *input) {
scanner->state = SCANSTATE_START;
if (scanner->input != NULL) { if (scanner->input != NULL) {
free(scanner->input); free(scanner->input);
} }
scanner->input = input; scanner->input = input;
if (scanner->errMsg != NULL) {
free(scanner->input);
scanner->errMsg = NULL;
}
scanner->pos = 0; scanner->pos = 0;
scanner->lineNo = 1;
scanner->linePos = 1;
scanner->state = SCANSTATE_START;
scanner->errMsg = NULL;
} }
Scanner_Result *scanner_next_token(Scanner *scanner, Scanner_Result *result) { Scanner_Result *scanner_next_token(Scanner *scanner, Scanner_Result *result) {
if (scanner->state == SCANSTATE_DONE) { if (scanner->state == SCANSTATE_DONE || scanner->state == SCANSTATE_ERROR) {
if (result != NULL) free_scanner_result(result);
return NULL; return NULL;
} }
if (result == NULL) {
result = new_scanner_result();
} else {
clear_scanner_result(result);
}
//Consume white space //Consume white space
while (scanner_peek_char(scanner) != NULL && isblank(scanner_peek_char(scanner))) { while (scanner_peek_char(scanner) != 0 && isspace(scanner_peek_char(scanner))) {
scanner->pos++; if (scanner_next_char(scanner) == '\n') {
scanner->lineNo++;
scanner->linePos = 1;
}
} }
//Check for end of string //Check for end of string
if (scanner_peek_char(scanner) == NULL) { if (scanner_peek_char(scanner) == 0) {
scanner->state = SCANSTATE_DONE; scanner->state = SCANSTATE_DONE;
free_scanner_result(result);
return NULL; return NULL;
} }
//Keywords
if (scanner_match(scanner, "select", true)) result->token = K_SELECT;
else if (scanner_match(scanner, "from", true)) result->token = K_FROM;
else if (scanner_match(scanner, "insert", true)) result->token = K_INSERT;
else if (scanner_match(scanner, "into", true)) result->token = K_INTO;
else if (scanner_match(scanner, "set", true)) result->token = K_SET;
else if (scanner_match(scanner, "update", true)) result->token = K_UPDATE;
else if (scanner_match(scanner, "where", true)) result->token = K_WHERE;
else if (scanner_match(scanner, "delete", true)) result->token = K_DELETE;
else if (scanner_match(scanner, "create", true)) result->token = K_CREATE;
else if (scanner_match(scanner, "table", true)) result->token = K_TABLE;
else if (scanner_match(scanner, "drop", true)) result->token = K_DROP;
else if (scanner_match(scanner, "string", true)) result->token = K_STRING;
else if (scanner_match(scanner, "int", true)) result->token = K_INT;
else if (scanner_match(scanner, "index", true)) result->token = K_INDEX;
if (result->token != T_NONE) {
return result;
}
//Comparators
if (scanner_match(scanner, "=", false)) result->token = T_COMP_EQ;
if (scanner_match(scanner, "<>", false)) result->token = T_COMP_NEQ;
if (scanner_match(scanner, "and", true)) result->token = T_COMP_AND;
if (result->token != T_NONE) {
return result;
}
//Punctuation
if (scanner_peek_char(scanner) == ',') {
scanner_next_char(scanner);
result->token = T_COMMA;
}
if (scanner_peek_char(scanner) == ';') {
scanner_next_char(scanner);
result->token = T_SEMICOLON;
}
if (result->token != T_NONE) {
return result;
}
//Numbers
if (isdigit(scanner_peek_char(scanner))) {
char intInput[32] = {0};
size_t intIndex = 0;
while (scanner_peek_char(scanner) != 0
&& isdigit(scanner_peek_char(scanner))
&& intIndex + 1 < 32) {
intInput[intIndex++] = scanner_next_char(scanner);
}
if (isdigit(scanner_peek_char(scanner))) {
//Exceed length of int array!
scanner_set_error(scanner, "Number exceeded allowed length");
free_scanner_result(result);
return NULL;
}
result->token = T_NUMBER;
//convert number
result->value_int = (uint64_t) strtol(intInput, NULL, 10);
return result;
}
//Strings
if (scanner_peek_char(scanner) == '"' || scanner_peek_char(scanner) == '\'') {
if (scanner_read_string(scanner, result, scanner_next_char(scanner)) == false) {
free_scanner_result(result);
return NULL;
}
return result;
}
//Identifiers
if (isalpha(scanner_peek_char(scanner)) || scanner_peek_char(scanner) == '_') {
size_t ident_size = 8;
char *ident = calloc(ident_size, sizeof(char));
size_t ident_i = 0;
do {
ident[ident_i++] = scanner_next_char(scanner);
ident[ident_i] = 0;
if (ident_i + 1 == ident_size) {
ident_size = ident_size * 2;
ident = realloc(ident, sizeof(char) * ident_size);
}
} while ((isalnum(scanner_peek_char(scanner)) || scanner_peek_char(scanner) == '_'));
result->token = T_IDENTIFIER;
result->value_str = strdup(ident);
free(ident);
return result;
}
//Nothing matched
scanner_set_error(scanner, "Unknown text");
free_scanner_result(result);
return NULL;
}
bool scanner_read_string(Scanner *scanner, Scanner_Result *result, char quoteType) {
char next = 0;
size_t length = 8;
size_t i = 0;
char *str = calloc(length, sizeof(char));
while ((next = scanner_next_char(scanner)) != 0) {
if (next == '\\') {
//Capture escapes
char escaped = scanner_next_char(scanner);
if (escaped == 0) {
//end of input!
scanner_set_error(scanner, "Unterminated String");
return false;
}
switch (escaped) {
case 'n':
next = '\n';
break;
case 'r':
next = '\r';
break;
case 't':
next = '\t';
break;
case '\\':
next = '\\';
break;
case '\'':
next = '\'';
break;
case '"':
next = '"';
break;
default:
next = 0;
break;
}
if (next == 0) {
//ignore unknown escape sequence
continue;
}
} else if (next == quoteType) {
//end of string
result->token = T_STRING;
result->value_str = strdup(str);
free(str);
return true;
}
if (next == '\n') {
scanner->lineNo++;
}
str[i++] = next;
str[i] = 0;
if (i + 1 == length) {
//expand string
length = length * 2;
str = realloc(str, sizeof(char) * length);
}
}
//reached end of input
scanner_set_error(scanner, "Unterminated String");
return false;
} }
char scanner_next_char(Scanner *scanner) { char scanner_next_char(Scanner *scanner) {
char next = scanner->input[scanner->pos]; char next = scanner->input[scanner->pos];
if (next != NULL) { if (next != 0) {
scanner->pos++; scanner->pos++;
scanner->linePos++;
} }
return next; return next;
} }
@@ -74,21 +265,42 @@ char scanner_peek_char(Scanner *scanner) {
return scanner->input[scanner->pos]; return scanner->input[scanner->pos];
} }
bool scanner_match(Scanner *scanner, const char *text) { bool scanner_match(Scanner *scanner, const char *text, bool keyword) {
char a = NULL; char a = 0;
char b = NULL; char b = 0;
size_t ai = scanner->pos; size_t ai = scanner->pos;
size_t bi = 0; size_t bi = 0;
do { do {
b = text[bi++]; b = text[bi++];
//Match if we reach the end of text //Match if we reach the end of text
if (b == NULL) { if (b == 0) {
//Check that the next char is not an alphanumeric char (ie do not match hello against helloworld)
if (keyword && isalnum(scanner->input[ai])) {
break;
}
scanner->pos = ai; //consume text from scanner input scanner->pos = ai; //consume text from scanner input
scanner->linePos += strlen(text);
return true; return true;
} }
a = scanner->input[ai++]; a = scanner->input[ai++];
} while (a != NULL && tolower(a) == tolower(b)); } while (a != 0 && tolower(a) == tolower(b));
return false; return false;
}
void scanner_set_error(Scanner *scanner, const char *errText) {
scanner->state = SCANSTATE_ERROR;
//Create error msg
char *errMsg = malloc(sizeof(char) * 128);
snprintf(errMsg, 128, "Error [%d:%d]: %s", scanner->lineNo, scanner->linePos, errText);
if (scanner->errMsg != NULL) {
free(scanner->errMsg);
}
scanner->state = SCANSTATE_ERROR;
scanner->errMsg = errMsg;
printf(scanner->errMsg);
} }

View File

@@ -6,6 +6,7 @@
#define SDB_SCANNER_H #define SDB_SCANNER_H
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h>
enum Scanner_Token_t { enum Scanner_Token_t {
T_NONE, T_NONE,
@@ -45,22 +46,28 @@ typedef enum Scanner_Token_t Scanner_Token;
struct Scanner_Result_t { struct Scanner_Result_t {
Scanner_Token token; Scanner_Token token;
char *value_str; char *value_str;
int value_int; uint64_t value_int;
}; };
typedef struct Scanner_Result_t Scanner_Result; typedef struct Scanner_Result_t Scanner_Result;
Scanner_Result *new_scanner_result(); Scanner_Result *new_scanner_result();
void clear_scanner_result(Scanner_Result *result);
void free_scanner_result(Scanner_Result *result); void free_scanner_result(Scanner_Result *result);
enum Scanner_State_t { enum Scanner_State_t {
SCANSTATE_START, SCANSTATE_START,
SCANSTATE_ERROR,
SCANSTATE_DONE SCANSTATE_DONE
}; };
typedef enum Scanner_State_t Scanner_State; typedef enum Scanner_State_t Scanner_State;
struct Scanner_t { struct Scanner_t {
char *input; char *input;
char *errMsg;
size_t lineNo;
size_t linePos;
size_t pos; size_t pos;
Scanner_State state; Scanner_State state;
}; };
@@ -74,10 +81,14 @@ void reuse_scanner(Scanner *scanner, char *input);
Scanner_Result *scanner_next_token(Scanner *scanner, Scanner_Result *result); Scanner_Result *scanner_next_token(Scanner *scanner, Scanner_Result *result);
bool scanner_read_string(Scanner *scanner, Scanner_Result *result, char quoteType);
char scanner_next_char(Scanner *scanner); char scanner_next_char(Scanner *scanner);
char scanner_peek_char(Scanner *scanner); char scanner_peek_char(Scanner *scanner);
bool scanner_match(Scanner *scanner, const char *text); bool scanner_match(Scanner *scanner, const char *text, bool keyword);
void scanner_set_error(Scanner *scanner, const char *errText);
#endif //SDB_SCANNER_H #endif //SDB_SCANNER_H