Implemented language token scanner.

This commit is contained in:
2018-06-06 22:54:45 +01:00
parent 8335bbefe6
commit 8b7a0fba4e
4 changed files with 277 additions and 18 deletions

View File

@@ -6,10 +6,11 @@
#define SDB_SQL_H
#include <stddef.h>
#include <inttypes.h>
union Value_t {
char *string;
int number;
uint64_t number;
};
typedef union Value_t Value;

View File

@@ -3,12 +3,13 @@
#include <mem.h>
#include "InputBuffer.h"
#include "scanner.h"
void prompt() {
printf("SDB> ");
}
void readInput(InputBuffer *buffer) {
void read_input(InputBuffer *buffer) {
ssize_t read = getline(&buffer->buffer, &buffer->bufferLength, stdin);
if (read <= 0) {
@@ -21,17 +22,51 @@ void readInput(InputBuffer *buffer) {
buffer->buffer[read - 1] = 0;
}
void parse_input(char *input) {
Scanner *scanner = new_scanner(strdup(input));
Scanner_Result *result = NULL;
while ((result = scanner_next_token(scanner, result)) != NULL) {
if (result->token == T_STRING) {
printf("Found String: %s\n", result->value_str);
} else if (result->token == T_IDENTIFIER) {
printf("Found Identifier: %s\n", result->value_str);
} else if (result->token == T_NUMBER) {
printf("Found Number %lld\n", result->value_int);
} else {
printf("Found Token: %d\n", result->token);
}
}
if (scanner->state == SCANSTATE_ERROR) {
if (scanner->errMsg != NULL) {
printf("%s\n", scanner->errMsg);
} else {
printf("Parse Error!");
}
}
free_scanner(scanner);
}
int main() {
#if defined(_WIN32) || defined(WIN32)
setbuf(stdout, 0);
setbuf(stderr, 0);
#endif
InputBuffer *buffer = input_buffer_new();
while (true) {
prompt();
readInput(buffer);
read_input(buffer);
if (strcmpi(buffer->buffer, ".exit") == 0) {
break;
} else {
printf("Unknown Command or Query\n");
parse_input(buffer->buffer);
}
}

View File

@@ -6,14 +6,22 @@
#include <string.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include "scanner.h"
Scanner_Result *new_scanner_result() {
Scanner_Result *result = malloc(sizeof(Scanner_Result));
clear_scanner_result(result);
return result;
}
void clear_scanner_result(Scanner_Result *result) {
result->token = T_NONE;
result->value_int = 0;
if (result->value_str != NULL) {
free(result->value_str);
}
result->value_str = NULL;
return result;
}
void free_scanner_result(Scanner_Result *result) {
@@ -27,7 +35,10 @@ Scanner *new_scanner(char *input) {
Scanner *scanner = malloc(sizeof(Scanner));
scanner->input = input;
scanner->pos = 0;
scanner->lineNo = 1;
scanner->linePos = 1;
scanner->state = SCANSTATE_START;
scanner->errMsg = NULL;
return scanner;
}
@@ -39,33 +50,213 @@ void free_scanner(Scanner *scanner) {
}
void reuse_scanner(Scanner *scanner, char *input) {
scanner->state = SCANSTATE_START;
if (scanner->input != NULL) {
free(scanner->input);
}
scanner->input = input;
if (scanner->errMsg != NULL) {
free(scanner->input);
scanner->errMsg = NULL;
}
scanner->pos = 0;
scanner->lineNo = 1;
scanner->linePos = 1;
scanner->state = SCANSTATE_START;
scanner->errMsg = NULL;
}
Scanner_Result *scanner_next_token(Scanner *scanner, Scanner_Result *result) {
if (scanner->state == SCANSTATE_DONE) {
if (scanner->state == SCANSTATE_DONE || scanner->state == SCANSTATE_ERROR) {
if (result != NULL) free_scanner_result(result);
return NULL;
}
if (result == NULL) {
result = new_scanner_result();
} else {
clear_scanner_result(result);
}
//Consume white space
while (scanner_peek_char(scanner) != NULL && isblank(scanner_peek_char(scanner))) {
scanner->pos++;
while (scanner_peek_char(scanner) != 0 && isspace(scanner_peek_char(scanner))) {
if (scanner_next_char(scanner) == '\n') {
scanner->lineNo++;
scanner->linePos = 1;
}
}
//Check for end of string
if (scanner_peek_char(scanner) == NULL) {
if (scanner_peek_char(scanner) == 0) {
scanner->state = SCANSTATE_DONE;
free_scanner_result(result);
return NULL;
}
//Keywords
if (scanner_match(scanner, "select", true)) result->token = K_SELECT;
else if (scanner_match(scanner, "from", true)) result->token = K_FROM;
else if (scanner_match(scanner, "insert", true)) result->token = K_INSERT;
else if (scanner_match(scanner, "into", true)) result->token = K_INTO;
else if (scanner_match(scanner, "set", true)) result->token = K_SET;
else if (scanner_match(scanner, "update", true)) result->token = K_UPDATE;
else if (scanner_match(scanner, "where", true)) result->token = K_WHERE;
else if (scanner_match(scanner, "delete", true)) result->token = K_DELETE;
else if (scanner_match(scanner, "create", true)) result->token = K_CREATE;
else if (scanner_match(scanner, "table", true)) result->token = K_TABLE;
else if (scanner_match(scanner, "drop", true)) result->token = K_DROP;
else if (scanner_match(scanner, "string", true)) result->token = K_STRING;
else if (scanner_match(scanner, "int", true)) result->token = K_INT;
else if (scanner_match(scanner, "index", true)) result->token = K_INDEX;
if (result->token != T_NONE) {
return result;
}
//Comparators
if (scanner_match(scanner, "=", false)) result->token = T_COMP_EQ;
if (scanner_match(scanner, "<>", false)) result->token = T_COMP_NEQ;
if (scanner_match(scanner, "and", true)) result->token = T_COMP_AND;
if (result->token != T_NONE) {
return result;
}
//Punctuation
if (scanner_peek_char(scanner) == ',') {
scanner_next_char(scanner);
result->token = T_COMMA;
}
if (scanner_peek_char(scanner) == ';') {
scanner_next_char(scanner);
result->token = T_SEMICOLON;
}
if (result->token != T_NONE) {
return result;
}
//Numbers
if (isdigit(scanner_peek_char(scanner))) {
char intInput[32] = {0};
size_t intIndex = 0;
while (scanner_peek_char(scanner) != 0
&& isdigit(scanner_peek_char(scanner))
&& intIndex + 1 < 32) {
intInput[intIndex++] = scanner_next_char(scanner);
}
if (isdigit(scanner_peek_char(scanner))) {
//Exceed length of int array!
scanner_set_error(scanner, "Number exceeded allowed length");
free_scanner_result(result);
return NULL;
}
result->token = T_NUMBER;
//convert number
result->value_int = (uint64_t) strtol(intInput, NULL, 10);
return result;
}
//Strings
if (scanner_peek_char(scanner) == '"' || scanner_peek_char(scanner) == '\'') {
if (scanner_read_string(scanner, result, scanner_next_char(scanner)) == false) {
free_scanner_result(result);
return NULL;
}
return result;
}
//Identifiers
if (isalpha(scanner_peek_char(scanner)) || scanner_peek_char(scanner) == '_') {
size_t ident_size = 8;
char *ident = calloc(ident_size, sizeof(char));
size_t ident_i = 0;
do {
ident[ident_i++] = scanner_next_char(scanner);
ident[ident_i] = 0;
if (ident_i + 1 == ident_size) {
ident_size = ident_size * 2;
ident = realloc(ident, sizeof(char) * ident_size);
}
} while ((isalnum(scanner_peek_char(scanner)) || scanner_peek_char(scanner) == '_'));
result->token = T_IDENTIFIER;
result->value_str = strdup(ident);
free(ident);
return result;
}
//Nothing matched
scanner_set_error(scanner, "Unknown text");
free_scanner_result(result);
return NULL;
}
bool scanner_read_string(Scanner *scanner, Scanner_Result *result, char quoteType) {
char next = 0;
size_t length = 8;
size_t i = 0;
char *str = calloc(length, sizeof(char));
while ((next = scanner_next_char(scanner)) != 0) {
if (next == '\\') {
//Capture escapes
char escaped = scanner_next_char(scanner);
if (escaped == 0) {
//end of input!
scanner_set_error(scanner, "Unterminated String");
return false;
}
switch (escaped) {
case 'n':
next = '\n';
break;
case 'r':
next = '\r';
break;
case 't':
next = '\t';
break;
case '\\':
next = '\\';
break;
case '\'':
next = '\'';
break;
case '"':
next = '"';
break;
default:
next = 0;
break;
}
if (next == 0) {
//ignore unknown escape sequence
continue;
}
} else if (next == quoteType) {
//end of string
result->token = T_STRING;
result->value_str = strdup(str);
free(str);
return true;
}
if (next == '\n') {
scanner->lineNo++;
}
str[i++] = next;
str[i] = 0;
if (i + 1 == length) {
//expand string
length = length * 2;
str = realloc(str, sizeof(char) * length);
}
}
//reached end of input
scanner_set_error(scanner, "Unterminated String");
return false;
}
char scanner_next_char(Scanner *scanner) {
char next = scanner->input[scanner->pos];
if (next != NULL) {
if (next != 0) {
scanner->pos++;
scanner->linePos++;
}
return next;
}
@@ -74,21 +265,42 @@ char scanner_peek_char(Scanner *scanner) {
return scanner->input[scanner->pos];
}
bool scanner_match(Scanner *scanner, const char *text) {
char a = NULL;
char b = NULL;
bool scanner_match(Scanner *scanner, const char *text, bool keyword) {
char a = 0;
char b = 0;
size_t ai = scanner->pos;
size_t bi = 0;
do {
b = text[bi++];
//Match if we reach the end of text
if (b == NULL) {
if (b == 0) {
//Check that the next char is not an alphanumeric char (ie do not match hello against helloworld)
if (keyword && isalnum(scanner->input[ai])) {
break;
}
scanner->pos = ai; //consume text from scanner input
scanner->linePos += strlen(text);
return true;
}
a = scanner->input[ai++];
} while (a != NULL && tolower(a) == tolower(b));
} while (a != 0 && tolower(a) == tolower(b));
return false;
}
void scanner_set_error(Scanner *scanner, const char *errText) {
scanner->state = SCANSTATE_ERROR;
//Create error msg
char *errMsg = malloc(sizeof(char) * 128);
snprintf(errMsg, 128, "Error [%d:%d]: %s", scanner->lineNo, scanner->linePos, errText);
if (scanner->errMsg != NULL) {
free(scanner->errMsg);
}
scanner->state = SCANSTATE_ERROR;
scanner->errMsg = errMsg;
printf(scanner->errMsg);
}

View File

@@ -6,6 +6,7 @@
#define SDB_SCANNER_H
#include <stdbool.h>
#include <stdint.h>
enum Scanner_Token_t {
T_NONE,
@@ -45,22 +46,28 @@ typedef enum Scanner_Token_t Scanner_Token;
struct Scanner_Result_t {
Scanner_Token token;
char *value_str;
int value_int;
uint64_t value_int;
};
typedef struct Scanner_Result_t Scanner_Result;
Scanner_Result *new_scanner_result();
void clear_scanner_result(Scanner_Result *result);
void free_scanner_result(Scanner_Result *result);
enum Scanner_State_t {
SCANSTATE_START,
SCANSTATE_ERROR,
SCANSTATE_DONE
};
typedef enum Scanner_State_t Scanner_State;
struct Scanner_t {
char *input;
char *errMsg;
size_t lineNo;
size_t linePos;
size_t pos;
Scanner_State state;
};
@@ -74,10 +81,14 @@ void reuse_scanner(Scanner *scanner, char *input);
Scanner_Result *scanner_next_token(Scanner *scanner, Scanner_Result *result);
bool scanner_read_string(Scanner *scanner, Scanner_Result *result, char quoteType);
char scanner_next_char(Scanner *scanner);
char scanner_peek_char(Scanner *scanner);
bool scanner_match(Scanner *scanner, const char *text);
bool scanner_match(Scanner *scanner, const char *text, bool keyword);
void scanner_set_error(Scanner *scanner, const char *errText);
#endif //SDB_SCANNER_H