Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdlib.h>
- #include <stdbool.h>
- #include <string.h>
- #include <ctype.h>
- #define INITIAL_TOKEN_ARRAY_SIZE 1024
- #define MAX_IDENTIFIER_SIZE 1024
- #define MAX_TOKENTYPE_NAME_SIZE 32
- typedef enum TokenType {
- //Single-character tokens.
- TOKEN_LEFT_PAREN,
- TOKEN_RIGHT_PAREN,
- TOKEN_LEFT_BRACE,
- TOKEN_RIGHT_BRACE,
- TOKEN_COMMA,
- TOKEN_DOT,
- TOKEN_MINUS,
- TOKEN_PLUS,
- TOKEN_SEMICOLON,
- TOKEN_SLASH,
- TOKEN_STAR,
- //One or two character tokens.
- TOKEN_BANG,
- TOKEN_BANG_EQUAL,
- TOKEN_EQUAL,
- TOKEN_EQUAL_EQUAL,
- TOKEN_GREATER,
- TOKEN_GREATER_EQUAL,
- TOKEN_LESS,
- TOKEN_LESS_EQUAL,
- //Literals.
- TOKEN_IDENTIFIER,
- TOKEN_STRING,
- TOKEN_NUMBER,
- //Keywords.
- TOKEN_AND,
- TOKEN_CLASS,
- TOKEN_ELSE,
- TOKEN_FALSE,
- TOKEN_FUN,
- TOKEN_FOR,
- TOKEN_IF,
- TOKEN_NIL,
- TOKEN_OR,
- TOKEN_PRINT,
- TOKEN_RETURN,
- TOKEN_SUPER,
- TOKEN_THIS,
- TOKEN_TRUE,
- TOKEN_VAR,
- TOKEN_WHILE,
- //EOF.
- TOKEN_EOF
- } TokenType;
- typedef struct Token {
- TokenType type;
- char lexeme[MAX_IDENTIFIER_SIZE];
- } Token;
- Token token_create(TokenType, const char*);
- void token_free(Token*);
- void token_name(TokenType, char*);
- void print_token(const Token);
- Token token_create(TokenType type, const char* lexeme) {
- Token token;
- token.type = type;
- strncpy(token.lexeme, lexeme, MAX_IDENTIFIER_SIZE);
- return token;
- }
- void token_free(Token* token) {
- free(token);
- }
- void token_name(TokenType type, char* buffer) {
- switch (type) {
- case TOKEN_LEFT_PAREN: strcpy(buffer, "TOKEN_LEFT_PAREN"); break;
- case TOKEN_RIGHT_PAREN: strcpy(buffer, "TOKEN_RIGHT_PAREN"); break;
- case TOKEN_LEFT_BRACE: strcpy(buffer, "TOKEN_LEFT_BRACE"); break;
- case TOKEN_RIGHT_BRACE: strcpy(buffer, "TOKEN_RIGHT_BRACE"); break;
- case TOKEN_COMMA: strcpy(buffer, "TOKEN_COMMA"); break;
- case TOKEN_DOT: strcpy(buffer, "TOKEN_DOT"); break;
- case TOKEN_MINUS: strcpy(buffer, "TOKEN_MINUS"); break;
- case TOKEN_PLUS: strcpy(buffer, "TOKEN_PLUS"); break;
- case TOKEN_SEMICOLON: strcpy(buffer, "TOKEN_SEMICOLON"); break;
- case TOKEN_SLASH: strcpy(buffer, "TOKEN_SLASH"); break;
- case TOKEN_STAR: strcpy(buffer, "TOKEN_STAR"); break;
- case TOKEN_BANG: strcpy(buffer, "TOKEN_BANG"); break;
- case TOKEN_BANG_EQUAL: strcpy(buffer, "TOKEN_BANG_EQUAL"); break;
- case TOKEN_EQUAL: strcpy(buffer, "TOKEN_EQUAL"); break;
- case TOKEN_EQUAL_EQUAL: strcpy(buffer, "TOKEN_EQUAL_EQUAL"); break;
- case TOKEN_GREATER: strcpy(buffer, "TOKEN_GREATER"); break;
- case TOKEN_GREATER_EQUAL: strcpy(buffer, "TOKEN_GREATER_EQUAL"); break;
- case TOKEN_LESS: strcpy(buffer, "TOKEN_LESS"); break;
- case TOKEN_LESS_EQUAL: strcpy(buffer, "TOKEN_LESS_EQUAL"); break;
- case TOKEN_IDENTIFIER: strcpy(buffer, "TOKEN_IDENTIFIER"); break;
- case TOKEN_STRING: strcpy(buffer, "TOKEN_STRING"); break;
- case TOKEN_NUMBER: strcpy(buffer, "TOKEN_NUMBER"); break;
- case TOKEN_AND: strcpy(buffer, "TOKEN_AND"); break;
- case TOKEN_CLASS: strcpy(buffer, "TOKEN_CLASS"); break;
- case TOKEN_ELSE: strcpy(buffer, "TOKEN_ELSE"); break;
- case TOKEN_FALSE: strcpy(buffer, "TOKEN_FALSE"); break;
- case TOKEN_FUN: strcpy(buffer, "TOKEN_FUN"); break;
- case TOKEN_FOR: strcpy(buffer, "TOKEN_FOR"); break;
- case TOKEN_IF: strcpy(buffer, "TOKEN_IF"); break;
- case TOKEN_NIL: strcpy(buffer, "TOKEN_NIL"); break;
- case TOKEN_OR: strcpy(buffer, "TOKEN_OR"); break;
- case TOKEN_PRINT: strcpy(buffer, "TOKEN_PRINT"); break;
- case TOKEN_RETURN: strcpy(buffer, "TOKEN_RETURN"); break;
- case TOKEN_SUPER: strcpy(buffer, "TOKEN_SUPER"); break;
- case TOKEN_THIS: strcpy(buffer, "TOKEN_THIS"); break;
- case TOKEN_TRUE: strcpy(buffer, "TOKEN_TRUE"); break;
- case TOKEN_VAR: strcpy(buffer, "TOKEN_VAR"); break;
- case TOKEN_WHILE: strcpy(buffer, "TOKEN_WHILE"); break;
- case TOKEN_EOF: strcpy(buffer, "TOKEN_EOF"); break;
- }
- }
- void print_token(const Token token) {
- char name[MAX_TOKENTYPE_NAME_SIZE];
- token_name(token.type, name);
- if (token.type == TOKEN_EOF) {
- printf("%s", name);
- }
- else {
- printf("%s: %s", name, token.lexeme);
- }
- }
- typedef struct Lexer {
- const char* source;
- size_t start;
- size_t pos;
- char current;
- size_t token_capacity;
- Token* tokens;
- size_t token_count;
- } Lexer;
- Lexer* lexer_init(const char*);
- void lexer_free(Lexer*);
- bool lexer_is_at_end(Lexer*);
- void lexer_advance(Lexer*);
- char lexer_peek(Lexer*);
- char lexer_peek_next(Lexer*);
- bool lexer_match(Lexer*, char);
- bool lexer_advance_identifier(Lexer*);
- bool lexer_append_token(Lexer*, Token);
- void lexer_token_create(Lexer*, TokenType);
- void lexer_next_token(Lexer* lexer);
- Lexer* lexer_init(const char* source) {
- Lexer* lexer = malloc(sizeof(Lexer));
- lexer->source = source;
- lexer->start = 0;
- lexer->pos = 0;
- lexer->current = '\0';
- lexer->token_capacity = INITIAL_TOKEN_ARRAY_SIZE;
- lexer->tokens = malloc(lexer->token_capacity * sizeof(Token));
- lexer->token_count = 0;
- return lexer;
- }
- void lexer_free(Lexer* lexer) {
- free((char*) lexer->source);
- free(lexer->tokens);
- free(lexer);
- }
- bool lexer_is_at_end(Lexer* lexer) {
- return lexer->pos >= strlen(lexer->source);
- }
- void lexer_advance(Lexer* lexer) {
- lexer->current = lexer->source[lexer->pos];
- lexer->pos++;
- }
- char lexer_peek(Lexer* lexer) {
- return lexer_is_at_end(lexer) ? EOF : lexer->source[lexer->pos];
- }
- char lexer_peek_next(Lexer* lexer) {
- return lexer->pos + 1 >= strlen(lexer->source) ? EOF : lexer->source[lexer->pos + 1];
- }
- bool lexer_match(Lexer* lexer, char c) {
- if (lexer_peek(lexer) == c) {
- lexer_advance(lexer);
- return true;
- }
- return false;
- }
- bool lexer_advance_identifier(Lexer* lexer) {
- lexer->start = (lexer->pos - 1);
- size_t size = 0;
- while (isalnum(lexer_peek(lexer)) || lexer_peek(lexer) == '_') {
- size++;
- if (size >= MAX_IDENTIFIER_SIZE) { // >= Because we need an extra space for the '\0'.
- return false;
- }
- lexer_advance(lexer);
- }
- lexer_token_create(lexer, TOKEN_IDENTIFIER);
- return true;
- }
- bool lexer_append_token(Lexer* lexer, Token token) {
- if (lexer->token_count >= lexer->token_capacity) {
- lexer->token_capacity += INITIAL_TOKEN_ARRAY_SIZE;
- Token* new_array = realloc(lexer->tokens, lexer->token_capacity * sizeof(Token));
- if (new_array == NULL) {
- return false;
- }
- lexer->tokens = new_array;
- }
- lexer->tokens[lexer->token_count] = token;
- lexer->token_count++;
- return true;
- }
- void lexer_token_create(Lexer* lexer, TokenType type) {
- const size_t lexeme_size = lexer->pos - lexer->start;
- char lexeme[lexeme_size + 1];
- for (size_t i = 0; i < lexeme_size; i++) {
- lexeme[i] = lexer->source[lexer->start + i];
- }
- lexeme[lexeme_size] = '\0';
- Token token = token_create(type, lexeme);
- bool result = lexer_append_token(lexer, token);
- if (!result) {
- fprintf(stderr, "Error adding Token to Token list.\n");
- exit(EXIT_FAILURE);
- }
- }
- void lexer_next_token(Lexer* lexer) {
- lexer->start = lexer->pos;
- lexer_advance(lexer);
- const char current = lexer->current;
- if (current == '+') lexer_token_create(lexer, TOKEN_PLUS);
- else if (current == '-') lexer_token_create(lexer, TOKEN_MINUS);
- else if (current == '*') lexer_token_create(lexer, TOKEN_STAR);
- else if (current == '.') lexer_token_create(lexer, TOKEN_DOT);
- else if (current == ',') lexer_token_create(lexer, TOKEN_COMMA);
- else if (current == ';') lexer_token_create(lexer, TOKEN_SEMICOLON);
- else if (current == '(') lexer_token_create(lexer, TOKEN_LEFT_PAREN);
- else if (current == ')') lexer_token_create(lexer, TOKEN_RIGHT_PAREN);
- else if (current == '{') lexer_token_create(lexer, TOKEN_LEFT_BRACE);
- else if (current == '}') lexer_token_create(lexer, TOKEN_RIGHT_BRACE);
- else if (current == '=') {
- if (lexer_match(lexer, '=')) lexer_token_create(lexer, TOKEN_EQUAL_EQUAL);
- else lexer_token_create(lexer, TOKEN_EQUAL);
- }
- else if (current == '!') {
- if (lexer_match(lexer, '=')) lexer_token_create(lexer, TOKEN_BANG_EQUAL);
- else lexer_token_create(lexer, TOKEN_BANG);
- }
- else if (current == '>') {
- if (lexer_match(lexer, '=')) lexer_token_create(lexer, TOKEN_GREATER_EQUAL);
- else lexer_token_create(lexer, TOKEN_GREATER);
- }
- else if (current == '<') {
- if (lexer_match(lexer, '=')) lexer_token_create(lexer, TOKEN_LESS_EQUAL);
- else lexer_token_create(lexer, TOKEN_LESS);
- }
- else if (isalpha(current)) {
- const bool result = lexer_advance_identifier(lexer);
- if (!result) {
- fprintf(stderr, "Error trying to read an identifier larger than maximum size. (%d)", MAX_IDENTIFIER_SIZE);
- exit(EXIT_FAILURE);
- }
- }
- else if (isspace(current)) {
- while (isspace(lexer_peek(lexer))) {
- lexer_advance(lexer);
- }
- }
- else {
- fprintf(stderr, "Unknown char '%c' in source file.\n", current);
- exit(EXIT_FAILURE);
- }
- }
- size_t file_size(FILE* file);
- FILE* get_file(const char*);
- char* slurp_file(const char*);
- size_t file_size(FILE* file) {
- fseek(file, 0, SEEK_END);
- size_t size = ftell(file);
- rewind(file);
- return size;
- }
- FILE* get_file(const char* path) {
- FILE* file = fopen(path, "r");
- if (file == NULL) { // I know this is dumb.
- return NULL;
- }
- return file;
- }
- char* slurp_file(const char* path) {
- FILE* file = get_file(path);
- if (file == NULL) {
- fprintf(stderr, "Couldn't open file '%s'.\n", path);
- return NULL;
- }
- const size_t size = file_size(file);
- char* buffer = malloc(size * sizeof(char));
- if (buffer == NULL) {
- fprintf(stderr, "Couldn't allocate memory for file source.");
- fclose(file);
- return NULL;
- }
- fread(buffer, sizeof(char), size, file);
- fclose(file);
- return buffer;
- }
- int main(int argc, char** argv) {
- const char* source = slurp_file("code.txt");
- if (source == NULL) {
- return EXIT_FAILURE;
- }
- Lexer* lexer = lexer_init(source);
- while (!lexer_is_at_end(lexer)) {
- lexer_next_token(lexer);
- }
- lexer_token_create(lexer, TOKEN_EOF);
- for (size_t i = 0; i < lexer->token_count; i++) {
- print_token(lexer->tokens[i]);
- putchar('\n');
- }
- lexer_free(lexer);
- return EXIT_SUCCESS;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement