Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- %{
- #include <string.h>
- #include <stdlib.h>
- #include <stdio.h>
- #define YY_DECL int alpha_yylex(void* yylval)
- typedef struct alpha_token_t {
- unsigned int numline;
- unsigned int numToken;
- char *content;
- char *type;
- char *macro;
- char *wtf;
- struct alpha_token_t * next;
- }alpha_token_t;
- char * buffered_string = NULL;
- char * old_buffered_string = NULL;
- int nested_comment = 0;
- int * start_comments;
- int array_index = 0;
- int yytokenno = 0;
- int lineno = 1;
- int illegal_char_flag = 0;
- alpha_token_t *head = NULL;
- alpha_token_t *tail = NULL;
- void insert_token(unsigned int numline, unsigned int numToken, char* content, char* type, char* macro, char* wtf){
- alpha_token_t *new = malloc(sizeof(alpha_token_t));
- new->numline = numline;
- new->numToken = numToken;
- new->content = strdup(content);
- new->type = strdup(type);
- new->macro = strdup(macro);
- new->wtf = strdup(wtf);
- new->next = NULL;
- if(head == NULL){
- head = new;
- tail = new;
- head->next = NULL;
- return;
- }
- tail->next = new;
- tail = new;
- }
- void print_tokens(){
- alpha_token_t *tmp = head;
- while(tmp != NULL){
- if(strcmp(tmp->type,"STRING") == 0 || strcmp(tmp->type,"ID") == 0){
- printf("%d: #%d \"%s\" %s \"%s\" <-%s\n", tmp->numline,tmp->numToken,tmp->content,tmp->type,tmp->macro,tmp->wtf);
- }else{
- printf("%d: #%d \"%s\" %s %s <-%s\n", tmp->numline,tmp->numToken,tmp->content,tmp->type,tmp->macro,tmp->wtf);
- }
- tmp = tmp->next;
- }
- }
- %}
- %option header-file="./scanner.h"
- %option noyywrap
- %option yylineno
- %x string
- %x blockcom
- if "if"
- else "else"
- while "while"
- for "for"
- function "function"
- return "return"
- break "break"
- continue "continue"
- and "and"
- not "not"
- or "or"
- local "local"
- true "true"
- false "false"
- nil "nil"
- ass "="
- add "+"
- sub "-"
- mul "*"
- div "/"
- mod "%"
- eq "=="
- neq "!="
- inc "++"
- dec "--"
- gr ">"
- less "<"
- greq ">="
- leq "<="
- integer [0-9]+
- real [0-9]+\.[0-9]+
- lcurbr "{"
- rcurbr "}"
- lbr "["
- rbr "]"
- lpar "("
- rpar ")"
- semic ";"
- comma ","
- colon ":"
- dcolon "::"
- dot "."
- ddot ".."
- id [a-zA-Z][a-zA-Z_0-9]*
- linecom "//".*
- whitespace [\t' '\r]*
- newline ['\n']
- OTHER .[id]*
- %%
- {if} {insert_token(yylineno,++yytokenno,yytext,"KEYWORD","IF","enumerated"); /*[^_|^$|^~|^?|^#|^&&|^\|\||^!]*/}
- {else} {insert_token(lineno,++yytokenno,yytext,"KEYWORD","ELSE","enumerated");}
- {while} {insert_token(lineno,++yytokenno,yytext,"KEYWORD","WHILE","enumerated");}
- {for} {insert_token(lineno,++yytokenno,yytext,"KEYWORD","FOR","enumerated");}
- {function} {insert_token(lineno,++yytokenno,yytext,"KEYWORD","FUNCTION","enumerated");}
- {return} {insert_token(lineno,++yytokenno,yytext,"KEYWORD","RETURN","enumerated");}
- {break} {insert_token(lineno,++yytokenno,yytext,"KEYWORD","BREAK","enumerated");}
- {continue} {insert_token(lineno,++yytokenno,yytext,"KEYWORD","CONTINUE","enumerated");}
- {and} {insert_token(lineno,++yytokenno,yytext,"KEYWORD","AND","enumerated");}
- {not} {insert_token(lineno,++yytokenno,yytext,"KEYWORD","NOT","enumerated");}
- {or} {insert_token(lineno,++yytokenno,yytext,"KEYWORD","OR","enumerated");}
- {local} {insert_token(lineno,++yytokenno,yytext,"KEYWORD","LOCAL","enumerated");}
- {true} {insert_token(lineno,++yytokenno,yytext,"KEYWORD","TRUE","enumerated");}
- {false} {insert_token(lineno,++yytokenno,yytext,"KEYWORD","FALSE","enumerated");}
- {nil} {insert_token(lineno,++yytokenno,yytext,"KEYWORD","NIL","enumerated");}
- {ass} {insert_token(lineno,++yytokenno,yytext,"OPERATOR","ASSIGN","enumerated");}
- {add} {insert_token(lineno,++yytokenno,yytext,"OPERATOR","PLUS","enumerated");}
- {sub} {insert_token(lineno,++yytokenno,yytext,"OPERATOR","MINUS","enumerated");}
- {mul} {insert_token(lineno,++yytokenno,yytext,"OPERATOR","MULTIPLICATION","enumerated");}
- {div} {insert_token(lineno,++yytokenno,yytext,"OPERATOR","DIVISION","enumerated");}
- {mod} {insert_token(lineno,++yytokenno,yytext,"OPERATOR","MODULO","enumerated");}
- {eq} {insert_token(lineno,++yytokenno,yytext,"OPERATOR","EQUAL","enumerated");}
- {neq} {insert_token(lineno,++yytokenno,yytext,"OPERATOR","NOT_EQUAL","enumerated");}
- {inc} {insert_token(lineno,++yytokenno,yytext,"OPERATOR","PLUS_PLUS","enumerated");}
- {dec} {insert_token(lineno,++yytokenno,yytext,"OPERATOR","MINUS_MINUS","enumerated");}
- {gr} {insert_token(lineno,++yytokenno,yytext,"OPERATOR","GREATER_THAN","enumerated");}
- {less} {insert_token(lineno,++yytokenno,yytext,"OPERATOR","LESS_THAN","enumerated");}
- {greq} {insert_token(lineno,++yytokenno,yytext,"OPERATOR","GREATER_EQUAL","enumerated");}
- {leq} {insert_token(lineno,++yytokenno,yytext,"OPERATOR","LESS_EQUAL","enumerated");}
- {integer} {
- char * str = malloc(strlen(yytext));
- strcpy(str, yytext);
- insert_token(lineno,++yytokenno,str,"CONST_INT",str,"integer");
- }
- {real} {
- char * str = malloc(strlen(yytext));
- strcpy(str, yytext);
- insert_token(lineno,++yytokenno,str,"REAL_INT",str,"real");
- }
- {lcurbr} {insert_token(lineno,++yytokenno,yytext,"PUNCTUATION","LEFT_CURLY_BRUCKET","enumerated");}
- {rcurbr} {insert_token(lineno,++yytokenno,yytext,"PUNCTUATION","RIGHT_CURLY_BRUCKET","enumerated");}
- {lbr} {insert_token(lineno,++yytokenno,yytext,"PUNCTUATION","LEFT_BRUCKET","enumerated");}
- {rbr} {insert_token(lineno,++yytokenno,yytext,"PUNCTUATION","RIGHT_BRUCKET","enumerated");}
- {lpar} {insert_token(lineno,++yytokenno,yytext,"PUNCTUATION","LEFT_PARENTHESIS","enumerated");}
- {rpar} {insert_token(lineno,++yytokenno,yytext,"PUNCTUATION","RIGHT_PARENTHESIS","enumerated");}
- {semic} {insert_token(lineno,++yytokenno,yytext,"PUNCTUATION","SEMICOLON","enumerated");}
- {comma} {insert_token(lineno,++yytokenno,yytext,"PUNCTUATION","COMMA","enumerated");}
- {colon} {insert_token(lineno,++yytokenno,yytext,"PUNCTUATION","COLON","enumerated");}
- {dcolon} {insert_token(lineno,++yytokenno,yytext,"PUNCTUATION","DOUBLE_COLON","enumerated");}
- {dot} {insert_token(lineno,++yytokenno,yytext,"PUNCTUATION","DOT","enumerated");}
- {ddot} {insert_token(lineno,++yytokenno,yytext,"PUNCTUATION","DOUBLE_DOT","enumerated");}
- {id} {char * str = malloc(strlen(yytext));
- strcpy(str, yytext);
- insert_token(lineno,++yytokenno,str,"ID",str,"char*");}
- {linecom} {insert_token(lineno,++yytokenno,"","LINE_COMMENT","","enumerated");}
- {whitespace} {}
- {newline} {lineno++;}
- \" {
- BEGIN(string);
- buffered_string = NULL;
- old_buffered_string = NULL;
- }
- <string>[^"\\]* {
- if(old_buffered_string == NULL){
- buffered_string = malloc(strlen(yytext) + 1);
- strcpy(buffered_string, yytext);
- old_buffered_string = malloc(strlen(buffered_string));
- strcpy(old_buffered_string, buffered_string);
- }
- else{
- buffered_string = realloc(buffered_string, strlen(old_buffered_string) + strlen(yytext) + 1);
- strcat(buffered_string, yytext);
- old_buffered_string = realloc(old_buffered_string, strlen(buffered_string));
- strcpy(old_buffered_string, buffered_string);
- }
- }
- <string>\\t {
- if(old_buffered_string == NULL){
- buffered_string = malloc(2);
- char c = '\t';
- strncat(buffered_string, &c, 1);
- old_buffered_string = malloc(strlen(buffered_string));
- strcpy(old_buffered_string, buffered_string);
- }
- else{
- buffered_string = realloc(buffered_string, strlen(old_buffered_string) + 2);
- char c = '\t';
- strncat(buffered_string, &c, 1);
- old_buffered_string = realloc(old_buffered_string, strlen(buffered_string));
- strcpy(old_buffered_string, buffered_string);
- }
- }
- <string>\\n {
- if(old_buffered_string == NULL){
- buffered_string = malloc(2);
- char c = '\n';
- strncat(buffered_string, &c, 1);
- old_buffered_string = malloc(strlen(buffered_string));
- strcpy(old_buffered_string, buffered_string);
- }
- else{
- buffered_string = realloc(buffered_string, strlen(old_buffered_string) + 2);
- char c = '\n';
- strncat(buffered_string, &c, 1);
- old_buffered_string = realloc(old_buffered_string, strlen(buffered_string));
- strcpy(old_buffered_string, buffered_string);
- }
- }
- <string>\\\" {
- if(old_buffered_string == NULL){
- buffered_string = malloc(2);
- char c = '\"';
- strncat(buffered_string, &c, 1);
- old_buffered_string = malloc(strlen(buffered_string));
- strcpy(old_buffered_string, buffered_string);
- }
- else{
- buffered_string = realloc(buffered_string, strlen(old_buffered_string) + 2);
- char c = '\"';
- strncat(buffered_string, &c, 1);
- old_buffered_string = realloc(old_buffered_string, strlen(buffered_string));
- strcpy(old_buffered_string, buffered_string);
- }
- }
- <string>\\\\ {
- if(old_buffered_string == NULL){
- buffered_string = malloc(2);
- char c = '\\';
- strncat(buffered_string, &c, 1);
- old_buffered_string = malloc(strlen(buffered_string));
- strcpy(old_buffered_string, buffered_string);
- }
- else{
- buffered_string = realloc(buffered_string, strlen(old_buffered_string) + 2);
- char c = '\\';
- strncat(buffered_string, &c, 1);
- old_buffered_string = realloc(old_buffered_string, strlen(buffered_string));
- strcpy(old_buffered_string, buffered_string);
- }
- }
- <string>\" {
- if(buffered_string == NULL){
- insert_token(lineno,++yytokenno,"","STRING","","char*");
- }
- else if(!illegal_char_flag){
- insert_token(lineno,++yytokenno,buffered_string,"STRING",buffered_string,"char*");
- }
- illegal_char_flag = 0;
- BEGIN(INITIAL);
- }
- <string>\\.{1} {
- printf(" Illegal escape character in string.\n");
- illegal_char_flag = 1;
- }
- <string><<EOF>> { printf("unterminated string\n"); BEGIN(INITIAL);}
- "/*" {
- BEGIN(blockcom);
- start_comments = realloc(start_comments, (array_index + 1) * sizeof(int));
- start_comments[array_index] = lineno;
- array_index++;
- }
- <blockcom>"*/" {
- char str[100];
- array_index--;
- sprintf(str, "%d - %d", start_comments[array_index], lineno);
- if(nested_comment == 0){
- insert_token(lineno, ++yytokenno, str,"COMMENT", "BLOCK_COMMENT", "enumerated");
- BEGIN(INITIAL);
- }else{
- nested_comment--;
- insert_token(lineno, ++yytokenno, str,"COMMENT", "NESTED_COMMENT", "enumerated");
- }
- }
- <blockcom>"/*" {
- nested_comment++;
- start_comments = realloc(start_comments, (array_index + 1) * sizeof(int));
- start_comments[array_index] = lineno;
- array_index++;
- }
- <blockcom>"\n" {
- lineno++;
- }
- <blockcom>^"//" {
- insert_token(lineno, ++yytokenno, "","LINE_COMMENT", "", "enumerated");
- }
- <blockcom><<EOF>> {
- printf("unterminated comments\n");
- BEGIN(INITIAL);
- }
- <blockcom>. {}
- {OTHER} {
- printf("undefined character %s in line %d\n", yytext, lineno);
- }
- %%
- int main(int argc, char* argv[]){
- if (argc > 1) {
- if (!(yyin = fopen(argv[1], "r"))) {
- fprintf(stderr, "Cannot open file %s\n", argv[1]);
- return -1;
- }
- if (argc > 2) {
- if (!(yyout = fopen(argv[2], "w"))) {
- fprintf(stderr, "Cannot open file %s\n", argv[1]);
- return -1;
- }
- }
- }
- alpha_yylex(head);
- print_tokens();
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement