Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import std/[strutils, strformat]
- import token
- # Reserved words in the lexer
- const reservedWords = @["if", "else", "then", "do", "for", "function", "while", "end"]
- ## Lexer state machine object
- type Lexer* = object
- src*: string
- prevPos: int
- currPos: int
- line: int
- tokens: seq[Token]
- func initLexer*(src: string): Lexer = Lexer(src: src, prevPos: 0, currPos: 0)
- func atEnd(lexer: Lexer, offset = 0): bool = lexer.currPos + offset >= lexer.src.len
- # Helpers
- func isReserved(word: string): bool =
- if word in reservedWords:
- result = true
- func advance(lexer: var Lexer): char =
- if not lexer.atEnd(0):
- result = lexer.src[lexer.currPos + 1]
- lexer.currPos += 1
- func peek(lexer: Lexer): char =
- if not lexer.atEnd(0):
- result = lexer.src[lexer.currPos + 1]
- func addToken(lexer: var Lexer, tk: TokenKind) =
- lexer.tokens.add(Token(kind: tk, start: lexer.prevPos, endAt: lexer.currPos))
- # Lexer rules
- proc scanIdent(lexer: var Lexer) =
- while lexer.peek.isAlphaNumeric or lexer.peek == '_':
- discard lexer.advance()
- var tokenKind: TokenKind
- let ident = lexer.src.substr(lexer.prevPos, lexer.currPos - lexer.prevPos)
- if ident.isReserved:
- tokenKind = case ident
- of "if": tkIF
- of "else": tkELSE
- of "then": tkTHEN
- of "do": tkDO
- of "for": tkFOR
- of "function": tkFUNCTION
- of "while": tkWHILE
- of "end": tkEND
- else: tkIDENT
- lexer.addToken(tokenKind)
- proc scanString(lexer: var Lexer) =
- while lexer.peek != '"' and not lexer.atEnd:
- discard lexer.advance()
- if lexer.atEnd:
- let ln = lexer.line
- echo(fmt"Unterminated string literal (at {lexer.prevPos}:{lexer.currPos}, L{ln})")
- # Closing quote
- discard lexer.advance()
- let slice = lexer.src.substr(lexer.prevPos, lexer.currPos - lexer.prevPos)
- echo(fmt"String slice: {slice}")
- lexer.addToken(tkSTRING)
- proc scanInt(lexer: var Lexer) =
- while lexer.peek.isDigit:
- discard lexer.advance()
- if lexer.peek == '.' and lexer.advance.isDigit:
- while lexer.peek.isDigit:
- discard lexer.advance()
- let slice = lexer.src.substr(lexer.prevPos, lexer.currPos - lexer.prevPos)
- echo(fmt"Integer slice: {slice}")
- lexer.addToken(tkINT)
- ##
- ## Scan the source string and return a sequence of tokens
- ##
- proc scan*(lexer: var Lexer): seq[Token] =
- var c: char
- while not lexer.atEnd:
- lexer.prevPos = lexer.currPos
- c = lexer.advance()
- echo(c)
- case c:
- of '"': lexer.scanString
- of '(': lexer.addToken(tkLPAREN)
- of ')': lexer.addToken(tkRPAREN)
- of '+': lexer.addToken(tkPLUS)
- of '-':
- if lexer.peek == '-':
- # Comment
- discard
- else: lexer.addToken(tkMINUS)
- of '*': lexer.addToken(tkSTAR)
- of '/': lexer.addToken(tkSLASH)
- of '=':
- if lexer.peek == '=':
- lexer.addToken(tkDEQUAL)
- else: lexer.addToken(tkEQUAL)
- of '<':
- if lexer.peek == '=':
- lexer.addToken(tkLT_EQUAL)
- else: lexer.addToken(tkLESS)
- of '>':
- if lexer.peek == '=':
- lexer.addToken(tkGT_EQUAL)
- else: lexer.addToken(tkGREAT)
- of '!':
- if lexer.peek == '=':
- lexer.addToken(tkNEQUAL)
- else: lexer.addToken(tkNOT)
- of '\r', '\t':
- discard
- of '\n':
- lexer.line += 1
- of '\0':
- if lexer.atEnd: break
- else:
- if c.isAlphaAscii or c == '_':
- lexer.scanIdent()
- elif c.isDigit:
- lexer.scanInt()
- else:
- echo(fmt"Unexpected character '{c}'")
- result = lexer.tokens
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement