Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #constants
- DIG = '0123456789'
- ABC = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
- #errors
- class err():
- def __init__(self, errn, dt):
- self.errn = errn
- self.dt = dt
- def astr(self):
- result = f"{self.errn}:{self.dt}"
- return result
- class ILLEGALCHARERR(err):
- def __init__(self, dt):
- super().__init__('ILLEGAL CHAR', dt)
- #tokens
- TINT = 'INT'
- TFLT = 'FLOAT'
- TADD = 'ADD'
- TSUB = 'SUB'
- TMUL = 'MUL'
- TDIV = 'DIV'
- TLPR = 'LPR'
- TRPR = 'RPR'
- TLTHN = 'LTHN'
- TGTHN = 'GTHN'
- IF = 'IF'
- class token():
- def __init__(self, type_, val=None):
- self.type = type_
- self.val = val
- def __repr__(self):
- if self.val: return f'{self.type}:{self.val}'
- return f'{self.type}'
- #lexer
- class lexer():
- def __init__(self, txt):
- self.txt= txt
- self.pos= -1
- self.currentCharacter= None
- self.advance()
- def advance(self):
- self.pos +=1
- self.currentCharacter = self.txt[self.pos] if self.pos < len(self.txt) else None
- #AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
- def multiCharacterToken(self):
- multiCharacterToken = ''
- if self.currentCharacter in ABC:
- print("IN ABC")
- multiCharacterToken += self.currentCharacter
- print(multiCharacterToken)
- self.advance()
- else:
- print("NOT IN ABC")
- self.advance()
- return [], ILLEGALCHARERR("'" + char + "'")
- while self.currentCharacter != None:
- print(self.currentCharacter)
- multiCharacterToken += self.currentCharacter
- self.advance()
- if self.currentCharacter == None:
- print("BREAK")
- break
- print(multiCharacterToken)
- return multiCharacterToken
- def mtk(self):
- tk = []
- while self.currentCharacter != None:
- if self.currentCharacter in ' \n' and ' \t':
- self.advance()
- elif self.currentCharacter in DIG:
- tk.append(self.num())
- elif self.currentCharacter=='+':
- tk.append(token(TADD))
- self.advance()
- elif self.currentCharacter=='-':
- tk.append(token(TSUB))
- self.advance()
- elif self.currentCharacter=='*':
- tk.append(token(TMUL))
- self.advance()
- elif self.currentCharacter=='/':
- tk.append(token(TDIV))
- self.advance()
- elif self.currentCharacter=='(':
- tk.append(token(TLPR))
- self.advance()
- elif self.currentCharacter==')':
- tk.append(token(TRPR))
- self.advance()
- elif self.currentCharacter=='<':
- tk.append(token(TLTHN))
- self.advance()
- elif self.currentCharacter=='>':
- tk.append(token(TGTHN))
- self.advance()
- elif self.currentCharacter in ABC:
- multiCharacterToken = self.mtoken()
- if multiCharacterToken=='IF':
- tk.append(token(IF))
- else:
- char = self.currentCharacter
- self.advance()
- return [], ILLEGALCHARERR("'" + char + "'")
- return tk, None
- def num(self):
- nstr = ''
- dc = 0
- while self.currentCharacter != None and self.currentCharacter in DIG + '.':
- if self.currentCharacter == '.':
- if dc == 1: break
- dc+=1
- nstr += '.'
- self.advance()
- else:
- nstr += self.currentCharacter
- self.advance()
- if dc == 0: return token(TINT, int(nstr))
- else: return token(TFLT, float(nstr))
- #run
- def run(txt):
- global lexer
- mylexer = lexer(txt)
- token, err = mylexer.mtk()
- return token, err
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement