Advertisement
jumpToSubroutine

lexer.py

Sep 12th, 2020 (edited)
344
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.05 KB | None | 0 0
  1.  #constants
  2.  
  3. DIG = '0123456789'
  4. ABC = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
  5.  
  6. #errors
  7.  
  8. class err():
  9.     def __init__(self, errn, dt):
  10.         self.errn = errn
  11.         self.dt = dt
  12.  
  13.     def astr(self):
  14.         result = f"{self.errn}:{self.dt}"
  15.        
  16.         return result
  17.  
  18. class ILLEGALCHARERR(err):
  19.     def __init__(self, dt):
  20.         super().__init__('ILLEGAL CHAR', dt)
  21.  
  22. #tokens
  23.  
  24. TINT = 'INT'
  25. TFLT = 'FLOAT'
  26. TADD = 'ADD'
  27. TSUB = 'SUB'
  28. TMUL = 'MUL'
  29. TDIV = 'DIV'
  30. TLPR = 'LPR'
  31. TRPR = 'RPR'
  32. TLTHN = 'LTHN'
  33. TGTHN = 'GTHN'
  34. IF = 'IF'
  35.  
  36. class token():
  37.     def __init__(self, type_, val=None):
  38.         self.type = type_
  39.         self.val = val
  40.  
  41.     def __repr__(self):
  42.         if self.val: return f'{self.type}:{self.val}'
  43.         return f'{self.type}'
  44.  
  45. #lexer
  46.  
  47. class lexer():
  48.     def __init__(self, txt):
  49.         self.txt= txt
  50.         self.pos= -1
  51.         self.currentCharacter= None
  52.         self.advance()
  53.  
  54.     def advance(self):
  55.         self.pos +=1
  56.         self.currentCharacter = self.txt[self.pos] if self.pos < len(self.txt) else None
  57.  
  58.     #AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
  59.  
  60.     def multiCharacterToken(self):
  61.      multiCharacterToken = ''
  62.    
  63.      if self.currentCharacter in ABC:
  64.         print("IN ABC")
  65.         multiCharacterToken += self.currentCharacter
  66.         print(multiCharacterToken)
  67.         self.advance()
  68.  
  69.      else:
  70.         print("NOT IN ABC")
  71.         self.advance()
  72.         return [], ILLEGALCHARERR("'" + char + "'")
  73.  
  74.      while self.currentCharacter != None:
  75.          print(self.currentCharacter)
  76.          multiCharacterToken += self.currentCharacter
  77.          self.advance()
  78.  
  79.          if self.currentCharacter == None:
  80.              print("BREAK")
  81.              break
  82.  
  83.      print(multiCharacterToken)
  84.      return multiCharacterToken
  85.  
  86.  
  87.     def mtk(self):
  88.         tk = []
  89.  
  90.         while self.currentCharacter != None:
  91.             if self.currentCharacter in ' \n' and ' \t':
  92.                 self.advance()
  93.             elif self.currentCharacter in DIG:
  94.                 tk.append(self.num())
  95.             elif self.currentCharacter=='+':
  96.                 tk.append(token(TADD))
  97.                 self.advance()
  98.             elif self.currentCharacter=='-':
  99.                 tk.append(token(TSUB))
  100.                 self.advance()
  101.             elif self.currentCharacter=='*':
  102.                 tk.append(token(TMUL))
  103.                 self.advance()
  104.             elif self.currentCharacter=='/':
  105.                 tk.append(token(TDIV))
  106.                 self.advance()
  107.             elif self.currentCharacter=='(':
  108.                 tk.append(token(TLPR))
  109.                 self.advance()
  110.             elif self.currentCharacter==')':
  111.                 tk.append(token(TRPR))
  112.                 self.advance()
  113.             elif self.currentCharacter=='<':
  114.                 tk.append(token(TLTHN))
  115.                 self.advance()
  116.             elif self.currentCharacter=='>':
  117.                 tk.append(token(TGTHN))
  118.                 self.advance()
  119.             elif self.currentCharacter in ABC:
  120.                 multiCharacterToken = self.mtoken()
  121.                 if multiCharacterToken=='IF':
  122.                     tk.append(token(IF))
  123.             else:
  124.                 char = self.currentCharacter
  125.                 self.advance()
  126.                 return [], ILLEGALCHARERR("'" + char + "'")
  127.         return tk, None
  128.  
  129.     def num(self):
  130.         nstr = ''
  131.         dc = 0
  132.  
  133.         while self.currentCharacter != None and self.currentCharacter in DIG + '.':
  134.             if self.currentCharacter == '.':
  135.                 if dc == 1: break
  136.                 dc+=1
  137.                 nstr += '.'
  138.                 self.advance()
  139.             else:
  140.                 nstr += self.currentCharacter
  141.                 self.advance()
  142.         if dc == 0: return token(TINT, int(nstr))
  143.         else: return token(TFLT, float(nstr))
  144.        
  145. #run
  146.  
  147. def run(txt):
  148.     global lexer
  149.     mylexer = lexer(txt)
  150.     token, err = mylexer.mtk()
  151.  
  152.     return token, err
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement