Advertisement
Guest User

Untitled

a guest
Mar 29th, 2020
187
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.20 KB | None | 0 0
  1. #Name: Sushant Baskota. Current Date: March 28, 2020. Sources Consulted: Python docs
  2. #By submitting this work, I attest that it is my original work and that I did not violate the University of Mississippi academic policies set forth in the M book.
  3.  
  4.  
  5. #add the path to the input file here.
  6. fileName = 'C:/Users/Sushant/Desktop/sample4.in'
  7.  
  8.  
  9. #Added all the tokens to the dictionary
  10. tokens = dict(TOK_IF=1001, TOK_ELSE=1002, TOK_FOR = 1003, TOK_WHILE=1004, TOK_PRINT=1005, TOK_RETURN=1006
  11. , TOK_CONTINUE=1007
  12. , TOK_BREAK=1008
  13. , TOK_DEBUG=1009
  14. , TOK_READ=1010
  15. , TOK_LET=1011
  16. , TOK_INT=1100
  17. , TOK_FLOAT=1101
  18. , TOK_STRING=1102
  19. , TOK_SEMICOLON=2000
  20. , TOK_OPENPAREN=2001
  21. , TOK_CLOSEPAREN=2002
  22. , TOK_OPENBRACKET=2003
  23. , TOK_CLOSEBRACKET=2004
  24. , TOK_OPENBRACE=2005
  25. , TOK_CLOSEBRACE=2006
  26. , TOK_COMMA=2007
  27. , TOK_PLUS=3000
  28. , TOK_MINUS=3001
  29. , TOK_MULTIPLY=3002
  30. , TOK_DIVIDE=3003
  31. , TOK_ASSIGN=3004
  32. , TOK_EQUALTO=3005
  33. , TOK_LESSTHAN=3006
  34. , TOK_GREATERTHAN=3007
  35. , TOK_NOTEQUALTO=3008
  36. , TOK_AND=3009
  37. , TOK_OR=3010
  38. , TOK_NOT=3011
  39. , TOK_LENGTH=3012
  40. , TOK_IDENTIFIER=4000
  41. , TOK_INTLIT=4001
  42. , TOK_FLOATLIT=4002
  43. , TOK_STRINGLIT=4003
  44. , TOK_EOF=5000
  45. , TOK_UNKNOWN=6000)
  46.  
  47. #open the file
  48. file=open(fileName, 'r')
  49.  
  50. lexemmas = []
  51.  
  52. #populate an array with each character
  53. for each in file:
  54. for a in each:
  55. lexemmas.append(a)
  56.  
  57. #switcher for normal cases
  58. switcher = {
  59. ';':'TOK_SEMICOLON',
  60. '(':'TOK_OPENPAREN',
  61. ')':'TOK_CLOSEPAREN',
  62. '[':'TOK_OPENBRACKET',
  63. ']':'TOK_CLOSEBRACKET',
  64. '{':'TOK_OPENBRACE',
  65. '}':'TOK_CLOSEBRACE',
  66. ',':'TOK_COMMA',
  67. '+':'TOK_PLUS',
  68. '-':'TOK_MINUS',
  69. '*':'TOK_MULTIPLY'
  70. }
  71.  
  72. #function that prints the required output
  73. def printOutput(lexemma, token):
  74. print('lexemma: '+ '|'+ lexemma +'|, length: '+ str(len(lexemma)) + ', token: ', tokens[token])
  75.  
  76. #function that prints error message
  77. def printError(lexemma):
  78. print('\t' + 'Error: unknown token')
  79.  
  80. #function that checks if there is a termination point for keywords or identifiers
  81. def checkEnd(i):
  82. if (i==len(lexemmas)-1):
  83. return True
  84. if (lexemmas[i+1]==' 'or lexemmas[i+1]=='\n' or lexemmas[i+1]=='"' or lexemmas[i+1]=='(' or lexemmas[i+1]==')' or lexemmas[i+1]=='>' or lexemmas[i+1]=='<' or lexemmas[i+1]==':' or lexemmas[i+1]=='='):
  85. return True
  86. else:
  87. return False
  88.  
  89. #function that does all the comparisions for special cases
  90. def hero():
  91. thisstring=''
  92. quote= False
  93. skip=False
  94. special=False
  95. for i in range(0, len(lexemmas)):
  96. if(skip):
  97. skip=False
  98. continue
  99. if(lexemmas[i]=='\n'):
  100. continue
  101. if(lexemmas[i]=='#'):
  102. printOutput(lexemmas[i], 'TOK_UNKNOWN')
  103. printError(lexemmas[i])
  104. thisstring=''
  105. continue
  106.  
  107. if(not quote):
  108. switched = switcher.get(lexemmas[i], 'false')
  109.  
  110. if(switched=='false'):
  111. thisstring = thisstring + lexemmas[i]
  112. special=False
  113. naya=True
  114. while naya:
  115. naya=False
  116. if(lexemmas[i]=='"'):
  117. if(quote):
  118. printOutput(thisstring.strip(), 'TOK_STRINGLIT')
  119. thisstring = ''
  120. quote= not quote
  121. elif(thisstring.strip() == 'for' and checkEnd(i)):
  122. printOutput(thisstring.strip(), 'TOK_FOR')
  123. thisstring=''
  124. elif (thisstring.strip() == 'if' and checkEnd(i)):
  125. printOutput(thisstring.strip(), 'TOK_IF')
  126. thisstring=''
  127. elif (thisstring.strip() == 'else' and checkEnd(i)):
  128. printOutput(thisstring.strip(), 'TOK_ELSE')
  129. thisstring=''
  130. elif (thisstring.strip() == 'while' and checkEnd(i)):
  131. printOutput(thisstring.strip(), 'TOK_WHILE')
  132. thisstring=''
  133. elif (thisstring.strip() == 'print' and checkEnd(i)):
  134. printOutput(thisstring.strip(), 'TOK_PRINT')
  135. thisstring=''
  136. elif (thisstring.strip() == 'return' and checkEnd(i)):
  137. printOutput(thisstring.strip(), 'TOK_RETURN')
  138. thisstring=''
  139. elif (thisstring.strip() == 'continue' and checkEnd(i)):
  140. printOutput(thisstring.strip(), 'TOK_CONTINUE')
  141. thisstring=''
  142. elif (thisstring.strip() == 'break' and checkEnd(i)):
  143. printOutput(thisstring.strip(), 'TOK_BREAK')
  144. thisstring=''
  145. elif (thisstring.strip() == 'debug' and checkEnd(i)):
  146. printOutput(thisstring.strip(), 'TOK_DEBUG')
  147. thisstring=''
  148. elif (thisstring.strip() == 'read' and checkEnd(i)):
  149. printOutput(thisstring.strip(), 'TOK_READ')
  150. thisstring=''
  151. elif (thisstring.strip() == 'let' and checkEnd(i)):
  152. printOutput(thisstring.strip(), 'TOK_LET')
  153. thisstring=''
  154. elif (thisstring.strip() == 'int' and checkEnd(i)):
  155. printOutput(thisstring.strip(), 'TOK_INT')
  156. thisstring=''
  157. elif (thisstring.strip() == 'float' and checkEnd(i)):
  158. printOutput(thisstring.strip(), 'TOK_FLOAT')
  159. thisstring=''
  160. elif (thisstring.strip() == 'string'):
  161. printOutput(thisstring.strip(), 'TOK_STRING')
  162. thisstring=''
  163. elif (thisstring.strip() == '<' and checkEnd(i)):
  164. if(lexemmas[i+1]=='>'):
  165. printOutput(thisstring.strip()+ '>', 'TOK_NOTEQUALTO')
  166. skip=True
  167. else:
  168. printOutput(thisstring.strip(), 'TOK_LESSTHAN')
  169. thisstring=''
  170. elif (thisstring.strip() == '<>' and checkEnd(i)):
  171. printOutput(thisstring.strip(), 'TOK_NOTEQUALTO')
  172. thisstring=''
  173. elif (thisstring.strip() == ':'):
  174. if(lexemmas[i+1]=='='):
  175. printOutput(thisstring.strip()+ '=', 'TOK_ASSIGN')
  176. skip=True
  177. thisstring=''
  178. elif (thisstring.strip() == '=' and checkEnd(i)):
  179. if(lexemmas[i+1]=='='):
  180. printOutput(thisstring.strip()+ '=', 'TOK_EQUALTO')
  181. skip=True
  182. thisstring=''
  183. elif (thisstring.strip() == '>' and checkEnd(i)):
  184. printOutput(thisstring.strip(), 'TOK_GREATERTHAN')
  185. thisstring=''
  186. elif (thisstring.strip() == 'and' and checkEnd(i)):
  187. printOutput(thisstring.strip(), 'TOK_AND')
  188. thisstring=''
  189. elif (thisstring.strip() == 'or' and checkEnd(i)):
  190. printOutput(thisstring.strip(), 'TOK_OR')
  191. thisstring=''
  192. elif (thisstring.strip() == 'not' and checkEnd(i)):
  193. printOutput(thisstring.strip(), 'TOK_NOT')
  194. thisstring=''
  195. elif (thisstring.strip() == 'length' and checkEnd(i)):
  196. printOutput(thisstring.strip(), 'TOK_LENGTH')
  197. thisstring=''
  198. #checks if the given string is an identifier or a number or a combination
  199. else:
  200. yo = thisstring.strip()
  201. num=''
  202. text=''
  203. if(not quote and (checkEnd(i) or special) and len(thisstring.strip())>0):
  204.  
  205. if(yo[0].isalpha()):
  206. printOutput(yo, 'TOK_IDENTIFIER')
  207. elif(yo[0].isdigit()):
  208. for i in range(len(yo)):
  209. if(yo[i].isdigit() or yo[i]=='.'):
  210. num=num+yo[i]
  211. else:
  212. break
  213. if(num != ''):
  214. text=yo[len(num):]
  215. if(float(num)==round(float(num))):
  216. printOutput(num, 'TOK_INTLIT')
  217. num=''
  218. else:
  219. printOutput(num, 'TOK_FLOATLIT')
  220. num=''
  221. if(len(text)>0):
  222. thisstring=text
  223. naya=True
  224. special=True
  225. text=''
  226. else:
  227. thisstring=''
  228. elif(thisstring==''):
  229. printOutput('lexemma: Unkown Token', 'TOK_UNKNOWN')
  230. else:
  231. printOutput(lexemmas[i], switched)
  232. #if the file ends with the quotes open EOF
  233. if(quote):
  234. printOutput(thisstring, 'TOK_EOF')
  235. #execute the main function
  236. hero()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement