Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #Name: Sushant Baskota. Current Date: March 28, 2020. Sources Consulted: Python docs
- #By submitting this work, I attest that it is my original work and that I did not violate the University of Mississippi academic policies set forth in the M book.
- #add the path to the input file here.
- fileName = 'C:/Users/Sushant/Desktop/sample4.in'
- #Added all the tokens to the dictionary
- tokens = dict(TOK_IF=1001, TOK_ELSE=1002, TOK_FOR = 1003, TOK_WHILE=1004, TOK_PRINT=1005, TOK_RETURN=1006
- , TOK_CONTINUE=1007
- , TOK_BREAK=1008
- , TOK_DEBUG=1009
- , TOK_READ=1010
- , TOK_LET=1011
- , TOK_INT=1100
- , TOK_FLOAT=1101
- , TOK_STRING=1102
- , TOK_SEMICOLON=2000
- , TOK_OPENPAREN=2001
- , TOK_CLOSEPAREN=2002
- , TOK_OPENBRACKET=2003
- , TOK_CLOSEBRACKET=2004
- , TOK_OPENBRACE=2005
- , TOK_CLOSEBRACE=2006
- , TOK_COMMA=2007
- , TOK_PLUS=3000
- , TOK_MINUS=3001
- , TOK_MULTIPLY=3002
- , TOK_DIVIDE=3003
- , TOK_ASSIGN=3004
- , TOK_EQUALTO=3005
- , TOK_LESSTHAN=3006
- , TOK_GREATERTHAN=3007
- , TOK_NOTEQUALTO=3008
- , TOK_AND=3009
- , TOK_OR=3010
- , TOK_NOT=3011
- , TOK_LENGTH=3012
- , TOK_IDENTIFIER=4000
- , TOK_INTLIT=4001
- , TOK_FLOATLIT=4002
- , TOK_STRINGLIT=4003
- , TOK_EOF=5000
- , TOK_UNKNOWN=6000)
- #open the file
- file=open(fileName, 'r')
- lexemmas = []
- #populate an array with each character
- for each in file:
- for a in each:
- lexemmas.append(a)
- #switcher for normal cases
- switcher = {
- ';':'TOK_SEMICOLON',
- '(':'TOK_OPENPAREN',
- ')':'TOK_CLOSEPAREN',
- '[':'TOK_OPENBRACKET',
- ']':'TOK_CLOSEBRACKET',
- '{':'TOK_OPENBRACE',
- '}':'TOK_CLOSEBRACE',
- ',':'TOK_COMMA',
- '+':'TOK_PLUS',
- '-':'TOK_MINUS',
- '*':'TOK_MULTIPLY'
- }
- #function that prints the required output
- def printOutput(lexemma, token):
- print('lexemma: '+ '|'+ lexemma +'|, length: '+ str(len(lexemma)) + ', token: ', tokens[token])
- #function that prints error message
- def printError(lexemma):
- print('\t' + 'Error: unknown token')
- #function that checks if there is a termination point for keywords or identifiers
- def checkEnd(i):
- if (i==len(lexemmas)-1):
- return True
- if (lexemmas[i+1]==' 'or lexemmas[i+1]=='\n' or lexemmas[i+1]=='"' or lexemmas[i+1]=='(' or lexemmas[i+1]==')' or lexemmas[i+1]=='>' or lexemmas[i+1]=='<' or lexemmas[i+1]==':' or lexemmas[i+1]=='='):
- return True
- else:
- return False
- #function that does all the comparisions for special cases
- def hero():
- thisstring=''
- quote= False
- skip=False
- special=False
- for i in range(0, len(lexemmas)):
- if(skip):
- skip=False
- continue
- if(lexemmas[i]=='\n'):
- continue
- if(lexemmas[i]=='#'):
- printOutput(lexemmas[i], 'TOK_UNKNOWN')
- printError(lexemmas[i])
- thisstring=''
- continue
- if(not quote):
- switched = switcher.get(lexemmas[i], 'false')
- if(switched=='false'):
- thisstring = thisstring + lexemmas[i]
- special=False
- naya=True
- while naya:
- naya=False
- if(lexemmas[i]=='"'):
- if(quote):
- printOutput(thisstring.strip(), 'TOK_STRINGLIT')
- thisstring = ''
- quote= not quote
- elif(thisstring.strip() == 'for' and checkEnd(i)):
- printOutput(thisstring.strip(), 'TOK_FOR')
- thisstring=''
- elif (thisstring.strip() == 'if' and checkEnd(i)):
- printOutput(thisstring.strip(), 'TOK_IF')
- thisstring=''
- elif (thisstring.strip() == 'else' and checkEnd(i)):
- printOutput(thisstring.strip(), 'TOK_ELSE')
- thisstring=''
- elif (thisstring.strip() == 'while' and checkEnd(i)):
- printOutput(thisstring.strip(), 'TOK_WHILE')
- thisstring=''
- elif (thisstring.strip() == 'print' and checkEnd(i)):
- printOutput(thisstring.strip(), 'TOK_PRINT')
- thisstring=''
- elif (thisstring.strip() == 'return' and checkEnd(i)):
- printOutput(thisstring.strip(), 'TOK_RETURN')
- thisstring=''
- elif (thisstring.strip() == 'continue' and checkEnd(i)):
- printOutput(thisstring.strip(), 'TOK_CONTINUE')
- thisstring=''
- elif (thisstring.strip() == 'break' and checkEnd(i)):
- printOutput(thisstring.strip(), 'TOK_BREAK')
- thisstring=''
- elif (thisstring.strip() == 'debug' and checkEnd(i)):
- printOutput(thisstring.strip(), 'TOK_DEBUG')
- thisstring=''
- elif (thisstring.strip() == 'read' and checkEnd(i)):
- printOutput(thisstring.strip(), 'TOK_READ')
- thisstring=''
- elif (thisstring.strip() == 'let' and checkEnd(i)):
- printOutput(thisstring.strip(), 'TOK_LET')
- thisstring=''
- elif (thisstring.strip() == 'int' and checkEnd(i)):
- printOutput(thisstring.strip(), 'TOK_INT')
- thisstring=''
- elif (thisstring.strip() == 'float' and checkEnd(i)):
- printOutput(thisstring.strip(), 'TOK_FLOAT')
- thisstring=''
- elif (thisstring.strip() == 'string'):
- printOutput(thisstring.strip(), 'TOK_STRING')
- thisstring=''
- elif (thisstring.strip() == '<' and checkEnd(i)):
- if(lexemmas[i+1]=='>'):
- printOutput(thisstring.strip()+ '>', 'TOK_NOTEQUALTO')
- skip=True
- else:
- printOutput(thisstring.strip(), 'TOK_LESSTHAN')
- thisstring=''
- elif (thisstring.strip() == '<>' and checkEnd(i)):
- printOutput(thisstring.strip(), 'TOK_NOTEQUALTO')
- thisstring=''
- elif (thisstring.strip() == ':'):
- if(lexemmas[i+1]=='='):
- printOutput(thisstring.strip()+ '=', 'TOK_ASSIGN')
- skip=True
- thisstring=''
- elif (thisstring.strip() == '=' and checkEnd(i)):
- if(lexemmas[i+1]=='='):
- printOutput(thisstring.strip()+ '=', 'TOK_EQUALTO')
- skip=True
- thisstring=''
- elif (thisstring.strip() == '>' and checkEnd(i)):
- printOutput(thisstring.strip(), 'TOK_GREATERTHAN')
- thisstring=''
- elif (thisstring.strip() == 'and' and checkEnd(i)):
- printOutput(thisstring.strip(), 'TOK_AND')
- thisstring=''
- elif (thisstring.strip() == 'or' and checkEnd(i)):
- printOutput(thisstring.strip(), 'TOK_OR')
- thisstring=''
- elif (thisstring.strip() == 'not' and checkEnd(i)):
- printOutput(thisstring.strip(), 'TOK_NOT')
- thisstring=''
- elif (thisstring.strip() == 'length' and checkEnd(i)):
- printOutput(thisstring.strip(), 'TOK_LENGTH')
- thisstring=''
- #checks if the given string is an identifier or a number or a combination
- else:
- yo = thisstring.strip()
- num=''
- text=''
- if(not quote and (checkEnd(i) or special) and len(thisstring.strip())>0):
- if(yo[0].isalpha()):
- printOutput(yo, 'TOK_IDENTIFIER')
- elif(yo[0].isdigit()):
- for i in range(len(yo)):
- if(yo[i].isdigit() or yo[i]=='.'):
- num=num+yo[i]
- else:
- break
- if(num != ''):
- text=yo[len(num):]
- if(float(num)==round(float(num))):
- printOutput(num, 'TOK_INTLIT')
- num=''
- else:
- printOutput(num, 'TOK_FLOATLIT')
- num=''
- if(len(text)>0):
- thisstring=text
- naya=True
- special=True
- text=''
- else:
- thisstring=''
- elif(thisstring==''):
- printOutput('lexemma: Unkown Token', 'TOK_UNKNOWN')
- else:
- printOutput(lexemmas[i], switched)
- #if the file ends with the quotes open EOF
- if(quote):
- printOutput(thisstring, 'TOK_EOF')
- #execute the main function
- hero()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement