Untitled

#Name: Sushant Baskota. Current Date: March 28, 2020. Sources Consulted: Python docs
#By submitting this work, I attest that it is my original work and that I did not violate the University of Mississippi academic policies set forth in the M book.


#add the path to the input file here.
fileName = 'C:/Users/Sushant/Desktop/sample4.in'


#Added all the tokens to the dictionary
tokens = dict(TOK_IF=1001, TOK_ELSE=1002, TOK_FOR = 1003, TOK_WHILE=1004, TOK_PRINT=1005, TOK_RETURN=1006
, TOK_CONTINUE=1007
, TOK_BREAK=1008
, TOK_DEBUG=1009
, TOK_READ=1010
, TOK_LET=1011
, TOK_INT=1100
, TOK_FLOAT=1101
, TOK_STRING=1102
, TOK_SEMICOLON=2000
, TOK_OPENPAREN=2001
, TOK_CLOSEPAREN=2002
, TOK_OPENBRACKET=2003
, TOK_CLOSEBRACKET=2004
, TOK_OPENBRACE=2005
, TOK_CLOSEBRACE=2006
, TOK_COMMA=2007
, TOK_PLUS=3000
, TOK_MINUS=3001
, TOK_MULTIPLY=3002
, TOK_DIVIDE=3003
, TOK_ASSIGN=3004
, TOK_EQUALTO=3005
, TOK_LESSTHAN=3006
, TOK_GREATERTHAN=3007
, TOK_NOTEQUALTO=3008
, TOK_AND=3009
, TOK_OR=3010
, TOK_NOT=3011
, TOK_LENGTH=3012
, TOK_IDENTIFIER=4000
, TOK_INTLIT=4001
, TOK_FLOATLIT=4002
, TOK_STRINGLIT=4003
, TOK_EOF=5000
, TOK_UNKNOWN=6000)

#open the file
file=open(fileName, 'r')

lexemmas = []

#populate an array with each character
for each in file:
    for a in each:
        lexemmas.append(a)

#switcher for normal cases
switcher = {
    ';':'TOK_SEMICOLON',
    '(':'TOK_OPENPAREN',
    ')':'TOK_CLOSEPAREN',
    '[':'TOK_OPENBRACKET',
    ']':'TOK_CLOSEBRACKET',
    '{':'TOK_OPENBRACE',
    '}':'TOK_CLOSEBRACE',
    ',':'TOK_COMMA',
    '+':'TOK_PLUS',
    '-':'TOK_MINUS',
    '*':'TOK_MULTIPLY'
}

#function that prints the required output
def printOutput(lexemma, token):
    print('lexemma: '+ '|'+ lexemma +'|, length: '+ str(len(lexemma)) + ', token: ', tokens[token])

#function that prints error message
def printError(lexemma):
    print('\t' + 'Error: unknown token')

#function that checks if there is a termination point for keywords or identifiers
def checkEnd(i):
    if (i==len(lexemmas)-1):
        return True
    if (lexemmas[i+1]==' 'or lexemmas[i+1]=='\n' or lexemmas[i+1]=='"' or lexemmas[i+1]=='(' or lexemmas[i+1]==')' or lexemmas[i+1]=='>' or lexemmas[i+1]=='<' or lexemmas[i+1]==':' or lexemmas[i+1]=='='):
        return True
    else:
        return False

#function that does all the comparisions for special cases
def hero():
    thisstring=''
    quote= False
    skip=False
    special=False
    for i in range(0, len(lexemmas)):
        if(skip):
            skip=False
            continue
        if(lexemmas[i]=='\n'):
            continue
        if(lexemmas[i]=='#'):
            printOutput(lexemmas[i], 'TOK_UNKNOWN')
            printError(lexemmas[i])
            thisstring=''
            continue

        if(not quote):
            switched = switcher.get(lexemmas[i], 'false')

        if(switched=='false'):
            thisstring = thisstring + lexemmas[i]
            special=False
            naya=True
            while naya:
                naya=False
                if(lexemmas[i]=='"'):
                    if(quote):
                        printOutput(thisstring.strip(), 'TOK_STRINGLIT')
                        thisstring = ''
                    quote= not quote
                elif(thisstring.strip() == 'for' and checkEnd(i)):
                    printOutput(thisstring.strip(), 'TOK_FOR')
                    thisstring=''
                elif (thisstring.strip() == 'if' and checkEnd(i)):
                    printOutput(thisstring.strip(), 'TOK_IF')
                    thisstring=''
                elif (thisstring.strip() == 'else' and checkEnd(i)):
                    printOutput(thisstring.strip(), 'TOK_ELSE')
                    thisstring=''
                elif (thisstring.strip() == 'while' and checkEnd(i)):
                    printOutput(thisstring.strip(), 'TOK_WHILE')
                    thisstring=''
                elif (thisstring.strip() == 'print' and checkEnd(i)):
                    printOutput(thisstring.strip(), 'TOK_PRINT')
                    thisstring=''
                elif (thisstring.strip() == 'return' and checkEnd(i)):
                    printOutput(thisstring.strip(), 'TOK_RETURN')
                    thisstring=''
                elif (thisstring.strip() == 'continue' and checkEnd(i)):
                    printOutput(thisstring.strip(), 'TOK_CONTINUE')
                    thisstring=''
                elif (thisstring.strip() == 'break' and checkEnd(i)):
                    printOutput(thisstring.strip(), 'TOK_BREAK')
                    thisstring=''
                elif (thisstring.strip() == 'debug' and checkEnd(i)):
                    printOutput(thisstring.strip(), 'TOK_DEBUG')
                    thisstring=''
                elif (thisstring.strip() == 'read' and checkEnd(i)):
                    printOutput(thisstring.strip(), 'TOK_READ')
                    thisstring=''
                elif (thisstring.strip() == 'let' and checkEnd(i)):
                    printOutput(thisstring.strip(), 'TOK_LET')
                    thisstring=''
                elif (thisstring.strip() == 'int' and checkEnd(i)):
                    printOutput(thisstring.strip(), 'TOK_INT')
                    thisstring=''
                elif (thisstring.strip() == 'float' and checkEnd(i)):
                    printOutput(thisstring.strip(), 'TOK_FLOAT')
                    thisstring=''
                elif (thisstring.strip() == 'string'):
                    printOutput(thisstring.strip(), 'TOK_STRING')
                    thisstring=''
                elif (thisstring.strip() == '<' and checkEnd(i)):
                    if(lexemmas[i+1]=='>'):
                        printOutput(thisstring.strip()+ '>', 'TOK_NOTEQUALTO')
                        skip=True
                    else:
                        printOutput(thisstring.strip(), 'TOK_LESSTHAN')
                    thisstring=''
                elif (thisstring.strip() == '<>' and checkEnd(i)):
                    printOutput(thisstring.strip(), 'TOK_NOTEQUALTO')
                    thisstring=''
                elif (thisstring.strip() == ':'):
                    if(lexemmas[i+1]=='='):
                        printOutput(thisstring.strip()+ '=', 'TOK_ASSIGN')
                        skip=True
                    thisstring=''
                elif (thisstring.strip() == '=' and checkEnd(i)):
                    if(lexemmas[i+1]=='='):
                        printOutput(thisstring.strip()+ '=', 'TOK_EQUALTO')
                        skip=True
                    thisstring=''
                elif (thisstring.strip() == '>' and checkEnd(i)):
                    printOutput(thisstring.strip(), 'TOK_GREATERTHAN')
                    thisstring=''
                elif (thisstring.strip() == 'and' and checkEnd(i)):
                    printOutput(thisstring.strip(), 'TOK_AND')
                    thisstring=''
                elif (thisstring.strip() == 'or' and checkEnd(i)):
                    printOutput(thisstring.strip(), 'TOK_OR')
                    thisstring=''
                elif (thisstring.strip() == 'not' and checkEnd(i)):
                    printOutput(thisstring.strip(), 'TOK_NOT')
                    thisstring=''
                elif (thisstring.strip() == 'length' and checkEnd(i)):
                    printOutput(thisstring.strip(), 'TOK_LENGTH')
                    thisstring=''
				#checks if the given string is an identifier or a number or a combination
                else:
                    yo = thisstring.strip()
                    num=''
                    text=''
                    if(not quote and (checkEnd(i) or special) and len(thisstring.strip())>0):

                        if(yo[0].isalpha()):
                            printOutput(yo, 'TOK_IDENTIFIER')
                        elif(yo[0].isdigit()):
                            for i in range(len(yo)):
                                if(yo[i].isdigit() or yo[i]=='.'):
                                    num=num+yo[i]
                                else:
                                    break
                            if(num != ''):
                                text=yo[len(num):]
                                if(float(num)==round(float(num))):
                                    printOutput(num, 'TOK_INTLIT')
                                    num=''
                                else:
                                    printOutput(num, 'TOK_FLOATLIT')
                                    num=''
                        if(len(text)>0):
                            thisstring=text
                            naya=True
                            special=True
                            text=''
                        else:
                            thisstring=''
                    elif(thisstring==''):
                        printOutput('lexemma: Unkown Token', 'TOK_UNKNOWN')
        else:
            printOutput(lexemmas[i], switched)
    #if the file ends with the quotes open EOF
    if(quote):
        printOutput(thisstring, 'TOK_EOF')
#execute the main function
hero()