Untitled

if(Next_Coordinates != ConstantsHandler.Goal_State and Next_Coordinates not in ConstantsHandler.Cliff_States):
                #print(str(Next_Coordinates) + " is a valid next coordinate.")
                #print(Q_Values[SA])

                Next_SA = (Next_Coordinates, Next_Action)

                Best_Q_Value = Q_Values[Next_SA]

                #Update the amount of times we have seen this State Action mapping.
                Count[SA] = Count[SA] + ConstantsHandler.Seen_Increase_Rate

                # Based on the expected Q value, update the Q_Value entries
                Q_Values[SA] = Q_Values[SA] + ConstantsHandler.Alpha * ((ConstantsHandler.Standard_Reward + Best_Q_Value) - Q_Values[SA])
                #print(Q_Values[SA])
                Current_State = Next_Coordinates


            elif(Next_Coordinates in ConstantsHandler.Cliff_States):
                #print(str(Next_Coordinates) + " is a cliff.")
                #print(Q_Values[SA])
                # Update the amount of times we have seen this State Action mapping.
                Count[SA] = Count[SA] + ConstantsHandler.Seen_Increase_Rate

                # Update Q Value to the Loss Rate since we lost.
                Q_Values[SA] = Q_Values[SA] + ConstantsHandler.Alpha * (ConstantsHandler.Cliff_Reward - Q_Values[SA])
                #print(Q_Values[SA])

                Average_Winnings = sum(Q_Values.values()) / len(Q_Values)
                Average_Winnings_List.append(Average_Winnings)
                break


            #Check if we ARE in the goal state
            elif (Next_Coordinates == ConstantsHandler.Goal_State):
                #print(str(Next_Coordinates) + " is the goal.")
                #print(Q_Values[SA])
                # Update the amount of times we have seen this state action mapping
                Count[SA] = Count[SA] + ConstantsHandler.Seen_Increase_Rate

                # Update the Q Value entries based on our reward based on the current state and staying
                Q_Values[SA] = Q_Values[SA] + ConstantsHandler.Alpha * (ConstantsHandler.Goal_Reward - Q_Values[SA])
                #print(Q_Values[SA])

                Average_Winnings = sum(Q_Values.values()) / len(Q_Values)
                Average_Winnings_List.append(Average_Winnings)
                break