Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- if(Next_Coordinates != ConstantsHandler.Goal_State and Next_Coordinates not in ConstantsHandler.Cliff_States):
- #print(str(Next_Coordinates) + " is a valid next coordinate.")
- #print(Q_Values[SA])
- Next_SA = (Next_Coordinates, Next_Action)
- Best_Q_Value = Q_Values[Next_SA]
- #Update the amount of times we have seen this State Action mapping.
- Count[SA] = Count[SA] + ConstantsHandler.Seen_Increase_Rate
- # Based on the expected Q value, update the Q_Value entries
- Q_Values[SA] = Q_Values[SA] + ConstantsHandler.Alpha * ((ConstantsHandler.Standard_Reward + Best_Q_Value) - Q_Values[SA])
- #print(Q_Values[SA])
- Current_State = Next_Coordinates
- elif(Next_Coordinates in ConstantsHandler.Cliff_States):
- #print(str(Next_Coordinates) + " is a cliff.")
- #print(Q_Values[SA])
- # Update the amount of times we have seen this State Action mapping.
- Count[SA] = Count[SA] + ConstantsHandler.Seen_Increase_Rate
- # Update Q Value to the Loss Rate since we lost.
- Q_Values[SA] = Q_Values[SA] + ConstantsHandler.Alpha * (ConstantsHandler.Cliff_Reward - Q_Values[SA])
- #print(Q_Values[SA])
- Average_Winnings = sum(Q_Values.values()) / len(Q_Values)
- Average_Winnings_List.append(Average_Winnings)
- break
- #Check if we ARE in the goal state
- elif (Next_Coordinates == ConstantsHandler.Goal_State):
- #print(str(Next_Coordinates) + " is the goal.")
- #print(Q_Values[SA])
- # Update the amount of times we have seen this state action mapping
- Count[SA] = Count[SA] + ConstantsHandler.Seen_Increase_Rate
- # Update the Q Value entries based on our reward based on the current state and staying
- Q_Values[SA] = Q_Values[SA] + ConstantsHandler.Alpha * (ConstantsHandler.Goal_Reward - Q_Values[SA])
- #print(Q_Values[SA])
- Average_Winnings = sum(Q_Values.values()) / len(Q_Values)
- Average_Winnings_List.append(Average_Winnings)
- break
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement