Advertisement
Guest User

Untitled

a guest
Apr 26th, 2017
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.30 KB | None | 0 0
  1. if(Next_Coordinates != ConstantsHandler.Goal_State and Next_Coordinates not in ConstantsHandler.Cliff_States):
  2. #print(str(Next_Coordinates) + " is a valid next coordinate.")
  3. #print(Q_Values[SA])
  4.  
  5. Next_SA = (Next_Coordinates, Next_Action)
  6.  
  7. Best_Q_Value = Q_Values[Next_SA]
  8.  
  9. #Update the amount of times we have seen this State Action mapping.
  10. Count[SA] = Count[SA] + ConstantsHandler.Seen_Increase_Rate
  11.  
  12. # Based on the expected Q value, update the Q_Value entries
  13. Q_Values[SA] = Q_Values[SA] + ConstantsHandler.Alpha * ((ConstantsHandler.Standard_Reward + Best_Q_Value) - Q_Values[SA])
  14. #print(Q_Values[SA])
  15. Current_State = Next_Coordinates
  16.  
  17.  
  18. elif(Next_Coordinates in ConstantsHandler.Cliff_States):
  19. #print(str(Next_Coordinates) + " is a cliff.")
  20. #print(Q_Values[SA])
  21. # Update the amount of times we have seen this State Action mapping.
  22. Count[SA] = Count[SA] + ConstantsHandler.Seen_Increase_Rate
  23.  
  24. # Update Q Value to the Loss Rate since we lost.
  25. Q_Values[SA] = Q_Values[SA] + ConstantsHandler.Alpha * (ConstantsHandler.Cliff_Reward - Q_Values[SA])
  26. #print(Q_Values[SA])
  27.  
  28. Average_Winnings = sum(Q_Values.values()) / len(Q_Values)
  29. Average_Winnings_List.append(Average_Winnings)
  30. break
  31.  
  32.  
  33. #Check if we ARE in the goal state
  34. elif (Next_Coordinates == ConstantsHandler.Goal_State):
  35. #print(str(Next_Coordinates) + " is the goal.")
  36. #print(Q_Values[SA])
  37. # Update the amount of times we have seen this state action mapping
  38. Count[SA] = Count[SA] + ConstantsHandler.Seen_Increase_Rate
  39.  
  40. # Update the Q Value entries based on our reward based on the current state and staying
  41. Q_Values[SA] = Q_Values[SA] + ConstantsHandler.Alpha * (ConstantsHandler.Goal_Reward - Q_Values[SA])
  42. #print(Q_Values[SA])
  43.  
  44. Average_Winnings = sum(Q_Values.values()) / len(Q_Values)
  45. Average_Winnings_List.append(Average_Winnings)
  46. break
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement