Advertisement
Guest User

Untitled

a guest
May 26th, 2016
116
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.01 KB | None | 0 0
  1. def __init__(self, mdp, discount = 0.9, iterations = 20):
  2. """
  3. Your value iteration agent should take an mdp on
  4. construction, run the indicated number of iterations
  5. and then act according to the resulting policy.
  6.  
  7. Some useful mdp methods you will use:
  8. mdp.getStates()
  9. mdp.getPossibleActions(state)
  10. mdp.getTransitionStatesAndProbs(state, action)
  11. mdp.getReward(state, action, nextState)
  12. mdp.isTerminal(state)
  13. """
  14. self.mdp = mdp
  15. self.discount = discount
  16. self.iterations = iterations
  17. self.values = util.Counter() # A Counter is a dict with default 0
  18. states = self.mdp.getStates()
  19. self.actions = util.Counter()
  20.  
  21. for state in states:
  22. self.actions[state] = 'north'
  23.  
  24.  
  25. # Write value iteration code here
  26. "*** YOUR CODE HERE ***"
  27.  
  28.  
  29. for i in range (self.iterations):
  30. temp_values=util.Counter()
  31.  
  32. for state in states:
  33. maxValue = -9999
  34. for action in self.mdp.getPossibleActions(state):
  35. qvalue = self.computeQValueFromValues(state,action)
  36. if maxValue<qvalue:
  37. maxValue=qvalue
  38. temp_values[state]=qvalue
  39. self.actions[state]=action
  40.  
  41.  
  42. self.values=temp_values
  43.  
  44.  
  45.  
  46.  
  47. def getValue(self, state):
  48. """
  49. Return the value of the state (computed in __init__).
  50. """
  51. return self.values[state]
  52.  
  53.  
  54. def computeQValueFromValues(self, state, action):
  55. """
  56. Compute the Q-value of action in state from the
  57. value function stored in self.values.
  58. """
  59. "*** YOUR CODE HERE ***"
  60. qvalue=0
  61. SaP = self.mdp.getTransitionStatesAndProbs(state, action) #dohvati sljedeca stanja i vjerojatnosti prijelaza
  62. for sap in SaP:
  63. nextState=sap[0]
  64. prob=sap[1]
  65. qvalue+=prob*( self.mdp.getReward(state, action, nextState) + self.discount*self.getValue(nextState) )
  66.  
  67. return qvalue
  68.  
  69. def computeActionFromValues(self, state):
  70. """
  71. The policy is the best action in the given state
  72. according to the values currently stored in self.values.
  73.  
  74. You may break ties any way you see fit. Note that if
  75. there are no legal actions, which is the case at the
  76. terminal state, you should return None.
  77. """
  78. "*** YOUR CODE HERE ***"
  79. if self.mdp.isTerminal(state):
  80. return None
  81.  
  82. return self.actions[state]
  83.  
  84. def getPolicy(self, state):
  85. return self.computeActionFromValues(state)
  86.  
  87. def getAction(self, state):
  88. "Returns the policy at the state (no exploration)."
  89. return self.computeActionFromValues(state)
  90.  
  91. def getQValue(self, state, action):
  92. return self.computeQValueFromValues(state, action)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement