Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def __init__(self, mdp, discount = 0.9, iterations = 20):
- """
- Your value iteration agent should take an mdp on
- construction, run the indicated number of iterations
- and then act according to the resulting policy.
- Some useful mdp methods you will use:
- mdp.getStates()
- mdp.getPossibleActions(state)
- mdp.getTransitionStatesAndProbs(state, action)
- mdp.getReward(state, action, nextState)
- mdp.isTerminal(state)
- """
- self.mdp = mdp
- self.discount = discount
- self.iterations = iterations
- self.values = util.Counter() # A Counter is a dict with default 0
- states = self.mdp.getStates()
- self.actions = util.Counter()
- for state in states:
- self.actions[state] = 'north'
- # Write value iteration code here
- "*** YOUR CODE HERE ***"
- for i in range (self.iterations):
- temp_values=util.Counter()
- for state in states:
- maxValue = -9999
- for action in self.mdp.getPossibleActions(state):
- qvalue = self.computeQValueFromValues(state,action)
- if maxValue<qvalue:
- maxValue=qvalue
- temp_values[state]=qvalue
- self.actions[state]=action
- self.values=temp_values
- def getValue(self, state):
- """
- Return the value of the state (computed in __init__).
- """
- return self.values[state]
- def computeQValueFromValues(self, state, action):
- """
- Compute the Q-value of action in state from the
- value function stored in self.values.
- """
- "*** YOUR CODE HERE ***"
- qvalue=0
- SaP = self.mdp.getTransitionStatesAndProbs(state, action) #dohvati sljedeca stanja i vjerojatnosti prijelaza
- for sap in SaP:
- nextState=sap[0]
- prob=sap[1]
- qvalue+=prob*( self.mdp.getReward(state, action, nextState) + self.discount*self.getValue(nextState) )
- return qvalue
- def computeActionFromValues(self, state):
- """
- The policy is the best action in the given state
- according to the values currently stored in self.values.
- You may break ties any way you see fit. Note that if
- there are no legal actions, which is the case at the
- terminal state, you should return None.
- """
- "*** YOUR CODE HERE ***"
- if self.mdp.isTerminal(state):
- return None
- return self.actions[state]
- def getPolicy(self, state):
- return self.computeActionFromValues(state)
- def getAction(self, state):
- "Returns the policy at the state (no exploration)."
- return self.computeActionFromValues(state)
- def getQValue(self, state, action):
- return self.computeQValueFromValues(state, action)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement