Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def computeActionFromValues(self, state):
- """
- The policy is the best action in the given state
- according to the values currently stored in self.values.
- You may break ties any way you see fit. Note that if
- there are no legal actions, which is the case at the
- terminal state, you should return None.
- """
- if self.mdp.isTerminal(state):
- return None
- # lista tuple-ova (akcija, vrijednost iduceg stanja)
- vrijednostiStanjaZaAkcije = []
- #print "stanje %s" % (state,)
- for action in self.mdp.getPossibleActions(state):
- # buduci da racunam vrijednost stanja za navedenu akciju
- # uzimam najvjerojatniji prijelaz za akciju predanu
- # u mdp.getTransitionStatesAndProbes
- possibleNextStates = self.mdp.getTransitionStatesAndProbs(state,action)
- #print "za akciju %s" % (action,)
- #print "moguca su iduca stanja s vjerojatnostima: %s" % (possibleNextStates,)
- nextStateTuple = max(possibleNextStates,key=lambda x:x[1])
- nextState = nextStateTuple[0]
- vrijednostiStanjaZaAkcije.append((action,self.getValue(nextState)))
- trazenaAkcijaTuple = max(vrijednostiStanjaZaAkcije,key=lambda x:x[1])
- # tuple je (akcija, vrijednost)
- return trazenaAkcijaTuple[0]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement