Untitled

def computeActionFromValues(self, state):
        """
          The policy is the best action in the given state
          according to the values currently stored in self.values.

          You may break ties any way you see fit.  Note that if
          there are no legal actions, which is the case at the
          terminal state, you should return None.
        """

        if self.mdp.isTerminal(state):
                return None

        # lista tuple-ova (akcija, vrijednost iduceg stanja)
        vrijednostiStanjaZaAkcije = []
        #print "stanje %s" % (state,)
        for action in self.mdp.getPossibleActions(state):
            # buduci da racunam vrijednost stanja za navedenu akciju
            # uzimam najvjerojatniji prijelaz za akciju predanu
            # u mdp.getTransitionStatesAndProbes
            possibleNextStates = self.mdp.getTransitionStatesAndProbs(state,action)
            #print "za akciju %s" % (action,)
            #print "moguca su iduca stanja s vjerojatnostima: %s" % (possibleNextStates,)
            nextStateTuple = max(possibleNextStates,key=lambda x:x[1])
            nextState = nextStateTuple[0]
            vrijednostiStanjaZaAkcije.append((action,self.getValue(nextState)))

        trazenaAkcijaTuple = max(vrijednostiStanjaZaAkcije,key=lambda x:x[1])
        # tuple je (akcija, vrijednost)
        return trazenaAkcijaTuple[0]