Untitled

    def getQValue(self, state, action):
        """
          Should return Q(state,action) = w * featureVector
          where * is the dotProduct operator
        """
        featureVector = self.featExtractor.getFeatures(state, action)

        QVal = None
        for feature in featureVector.keys():
            if (QVal == None):
                QVal = self.weights.get(feature) * featureVector.get(feature)
            else:
                QVal += self.weights.get(feature) * featureVector.get(feature)
        return QVal

    def update(self, state, action, nextState, reward):
        """
           Should update your weights based on transition
        """
        max = -float('inf')

        for action in self.getLegalActions(nextState):
            QVal = self.getQValue(nextState, action)
            if (QVal > max):
                max = QVal
        if max == -float('inf'):
            max = 0

        difference = reward + self.discount * max - self.getQValue(state, action)
        featureVector = self.featExtractor.getFeatures(state, action)

        for feature in featureVector.keys():
            self.weights[feature] += self.alpha * difference * featureVector.get(feature)