Advertisement
Guest User

Untitled

a guest
May 25th, 2019
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.21 KB | None | 0 0
  1.     def getQValue(self, state, action):
  2.         """
  3.          Should return Q(state,action) = w * featureVector
  4.          where * is the dotProduct operator
  5.        """
  6.         featureVector = self.featExtractor.getFeatures(state, action)
  7.  
  8.         QVal = None
  9.         for feature in featureVector.keys():
  10.             if (QVal == None):
  11.                 QVal = self.weights.get(feature) * featureVector.get(feature)
  12.             else:
  13.                 QVal += self.weights.get(feature) * featureVector.get(feature)
  14.         return QVal
  15.  
  16.     def update(self, state, action, nextState, reward):
  17.         """
  18.           Should update your weights based on transition
  19.        """
  20.         max = -float('inf')
  21.  
  22.         for action in self.getLegalActions(nextState):
  23.             QVal = self.getQValue(nextState, action)
  24.             if (QVal > max):
  25.                 max = QVal
  26.         if max == -float('inf'):
  27.             max = 0
  28.  
  29.         difference = reward + self.discount * max - self.getQValue(state, action)
  30.         featureVector = self.featExtractor.getFeatures(state, action)
  31.  
  32.         for feature in featureVector.keys():
  33.             self.weights[feature] += self.alpha * difference * featureVector.get(feature)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement