Advertisement
Guest User

Untitled

a guest
May 23rd, 2019
81
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.92 KB | None | 0 0
  1.  def getQValue(self, state, action):
  2.         """
  3.          Should return Q(state,action) = w * featureVector
  4.          where * is the dotProduct operator
  5.        """
  6.         qValue = 0
  7.         features = self.featExtractor.getFeatures(state, action)
  8.         for feature, featureValue in features.items():
  9.             qValue += self.getWeights()[feature] * featureValue
  10.  
  11.         return qValue
  12.  
  13.     def update(self, state, action, nextState, reward):
  14.         """
  15.           Should update your weights based on transition
  16.        """
  17.         alpha = self.alpha
  18.         gamma = self.discount
  19.         qValueMax = self.getValue(nextState)
  20.         qValue = self.getQValue(state, action)
  21.         diff = (reward + gamma * qValueMax) - qValue
  22.         features = self.featExtractor.getFeatures(state, action)
  23.         for feature, featureValue in features.items():
  24.             self.getWeights()[feature] += alpha * diff * featureValue
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement