Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def getQValue(self, state, action):
- """
- Should return Q(state,action) = w * featureVector
- where * is the dotProduct operator
- """
- qValue = 0
- features = self.featExtractor.getFeatures(state, action)
- for feature, featureValue in features.items():
- qValue += self.getWeights()[feature] * featureValue
- return qValue
- def update(self, state, action, nextState, reward):
- """
- Should update your weights based on transition
- """
- alpha = self.alpha
- gamma = self.discount
- qValueMax = self.getValue(nextState)
- qValue = self.getQValue(state, action)
- diff = (reward + gamma * qValueMax) - qValue
- features = self.featExtractor.getFeatures(state, action)
- for feature, featureValue in features.items():
- self.getWeights()[feature] += alpha * diff * featureValue
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement