Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def getQValue(self, state, action):
- """
- Should return Q(state,action) = w * featureVector
- where * is the dotProduct operator
- """
- featureVector = self.featExtractor.getFeatures(state, action)
- QVal = None
- for feature in featureVector.keys():
- if (QVal == None):
- QVal = self.weights.get(feature) * featureVector.get(feature)
- else:
- QVal += self.weights.get(feature) * featureVector.get(feature)
- return QVal
- def update(self, state, action, nextState, reward):
- """
- Should update your weights based on transition
- """
- max = -float('inf')
- for action in self.getLegalActions(nextState):
- QVal = self.getQValue(nextState, action)
- if (QVal > max):
- max = QVal
- if max == -float('inf'):
- max = 0
- difference = reward + self.discount * max - self.getQValue(state, action)
- featureVector = self.featExtractor.getFeatures(state, action)
- for feature in featureVector.keys():
- self.weights[feature] += self.alpha * difference * featureVector.get(feature)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement