Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class ApproximateQAgent(PacmanQAgent):
- """
- ApproximateQLearningAgent
- You should only have to overwrite getQValue
- and update. All other QLearningAgent functions
- should work as is.
- """
- def __init__(self, extractor='IdentityExtractor', **args):
- self.featExtractor = util.lookup(extractor, globals())()
- PacmanQAgent.__init__(self, **args)
- self.weights = util.Counter()
- def getWeights(self):
- return self.weights
- def getQValue(self, state, action):
- """
- Should return Q(state,action) = w * featureVector
- where * is the dotProduct operator
- """
- "*** YOUR CODE HERE ***"
- Qsa = 0
- features = self.featExtractor.getFeatures(state, action)
- for key, value in features.items():
- Qsa += self.weights[key]*value
- return Qsa
- def update(self, state, action, nextState, reward):
- """
- Should update your weights based on transition
- """
- "*** YOUR CODE HERE ***"
- Qsa = self.getQValue(state, action)
- nextActions = self.getLegalActions(nextState)
- nextMax = []
- for actionNext in nextActions:
- nextMax.append(self.getQValue(nextState, actionNext))
- if nextMax:
- QNextSNextA = max(nextMax)
- else:
- QNextSNextA = 0.0
- diff = (reward + self.discount * QNextSNextA) - Qsa
- features = self.featExtractor.getFeatures(state, action)
- for key, value in features.items():
- self.weights[key] += self.alpha * diff * value
- def final(self, state):
- "Called at the end of each game."
- # call the super-class final method
- PacmanQAgent.final(self, state)
- # did we finish training?
- if self.episodesSoFar == self.numTraining:
- # you might want to print your weights here for debugging
- "*** YOUR CODE HERE ***"
- pass
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement