Advertisement
Guest User

Untitled

a guest
May 26th, 2018
172
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.90 KB | None | 0 0
  1. # qlearningAgents.py
  2. # ------------------
  3. # Licensing Information:  You are free to use or extend these projects for
  4. # educational purposes provided that (1) you do not distribute or publish
  5. # solutions, (2) you retain this notice, and (3) you provide clear
  6. # attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
  7. #
  8. # Attribution Information: The Pacman AI projects were developed at UC Berkeley.
  9. # The core projects and autograders were primarily created by John DeNero
  10. # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
  11. # Student side autograding was added by Brad Miller, Nick Hay, and
  12. # Pieter Abbeel (pabbeel@cs.berkeley.edu).
  13.  
  14.  
  15. from game import *
  16. from learningAgents import ReinforcementAgent
  17. from featureExtractors import *
  18.  
  19. import random,util,math
  20.  
  21. class QLearningAgent(ReinforcementAgent):
  22.     """
  23.      Q-Learning Agent
  24.  
  25.      Functions you should fill in:
  26.        - computeValueFromQValues
  27.        - computeActionFromQValues
  28.        - getQValue
  29.        - getAction
  30.        - update
  31.  
  32.      Instance variables you have access to
  33.        - self.epsilon (exploration prob)
  34.        - self.alpha (learning rate)
  35.        - self.discount (discount rate)
  36.  
  37.      Functions you should use
  38.        - self.getLegalActions(state)
  39.          which returns legal actions for a state
  40.    """
  41.     def __init__(self, **args):
  42.         "You can initialize Q-values here..."
  43.         ReinforcementAgent.__init__(self, **args)
  44.  
  45.         "*** YOUR CODE HERE ***"
  46.         self.qValues = util.Counter()
  47.  
  48.     def getQValue(self, state, action):
  49.         """
  50.          Returns Q(state,action)
  51.          Should return 0.0 if we have never seen a state
  52.          or the Q node value otherwise
  53.        """
  54.         "*** YOUR CODE HERE ***"
  55.         return self.qValues[(state,action)]
  56.         util.raiseNotDefined()
  57.  
  58.  
  59.     def computeValueFromQValues(self, state):
  60.         """
  61.          Returns max_action Q(state,action)
  62.          where the max is over legal actions.  Note that if
  63.          there are no legal actions, which is the case at the
  64.          terminal state, you should return a value of 0.0.
  65.        """
  66.         "*** YOUR CODE HERE ***"
  67.         actions = self.getLegalActions(state)
  68.         if len(actions) == 0:
  69.             return 0.0
  70.         temp = []
  71.         for action in actions:
  72.             temp.append(self.getQValue(state,action))
  73.         return max(temp)
  74.  
  75.  
  76.  
  77.         util.raiseNotDefined()
  78.  
  79.     def computeActionFromQValues(self, state):
  80.         """
  81.          Compute the best action to take in a state.  Note that if there
  82.          are no legal actions, which is the case at the terminal state,
  83.          you should return None.
  84.        """
  85.         "*** YOUR CODE HERE ***"
  86.         actions = self.getLegalActions(state)
  87.         if len(actions) == 0:
  88.             return None
  89.         maxAction = actions[0]
  90.         qVal = self.getQValue(state,maxAction)
  91.         for action in actions:
  92.             temp = self.getQValue(state,action)
  93.             if temp > qVal:
  94.                 qVal = temp
  95.                 maxAction = action
  96.        
  97.         return maxAction
  98.  
  99.         util.raiseNotDefined()
  100.  
  101.     def getAction(self, state):
  102.         """
  103.          Compute the action to take in the current state.  With
  104.          probability self.epsilon, we should take a random action and
  105.          take the best policy action otherwise.  Note that if there are
  106.          no legal actions, which is the case at the terminal state, you
  107.          should choose None as the action.
  108.  
  109.          HINT: You might want to use util.flipCoin(prob)
  110.          HINT: To pick randomly from a list, use random.choice(list)
  111.        """
  112.         # Pick Action
  113.         legalActions = self.getLegalActions(state)
  114.         action = None
  115.         "*** YOUR CODE HERE ***"
  116.         if len(legalActions) == 0:
  117.             return action
  118.         if util.flipCoin(self.epsilon):
  119.             action = random.choice(legalActions)
  120.         else:
  121.             action = self.computeActionFromQValues(state)
  122.  
  123.         return action
  124.        
  125.        
  126.  
  127.     def update(self, state, action, nextState, reward):
  128.         """
  129.          The parent class calls this to observe a
  130.          state = action => nextState and reward transition.
  131.          You should do your Q-Value update here
  132.  
  133.          NOTE: You should never call this function,
  134.          it will be called on your behalf
  135.        """
  136.         "*** YOUR CODE HERE ***"
  137.         qVal = self.getQValue(state,action)
  138.         nextQVal = self.computeValueFromQValues(nextState)
  139.         self.qValues[state,action] = qVal + self.alpha*( reward + self.discount*nextQVal - qVal )
  140.  
  141.        
  142.  
  143.     def getPolicy(self, state):
  144.         return self.computeActionFromQValues(state)
  145.  
  146.     def getValue(self, state):
  147.         return self.computeValueFromQValues(state)
  148.  
  149.  
  150. class PacmanQAgent(QLearningAgent):
  151.     "Exactly the same as QLearningAgent, but with different default parameters"
  152.  
  153.     def __init__(self, epsilon=0.05,gamma=0.8,alpha=0.2, numTraining=0, **args):
  154.         """
  155.        These default parameters can be changed from the pacman.py command line.
  156.        For example, to change the exploration rate, try:
  157.            python pacman.py -p PacmanQLearningAgent -a epsilon=0.1
  158.  
  159.        alpha    - learning rate
  160.        epsilon  - exploration rate
  161.        gamma    - discount factor
  162.        numTraining - number of training episodes, i.e. no learning after these many episodes
  163.        """
  164.         args['epsilon'] = epsilon
  165.         args['gamma'] = gamma
  166.         args['alpha'] = alpha
  167.         args['numTraining'] = numTraining
  168.         self.index = 0  # This is always Pacman
  169.         QLearningAgent.__init__(self, **args)
  170.  
  171.     def getAction(self, state):
  172.         """
  173.        Simply calls the getAction method of QLearningAgent and then
  174.        informs parent of action for Pacman.  Do not change or remove this
  175.        method.
  176.        """
  177.         action = QLearningAgent.getAction(self,state)
  178.         self.doAction(state,action)
  179.         return action
  180.  
  181.  
  182. class ApproximateQAgent(PacmanQAgent):
  183.     """
  184.       ApproximateQLearningAgent
  185.  
  186.       You should only have to overwrite getQValue
  187.       and update.  All other QLearningAgent functions
  188.       should work as is.
  189.    """
  190.     def __init__(self, extractor='IdentityExtractor', **args):
  191.         self.featExtractor = util.lookup(extractor, globals())()
  192.         PacmanQAgent.__init__(self, **args)
  193.         self.weights = util.Counter()
  194.  
  195.     def getWeights(self):
  196.         return self.weights
  197.  
  198.     def getQValue(self, state, action):
  199.         """
  200.          Should return Q(state,action) = w * featureVector
  201.          where * is the dotProduct operator
  202.        """
  203.         "*** YOUR CODE HERE ***"
  204.         weights = self.getWeights()
  205.         features = self.featExtractor.getFeatures(state,action)
  206.         temp = 0
  207.         for feature in features:
  208.             temp += weights[(state,action,feature)]*features[feature]
  209.        
  210.         return temp
  211.        
  212.         util.raiseNotDefined()
  213.  
  214.     def update(self, state, action, nextState, reward):
  215.         """
  216.           Should update your weights based on transition
  217.        """
  218.         "*** YOUR CODE HERE ***"
  219.         weights = self.getWeights()
  220.         features = self.featExtractor.getFeatures(state,action)
  221.  
  222.         for feature in features:
  223.             difference = reward + self.discount*self.computeValueFromQValues(nextState) - self.getQValue(state,action)
  224.             self.weights[(state,action,feature)] += self.alpha*difference*features[feature]
  225.        
  226.  
  227.  
  228.  
  229.     def final(self, state):
  230.         "Called at the end of each game."
  231.         # call the super-class final method
  232.         PacmanQAgent.final(self, state)
  233.  
  234.         # did we finish training?
  235.         if self.episodesSoFar == self.numTraining:
  236.             # you might want to print your weights here for debugging
  237.             "*** YOUR CODE HERE ***"
  238.            
  239.             pass
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement