Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class QLearningAgent(ReinforcementAgent):
- """
- Q-Learning Agent
- Functions you should fill in:
- - computeValueFromQValues
- - computeActionFromQValues
- - getQValue
- - getAction
- - update
- Instance variables you have access to
- - self.epsilon (exploration prob)
- - self.alpha (learning rate)
- - self.discount (discount rate)
- Functions you should use
- - self.getLegalActions(state)
- which returns legal actions for a state
- """
- def __init__(self, **args):
- "You can initialize Q-values here..."
- ReinforcementAgent.__init__(self, **args)
- self.qValues = util.Counter()
- "*** YOUR CODE HERE ***"
- def getQValue(self, state, action):
- """
- Returns Q(state,action)
- Should return 0.0 if we have never seen a state
- or the Q node value otherwise
- """
- return self.qValues[(state, action)]
- def computeValueFromQValues(self, state):
- """
- Returns max_action Q(state,action)
- where the max is over legal actions. Note that if
- there are no legal actions, which is the case at the
- terminal state, you should return a value of 0.0.
- """
- "*** YOUR CODE HERE ***"
- possibleStateQValues = util.Counter()
- for action in self.getLegalActions(state):
- possibleStateQValues[action] = self.getQValue(state, action)
- if len(possibleStateQValues) > 0:
- return possibleStateQValues[possibleStateQValues.argMax()]
- return 0.0
- def computeActionFromQValues(self, state):
- """
- Compute the best action to take in a state. Note that if there
- are no legal actions, which is the case at the terminal state,
- you should return None.
- """
- possibleStateQValues = util.Counter()
- for action in self.getLegalActions(state):
- possibleStateQValues[action] = self.getQValue(state, action)
- if len(possibleStateQValues) == 0:
- return None
- best_actions = []
- best_value = possibleStateQValues[possibleStateQValues.argMax()]
- for action,value in possibleStateQValues.items():
- if (value == best_value):
- best_actions.append(action)
- "*** YOUR CODE HERE ***"
- return random.choice(best_actions)
- def getAction(self, state):
- """
- Compute the action to take in the current state. With
- probability self.epsilon, we should take a random action and
- take the best policy action otherwise. Note that if there are
- no legal actions, which is the case at the terminal state, you
- should choose None as the action.
- HINT: You might want to use util.flipCoin(prob)
- HINT: To pick randomly from a list, use random.choice(list)
- """
- # Pick Action
- legalActions = self.getLegalActions(state)
- action = None
- if len(legalActions) > 0:
- if util.flipCoin(self.epsilon):
- action = random.choice(legalActions)
- else:
- action = self.getPolicy(state)
- return action
- "*** YOUR CODE HERE ***"
- return action
- def update(self, state, action, nextState, reward):
- """
- The parent class calls this to observe a
- state = action => nextState and reward transition.
- You should do your Q-Value update here
- NOTE: You should never call this function,
- it will be called on your behalf
- """
- "*** YOUR CODE HERE ***"
- self.qValues[(state, action)] = self.getQValue(state, action) + self.alpha * (reward + self.discount * self.getValue(nextState) - self.getQValue(state,action))
- util.raiseNotDefined()
- def getPolicy(self, state):
- return self.computeActionFromQValues(state)
- def getValue(self, state):
- return self.computeValueFromQValues(state)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement