Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- from misio.optilio.pacman import StdIOPacmanRunner
- import json
- from misio.pacman.game import *
- from misio.pacman.learningAgents import ReinforcementAgent
- from misio.pacman.util import CustomCounter, lookup
- import random, math
- import datetime
- def flipCoin( p ):
- r = random.random()
- return r < p
- class MySimpleExtractor:
- """
- Returns simple features for a basic reflex Pacman:
- - whether food will be eaten
- - how far away the next food is
- - whether a ghost collision is imminent
- - whether a ghost is one step away
- """
- crossings = None
- food_in_trap = None
- total_fruits = -1
- def del_crossings(self):
- self.crossings = None
- self.food_in_trap = None
- def find_all_crossings(self, state):
- walls = state.getWalls()
- self.crossings = []
- for i in range(walls.width):
- for j in range(walls.height):
- if len(Actions.getLegalNeighbors((i, j), walls)) > 3:
- self.crossings.append((i, j))
- def closest_food(self, pos, food, walls):
- """
- closestFood -- this is similar to the function that we have
- worked on in the search project; here its all in one place
- """
- fringe = [(pos[0], pos[1], 0)]
- expanded = set()
- while fringe:
- pos_x, pos_y, dist = fringe.pop(0)
- if (pos_x, pos_y) in expanded:
- continue
- expanded.add((pos_x, pos_y))
- # if we find a food at this location then exit
- if food[pos_x][pos_y]:
- return dist
- # otherwise spread out from the location to its neighbours
- nbrs = Actions.getLegalNeighbors((pos_x, pos_y), walls)
- for nbr_x, nbr_y in nbrs:
- fringe.append((nbr_x, nbr_y, dist + 1))
- # no food found
- return None
- def is_trap(self, pos, walls, ghosts):
- if (pos[0], pos[1]) in self.crossings:
- return False
- fringe = [(pos[0], pos[1], 0)]
- expanded = set()
- trap_cond = [True, True]
- while fringe:
- pos_x, pos_y, dist = fringe.pop(0)
- if (pos_x, pos_y) in expanded or (pos_x, pos_y) in ghosts:
- continue
- # print((pos_x, pos_y))
- # if we find a food at this location then exit
- expanded.add((pos_x, pos_y))
- if (pos_x, pos_y) in self.crossings:
- if not trap_cond[0]:
- return False
- else:
- trap_cond[0] = False
- continue
- nbrs = Actions.getLegalNeighbors((pos_x, pos_y), walls)
- # print((pos_x, pos_y), nbrs)
- for nbr_x, nbr_y in nbrs:
- fringe.append((nbr_x, nbr_y, dist + 1))
- return trap_cond[0] or trap_cond[1]
- def closest_ghost_or_pills(self, pos, ghosts_or_pills, walls):
- fringe = [(pos[0], pos[1], 0)]
- expanded = set()
- while fringe:
- pos_x, pos_y, dist = fringe.pop(0)
- if (pos_x, pos_y) in expanded:
- continue
- expanded.add((pos_x, pos_y))
- # if we find a food at this location then exit
- if (pos_x, pos_y) in ghosts_or_pills:
- return dist
- # otherwise spread out from the location to its neighbours
- nbrs = Actions.getLegalNeighbors((pos_x, pos_y), walls)
- for nbr_x, nbr_y in nbrs:
- fringe.append((nbr_x, nbr_y, dist + 1))
- # no food found
- return None
- def getFeatures(self, state, action, last_move):
- # extract the grid of food and wall locations and get the ghost locations
- food = state.getFood()
- walls = state.getWalls()
- ghosts = state.getGhostPositions()
- pills = state.getCapsules()
- ghost_states = state.getGhostStates()
- scared_ghosts = []
- not_scared_ghosts = []
- for n_ghost, ghost in enumerate(ghosts):
- if ghost_states[n_ghost].scaredTimer > 1:
- scared_ghosts.append(ghost)
- else:
- not_scared_ghosts.append(ghost)
- # print(ghost_states, len(ghost_states), )
- if self.crossings is None:
- self.find_all_crossings(state)
- features = CustomCounter()
- features["bias"] = 1.0
- # compute the location of pacman after he takes the action
- dx, dy = Actions.directionToVector(action)
- next_x, next_y = int(x + dx), int(y + dy)
- scared_ghosts_in_neighbourhood = []
- not_scared_ghosts_in_neighbourhood = []
- for g in ghosts:
- if (next_x, next_y) in Actions.getLegalNeighbors(g, walls):
- if g in scared_ghosts:
- scared_ghosts_in_neighbourhood.append(g)
- else:
- not_scared_ghosts_in_neighbourhood.append(g)
- # count the number of ghosts 1-step away
- features["#-of-scared-ghosts-1-step-away"] = len(scared_ghosts_in_neighbourhood) / len(ghosts)
- features["#-of-ghosts-1-step-away"] = len(not_scared_ghosts_in_neighbourhood) / len(ghosts)
- if len(scared_ghosts) == 0:
- dist = self.closest_ghost_or_pills((next_x, next_y), pills, walls)
- if dist:
- features["closest-pill"] = float(dist) / (walls.width * walls.height)
- # features["scared"] = len(scared_ghosts) / len(ghosts)
- if not features["#-of-ghosts-1-step-away"] and food[next_x][next_y]:
- features["eats-food"] = 1.0
- if Actions.reverseDirection(last_move) == action:
- features["loop"] = 1.0
- if self.is_trap((next_x, next_y), walls, scared_ghosts):
- features["trap"] = 1.0
- dist = closestFood((next_x, next_y), food, walls)
- if dist is not None:
- features["closest-food"] = float(dist) / (walls.width * walls.height)
- return features
- def closestFood(pos, food, walls):
- """
- closestFood -- this is similar to the function that we have
- worked on in the search project; here its all in one place
- """
- fringe = [(pos[0], pos[1], 0)]
- expanded = set()
- while fringe:
- pos_x, pos_y, dist = fringe.pop(0)
- if (pos_x, pos_y) in expanded:
- continue
- expanded.add((pos_x, pos_y))
- # if we find a food at this location then exit
- if food[pos_x][pos_y]:
- return dist
- # otherwise spread out from the location to its neighbours
- nbrs = Actions.getLegalNeighbors((pos_x, pos_y), walls)
- for nbr_x, nbr_y in nbrs:
- fringe.append((nbr_x, nbr_y, dist + 1))
- # no food found
- return None
- class QLearningAgent(ReinforcementAgent):
- """
- Q-Learning Agent
- Functions you should fill in:
- - computeValueFromQValues
- - computeActionFromQValues
- - getQValue
- - getAction
- - update
- Instance variables you have access to
- - self.epsilon (exploration prob)
- - self.alpha (learning rate)
- - self.discount (discount rate)
- Functions you should use
- - self.getLegalActions(state)
- which returns legal actions for a state
- """
- def __init__(self, **args):
- "You can initialize Q-values here..."
- ReinforcementAgent.__init__(self, **args)
- self.qvalues = {}
- "*** YOUR CODE HERE ***"
- def getQValue(self, state, action):
- """
- Returns Q(state,action)
- Should return 0.0 if we have never seen a state
- or the Q node value otherwise
- """
- "*** YOUR CODE HERE ***"
- if (state, action) in self.qvalues:
- return self.qvalues[(state, action)]
- else:
- return 0.0
- # raise NotImplementedError()
- def computeValueFromQValues(self, state):
- """
- Returns max_action Q(state,action)
- where the max is over legal actions. Note that if
- there are no legal actions, which is the case at the
- terminal state, you should return a value of 0.0.
- """
- "*** YOUR CODE HERE ***"
- q_values = [self.getQValue(state, action) for action in self.getLegalActions(state)]
- if not len(q_values):
- return 0.0
- return max(q_values)
- def computeActionFromQValues(self, state):
- """
- Compute the best action to take in a state. Note that if there
- are no legal actions, which is the case at the terminal state,
- you should return None.
- """
- "*** YOUR CODE HERE ***"
- best_q = self.computeValueFromQValues(state)
- action_list = self.getLegalActions(state)
- if not action_list:
- return None
- max_action_list = [action for action in action_list if self.getQValue(state, action) == best_q]
- return random.choice(max_action_list)
- last_move = None
- def getAction(self, state):
- """
- Compute the action to take in the current state. With
- probability self.epsilon, we should take a random action and
- take the best policy action otherwise. Note that if there are
- no legal actions, which is the case at the terminal state, you
- should choose None as the action.
- HINT: You might want to use util.flipCoin(prob)
- HINT: To pick randomly from a list, use random.choice(list)
- """
- # Pick Action
- legalActions = self.getLegalActions(state)
- action = None
- if flipCoin(self.epsilon):
- action = random.choice(legalActions)
- else:
- action = self.computeActionFromQValues(state)
- # print(action, Actions.reverseDirection(action))
- self.last_move = action
- return action
- def update(self, state, action, nextState, reward):
- raise NotImplementedError()
- def getPolicy(self, state):
- return self.computeActionFromQValues(state)
- def getValue(self, state):
- return self.computeValueFromQValues(state)
- class PacmanQAgent(QLearningAgent):
- "Exactly the same as QLearningAgent, but with different default parameters"
- def __init__(self, epsilon=0.05, gamma=0.8, alpha=0.2, numTraining=2000, **args):
- """
- These default parameters can be changed from the pacman.py command line.
- For example, to change the exploration rate, try:
- python pacman.py -p PacmanQLearningAgent -a epsilon=0.1
- alpha - learning rate
- epsilon - exploration rate
- gamma - discount factor
- numTraining - number of training episodes, i.e. no learning after these many episodes
- """
- args['epsilon'] = epsilon
- args['gamma'] = gamma
- args['alpha'] = alpha
- args['numTraining'] = numTraining
- self.index = 0 # This is always Pacman
- QLearningAgent.__init__(self, **args)
- def getAction(self, state):
- """
- Simply calls the getAction method of QLearningAgent and then
- informs parent of action for Pacman. Do not change or remove this
- method.
- """
- action = QLearningAgent.getAction(self, state)
- self.doAction(state, action)
- return action
- class ApproximateQAgent(PacmanQAgent):
- def __init__(self, extractor='MySimpleExtractor', **args):
- self.featExtractor = MySimpleExtractor()
- PacmanQAgent.__init__(self, **args)
- self.weights = {}
- # self.weights = {'#-of-ghosts-1-step-away': -939.9306551834235,
- # '#-of-scared-ghosts-1-step-away': 324.6366491699704, 'closest-food': -91.15343783290118,
- # 'bias': 5.405996263103678, 'loop': -7.181170081949115, 'eats-food': 27.91072255450381}
- def getWeights(self):
- return self.weights
- def getQValue(self, state, action):
- """
- Should return Q(state,action) = w * featureVector
- where * is the dotProduct operator
- """
- # features = self.featExtractor.getFeatures(state, action, True)
- features = self.featExtractor.getFeatures(state, action, self.last_move)
- q_function = 0
- for feature_name, feature_value in features.items():
- weight = self.get_weight_val(feature_name)
- q_function += feature_value * weight
- return q_function
- def get_weight_val(self, feature_name):
- if feature_name not in self.weights.keys():
- val = random.uniform(-100, 100)
- self.weights[feature_name] = val
- else:
- val = self.weights[feature_name]
- return val
- def update(self, state, action, nextState, reward):
- """
- Should update your weights based on transition
- """
- "*** YOUR CODE HERE ***"
- features = self.featExtractor.getFeatures(state, action, self.last_move)
- # max_q_from_next_state = self.computeValueFromQValues(nextState)
- # action_q_value = self.getQValue(state, action)
- q_val = self.getQValue(state, action)
- diff = (reward + self.discount * self.computeValueFromQValues(nextState) - q_val)
- for feature_name, feature_value in features.items():
- self.weights[feature_name] += self.alpha * diff * feature_value
- def final(self, state):
- "Called at the end of each game."
- # call the super-class final method
- PacmanQAgent.final(self, state)
- self.featExtractor.del_crossings()
- # did we finish training?
- if self.episodesSoFar == self.numTraining:
- # you might want to print your weights here for debugging
- print(self.weights)
- pass
- def load_weights_from_json(self, name):
- with open(name) as fp:
- self.weights = json.load(fp)
- def save_weights_from_json(self, score):
- self.last_move = None
- f_name = "./weights/agent5_" + str(datetime.datetime.now()).replace(".", "_") + "-Score: " + str(score).replace(".", "_") + ".json"
- with open(f_name, 'w') as fp:
- json.dump(self.weights, fp)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement