Advertisement
Guest User

Untitled

a guest
May 25th, 2019
92
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 14.21 KB | None | 0 0
  1. #!/usr/bin/env python3
  2.  
  3. from misio.optilio.pacman import StdIOPacmanRunner
  4. import json
  5.  
  6. from misio.pacman.game import *
  7. from misio.pacman.learningAgents import ReinforcementAgent
  8. from misio.pacman.util import CustomCounter, lookup
  9. import random, math
  10. import datetime
  11.  
  12. def flipCoin( p ):
  13.   r = random.random()
  14.   return r < p
  15.  
  16.  
  17. class MySimpleExtractor:
  18.     """
  19.    Returns simple features for a basic reflex Pacman:
  20.    - whether food will be eaten
  21.    - how far away the next food is
  22.    - whether a ghost collision is imminent
  23.    - whether a ghost is one step away
  24.    """
  25.  
  26.     crossings = None
  27.     food_in_trap = None
  28.     total_fruits = -1
  29.  
  30.     def del_crossings(self):
  31.         self.crossings = None
  32.         self.food_in_trap = None
  33.  
  34.     def find_all_crossings(self, state):
  35.         walls = state.getWalls()
  36.         self.crossings = []
  37.         for i in range(walls.width):
  38.             for j in range(walls.height):
  39.                 if len(Actions.getLegalNeighbors((i, j), walls)) > 3:
  40.                     self.crossings.append((i, j))
  41.  
  42.     def closest_food(self, pos, food, walls):
  43.         """
  44.        closestFood -- this is similar to the function that we have
  45.        worked on in the search project; here its all in one place
  46.        """
  47.         fringe = [(pos[0], pos[1], 0)]
  48.         expanded = set()
  49.         while fringe:
  50.             pos_x, pos_y, dist = fringe.pop(0)
  51.             if (pos_x, pos_y) in expanded:
  52.                 continue
  53.             expanded.add((pos_x, pos_y))
  54.             # if we find a food at this location then exit
  55.             if food[pos_x][pos_y]:
  56.                 return dist
  57.             # otherwise spread out from the location to its neighbours
  58.             nbrs = Actions.getLegalNeighbors((pos_x, pos_y), walls)
  59.             for nbr_x, nbr_y in nbrs:
  60.                 fringe.append((nbr_x, nbr_y, dist + 1))
  61.         # no food found
  62.         return None
  63.  
  64.     def is_trap(self, pos, walls, ghosts):
  65.         if (pos[0], pos[1]) in self.crossings:
  66.             return False
  67.  
  68.         fringe = [(pos[0], pos[1], 0)]
  69.         expanded = set()
  70.         trap_cond = [True, True]
  71.         while fringe:
  72.             pos_x, pos_y, dist = fringe.pop(0)
  73.             if (pos_x, pos_y) in expanded or (pos_x, pos_y) in ghosts:
  74.                 continue
  75.             # print((pos_x, pos_y))
  76.             # if we find a food at this location then exit
  77.             expanded.add((pos_x, pos_y))
  78.             if (pos_x, pos_y) in self.crossings:
  79.                 if not trap_cond[0]:
  80.                     return False
  81.                 else:
  82.                     trap_cond[0] = False
  83.                     continue
  84.  
  85.             nbrs = Actions.getLegalNeighbors((pos_x, pos_y), walls)
  86.             # print((pos_x, pos_y), nbrs)
  87.             for nbr_x, nbr_y in nbrs:
  88.                 fringe.append((nbr_x, nbr_y, dist + 1))
  89.         return trap_cond[0] or trap_cond[1]
  90.  
  91.     def closest_ghost_or_pills(self, pos, ghosts_or_pills, walls):
  92.         fringe = [(pos[0], pos[1], 0)]
  93.         expanded = set()
  94.         while fringe:
  95.             pos_x, pos_y, dist = fringe.pop(0)
  96.             if (pos_x, pos_y) in expanded:
  97.                 continue
  98.             expanded.add((pos_x, pos_y))
  99.             # if we find a food at this location then exit
  100.             if (pos_x, pos_y) in ghosts_or_pills:
  101.                 return dist
  102.             # otherwise spread out from the location to its neighbours
  103.             nbrs = Actions.getLegalNeighbors((pos_x, pos_y), walls)
  104.             for nbr_x, nbr_y in nbrs:
  105.                 fringe.append((nbr_x, nbr_y, dist + 1))
  106.         # no food found
  107.         return None
  108.  
  109.     def getFeatures(self, state, action, last_move):
  110.         # extract the grid of food and wall locations and get the ghost locations
  111.         food = state.getFood()
  112.         walls = state.getWalls()
  113.         ghosts = state.getGhostPositions()
  114.         pills = state.getCapsules()
  115.         ghost_states = state.getGhostStates()
  116.         scared_ghosts = []
  117.         not_scared_ghosts = []
  118.         for n_ghost, ghost in enumerate(ghosts):
  119.             if ghost_states[n_ghost].scaredTimer > 1:
  120.                 scared_ghosts.append(ghost)
  121.             else:
  122.                 not_scared_ghosts.append(ghost)
  123.         # print(ghost_states, len(ghost_states), )
  124.         if self.crossings is None:
  125.             self.find_all_crossings(state)
  126.         features = CustomCounter()
  127.         features["bias"] = 1.0
  128.  
  129.         # compute the location of pacman after he takes the action
  130.         dx, dy = Actions.directionToVector(action)
  131.         next_x, next_y = int(x + dx), int(y + dy)
  132.         scared_ghosts_in_neighbourhood = []
  133.         not_scared_ghosts_in_neighbourhood = []
  134.  
  135.         for g in ghosts:
  136.             if (next_x, next_y) in Actions.getLegalNeighbors(g, walls):
  137.                 if g in scared_ghosts:
  138.                     scared_ghosts_in_neighbourhood.append(g)
  139.                 else:
  140.                     not_scared_ghosts_in_neighbourhood.append(g)
  141.  
  142.         # count the number of ghosts 1-step away
  143.         features["#-of-scared-ghosts-1-step-away"] = len(scared_ghosts_in_neighbourhood) / len(ghosts)
  144.         features["#-of-ghosts-1-step-away"] = len(not_scared_ghosts_in_neighbourhood) / len(ghosts)
  145.  
  146.         if len(scared_ghosts) == 0:
  147.             dist = self.closest_ghost_or_pills((next_x, next_y), pills, walls)
  148.             if dist:
  149.                 features["closest-pill"] = float(dist) / (walls.width * walls.height)
  150.         # features["scared"] = len(scared_ghosts) / len(ghosts)
  151.  
  152.         if not features["#-of-ghosts-1-step-away"] and food[next_x][next_y]:
  153.             features["eats-food"] = 1.0
  154.  
  155.         if Actions.reverseDirection(last_move) == action:
  156.             features["loop"] = 1.0
  157.         if self.is_trap((next_x, next_y), walls, scared_ghosts):
  158.             features["trap"] = 1.0
  159.  
  160.         dist = closestFood((next_x, next_y), food, walls)
  161.         if dist is not None:
  162.             features["closest-food"] = float(dist) / (walls.width * walls.height)
  163.         return features
  164.  
  165. def closestFood(pos, food, walls):
  166.     """
  167.    closestFood -- this is similar to the function that we have
  168.    worked on in the search project; here its all in one place
  169.    """
  170.     fringe = [(pos[0], pos[1], 0)]
  171.     expanded = set()
  172.     while fringe:
  173.         pos_x, pos_y, dist = fringe.pop(0)
  174.         if (pos_x, pos_y) in expanded:
  175.             continue
  176.         expanded.add((pos_x, pos_y))
  177.         # if we find a food at this location then exit
  178.         if food[pos_x][pos_y]:
  179.             return dist
  180.         # otherwise spread out from the location to its neighbours
  181.         nbrs = Actions.getLegalNeighbors((pos_x, pos_y), walls)
  182.         for nbr_x, nbr_y in nbrs:
  183.             fringe.append((nbr_x, nbr_y, dist + 1))
  184.     # no food found
  185.     return None
  186.  
  187.  
  188.  
  189. class QLearningAgent(ReinforcementAgent):
  190.     """
  191.      Q-Learning Agent
  192.  
  193.      Functions you should fill in:
  194.        - computeValueFromQValues
  195.        - computeActionFromQValues
  196.        - getQValue
  197.        - getAction
  198.        - update
  199.  
  200.      Instance variables you have access to
  201.        - self.epsilon (exploration prob)
  202.        - self.alpha (learning rate)
  203.        - self.discount (discount rate)
  204.  
  205.      Functions you should use
  206.        - self.getLegalActions(state)
  207.          which returns legal actions for a state
  208.    """
  209.     def __init__(self, **args):
  210.         "You can initialize Q-values here..."
  211.         ReinforcementAgent.__init__(self, **args)
  212.         self.qvalues = {}
  213.         "*** YOUR CODE HERE ***"
  214.  
  215.     def getQValue(self, state, action):
  216.         """
  217.          Returns Q(state,action)
  218.          Should return 0.0 if we have never seen a state
  219.          or the Q node value otherwise
  220.        """
  221.         "*** YOUR CODE HERE ***"
  222.         if (state, action) in self.qvalues:
  223.             return self.qvalues[(state, action)]
  224.         else:
  225.             return 0.0
  226.         # raise NotImplementedError()
  227.  
  228.  
  229.     def computeValueFromQValues(self, state):
  230.         """
  231.          Returns max_action Q(state,action)
  232.          where the max is over legal actions.  Note that if
  233.          there are no legal actions, which is the case at the
  234.          terminal state, you should return a value of 0.0.
  235.        """
  236.         "*** YOUR CODE HERE ***"
  237.         q_values = [self.getQValue(state, action) for action in self.getLegalActions(state)]
  238.         if not len(q_values):
  239.             return 0.0
  240.         return max(q_values)
  241.  
  242.     def computeActionFromQValues(self, state):
  243.         """
  244.          Compute the best action to take in a state.  Note that if there
  245.          are no legal actions, which is the case at the terminal state,
  246.          you should return None.
  247.        """
  248.         "*** YOUR CODE HERE ***"
  249.         best_q = self.computeValueFromQValues(state)
  250.         action_list = self.getLegalActions(state)
  251.  
  252.         if not action_list:
  253.             return None
  254.  
  255.         max_action_list = [action for action in action_list if self.getQValue(state, action) == best_q]
  256.  
  257.         return random.choice(max_action_list)
  258.  
  259.     last_move = None
  260.  
  261.     def getAction(self, state):
  262.         """
  263.          Compute the action to take in the current state.  With
  264.          probability self.epsilon, we should take a random action and
  265.          take the best policy action otherwise.  Note that if there are
  266.          no legal actions, which is the case at the terminal state, you
  267.          should choose None as the action.
  268.  
  269.          HINT: You might want to use util.flipCoin(prob)
  270.          HINT: To pick randomly from a list, use random.choice(list)
  271.        """
  272.         # Pick Action
  273.         legalActions = self.getLegalActions(state)
  274.         action = None
  275.  
  276.         if flipCoin(self.epsilon):
  277.             action = random.choice(legalActions)
  278.         else:
  279.             action = self.computeActionFromQValues(state)
  280.         # print(action, Actions.reverseDirection(action))
  281.         self.last_move = action
  282.         return action
  283.  
  284.     def update(self, state, action, nextState, reward):
  285.         raise NotImplementedError()
  286.  
  287.     def getPolicy(self, state):
  288.         return self.computeActionFromQValues(state)
  289.  
  290.     def getValue(self, state):
  291.         return self.computeValueFromQValues(state)
  292.  
  293.  
  294. class PacmanQAgent(QLearningAgent):
  295.     "Exactly the same as QLearningAgent, but with different default parameters"
  296.  
  297.     def __init__(self, epsilon=0.05, gamma=0.8, alpha=0.2, numTraining=2000, **args):
  298.         """
  299.        These default parameters can be changed from the pacman.py command line.
  300.        For example, to change the exploration rate, try:
  301.            python pacman.py -p PacmanQLearningAgent -a epsilon=0.1
  302.  
  303.        alpha    - learning rate
  304.        epsilon  - exploration rate
  305.        gamma    - discount factor
  306.        numTraining - number of training episodes, i.e. no learning after these many episodes
  307.        """
  308.         args['epsilon'] = epsilon
  309.         args['gamma'] = gamma
  310.         args['alpha'] = alpha
  311.         args['numTraining'] = numTraining
  312.         self.index = 0  # This is always Pacman
  313.         QLearningAgent.__init__(self, **args)
  314.  
  315.     def getAction(self, state):
  316.         """
  317.        Simply calls the getAction method of QLearningAgent and then
  318.        informs parent of action for Pacman.  Do not change or remove this
  319.        method.
  320.        """
  321.         action = QLearningAgent.getAction(self, state)
  322.         self.doAction(state, action)
  323.         return action
  324.  
  325.  
  326. class ApproximateQAgent(PacmanQAgent):
  327.     def __init__(self, extractor='MySimpleExtractor', **args):
  328.         self.featExtractor = MySimpleExtractor()
  329.         PacmanQAgent.__init__(self, **args)
  330.         self.weights = {}
  331.         # self.weights = {'#-of-ghosts-1-step-away': -939.9306551834235,
  332.         #                  '#-of-scared-ghosts-1-step-away': 324.6366491699704, 'closest-food': -91.15343783290118,
  333.         #                  'bias': 5.405996263103678, 'loop': -7.181170081949115, 'eats-food': 27.91072255450381}
  334.  
  335.     def getWeights(self):
  336.         return self.weights
  337.  
  338.     def getQValue(self, state, action):
  339.         """
  340.          Should return Q(state,action) = w * featureVector
  341.          where * is the dotProduct operator
  342.        """
  343.         # features = self.featExtractor.getFeatures(state, action, True)
  344.         features = self.featExtractor.getFeatures(state, action, self.last_move)
  345.         q_function = 0
  346.  
  347.         for feature_name, feature_value in features.items():
  348.             weight = self.get_weight_val(feature_name)
  349.             q_function += feature_value * weight
  350.         return q_function
  351.  
  352.     def get_weight_val(self, feature_name):
  353.         if feature_name not in self.weights.keys():
  354.             val = random.uniform(-100, 100)
  355.             self.weights[feature_name] = val
  356.         else:
  357.             val = self.weights[feature_name]
  358.         return val
  359.  
  360.     def update(self, state, action, nextState, reward):
  361.         """
  362.           Should update your weights based on transition
  363.        """
  364.         "*** YOUR CODE HERE ***"
  365.         features = self.featExtractor.getFeatures(state, action, self.last_move)
  366.         # max_q_from_next_state = self.computeValueFromQValues(nextState)
  367.         # action_q_value = self.getQValue(state, action)
  368.         q_val = self.getQValue(state, action)
  369.         diff = (reward + self.discount * self.computeValueFromQValues(nextState) - q_val)
  370.  
  371.         for feature_name, feature_value in features.items():
  372.             self.weights[feature_name] += self.alpha * diff * feature_value
  373.  
  374.  
  375.     def final(self, state):
  376.         "Called at the end of each game."
  377.         # call the super-class final method
  378.         PacmanQAgent.final(self, state)
  379.         self.featExtractor.del_crossings()
  380.  
  381.         # did we finish training?
  382.         if self.episodesSoFar == self.numTraining:
  383.             # you might want to print your weights here for debugging
  384.             print(self.weights)
  385.             pass
  386.  
  387.     def load_weights_from_json(self, name):
  388.         with open(name) as fp:
  389.             self.weights = json.load(fp)
  390.  
  391.     def save_weights_from_json(self, score):
  392.         self.last_move = None
  393.         f_name = "./weights/agent5_" + str(datetime.datetime.now()).replace(".", "_") + "-Score: " + str(score).replace(".", "_") + ".json"
  394.         with open(f_name, 'w') as fp:
  395.             json.dump(self.weights, fp)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement