Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import random
- import pickle
- import sys
- from pprint import pprint
- import time
- import numpy as np
- GAMMA = 0.9
- GRID = None
- class State:
- def __init__(self, evader, pursuer1, pursuer2, grid=None):
- self.evader = evader
- self.pursuer1 = pursuer1
- self.pursuer2 = pursuer2
- if grid is not None:
- self.V = 0
- # key = (evader, pursuer1, pursuer2)
- # self.my_actions = actionsE(grid, key)
- # self.op_actions = actionsP(grid, key)
- def get_my_actions(self):
- key = (self.evader, self.pursuer1, self.pursuer2)
- return actionsE(GRID, key)
- def get_op_actions(self):
- key = (self.evader, self.pursuer1, self.pursuer2)
- return actionsP(GRID, key)
- def getV0(self):
- if self.isEnd():
- return -1
- else:
- return 1
- def isEnd(self):
- self.evader == self.pursuer1 or self.evader == self.pursuer2
- def getV(self):
- return self.V
- def present(self):
- print("[" + str(self.evader) + "|" + str(self.pursuer1) + str(self.pursuer2) +
- str(self.V)) #+ str(self.best_action_evader) + "|" + str(self.best_action_pursuer))
- def computeQ(self, states, myA, opA):
- p1s, p2s = opA
- es = myA
- action = sort_key((es, p1s, p2s))
- s = states[action] # transferred state
- return self.getV0() + GAMMA * s.getV()
- def updateV(self, states):
- my_actions = self.get_my_actions()
- op_actions = self.get_op_actions()
- if len(my_actions) == 0:
- self.best_action_evader = (-1, -1)
- return
- if self.isEnd():
- self.V = self.getV0()
- return
- my_max = -10000
- for m in my_actions:
- op_min = 10000
- for o in op_actions:
- q = self.computeQ(states, m, o)
- if q < op_min:
- op_min = q
- if op_min > my_max:
- my_max = op_min
- self.V = my_max
- def set_best_action_evader(self, states):
- best_value = -10000
- best_action = (-1, -1)
- for a in self.get_my_actions():
- e = a
- p1, p2 = self.pursuer1, self.pursuer2
- key = sort_key((e, p1, p2))
- s = states[key]
- if s.V > best_value:
- best_value = s.V
- best_action = a
- self.best_action_evader = best_action
- def set_best_action_pursuer(self, states):
- best_value = 10000
- best_action = ((-1, -1), (-1, -1))
- for a in self.op_actions:
- p1, p2 = a
- e = self.evader
- key = sort_key((e, p1, p2))
- s = states[key]
- if s.V < best_value:
- best_value = s.V
- best_action = a
- self.best_action_pursuer = best_action
- class ValueIteration:
- def __init__(self, grid, n):
- global GRID
- print("ValueIteration")
- GRID = grid
- PIK = "pacman.policy"
- try:
- with open(PIK, "rb") as f:
- print("opening a pickle " + PIK + "...")
- arr = pickle.load(f)
- self.states = self.decompress(arr)
- print("pickle loaded")
- except:
- self.learn(grid, PIK)
- print("DONE")
- def create_states(self, grid):
- self.states = dict()
- for e in grid.all_nodes():
- sys.stdout.write("\r creating state " + str(e))
- sys.stdout.flush()
- for p1 in grid.all_nodes():
- for p2 in grid.all_nodes():
- p1s, p2s = sort(p1, p2)
- if (e, p1s, p2s) in self.states:
- continue
- e = (np.int8(e[0]), np.int8(e[1]))
- p1s = (np.int8(p1s[0]), np.int8(p1s[1]))
- p2s = (np.int8(p2s[0]), np.int8(p2s[1]))
- self.states[(e, p1s, p2s)] = State(e, p1s, p2s, grid)
- print()
- def learn(self, grid, PIK):
- print("computing a policy")
- self.grid = grid
- print("creating states...")
- self.create_states(grid)
- print("created %d states" % len(self.states))
- start_time = time.time()
- total_time = 60 * 10 * 10000
- i = 0
- while time.time() - start_time < total_time and i < 20:
- sys.stdout.write(
- "\r updating value, run %4d, %5ds / %5ds" % (i, int(time.time() - start_time), total_time))
- sys.stdout.flush()
- self.updateAllV()
- i += 1
- print()
- for s in self.states.values():
- s.set_best_action_evader(self.states)
- s.set_best_action_pursuer(self.states)
- print("compressing...")
- data = self.compress()
- with open(PIK, "wb") as f:
- pickle.dump(data, f)
- print("policy computed and saved")
- def updateAllV(self):
- keys = self.states.keys()
- random.shuffle(keys)
- i = 0
- for key in keys:
- s = self.states[key]
- s.updateV(self.states)
- i += 1
- del keys
- def get_best_action_evader(self, e, p1, p2):
- key = sort_key((e, p1, p2))
- return self.states[key].best_action_evader
- def get_best_action_pursuer(self, e, my_p, p2):
- key = sort_key((e, my_p, p2))
- a1, a2 = self.states[key].best_action_pursuer
- if my_p == key[1]:
- return a1
- else:
- return a2
- def compress(self):
- new_arr = np.zeros((len(self.states), 12), dtype=np.int8)
- i = 0
- for s in self.states.values():
- del s.evader
- del s.pursuer1
- del s.pursuer2
- del s.my_actions
- del s.op_actions
- del s.V
- for k in self.states.keys():
- s = self.states[k]
- a = [k[0][0], k[0][1],
- k[1][0], k[1][1],
- k[2][0], k[2][1],
- s.best_action_evader[0], s.best_action_evader[1],
- s.best_action_pursuer[0][0], s.best_action_pursuer[0][1],
- s.best_action_pursuer[1][0], s.best_action_pursuer[1][1]]
- new_arr[i, :] = a
- i += 1
- return new_arr
- def decompress(self, arr):
- print("decompressing...")
- print(arr)
- states = dict()
- print(type(arr))
- (n, _) = arr.shape
- arr = arr.astype(np.int)
- print("n = " + str(n))
- for i in range(n):
- s = arr[i, :]
- ex, ey = s[0], s[1]
- p1x, p1y, p2x, p2y = s[2], s[3], s[4], s[5]
- bex, bey = s[6], s[7]
- bp1x, bp1y, bp2x, bp2y = s[8], s[9], s[10], s[11]
- state = State((ex, ey), (p1x, p1y), (p2x, p2y))
- state.best_action_evader = (bex, bey)
- state.best_action_pursuer = ((bp1x, bp1y), (bp2x, bp2y))
- states[((ex, ey), (p1x, p1y), (p2x, p2y))] = state
- return states
- print("decompressed")
- def sort(id1, id2):
- x1, y1 = id1
- x2, y2 = id2
- if x1 < x2 or (x1 == x2 and y1 < y2):
- return id1, id2
- else:
- return id2, id1
- def sort_key(key):
- e, p1, p2 = key
- p1s, p2s = sort(p1, p2)
- return e, p1s, p2s
- def actions(grid, s0):
- a = grid.neighbors4(s0)
- return filter(lambda s: grid.passable(s), a)
- def actionsE(grid, key): # returns actions for eaver, that are passable and don't go to persuers position
- e, p1, p2 = key
- return filter(lambda s: s != p1 and s != p2, actions(grid, e))
- def actionsP(grid, key): # returns all possible tuples of actions
- _, p1, p2 = key
- a = []
- for a1 in actions(grid, p1):
- for a2 in actions(grid, p2):
- a.append((a1, a2))
- return a
Advertisement
Add Comment
Please, Sign In to add comment