Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ## Run server
- ## ./bin/HFO --offense-npcs=2 --defense-agents=1 --defense-npcs=1
- ## Then run this
- from hfo import *
- import sys
- import numpy as np
- from sklearn.linear_model import SGDRegressor
- import matplotlib.pyplot as plt
- plt.ion()
- fig = plt.figure()
- plt.title('Episode Vs Win Rate')
- ax = fig.add_subplot(111)
- hl, = ax.plot([0,0,0], [0,0,0], 'r-')
- ax.set_xlim([1, 100])
- ax.set_ylim([0, 1])
- def update_line(x,y):
- hl.set_xdata(np.append(hl.get_xdata(), x))
- hl.set_ydata(np.append(hl.get_ydata(), y))
- fig.canvas.draw()
- fig.canvas.flush_events()
- hfo = HFOEnvironment()
- hfo.connectToServer(HIGH_LEVEL_FEATURE_SET,'/home/arijitx/cs747/HFO/bin/teams/base/config/formations-dt', 6000,'localhost', 'base_right', False)
- actions = [MOVE, INTERCEPT, REDUCE_ANGLE_TO_GOAL, DEFEND_GOAL, GO_TO_BALL, REORIENT]
- act_str = ['MOVE','INTERCEPT','REDUCE_ANGLE_TO_GOAL','DEFEND_GOAL','GO_TO_BALL','REORIENT']
- status_to_reward = {IN_GAME:0, GOAL:-1, CAPTURED_BY_DEFENSE:1,OUT_OF_BOUNDS:1,OUT_OF_TIME:1,SERVER_DOWN:0}
- n_action = len(actions)
- n_state = 10
- eps = 0.1
- def parse_state(T,O,f):
- s = {}
- s['pos'] = (f[0],f[1])
- s['ori'] = f[2]
- s['ball_pos'] = (f[3],f[4])
- s['can_kick'] = f[5]
- s['gcp'] = f[6]
- s['gca'] = f[7]
- s['goa'] = f[8]
- s['po'] = f[9]
- s['enemy'] = []
- for i in range(10+6*T,10+6*T+3*O,3):
- s['enemy'].append((f[i],f[i+1],f[i+2]))
- sv = np.array(f[:10])
- return s,sv
- class Model():
- def __init__(self,n_state,n_action):
- self.n_state = n_state
- self.n_action = n_action
- self.gamma = 1
- self.alpha = 0.5
- self.models = [SGDRegressor(eta0 = self.alpha) for _ in range(n_action)]
- for i in range(len(self.models)):
- self.models[i].partial_fit([np.random.uniform(size=10)],[0])
- def update(self,cur_state,cur_action,next_state,reward):
- td_0_target = reward + self.gamma*self.best_action(next_state)[0]
- self.models[cur_action].partial_fit([cur_state],[td_0_target])
- def best_action(self,state):
- preds = np.array([m.predict([state]) for m in self.models])
- value = np.max(preds)
- idx = np.argmax(preds)
- return value,idx
- m = Model(n_state,n_action)
- n_win = 0
- for episode in range(10000):
- status = IN_GAME
- vector = None
- action = None
- while status == IN_GAME:
- prev_state = vector
- prev_action = action
- features = hfo.getState()
- state,vector = parse_state(0,1,features)
- cur_state = vector
- if prev_state is not None:
- m.update(prev_state,prev_action,cur_state,reward)
- val,action = m.best_action(cur_state)
- if np.random.uniform() <= eps:
- action = np.random.randint(0,6)
- print('Episode: ',episode+1,' Action : ',act_str[action],val)
- hfo.act(actions[action])
- status = hfo.step()
- reward = status_to_reward[status]
- if prev_action is not None:
- m.update(prev_state,prev_action,cur_state,reward)
- if reward == -1:
- n_win+=1
- if(episode+1)%50 == 0:
- print((episode+1)/50,n_win/(episode+1))
- update_line((episode+1)/50,n_win/(episode+1))
Add Comment
Please, Sign In to add comment