Guest User

Untitled

a guest
Nov 16th, 2018
118
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.85 KB | None | 0 0
  1. ## Run server
  2. ## ./bin/HFO --offense-npcs=2 --defense-agents=1 --defense-npcs=1
  3. ## Then run this
  4.  
  5. from hfo import *
  6. import sys
  7. import numpy as np
  8. from sklearn.linear_model import SGDRegressor
  9. import matplotlib.pyplot as plt
  10. plt.ion()
  11. fig = plt.figure()
  12. plt.title('Episode Vs Win Rate')
  13. ax = fig.add_subplot(111)
  14. hl, = ax.plot([0,0,0], [0,0,0], 'r-')
  15. ax.set_xlim([1, 100])
  16. ax.set_ylim([0, 1])
  17.  
  18. def update_line(x,y):
  19.  
  20. hl.set_xdata(np.append(hl.get_xdata(), x))
  21. hl.set_ydata(np.append(hl.get_ydata(), y))
  22. fig.canvas.draw()
  23. fig.canvas.flush_events()
  24.  
  25. hfo = HFOEnvironment()
  26. hfo.connectToServer(HIGH_LEVEL_FEATURE_SET,'/home/arijitx/cs747/HFO/bin/teams/base/config/formations-dt', 6000,'localhost', 'base_right', False)
  27.  
  28. actions = [MOVE, INTERCEPT, REDUCE_ANGLE_TO_GOAL, DEFEND_GOAL, GO_TO_BALL, REORIENT]
  29. act_str = ['MOVE','INTERCEPT','REDUCE_ANGLE_TO_GOAL','DEFEND_GOAL','GO_TO_BALL','REORIENT']
  30. status_to_reward = {IN_GAME:0, GOAL:-1, CAPTURED_BY_DEFENSE:1,OUT_OF_BOUNDS:1,OUT_OF_TIME:1,SERVER_DOWN:0}
  31.  
  32. n_action = len(actions)
  33. n_state = 10
  34.  
  35. eps = 0.1
  36.  
  37. def parse_state(T,O,f):
  38. s = {}
  39. s['pos'] = (f[0],f[1])
  40. s['ori'] = f[2]
  41. s['ball_pos'] = (f[3],f[4])
  42. s['can_kick'] = f[5]
  43. s['gcp'] = f[6]
  44. s['gca'] = f[7]
  45. s['goa'] = f[8]
  46. s['po'] = f[9]
  47. s['enemy'] = []
  48. for i in range(10+6*T,10+6*T+3*O,3):
  49. s['enemy'].append((f[i],f[i+1],f[i+2]))
  50.  
  51. sv = np.array(f[:10])
  52. return s,sv
  53.  
  54. class Model():
  55. def __init__(self,n_state,n_action):
  56. self.n_state = n_state
  57. self.n_action = n_action
  58. self.gamma = 1
  59. self.alpha = 0.5
  60. self.models = [SGDRegressor(eta0 = self.alpha) for _ in range(n_action)]
  61. for i in range(len(self.models)):
  62. self.models[i].partial_fit([np.random.uniform(size=10)],[0])
  63.  
  64.  
  65. def update(self,cur_state,cur_action,next_state,reward):
  66. td_0_target = reward + self.gamma*self.best_action(next_state)[0]
  67. self.models[cur_action].partial_fit([cur_state],[td_0_target])
  68.  
  69. def best_action(self,state):
  70. preds = np.array([m.predict([state]) for m in self.models])
  71. value = np.max(preds)
  72. idx = np.argmax(preds)
  73. return value,idx
  74.  
  75. m = Model(n_state,n_action)
  76.  
  77. n_win = 0
  78. for episode in range(10000):
  79. status = IN_GAME
  80. vector = None
  81. action = None
  82.  
  83. while status == IN_GAME:
  84. prev_state = vector
  85. prev_action = action
  86. features = hfo.getState()
  87. state,vector = parse_state(0,1,features)
  88.  
  89. cur_state = vector
  90. if prev_state is not None:
  91. m.update(prev_state,prev_action,cur_state,reward)
  92.  
  93. val,action = m.best_action(cur_state)
  94. if np.random.uniform() <= eps:
  95. action = np.random.randint(0,6)
  96.  
  97. print('Episode: ',episode+1,' Action : ',act_str[action],val)
  98.  
  99. hfo.act(actions[action])
  100. status = hfo.step()
  101. reward = status_to_reward[status]
  102.  
  103. if prev_action is not None:
  104. m.update(prev_state,prev_action,cur_state,reward)
  105.  
  106. if reward == -1:
  107. n_win+=1
  108. if(episode+1)%50 == 0:
  109. print((episode+1)/50,n_win/(episode+1))
  110. update_line((episode+1)/50,n_win/(episode+1))
Add Comment
Please, Sign In to add comment