Advertisement
Guest User

Untitled

a guest
Dec 15th, 2019
129
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.68 KB | None | 0 0
  1. from ple.games.snake import Snake
  2. from ple import PLE
  3. import numpy as np
  4. from agent import Agent
  5. import pygame
  6. import sys
  7.  
  8.  
  9. def get_dist(head_x, head_y, obs_x, obs_y):
  10. return ((head_x - obs_x) ** 2 + (head_y - obs_y) ** 2) ** 0.5
  11.  
  12.  
  13. def get_state(state):
  14. head_x, head_y = state[0], state[1]
  15. min_dist_walls = min(get_dist(head_x, head_y, head_x, 0), get_dist(head_x, head_y, 0, head_y),
  16. get_dist(head_x, head_y, 600, head_y), get_dist(head_x, head_y, head_x, 600))
  17. return [state[0], state[1], state[2], state[3], min(min(state[4][4:]), min_dist_walls)]
  18.  
  19.  
  20. def vision(state, direction):
  21. my_vision = [[0,0] for _ in range(3)]
  22. head_x, head_y = state[0], state[1]
  23. food_x, food_y = state[2], state[3]
  24.  
  25. # food
  26.  
  27. if direction == "Left":
  28. if head_x - food_x >= 0:
  29. my_vision[2][0] = 1
  30. if food_y - head_y < 0:
  31. my_vision[0][0] = 1
  32. else:
  33. my_vision[1][0] = 1
  34.  
  35. # wall
  36. if head_x <= 50:
  37. my_vision[2][1] = -1
  38. if 600 - head_y <= 50:
  39. my_vision[1][1] = -1
  40. if head_y <= 50:
  41. my_vision[0][1] = -1
  42.  
  43. # body
  44. for body_x, body_y in state[5][3:]:
  45. # print(body_x,body_y)
  46. if head_x - body_x >= 0:
  47. my_vision[2][1] = -1
  48. if body_y - head_y < 0:
  49. my_vision[0][1] = -1
  50. else:
  51. my_vision[1][1] = -1
  52.  
  53. elif direction == "Up":
  54. if head_y - food_y >= 0:
  55. my_vision[2][0] = 1
  56. if head_x - food_x < 0:
  57. my_vision[0][0] = 1
  58. else:
  59. my_vision[1][0] = 1
  60.  
  61. # wall
  62. if head_y <= 50:
  63. my_vision[2][1] = -1
  64. if 600 - head_x <= 50:
  65. my_vision[0][1] = -1
  66. if head_x <= 50:
  67. my_vision[1][1] = -1
  68.  
  69. # body
  70. for body_x, body_y in state[5][3:]:
  71. # print(body_x,body_y)
  72. if head_y - body_y >= 0:
  73. my_vision[2][1] = -1
  74. if body_x - head_x < 0:
  75. my_vision[0][1] = -1
  76. else:
  77. my_vision[1][1] = -1
  78.  
  79. elif direction == "Right":
  80. if head_x - food_x <= 0:
  81. my_vision[2][0] = 1
  82. if food_y - head_y >= 0:
  83. my_vision[0][0] = 1
  84. else:
  85. my_vision[1][0] = 1
  86.  
  87. # wall
  88. if 600 - head_x <= 50:
  89. my_vision[2][1] = -1
  90. if head_y <= 50:
  91. my_vision[1][1] = -1
  92. if 600 - head_y <= 50:
  93. my_vision[0][1] = -1
  94.  
  95. # body
  96. for body_x, body_y in state[5][3:]:
  97. if head_x - body_x <= 0:
  98. my_vision[2][1] = -1
  99. if body_y - head_y >= 0:
  100. my_vision[0][1] = -1
  101. else:
  102. my_vision[1][1] = -1
  103.  
  104. else:
  105. if head_y - food_y <= 0:
  106. my_vision[2][0] = 1
  107. if head_x - food_x >= 0:
  108. my_vision[0][0] = 1
  109. else:
  110. my_vision[1][0] = 1
  111.  
  112. # wall
  113. if 600 - head_y <= 50:
  114. my_vision[2][1] = -1
  115. if head_x <= 50:
  116. my_vision[0][1] = -1
  117. if 600 - head_x <= 50:
  118. my_vision[1][1] = -1
  119.  
  120. # body
  121. for body_x, body_y in state[5][3:]:
  122. if head_y - body_y <= 0:
  123. my_vision[2][1] = -1
  124. if body_x - head_x >= 0:
  125. my_vision[0][1] = -1
  126. else:
  127. my_vision[1][1] = -1
  128.  
  129. output = []
  130. [output.extend(item) for item in my_vision]
  131. # output.extend([head_x,head_y,food_x,food_y])
  132. return output
  133.  
  134.  
  135. def prepare_corect_directions(direction):
  136. direction = str(direction)
  137. if direction == "Left":
  138. return {119: "Up", 115: "Down", 97: "Left"}
  139. if direction == "Right":
  140. return {115: "Down", 119: "Up", 100: "Right"}
  141. if direction == "Up":
  142. return {100: "Right", 97: "Left", 119: "Up"}
  143. if direction == "Down":
  144. return {97: "Left", 100: "Right", 115: "Down"}
  145.  
  146.  
  147. def process_state(state):
  148. return np.array([state.values()])
  149.  
  150.  
  151. def run():
  152. game = Snake(600, 600)
  153. p = PLE(game,fps=60,state_preprocessor=process_state, force_fps=True, display_screen=True, frame_skip=2,
  154. reward_values={"positive": 100.0,
  155. "negative": -50.0,
  156. "tick": -0.1,
  157. "loss": -70.0,
  158. "win": 5.0})
  159. # print(sys.argv[1])
  160. agent = Agent(alpha=float(sys.argv[1]), gamma=float(sys.argv[2]), n_actions=3, epsilon=0.001, batch_size=100,
  161. input_shape=6, epsilon_dec=0.9999,
  162. epsilon_end=0.001,
  163. memory_size=500000, file_name=sys.argv[3], activations=[str(sys.argv[4]), str(sys.argv[5])])
  164. p.init()
  165. agent.load_game()
  166.  
  167. scores = []
  168.  
  169. for _ in range(100000):
  170. if p.game_over():
  171. p.reset_game()
  172. score = 0
  173. # state = p.getGameState()
  174. initial_direction = "Right"
  175. game_state = np.array(vision(list(p.getGameState()[0]), initial_direction))
  176. # print(game_state)
  177. prec_dist = get_dist(game_state[0], game_state[1], game_state[2], game_state[3])
  178.  
  179. while not p.game_over():
  180. old_state = np.array(vision(list(p.getGameState()[0]), initial_direction))
  181.  
  182. action = agent.choose_action(old_state)
  183. # print(action)
  184. possible_directions = prepare_corect_directions(initial_direction)
  185. possible_directions_tuples = list(zip(possible_directions.keys(), possible_directions.values()))
  186. direction = possible_directions_tuples[action]
  187. initial_direction = direction[1]
  188.  
  189. reward = p.act(direction[0])
  190. # if reward == -0.1:
  191. # game_state = np.array(vision(list(p.getGameState()[0]), initial_direction))
  192. # curr_dist = get_dist(game_state[0], game_state[1], game_state[2], game_state[3])
  193. # if prec_dist > curr_dist: reward = 1.5
  194. # prec_dist = curr_dist
  195.  
  196. # print(reward)
  197.  
  198. new_state = np.array(vision(list(p.getGameState()[0]),initial_direction))
  199. agent.add_experience(old_state, action, reward, new_state)
  200. agent.learn()
  201. score = p.score()
  202. scores.append(score)
  203. print(
  204. f"Score for model iteration number _ {str(sys.argv[3])} with learning_rate {sys.argv[1]}, gama {sys.argv[2]}, activations: {sys.argv[4], sys.argv[5]} is score {score}. Epsilon is {agent.epsilon}")
  205. agent.save_game()
  206.  
  207.  
  208. #
  209.  
  210. if __name__ == '__main__':
  211. run()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement