Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from ple.games.snake import Snake
- from ple import PLE
- import numpy as np
- from agent import Agent
- import pygame
- import sys
- def get_dist(head_x, head_y, obs_x, obs_y):
- return ((head_x - obs_x) ** 2 + (head_y - obs_y) ** 2) ** 0.5
- def get_state(state):
- head_x, head_y = state[0], state[1]
- min_dist_walls = min(get_dist(head_x, head_y, head_x, 0), get_dist(head_x, head_y, 0, head_y),
- get_dist(head_x, head_y, 600, head_y), get_dist(head_x, head_y, head_x, 600))
- return [state[0], state[1], state[2], state[3], min(min(state[4][2:]), min_dist_walls)]
- def vision(state):
- my_vision = [[0, 0, 0] for _ in range(4)]
- head_x, head_y = state[0], state[1]
- food_x, food_y = state[2], state[3]
- # food
- dist_x, dist_y = head_x - food_x, head_y - food_y
- if abs(dist_y) < 100:
- if dist_x < 0:
- my_vision[3][0] = 1
- else:
- my_vision[2][0] = 1
- if abs(dist_x) < 100:
- if dist_y < 0:
- my_vision[1][0] = 1
- else:
- my_vision[0][0] = 1
- # wall
- if head_x <= 50:
- my_vision[2][1] = 1
- elif 600 - head_x <= 50:
- my_vision[3][1] = 1
- if head_y <= 50:
- my_vision[0][1] = 1
- elif 600 - head_y <= 50:
- my_vision[1][1] = 1
- # body
- for body_x, body_y in state[5][3:]:
- # print(body_x,body_y)
- dist_x = head_x - body_x
- dist_y = head_y - body_y
- if abs(dist_x) <= 50:
- if dist_x > 0:
- my_vision[2][2] = 1
- else:
- my_vision[3][2] = 1
- if abs(dist_y) <= 50:
- if dist_y < 0:
- my_vision[1][2] = 1
- else:
- my_vision[0][2] = 1
- output = []
- [output.extend(item) for item in my_vision]
- return output
- def process_state(state):
- return np.array([state.values()])
- def run():
- game = Snake(600, 600)
- p = PLE(game,reward_values={"positive": 100.0,
- "negative": -100.0,
- "tick": -0.5,
- "loss": -50.0,
- "win": 5.0}, display_screen=False, state_preprocessor=process_state)
- n_games = 10000
- print(sys.argv[1])
- agent = Agent(alpha=float(sys.argv[1]), gamma=float(sys.argv[2]), n_actions=4, epsilon=0.99, batch_size=64, input_shape=12, epsilon_dec=0.09,
- epsilon_end=0.01,
- memory_size=1000000,file_name=sys.argv[3],activations = [str(sys.argv[4]),str(sys.argv[5])])
- # agent.load_game()
- actions = [119,115,97,100]
- scores = []
- for _ in range(100000):
- if p.game_over():
- p.reset_game()
- score = 0
- # state = p.getGameState()
- while not p.game_over():
- old_state = np.array(vision(list(p.getGameState()[0])))
- # print(old_state)
- action = agent.choose_action(old_state)
- reward = p.act(actions[action])
- new_state = np.array(vision(list(p.getGameState()[0])))
- agent.add_experience(old_state,action,reward,new_state)
- agent.learn()
- score = p.score()
- scores.append(score)
- print(f"Score for model iteration number _ {str(sys.argv[3])} with learning_rate {sys.argv[1]}, gama {sys.argv[2]}, activations: {sys.argv[4],sys.argv[5]} is score {score}")
- agent.save_game()
- with open('scoruri.txt',"a") as my_scores:
- my_scores.write(f'Scorurile pentru rularea cu activarile {sys.argv[4], sys.argv[5]}')
- my_scores.write(scores)
- if __name__ == '__main__':
- run()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement