Untitled

from ple.games.snake import Snake
from ple import PLE
import numpy as np
from agent import Agent
import pygame
import sys

def get_dist(head_x, head_y, obs_x, obs_y):
    return ((head_x - obs_x) ** 2 + (head_y - obs_y) ** 2) ** 0.5


def get_state(state):
    head_x, head_y = state[0], state[1]
    min_dist_walls = min(get_dist(head_x, head_y, head_x, 0), get_dist(head_x, head_y, 0, head_y),
                         get_dist(head_x, head_y, 600, head_y), get_dist(head_x, head_y, head_x, 600))
    return [state[0], state[1], state[2], state[3], min(min(state[4][2:]), min_dist_walls)]


def vision(state):
    my_vision = [[0, 0, 0] for _ in range(4)]
    head_x, head_y = state[0], state[1]
    food_x, food_y = state[2], state[3]

    # food
    dist_x, dist_y = head_x - food_x, head_y - food_y
    if abs(dist_y) < 100:
        if dist_x < 0:
            my_vision[3][0] = 1
        else:
            my_vision[2][0] = 1
    if abs(dist_x) < 100:
        if dist_y < 0:
            my_vision[1][0] = 1
        else:
            my_vision[0][0] = 1

    # wall
    if head_x <= 50:
        my_vision[2][1] = 1
    elif 600 - head_x <= 50:
        my_vision[3][1] = 1
    if head_y <= 50:
        my_vision[0][1] = 1
    elif 600 - head_y <= 50:
        my_vision[1][1] = 1

    # body
    for body_x, body_y in state[5][3:]:
        # print(body_x,body_y)
        dist_x = head_x - body_x
        dist_y = head_y - body_y
        if abs(dist_x) <= 50:
            if dist_x > 0:
                my_vision[2][2] = 1
            else:
                my_vision[3][2] = 1
        if abs(dist_y) <= 50:
            if dist_y < 0:
                my_vision[1][2] = 1
            else:
                my_vision[0][2] = 1
    output = []
    [output.extend(item) for item in my_vision]
    return output


def process_state(state):
    return np.array([state.values()])

def run():
    game = Snake(600, 600)
    p = PLE(game,reward_values={"positive": 100.0,
                                 "negative": -100.0,
                                 "tick": -0.5,
                                 "loss": -50.0,
                                 "win": 5.0}, display_screen=False, state_preprocessor=process_state)
    n_games = 10000
    print(sys.argv[1])
    agent = Agent(alpha=float(sys.argv[1]), gamma=float(sys.argv[2]), n_actions=4, epsilon=0.99, batch_size=64, input_shape=12, epsilon_dec=0.09,
                  epsilon_end=0.01,
                  memory_size=1000000,file_name=sys.argv[3],activations = [str(sys.argv[4]),str(sys.argv[5])])
    # agent.load_game()
    actions = [119,115,97,100]
    scores  = []
    for _ in range(100000):
        if p.game_over():
            p.reset_game()
        score = 0
        # state = p.getGameState()
        while not p.game_over():
            old_state = np.array(vision(list(p.getGameState()[0])))
            # print(old_state)

            action = agent.choose_action(old_state)
            reward  = p.act(actions[action])
            new_state = np.array(vision(list(p.getGameState()[0])))
            agent.add_experience(old_state,action,reward,new_state)
            agent.learn()
            score = p.score()
        scores.append(score)
        print(f"Score for model iteration number _ {str(sys.argv[3])} with learning_rate {sys.argv[1]}, gama {sys.argv[2]}, activations: {sys.argv[4],sys.argv[5]}  is score {score}")
        agent.save_game()
    with open('scoruri.txt',"a") as my_scores:
         my_scores.write(f'Scorurile pentru rularea cu activarile {sys.argv[4], sys.argv[5]}')
         my_scores.write(scores)


if __name__ == '__main__':
    run()