Untitled

from buffer import Buffer, build_dqn, load_game
import numpy as np


class Agent:
    def __init__(self, alpha, gamma, n_actions, epsilon, batch_size, input_shape, epsilon_dec, epsilon_end,
                 memory_size,file_name,activations):
        print('cacat')

        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_dec = epsilon_dec
        self.epsilon_end = epsilon_end
        self.action_space = [i for i in range(n_actions)]
        self.batch_size = batch_size
        self.experiences = Buffer(memory_size, input_shape, n_actions)

        self.q_eval = build_dqn(alpha, n_actions, input_shape, 256, 256,activations)
        self.file=file_name

    def add_experience(self, state, action, reward, new_state):
        self.experiences.store_transition(state, new_state, reward, action)

    def choose_action(self, state):
        state = state[np.newaxis, :]
        rand = np.random.random()
        if rand < self.epsilon:
            action = np.random.choice(self.action_space)
        else:
            # print(state)
            actions = self.q_eval.predict(state)
            action = np.argmax(actions)

        return action

    def learn(self):
        if self.experiences.contor > self.batch_size:
            state, action, reward, new_state = self.experiences.get_batch(self.batch_size)

            # action_values = np.array(self.action_space, dtype=np.int8)
            # print(action_values, self.action_space)
            action_indices = np.dot(action, self.action_space)
            # print(action, action_indices)

            target = self.q_eval.predict(state)
            # print(new_state)
            new_values = self.q_eval.predict(new_state)

            batch_index = np.arange(self.batch_size, dtype=np.int32)
            # print(batch_index)

            target[batch_index, action_indices] = reward + self.gamma * np.max(new_values, axis=1)
            # print(target[batch_index, action_indices])

            self.q_eval.fit(state, target, verbose=0)
            self.epsilon = self.epsilon * self.epsilon_dec if self.epsilon > self.epsilon_end else self.epsilon_end

    def save_game(self):
        self.q_eval.save(str(self.file))

    def load_game(self):
        self.q_eval= load_game(self.file)