Untitled

def _build_model(self):
    model = Sequential()
    model.add(Dense(24, input_dim=self.state_size, activation="relu"))
    model.add(Dense(24, activation="relu"))
    model.add(Dense(self.action_size, activation="linear"))
    model.compile(optimizer=Adam(lr=self.learning_rate), loss="mse")
    return model

def get_action(self, state):
    # Use random exploration for the current rate.
    if np.random.rand() < self.epsilon:
        return random.randrange(self.action_size)

    # Otherwise use the model to predict the rewards and select the max.
    q_values = self.model.predict(state)
    return np.argmax(q_values[0])

def replay(self, batch_size):
    if len(agent.memory) < minibatch_size:
        return

    # Decay the exploration rate.
    self.epsilon *= self.epsilon_decay
    self.epsilon = max(self.epsilon_min, self.epsilon)

    minibatch = random.sample(self.memory, minibatch_size)

    state_batch, q_values_batch = [], []
    for state, action, reward, next_state, done in minibatch:
        # Get predictions for all actions for the current state.
        q_values = self.model.predict(state)

        # If we're not done, add on the future predicted reward at the discounted rate.
        if done:
            q_values[0][action] = reward
        else:
            f = self.target_model.predict(next_state)
            future_reward = max(self.target_model.predict(next_state)[0])
            q_values[0][action] = reward + self.gamma * future_reward

        state_batch.append(state[0])
        q_values_batch.append(q_values[0])

    # Re-fit the model to move it closer to this newly calculated reward.
    self.model.fit(np.array(state_batch), np.array(q_values_batch), batch_size=batch_size, epochs=1, verbose=0)

    self.update_weights()

def update_weights(self):
    weights = self.model.get_weights()
    target_weights = self.target_model.get_weights()

    for i in range(len(target_weights)):
        target_weights[i] = weights[i] * self.tau + target_weights[i] * (1 - self.tau)

    self.target_model.set_weights(target_weights)