Untitled

import gym
import random
import numpy as np
import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression
from statistics import median, mean
from collections import Counter
import tensorflow as tf
import sys

LR = 1e-3
env = gym.make("CartPole-v0")
env.reset()
goal_steps = 500
score_requirement = 70
initial_games = 300

n_nodes_hl1 = 500
n_nodes_hl2 = 500
n_nodes_hl3 = 500
n_classes = 2
batch_size = 200


def create_randoms():
    for episode in range(5):
        env.reset()
        for t in range(goal_steps):
            #env.render()
            action = env.action_space.sample()
            observation, reward, done, info = env.step(action)
            if done:
                break


def create_population():
    training_data = []
    scores = []
    accepted_scores= []
    for iteration in range(initial_games):

        score = 0
        game_memory = []
        prev_observation = []
        for _ in range(goal_steps):
            action = random.randrange(0,2)
            observation, reward, done, info = env.step(action)

            if len(prev_observation) > 0:
                game_memory.append([prev_observation,action])

            prev_observation = observation
            score += reward
            if done:
                break
        if score > score_requirement:
            accepted_scores.append(score)
            for data in game_memory:
                if data[1] == 1:
                    output =[0,1]
                elif data[1] == 0:
                    output = [1,0]

                training_data.append([data[0],output])
        env.reset()
        scores.append(score)

    training_data_save = np.array(training_data)
    np.save('saved.npy',training_data_save)

    print('Average accepted score:',mean(accepted_scores))
    print('Median accepted score:',median(accepted_scores))
    print(Counter(accepted_scores))

    return training_data

def neural_network_modelv2(data):
    # (input_data * weights) +biases
    hidden_1_layer = {'weights': tf.Variable(tf.random_normal([4, n_nodes_hl1])),
                      'biases': tf.Variable(tf.random_normal([n_nodes_hl1]))}

    hidden_2_layer = {'weights': tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])),
                      'biases': tf.Variable(tf.random_normal([n_nodes_hl2]))}

    hidden_3_layer = {'weights': tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])),
                      'biases': tf.Variable(tf.random_normal([n_nodes_hl3]))}

    output_layer = {'weights': tf.Variable(tf.random_normal([n_nodes_hl3, n_classes])),
                    'biases': tf.Variable(tf.random_normal([n_classes])), }
    data = tf.cast(data, tf.float32)
    l1 = tf.add(tf.matmul(data, hidden_1_layer['weights']), hidden_1_layer['biases'])
    l1 = tf.nn.relu(l1)

    l2 = tf.add(tf.matmul(l1, hidden_2_layer['weights']), hidden_2_layer['biases'])
    l2 = tf.nn.relu(l2)

    l3 = tf.add(tf.matmul(l2, hidden_3_layer['weights']), hidden_3_layer['biases'])
    l3 = tf.nn.relu(l3)

    output_layer = tf.matmul(l3, output_layer['weights']) + output_layer['biases']

    return output_layer

def train_modelv2(training_data, model=False):
    x = tf.placeholder('float')
    y = tf.placeholder('float')

    trainingX = np.array([i[0] for i in training_data]).reshape(-1, len(training_data[0][0]))
    trainingY = [i[1] for i in training_data]


    nn = neural_network_modelv2(trainingX)
    mn = tf.nn.softmax_cross_entropy_with_logits(logits=nn, labels=y)
    cost = tf.reduce_mean(mn)
    optimizer = tf.train.AdamOptimizer().minimize(cost)

    hm_epochs = 10

    with tf.Session() as sess:

        sess.run(tf.initialize_all_variables())

        # Train
        for epoch in range(hm_epochs):
            epoch_loss = 0
            ca, c = sess.run([optimizer, cost], feed_dict={x: trainingX, y: trainingY})
            epoch_loss += c
        print('Training done')

        scores = []
        choices = []
        for each_game in range(10):
            score = 0
            game_memory = []
            prev_obs = []
            env.reset()
            for _ in range(goal_steps):
                # env.render()
                if len(prev_obs) == 0:
                    action = random.randrange(0, 2)
                else:
                    action = (sess.run([nn], feed_dict={x: prev_obs.reshape(-1, len(prev_obs))})[0])
                    print(action)


                choices.append(action)

                new_observation, reward, done, info = env.step(action)

                prev_obs = new_observation
                game_memory.append([new_observation, action])
                score += reward

                if done: break

            scores.append(score)

        print('Average Score:', sum(scores) / len(scores))
        print('choice 1:{}  choice 0:{}'.format(choices.count(1) / len(choices), choices.count(0) / len(choices)))
        print(score_requirement)

training_data = create_population()
model = train_modelv2(training_data)