Untitled

Epoch 50 : cost = nan W = nan b = nan
Epoch 100 : cost = nan W = nan b = nan
Epoch 150 : cost = nan W = nan b = nan
Epoch 200 : cost = nan W = nan b = nan
Epoch 250 : cost = nan W = nan b = nan
Epoch 300 : cost = nan W = nan b = nan
Epoch 350 : cost = nan W = nan b = nan
Epoch 400 : cost = nan W = nan b = nan
Epoch 450 : cost = nan W = nan b = nan
Epoch 500 : cost = nan W = nan b = nan
Traceback (most recent call last):
  File "editabletoworkouterrors.py", line 77, in <module>
    new_green_dur = green_light_duration_new(current_reward, current_green)
  File "editabletoworkouterrors.py", line 66, in green_light_duration_new
    green_light_duration_new = weight * x + bias
TypeError: can't multiply sequence by non-int of type 'numpy.float32'

import numpy as np
import random
import matplotlib.pyplot as plt
import tensorflow as tf
import warnings

warnings.simplefilter(action='once', category=FutureWarning) # future warnings annoy me

# set the epsilon for this episode

# Start with empty lists
reward = []
green_light = []

# add in a couple of rewards and light durations
current_reward = [-1000,-900,-950]
current_green = [10,12,12]

# Pass in reward and green_light
def green_light_duration_new(current_reward, current_green):
    # Predicting the best light duration based on previous rewards.
    # predict the best duration based on previous step's reward value, using simple linear regression model
    x = current_reward
    y = current_green
    n = len(x)
    # Plot of Training Data
    plt.scatter(x, y)
    plt.xlabel('Reward')
    plt.ylabel('Green Light Duration')
    plt.title("Training Data")
    plt.show()

    X = tf.placeholder("float")
    Y = tf.placeholder("float")
    W = tf.Variable(np.random.randn(), name = "W")
    b = tf.Variable(np.random.randn(), name = "b")
    learning_rate = 0.01
    training_epochs = 500
    # Hypothesis
    y_pred = tf.add(tf.multiply(X, W), b)
    # Mean Squared Error Cost Function
    cost = tf.reduce_sum(tf.pow(y_pred-Y, 2)) / (2 * n)
    # Gradient Descent Optimizer
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
    # Global Variables Initializer
    init = tf.global_variables_initializer()
    # Starting the Tensorflow Session
    with tf.Session() as sess:
        # Initializing the Variables
        sess.run(init)
        # Iterating through all the epochs
        for epoch in range(training_epochs):
            # Feeding each data point into the optimizer using Feed Dictionary
            for (_x, _y) in zip(x, y):
                sess.run(optimizer, feed_dict = {X : _x, Y : _y})
            # Displaying the result after every 50 epochs
            if (epoch + 1) % 50 == 0:
                # Calculating the cost a every epoch
                c = sess.run(cost, feed_dict = {X : x, Y : y})
                print("Epoch", (epoch + 1), ": cost =", c, "W =", sess.run(W), "b =", sess.run(b))
        # Storing necessary values to be used outside the Session
        training_cost = sess.run(cost, feed_dict ={X: x, Y: y})
        weight = sess.run(W)
        bias = sess.run(b)
    # Calculating the predictions
    green_light_duration_new = weight * x + bias
    print("Training cost =", training_cost, "Weight =", weight, "bias =", bias, 'n')
    # Plotting the Results
    plt.plot(x, y, 'ro', label ='Original data')
    plt.plot(x, green_light_duration_new, label ='Fitted line')
    plt.title('Linear Regression Result')
    plt.legend()
    plt.show()
    return green_light_duration_new

# Go to the training function
new_green_dur = green_light_duration_new(current_reward, current_green)

# Append the predicted green light to its list to run regression later again
green_light.append(new_green_dur)

# Go on to run the rest of the simulation with the new green light duration,
# and append its subsequent reward to current_reward list to run again later.