neural network


import random
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as pltclr
import pandas as pd

#TODO remove this when finished
random.seed(3)

#test: some linear function
T_DATA_IN  = [[1]]
T_DATA_OUT = [[0.8]]

#classification: positive or negative number
#T_DATA_IN  = [[1]  ,[0.5],[-0.8],[-1] ,[-0.2] ,[0.1]]
#T_DATA_OUT = [[1,0],[1,0],[0,1] ,[0,1],[0,1]  ,[0,1] ]

#classification: large or small vector
#T_DATA_IN  = [[1,2],[0.5,1],[10,20],[5,10]]
#T_DATA_OUT = [[1,0],[1,0]  ,[0,1]  ,[0,1] ]

#configuration:
HIDDEN_LAYERS = [4] #count of nodes for hidden layers. e.g. [2,3] or [9,2,2,2]
SIGMOID_EXPONENT = 5  #for the activation function
MAX_ITERATIONS = 300  #to prevent non-converging ininite loops
ERROR_THRESHOLD = 0.01 #when the error is small enough
LEARNING_RATE = 0.1

#this defines the shape of the network. Training data will match the shape
LAYER_SIZES = [len(T_DATA_IN[0])]+HIDDEN_LAYERS+[len(T_DATA_OUT[0])]
#some usefull stuff to make it simpler
LAYER_COUNT = len(LAYER_SIZES)

plotcolor = [1,0,0] #first hsv color should be black

#log some information
print "training data:"
for i in range(len(T_DATA_IN)):
    print "input:",str(T_DATA_IN[i]),"output:",str(T_DATA_OUT[i])

#this for-loop creates for each layer a weight matrix, initialised with random values from -1 to 1
#weights is all weights. weights[i] is the weights for one layer.
weights = []
#iterate through all layers
for layer_nr in range(LAYER_COUNT)[1:LAYER_COUNT]:

    #initialize weightmatrix for a given layer
    weights.append([])

    #number of nodes from the previous layer is the rowcount of the weight matrix
    #number of nodes from the current layer is the colcount
    row_count = LAYER_SIZES[layer_nr-1]
    col_count = LAYER_SIZES[layer_nr]

    layer_index = layer_nr-1

    #create weight matrix
    for row_index in range(row_count):
        weights[layer_index].append([])
        for col_index in range(col_count):
            #random_value can also be a vector
            random_value = random.randint(-100,100)/100.0
            weights[layer_index][row_index].append(random_value)


#forward_propagate. v is a vector from the input layer
def compute(v):

    #start at index 1, because index 0 is the input v and therefore no need to calculate index 0
    #that's why put v in interims
    interims.append(v) #here v is a default non-numpy list at first
    for layer_nr in range(LAYER_COUNT)[1:LAYER_COUNT]:

        #calculate; multiply the layers with the weights one by one
        v = np.array(v).dot(weights[layer_nr-1]).tolist()
        #v is the matrix of interims/output-results
        interims.append(v)

        #now transform the computed_value according to the activation function
        #iterate over all values in the matrix.
        for j in range(len(v)):
                #sigmoid; produces soft curve
                exponent = -SIGMOID_EXPONENT*v[j]
                exponent = min(exponent,50)
                v[j] = 1/(1+2.72**(exponent))
                #linear
                #v[j] = min(max(v[j]+0.5,0),1)

    #v will now has the form of the output layer
    #because the last matrix-multiplication (.dot()) with the last
    #weight matrix will result in it
    if len(v) != LAYER_SIZES[-1]:
        print "ERROR, len(v) !=",LAYER_SIZES[-1]
    return v


def plot():

    plotcolor[0] = (plotcolor[0]+0.005)%1.0
    lower_bound = min(np.array(T_DATA_IN).flat[:])-0.5
    upper_bound = max(np.array(T_DATA_IN).flat[:])+0.5
    step = 0.03 #set this to a lower value to get a smoother curve
    input_value = lower_bound
    x1 = []
    x2 = []
    while input_value < upper_bound:
        computed_value = compute([input_value])
        x2.append(computed_value)
        x1.append(input_value)
        input_value += step

    #result
    plt.plot(x1,x2,
            marker = '',
            linestyle = '-',
            color = pltclr.hsv_to_rgb(plotcolor))

    #after the first line has been plottet in black, put some color in for the next lines
    plotcolor[1] = 1
    plotcolor[2] = 1
    #trainingdata


#train the neural network
max_iterations = MAX_ITERATIONS
#initialize the error in a way which does not prevent the loop from running
error = ERROR_THRESHOLD+1
interims = [] #needed for the backpropagation algorithm

def root(a,b):
    while b-1 > 0:
        a = np.sqrt(a)
        b -= 1
    return a

print "training..."
#stop when error is small enough or the computation takes too long
while max_iterations > 0 and abs(error) > ERROR_THRESHOLD:
    max_iterations -= 1
    #iterate over the training data
    error = 0
    for i in range(len(T_DATA_IN)):

        #backpropagate

        target_value = T_DATA_OUT[i] #target for the NN to calculate
        computed_value = T_DATA_IN[i] #initialize with the input, nothing computed yet
        computed_value = compute(computed_value)

        for j in range(LAYER_SIZES[-1]):
            error = computed_value[j]-target_value[j]

        #create shape of network in delta
        delta = LAYER_SIZES[:] #[:]: make copy of content
        for l in range(len(delta)):
            delta[l] = np.zeros(delta[l]).tolist()

        #for each output node compute
        j = -1
        for k in range(LAYER_SIZES[j]):
            old_output = interims[j][k]
            delta[j][k] = (old_output * (1 - old_output) * (old_output - target_value[k]))
            delta[j][k] = root(delta[j][k],1)

        #for each hidden node calculate
        summ = 0
        for i in range(LAYER_COUNT)[0:-1][::-1]: #delta for last layer is already calculated; start at the end
            #i points to the current layer (index)
            j = i+1 #point to the next layer
            for n in range(LAYER_SIZES[i]): #iterate over nodes in layer i
                delta[i][n] = 1-interims[i][n]*(1-interims[i][n]) #the following sum is going to be applied to this:
                for k in range(LAYER_SIZES[j]): #iterate over nodes in layer i+1 (k in j)
                    a = weights[i][n][k] #get weight that points from n to k
                    b = delta[j][k]
                    print b
                    summ += a * b
                delta[i][n] *= summ #finish calculation of delta for this node, go to next node in layer i

        #now that i (hopefully) have the deltas, update the weights
        for i in range(len(weights)):
            for n in range(len(weights[i])):
                #weights[i][n] is an array. each element in it points to a single node in l+1
                dweight = LEARNING_RATE * delta[i][n] * interims[i][n]
                weights[i][n] = (np.array(weights[i][n]) - dweight).tolist() #substract dweight from every single weight


        if i == 0: #set this to some number to log maybe useful information
            print "output:",computed_value,"error:",error

    plot()

if max_iterations < 1:
    print "training aborted because max_iterations was reached. Error did not converge to 0"

#show how NN classifies the training input now
print "result:"
for i in range(len(T_DATA_IN)):
    print "input:",str(T_DATA_IN[i]),"output:",str(compute(T_DATA_IN[i]))

#training finished
#do some matplotlib stuff to show the neural-network's function


plot()

#plot the training-datapoints
plt.plot(np.array(T_DATA_IN).flat[:],np.array(T_DATA_OUT).flat[:],
        marker = 'o',
        linestyle = '')
plt.xlabel("input")
plt.ylabel("forward-propagated")
plt.title("training result graph")
plt.margins(0.1)
plt.show()

quit()

#now let the user use the trained network

print
print "use 'q' to quit"
print "input example:",str(np.ones(len(T_DATA_IN[0])).tolist())[1:-1]

while 1:
    user_input = raw_input("your input: ")
    if user_input == "q":
        quit()

    #parse
    input_value = np.fromstring(user_input,dtype=float,sep=',')
    if len(input_value) != LAYER_SIZES[0]:
        print "wrong input-format. This is probably because you entered a character that is not a number or ',' or your input vector has the wrong length"
        continue

    #calculate. the computed_value has to be a vector []
    computed_value = compute(input_value)
    #for layer_nr in range(LAYER_COUNT)[1:LAYER_COUNT]:
    #   computed_value = np.array(computed_value).dot(weights[layer_nr-1])

    print "result: "+str(computed_value)