NN working without bias

import random
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as pltclr

#TODO remove this when finished
#random.seed(3)

#some linear function
#T_DATA_IN = []
#T_DATA_OUT = []
#points = 50
#for a in range(points+1):
#   T_DATA_IN.append([float(a)/points])
#   T_DATA_OUT.append([float(a)/points])
#CASE = 'regression'

#quadratic function
#T_DATA_IN = []
#T_DATA_OUT = []
#points = 300
#for a in range(points+1):
#   T_DATA_IN.append([float(a)/points])
#   T_DATA_OUT.append([(float(a)/points-0.5)**2*4])
#CASE = 'regression'

#T_DATA_IN = [[0],[0.7]]
#T_DATA_OUT = [[0.5],[0.9]]
#CASE = 'regression'

#classification: positive or negative number
#T_DATA_IN  = [[1]  ,[0.5],[-0.8],[-1] ,[-0.2] ,[0.1]]
#T_DATA_OUT = [[1,0],[1,0],[0,1] ,[0,1],[0,1]  ,[0,1] ]
#CASE = 'classification'

#classification: large or small vector
T_DATA_IN  = [[0.1,0.2],[0.3,0.1],[0.9,0.8],[0.7,0.9]]
T_DATA_OUT = [[1,0]    ,[1,0]    ,[0,1]    ,[0,1]    ]
CASE = 'classification'

#classification: from -0.3 to 0.3: 0,1,0
#        from -inf to -0.3: 1,0,0
#        from 0.3 to inf: 0,0,1
#T_DATA_IN  = [[-0.9],[-0.7],[-0.5],[-0.3],[-0.1],[0.2],[0.4],[0.6],[0.8],[1]]
#T_DATA_OUT = [[1,0,0],[1,0,0],[1,0,0],[1,0,0],[0,1,0],[0,1,0],[0,0,1],[0,0,1],[0,0,1],[0,0,1]]
#CASE = 'classification'

#configuration:
HIDDEN_LAYERS = [22,22] #count of nodes for hidden layers. e.g. [2,3] or [9,2,2,2]
MAX_ITERATIONS = 2200  #to prevent non-converging ininite loops
ERROR_THRESHOLD = 0.01 #when the error is small enough
LEARNING_RATE = 0.1 #for regression use '1'

#this defines the shape of the network. Training data will match the shape
LAYER_SIZES = [len(T_DATA_IN[0])]+HIDDEN_LAYERS+[len(T_DATA_OUT[0])]

#some usefull stuff to make it simpler
LAYER_COUNT = len(LAYER_SIZES)

plotcolor = [1,0,0] #first hsv color should be black

SIGMOID_EXPONENT = 1  #for the activation function (don't change this, dsigma(x) does not take this into account)

#this for-loop creates for each layer a weight matrix, initialised with random values from -1 to 1
#weights is all weights. weights[i] is the weights for one layer.
weights = []
changes = []

#iterate through all layers
for layer_nr in range(LAYER_COUNT)[1:LAYER_COUNT]:

    #initialize weightmatrix for a given layer
    weights.append([])
    changes.append([])

    #number of nodes from the previous layer is the rowcount of the weight matrix
    #number of nodes from the current layer is the colcount
    row_count = LAYER_SIZES[layer_nr-1]
    col_count = LAYER_SIZES[layer_nr]

    layer_index = layer_nr-1

    #create weight matrix
    for row_index in range(row_count):
        weights[layer_index].append([])
        changes[layer_index].append([])
        for col_index in range(col_count):
            #random_value can also be a vector
            random_value = random.randint(-100,100)/100.0
            weights[layer_index][row_index].append(random_value)
            changes[layer_index][row_index].append(0)

def sigma(x):
    return 1.0/(1.0+2.71828**(-SIGMOID_EXPONENT*x))

def dsigma(x): #x is already sigmoided
    return x*(1.0-x)

#forward_propagate. v is a vector from the input layer
def fprop(v):

    #start at index 1, because index 0 is the input v and therefore no need to calculate index 0
    #that's why put v in interims
    interims.append(v) #here v is a default non-numpy list at first
    for layer_nr in range(LAYER_COUNT)[1:LAYER_COUNT]:
        #v has always the shape of layer_nr,
        #thanks to the multiplication from the previous iteration

        #calculate; multiply the layers with the weights one by one
        if len(v) != LAYER_SIZES[layer_nr-1]:
            print "ERROR, len(v) !=",LAYER_SIZES[layer_nr-1],";",v

        v = np.array(v).dot(weights[layer_nr-1]).tolist()
        #v is the matrix of interims/output-results

        #now transform the fproped_value according to the activation function
        #iterate over all values in the input matrix.
        for j in range(len(v)):
            #sigmoid; produces soft curve
            v[j] = sigma(v[j])
            #linear
            #v[j] = min(max(v[j]+0.5,0),1)

        interims.append(v)

    #v will now has the form of the output layer,
    #because the last matrix-multiplication (.dot()) with the
    #last weight matrix will result in it
    return v


def plot():

    plotcolor[0] = (plotcolor[0]+1.0/MAX_ITERATIONS)%1.0
    lower_bound = min(np.array(T_DATA_IN).flat[:])
    upper_bound = max(np.array(T_DATA_IN).flat[:])
    step = 0.03 #set this to a lower value to get a smoother curve
    input_value = lower_bound
    x1 = []
    x2 = []
    while input_value < upper_bound:
        fproped_value = fprop([input_value])
        x2.append(fproped_value)
        x1.append(input_value)
        input_value += step

    #result
    plt.plot(x1,x2,
            marker = '',
            linestyle = '-',
            color = pltclr.hsv_to_rgb(plotcolor))

    #after the first line has been plottet in black, put some color in for the next lines
    plotcolor[1] = 1
    plotcolor[2] = 1
    #trainingdata

#train the neural network
#initialize the error in a way which does not prevent the loop from running
error = ERROR_THRESHOLD+1
interims = [] #needed for the backpropagation algorithm
iterations = 0

print "training..."
#stop when error is small enough or the computation takes too long
while iterations < MAX_ITERATIONS and error > ERROR_THRESHOLD:
    #iterate over the training data

    #at this point interims is broken because the plot() function called compute very often
    #but it's going to be reset before it can break anything. (just to prevent confusion)

    iterations += 1

    for i in range(len(T_DATA_IN)):

        #backpropagate
        interims = [] #reset interims

        target_value = T_DATA_OUT[i] #target for the NN to calculate
        fproped_value = T_DATA_IN[i] #initialize with the input, nothing computed yet
        fproped_value = fprop(fproped_value)
        #create shape of network in delta
        delta = LAYER_SIZES[:] #[:]: make copy of content
        for l in range(len(delta)):
            delta[l] = np.zeros(delta[l]).tolist()

        #for each output node compute
        j = -1
        for k in range(LAYER_SIZES[j]):
            output_node_k = interims[j][k]
            error = -(output_node_k - target_value[k])
            delta[j][k] = dsigma(output_node_k) * error
            #some stuff for the total error:
            error += abs(fproped_value[k]-target_value[k])


        #for each hidden node calculate
        error_sum = 0.0
        for i in range(LAYER_COUNT)[0:-1][::-1]: #delta for last layer is already calculated; start at the end
            #i points to the current layer (index)
            j = i+1 #point to the next layer
            for n in range(LAYER_SIZES[i]): #iterate over nodes in layer i
                for k in range(LAYER_SIZES[j]): #iterate over nodes in layer i+1 (k in j)
                    a = delta[j][k] #get delta from next layer in network
                    b = weights[i][n][k] #get weight from layer i that points from n to k
                    error_sum += a * b
                delta[i][n] = dsigma(interims[i][n]) * error_sum

        #update weights
        for layer in range(LAYER_COUNT-1):
            for node_lp0 in range(LAYER_SIZES[layer]):
                for node_lp1 in range(LAYER_SIZES[layer+1]):
                    change = delta[layer+1][node_lp1] * interims[layer][node_lp0]
                    previous_change = changes[layer][node_lp0][node_lp1]
                    #For some reason i have to use +=, not -=
                    weights[layer][node_lp0][node_lp1] += LEARNING_RATE * change + 0.5 * previous_change
                    changes[layer][node_lp0][node_lp1] = change

        #now that i (hopefully) have the deltas, update the weights
        #for i in range(len(weights)): #iterate over layers
        #   for n in range(len(weights[i])): #iterate over nodes1
        #       for n2 in range(len(weights[i][n])): #iterate over nodes2 that are connected to nodes1
        #           #weights[i][n] is an array. each element in it points to a single node in l+1
        #           dweight = LEARNING_RATE * delta[i][n] * interims[i][n]
        #           weights[i][n][n2] -= dweight #substract dweight from every single weight

        if i == -1: #set this to some number to log maybe useful information
            print "output:",fproped_value,"error:",error

    if CASE == 'regression':
        plot()
    if (100.0*iterations/MAX_ITERATIONS)%5 == 0:
        print iterations*100/MAX_ITERATIONS,"%"

if iterations == MAX_ITERATIONS:
    print "training aborted because max_iterations was reached. Error:",error
else:
    print "training finished"

#training finished
#do some matplotlib stuff to show the neural-network's function

if CASE == 'regression':
    plot()

    #plot the training-datapoints
    #with transposing make sure, that no 2D input is put into this
    plt.plot(np.array(T_DATA_IN).T[0].T.flat[:],np.array(T_DATA_OUT).flat[:],
            marker = 'o',
            linestyle = '')
    plt.xlabel("input")
    plt.ylabel("forward-propagated")
    plt.title("training result graph")
    plt.margins(0.1)
    plt.show()

if CASE == 'classification':
    for idatapoint in range(len(T_DATA_IN)):
        datapoint = T_DATA_IN[idatapoint]
        out = (np.int_(np.array(fprop(datapoint))*100)/100.0).tolist()
        print "in:",datapoint,"out:",out,"target:",T_DATA_OUT[idatapoint]

quit()

#log of errors:
#
# was not taking the weight that points from node j to k but took the whole weight vector from j to nodes of next layer for delta
# interims was updated before activation function was applied
# input data was not normalized. Hence the converging to y=1
# interims was not resetted to [] at the beginning of each iteration
#
#
#
#
#
#