Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import random
- import numpy as np
- import matplotlib.pyplot as plt
- import matplotlib.colors as pltclr
- #TODO remove this when finished
- #random.seed(3)
- #some linear function
- #T_DATA_IN = []
- #T_DATA_OUT = []
- #points = 50
- #for a in range(points+1):
- # T_DATA_IN.append([float(a)/points])
- # T_DATA_OUT.append([float(a)/points])
- #CASE = 'regression'
- #quadratic function
- #T_DATA_IN = []
- #T_DATA_OUT = []
- #points = 300
- #for a in range(points+1):
- # T_DATA_IN.append([float(a)/points])
- # T_DATA_OUT.append([(float(a)/points-0.5)**2*4])
- #CASE = 'regression'
- #T_DATA_IN = [[0],[0.7]]
- #T_DATA_OUT = [[0.5],[0.9]]
- #CASE = 'regression'
- #classification: positive or negative number
- #T_DATA_IN = [[1] ,[0.5],[-0.8],[-1] ,[-0.2] ,[0.1]]
- #T_DATA_OUT = [[1,0],[1,0],[0,1] ,[0,1],[0,1] ,[0,1] ]
- #CASE = 'classification'
- #classification: large or small vector
- T_DATA_IN = [[0.1,0.2],[0.3,0.1],[0.9,0.8],[0.7,0.9]]
- T_DATA_OUT = [[1,0] ,[1,0] ,[0,1] ,[0,1] ]
- CASE = 'classification'
- #classification: from -0.3 to 0.3: 0,1,0
- # from -inf to -0.3: 1,0,0
- # from 0.3 to inf: 0,0,1
- #T_DATA_IN = [[-0.9],[-0.7],[-0.5],[-0.3],[-0.1],[0.2],[0.4],[0.6],[0.8],[1]]
- #T_DATA_OUT = [[1,0,0],[1,0,0],[1,0,0],[1,0,0],[0,1,0],[0,1,0],[0,0,1],[0,0,1],[0,0,1],[0,0,1]]
- #CASE = 'classification'
- #configuration:
- HIDDEN_LAYERS = [22,22] #count of nodes for hidden layers. e.g. [2,3] or [9,2,2,2]
- MAX_ITERATIONS = 2200 #to prevent non-converging ininite loops
- ERROR_THRESHOLD = 0.01 #when the error is small enough
- LEARNING_RATE = 0.1 #for regression use '1'
- #this defines the shape of the network. Training data will match the shape
- LAYER_SIZES = [len(T_DATA_IN[0])]+HIDDEN_LAYERS+[len(T_DATA_OUT[0])]
- #some usefull stuff to make it simpler
- LAYER_COUNT = len(LAYER_SIZES)
- plotcolor = [1,0,0] #first hsv color should be black
- SIGMOID_EXPONENT = 1 #for the activation function (don't change this, dsigma(x) does not take this into account)
- #this for-loop creates for each layer a weight matrix, initialised with random values from -1 to 1
- #weights is all weights. weights[i] is the weights for one layer.
- weights = []
- changes = []
- #iterate through all layers
- for layer_nr in range(LAYER_COUNT)[1:LAYER_COUNT]:
- #initialize weightmatrix for a given layer
- weights.append([])
- changes.append([])
- #number of nodes from the previous layer is the rowcount of the weight matrix
- #number of nodes from the current layer is the colcount
- row_count = LAYER_SIZES[layer_nr-1]
- col_count = LAYER_SIZES[layer_nr]
- layer_index = layer_nr-1
- #create weight matrix
- for row_index in range(row_count):
- weights[layer_index].append([])
- changes[layer_index].append([])
- for col_index in range(col_count):
- #random_value can also be a vector
- random_value = random.randint(-100,100)/100.0
- weights[layer_index][row_index].append(random_value)
- changes[layer_index][row_index].append(0)
- def sigma(x):
- return 1.0/(1.0+2.71828**(-SIGMOID_EXPONENT*x))
- def dsigma(x): #x is already sigmoided
- return x*(1.0-x)
- #forward_propagate. v is a vector from the input layer
- def fprop(v):
- #start at index 1, because index 0 is the input v and therefore no need to calculate index 0
- #that's why put v in interims
- interims.append(v) #here v is a default non-numpy list at first
- for layer_nr in range(LAYER_COUNT)[1:LAYER_COUNT]:
- #v has always the shape of layer_nr,
- #thanks to the multiplication from the previous iteration
- #calculate; multiply the layers with the weights one by one
- if len(v) != LAYER_SIZES[layer_nr-1]:
- print "ERROR, len(v) !=",LAYER_SIZES[layer_nr-1],";",v
- v = np.array(v).dot(weights[layer_nr-1]).tolist()
- #v is the matrix of interims/output-results
- #now transform the fproped_value according to the activation function
- #iterate over all values in the input matrix.
- for j in range(len(v)):
- #sigmoid; produces soft curve
- v[j] = sigma(v[j])
- #linear
- #v[j] = min(max(v[j]+0.5,0),1)
- interims.append(v)
- #v will now has the form of the output layer,
- #because the last matrix-multiplication (.dot()) with the
- #last weight matrix will result in it
- return v
- def plot():
- plotcolor[0] = (plotcolor[0]+1.0/MAX_ITERATIONS)%1.0
- lower_bound = min(np.array(T_DATA_IN).flat[:])
- upper_bound = max(np.array(T_DATA_IN).flat[:])
- step = 0.03 #set this to a lower value to get a smoother curve
- input_value = lower_bound
- x1 = []
- x2 = []
- while input_value < upper_bound:
- fproped_value = fprop([input_value])
- x2.append(fproped_value)
- x1.append(input_value)
- input_value += step
- #result
- plt.plot(x1,x2,
- marker = '',
- linestyle = '-',
- color = pltclr.hsv_to_rgb(plotcolor))
- #after the first line has been plottet in black, put some color in for the next lines
- plotcolor[1] = 1
- plotcolor[2] = 1
- #trainingdata
- #train the neural network
- #initialize the error in a way which does not prevent the loop from running
- error = ERROR_THRESHOLD+1
- interims = [] #needed for the backpropagation algorithm
- iterations = 0
- print "training..."
- #stop when error is small enough or the computation takes too long
- while iterations < MAX_ITERATIONS and error > ERROR_THRESHOLD:
- #iterate over the training data
- #at this point interims is broken because the plot() function called compute very often
- #but it's going to be reset before it can break anything. (just to prevent confusion)
- iterations += 1
- for i in range(len(T_DATA_IN)):
- #backpropagate
- interims = [] #reset interims
- target_value = T_DATA_OUT[i] #target for the NN to calculate
- fproped_value = T_DATA_IN[i] #initialize with the input, nothing computed yet
- fproped_value = fprop(fproped_value)
- #create shape of network in delta
- delta = LAYER_SIZES[:] #[:]: make copy of content
- for l in range(len(delta)):
- delta[l] = np.zeros(delta[l]).tolist()
- #for each output node compute
- j = -1
- for k in range(LAYER_SIZES[j]):
- output_node_k = interims[j][k]
- error = -(output_node_k - target_value[k])
- delta[j][k] = dsigma(output_node_k) * error
- #some stuff for the total error:
- error += abs(fproped_value[k]-target_value[k])
- #for each hidden node calculate
- error_sum = 0.0
- for i in range(LAYER_COUNT)[0:-1][::-1]: #delta for last layer is already calculated; start at the end
- #i points to the current layer (index)
- j = i+1 #point to the next layer
- for n in range(LAYER_SIZES[i]): #iterate over nodes in layer i
- for k in range(LAYER_SIZES[j]): #iterate over nodes in layer i+1 (k in j)
- a = delta[j][k] #get delta from next layer in network
- b = weights[i][n][k] #get weight from layer i that points from n to k
- error_sum += a * b
- delta[i][n] = dsigma(interims[i][n]) * error_sum
- #update weights
- for layer in range(LAYER_COUNT-1):
- for node_lp0 in range(LAYER_SIZES[layer]):
- for node_lp1 in range(LAYER_SIZES[layer+1]):
- change = delta[layer+1][node_lp1] * interims[layer][node_lp0]
- previous_change = changes[layer][node_lp0][node_lp1]
- #For some reason i have to use +=, not -=
- weights[layer][node_lp0][node_lp1] += LEARNING_RATE * change + 0.5 * previous_change
- changes[layer][node_lp0][node_lp1] = change
- #now that i (hopefully) have the deltas, update the weights
- #for i in range(len(weights)): #iterate over layers
- # for n in range(len(weights[i])): #iterate over nodes1
- # for n2 in range(len(weights[i][n])): #iterate over nodes2 that are connected to nodes1
- # #weights[i][n] is an array. each element in it points to a single node in l+1
- # dweight = LEARNING_RATE * delta[i][n] * interims[i][n]
- # weights[i][n][n2] -= dweight #substract dweight from every single weight
- if i == -1: #set this to some number to log maybe useful information
- print "output:",fproped_value,"error:",error
- if CASE == 'regression':
- plot()
- if (100.0*iterations/MAX_ITERATIONS)%5 == 0:
- print iterations*100/MAX_ITERATIONS,"%"
- if iterations == MAX_ITERATIONS:
- print "training aborted because max_iterations was reached. Error:",error
- else:
- print "training finished"
- #training finished
- #do some matplotlib stuff to show the neural-network's function
- if CASE == 'regression':
- plot()
- #plot the training-datapoints
- #with transposing make sure, that no 2D input is put into this
- plt.plot(np.array(T_DATA_IN).T[0].T.flat[:],np.array(T_DATA_OUT).flat[:],
- marker = 'o',
- linestyle = '')
- plt.xlabel("input")
- plt.ylabel("forward-propagated")
- plt.title("training result graph")
- plt.margins(0.1)
- plt.show()
- if CASE == 'classification':
- for idatapoint in range(len(T_DATA_IN)):
- datapoint = T_DATA_IN[idatapoint]
- out = (np.int_(np.array(fprop(datapoint))*100)/100.0).tolist()
- print "in:",datapoint,"out:",out,"target:",T_DATA_OUT[idatapoint]
- quit()
- #log of errors:
- #
- # was not taking the weight that points from node j to k but took the whole weight vector from j to nodes of next layer for delta
- # interims was updated before activation function was applied
- # input data was not normalized. Hence the converging to y=1
- # interims was not resetted to [] at the beginning of each iteration
- #
- #
- #
- #
- #
- #
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement