Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import random
- import numpy as np
- import matplotlib.pyplot as plt
- import matplotlib.colors as pltclr
- import pandas as pd
- #TODO remove this when finished
- random.seed(3)
- #test: some linear function
- T_DATA_IN = [[1]]
- T_DATA_OUT = [[0.8]]
- #classification: positive or negative number
- #T_DATA_IN = [[1] ,[0.5],[-0.8],[-1] ,[-0.2] ,[0.1]]
- #T_DATA_OUT = [[1,0],[1,0],[0,1] ,[0,1],[0,1] ,[0,1] ]
- #classification: large or small vector
- #T_DATA_IN = [[1,2],[0.5,1],[10,20],[5,10]]
- #T_DATA_OUT = [[1,0],[1,0] ,[0,1] ,[0,1] ]
- #configuration:
- HIDDEN_LAYERS = [4] #count of nodes for hidden layers. e.g. [2,3] or [9,2,2,2]
- SIGMOID_EXPONENT = 5 #for the activation function
- MAX_ITERATIONS = 300 #to prevent non-converging ininite loops
- ERROR_THRESHOLD = 0.01 #when the error is small enough
- LEARNING_RATE = 0.1
- #this defines the shape of the network. Training data will match the shape
- LAYER_SIZES = [len(T_DATA_IN[0])]+HIDDEN_LAYERS+[len(T_DATA_OUT[0])]
- #some usefull stuff to make it simpler
- LAYER_COUNT = len(LAYER_SIZES)
- plotcolor = [1,0,0] #first hsv color should be black
- #log some information
- print "training data:"
- for i in range(len(T_DATA_IN)):
- print "input:",str(T_DATA_IN[i]),"output:",str(T_DATA_OUT[i])
- #this for-loop creates for each layer a weight matrix, initialised with random values from -1 to 1
- #weights is all weights. weights[i] is the weights for one layer.
- weights = []
- #iterate through all layers
- for layer_nr in range(LAYER_COUNT)[1:LAYER_COUNT]:
- #initialize weightmatrix for a given layer
- weights.append([])
- #number of nodes from the previous layer is the rowcount of the weight matrix
- #number of nodes from the current layer is the colcount
- row_count = LAYER_SIZES[layer_nr-1]
- col_count = LAYER_SIZES[layer_nr]
- layer_index = layer_nr-1
- #create weight matrix
- for row_index in range(row_count):
- weights[layer_index].append([])
- for col_index in range(col_count):
- #random_value can also be a vector
- random_value = random.randint(-100,100)/100.0
- weights[layer_index][row_index].append(random_value)
- #forward_propagate. v is a vector from the input layer
- def compute(v):
- #start at index 1, because index 0 is the input v and therefore no need to calculate index 0
- #that's why put v in interims
- interims.append(v) #here v is a default non-numpy list at first
- for layer_nr in range(LAYER_COUNT)[1:LAYER_COUNT]:
- #calculate; multiply the layers with the weights one by one
- v = np.array(v).dot(weights[layer_nr-1]).tolist()
- #v is the matrix of interims/output-results
- interims.append(v)
- #now transform the computed_value according to the activation function
- #iterate over all values in the matrix.
- for j in range(len(v)):
- #sigmoid; produces soft curve
- exponent = -SIGMOID_EXPONENT*v[j]
- exponent = min(exponent,50)
- v[j] = 1/(1+2.72**(exponent))
- #linear
- #v[j] = min(max(v[j]+0.5,0),1)
- #v will now has the form of the output layer
- #because the last matrix-multiplication (.dot()) with the last
- #weight matrix will result in it
- if len(v) != LAYER_SIZES[-1]:
- print "ERROR, len(v) !=",LAYER_SIZES[-1]
- return v
- def plot():
- plotcolor[0] = (plotcolor[0]+0.005)%1.0
- lower_bound = min(np.array(T_DATA_IN).flat[:])-0.5
- upper_bound = max(np.array(T_DATA_IN).flat[:])+0.5
- step = 0.03 #set this to a lower value to get a smoother curve
- input_value = lower_bound
- x1 = []
- x2 = []
- while input_value < upper_bound:
- computed_value = compute([input_value])
- x2.append(computed_value)
- x1.append(input_value)
- input_value += step
- #result
- plt.plot(x1,x2,
- marker = '',
- linestyle = '-',
- color = pltclr.hsv_to_rgb(plotcolor))
- #after the first line has been plottet in black, put some color in for the next lines
- plotcolor[1] = 1
- plotcolor[2] = 1
- #trainingdata
- #train the neural network
- max_iterations = MAX_ITERATIONS
- #initialize the error in a way which does not prevent the loop from running
- error = ERROR_THRESHOLD+1
- interims = [] #needed for the backpropagation algorithm
- def root(a,b):
- while b-1 > 0:
- a = np.sqrt(a)
- b -= 1
- return a
- print "training..."
- #stop when error is small enough or the computation takes too long
- while max_iterations > 0 and abs(error) > ERROR_THRESHOLD:
- max_iterations -= 1
- #iterate over the training data
- error = 0
- for i in range(len(T_DATA_IN)):
- #backpropagate
- target_value = T_DATA_OUT[i] #target for the NN to calculate
- computed_value = T_DATA_IN[i] #initialize with the input, nothing computed yet
- computed_value = compute(computed_value)
- for j in range(LAYER_SIZES[-1]):
- error = computed_value[j]-target_value[j]
- #create shape of network in delta
- delta = LAYER_SIZES[:] #[:]: make copy of content
- for l in range(len(delta)):
- delta[l] = np.zeros(delta[l]).tolist()
- #for each output node compute
- j = -1
- for k in range(LAYER_SIZES[j]):
- old_output = interims[j][k]
- delta[j][k] = (old_output * (1 - old_output) * (old_output - target_value[k]))
- delta[j][k] = root(delta[j][k],1)
- #for each hidden node calculate
- summ = 0
- for i in range(LAYER_COUNT)[0:-1][::-1]: #delta for last layer is already calculated; start at the end
- #i points to the current layer (index)
- j = i+1 #point to the next layer
- for n in range(LAYER_SIZES[i]): #iterate over nodes in layer i
- delta[i][n] = 1-interims[i][n]*(1-interims[i][n]) #the following sum is going to be applied to this:
- for k in range(LAYER_SIZES[j]): #iterate over nodes in layer i+1 (k in j)
- a = weights[i][n][k] #get weight that points from n to k
- b = delta[j][k]
- print b
- summ += a * b
- delta[i][n] *= summ #finish calculation of delta for this node, go to next node in layer i
- #now that i (hopefully) have the deltas, update the weights
- for i in range(len(weights)):
- for n in range(len(weights[i])):
- #weights[i][n] is an array. each element in it points to a single node in l+1
- dweight = LEARNING_RATE * delta[i][n] * interims[i][n]
- weights[i][n] = (np.array(weights[i][n]) - dweight).tolist() #substract dweight from every single weight
- if i == 0: #set this to some number to log maybe useful information
- print "output:",computed_value,"error:",error
- plot()
- if max_iterations < 1:
- print "training aborted because max_iterations was reached. Error did not converge to 0"
- #show how NN classifies the training input now
- print "result:"
- for i in range(len(T_DATA_IN)):
- print "input:",str(T_DATA_IN[i]),"output:",str(compute(T_DATA_IN[i]))
- #training finished
- #do some matplotlib stuff to show the neural-network's function
- plot()
- #plot the training-datapoints
- plt.plot(np.array(T_DATA_IN).flat[:],np.array(T_DATA_OUT).flat[:],
- marker = 'o',
- linestyle = '')
- plt.xlabel("input")
- plt.ylabel("forward-propagated")
- plt.title("training result graph")
- plt.margins(0.1)
- plt.show()
- quit()
- #now let the user use the trained network
- print
- print "use 'q' to quit"
- print "input example:",str(np.ones(len(T_DATA_IN[0])).tolist())[1:-1]
- while 1:
- user_input = raw_input("your input: ")
- if user_input == "q":
- quit()
- #parse
- input_value = np.fromstring(user_input,dtype=float,sep=',')
- if len(input_value) != LAYER_SIZES[0]:
- print "wrong input-format. This is probably because you entered a character that is not a number or ',' or your input vector has the wrong length"
- continue
- #calculate. the computed_value has to be a vector []
- computed_value = compute(input_value)
- #for layer_nr in range(LAYER_COUNT)[1:LAYER_COUNT]:
- # computed_value = np.array(computed_value).dot(weights[layer_nr-1])
- print "result: "+str(computed_value)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement