Advertisement
Guest User

NN working without bias

a guest
Jan 9th, 2017
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 9.09 KB | None | 0 0
  1. import random
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. import matplotlib.colors as pltclr
  5.  
  6. #TODO remove this when finished
  7. #random.seed(3)
  8.  
  9. #some linear function
  10. #T_DATA_IN = []
  11. #T_DATA_OUT = []
  12. #points = 50
  13. #for a in range(points+1):
  14. #   T_DATA_IN.append([float(a)/points])
  15. #   T_DATA_OUT.append([float(a)/points])
  16. #CASE = 'regression'
  17.  
  18. #quadratic function
  19. #T_DATA_IN = []
  20. #T_DATA_OUT = []
  21. #points = 300
  22. #for a in range(points+1):
  23. #   T_DATA_IN.append([float(a)/points])
  24. #   T_DATA_OUT.append([(float(a)/points-0.5)**2*4])
  25. #CASE = 'regression'
  26.  
  27. #T_DATA_IN = [[0],[0.7]]
  28. #T_DATA_OUT = [[0.5],[0.9]]
  29. #CASE = 'regression'
  30.  
  31. #classification: positive or negative number
  32. #T_DATA_IN  = [[1]  ,[0.5],[-0.8],[-1] ,[-0.2] ,[0.1]]
  33. #T_DATA_OUT = [[1,0],[1,0],[0,1] ,[0,1],[0,1]  ,[0,1] ]
  34. #CASE = 'classification'
  35.  
  36. #classification: large or small vector
  37. T_DATA_IN  = [[0.1,0.2],[0.3,0.1],[0.9,0.8],[0.7,0.9]]
  38. T_DATA_OUT = [[1,0]    ,[1,0]    ,[0,1]    ,[0,1]    ]
  39. CASE = 'classification'
  40.  
  41. #classification: from -0.3 to 0.3: 0,1,0
  42. #        from -inf to -0.3: 1,0,0
  43. #        from 0.3 to inf: 0,0,1
  44. #T_DATA_IN  = [[-0.9],[-0.7],[-0.5],[-0.3],[-0.1],[0.2],[0.4],[0.6],[0.8],[1]]
  45. #T_DATA_OUT = [[1,0,0],[1,0,0],[1,0,0],[1,0,0],[0,1,0],[0,1,0],[0,0,1],[0,0,1],[0,0,1],[0,0,1]]
  46. #CASE = 'classification'
  47.  
  48. #configuration:
  49. HIDDEN_LAYERS = [22,22] #count of nodes for hidden layers. e.g. [2,3] or [9,2,2,2]
  50. MAX_ITERATIONS = 2200  #to prevent non-converging ininite loops
  51. ERROR_THRESHOLD = 0.01 #when the error is small enough
  52. LEARNING_RATE = 0.1 #for regression use '1'
  53.  
  54. #this defines the shape of the network. Training data will match the shape
  55. LAYER_SIZES = [len(T_DATA_IN[0])]+HIDDEN_LAYERS+[len(T_DATA_OUT[0])]
  56.  
  57. #some usefull stuff to make it simpler
  58. LAYER_COUNT = len(LAYER_SIZES)
  59.  
  60. plotcolor = [1,0,0] #first hsv color should be black
  61.  
  62. SIGMOID_EXPONENT = 1  #for the activation function (don't change this, dsigma(x) does not take this into account)
  63.  
  64. #this for-loop creates for each layer a weight matrix, initialised with random values from -1 to 1
  65. #weights is all weights. weights[i] is the weights for one layer.
  66. weights = []
  67. changes = []
  68.  
  69. #iterate through all layers
  70. for layer_nr in range(LAYER_COUNT)[1:LAYER_COUNT]:
  71.  
  72.     #initialize weightmatrix for a given layer
  73.     weights.append([])
  74.     changes.append([])
  75.  
  76.     #number of nodes from the previous layer is the rowcount of the weight matrix
  77.     #number of nodes from the current layer is the colcount
  78.     row_count = LAYER_SIZES[layer_nr-1]
  79.     col_count = LAYER_SIZES[layer_nr]
  80.  
  81.     layer_index = layer_nr-1
  82.  
  83.     #create weight matrix
  84.     for row_index in range(row_count):
  85.         weights[layer_index].append([])
  86.         changes[layer_index].append([])
  87.         for col_index in range(col_count):
  88.             #random_value can also be a vector
  89.             random_value = random.randint(-100,100)/100.0
  90.             weights[layer_index][row_index].append(random_value)
  91.             changes[layer_index][row_index].append(0)
  92.  
  93. def sigma(x):
  94.     return 1.0/(1.0+2.71828**(-SIGMOID_EXPONENT*x))
  95.  
  96. def dsigma(x): #x is already sigmoided
  97.     return x*(1.0-x)
  98.  
  99. #forward_propagate. v is a vector from the input layer
  100. def fprop(v):
  101.  
  102.     #start at index 1, because index 0 is the input v and therefore no need to calculate index 0
  103.     #that's why put v in interims
  104.     interims.append(v) #here v is a default non-numpy list at first
  105.     for layer_nr in range(LAYER_COUNT)[1:LAYER_COUNT]:
  106.         #v has always the shape of layer_nr,
  107.         #thanks to the multiplication from the previous iteration
  108.  
  109.         #calculate; multiply the layers with the weights one by one
  110.         if len(v) != LAYER_SIZES[layer_nr-1]:
  111.             print "ERROR, len(v) !=",LAYER_SIZES[layer_nr-1],";",v
  112.  
  113.         v = np.array(v).dot(weights[layer_nr-1]).tolist()
  114.         #v is the matrix of interims/output-results
  115.  
  116.         #now transform the fproped_value according to the activation function
  117.         #iterate over all values in the input matrix.
  118.         for j in range(len(v)):
  119.             #sigmoid; produces soft curve
  120.             v[j] = sigma(v[j])
  121.             #linear
  122.             #v[j] = min(max(v[j]+0.5,0),1)
  123.  
  124.         interims.append(v)
  125.  
  126.     #v will now has the form of the output layer,
  127.     #because the last matrix-multiplication (.dot()) with the
  128.     #last weight matrix will result in it
  129.     return v
  130.  
  131.  
  132. def plot():
  133.  
  134.     plotcolor[0] = (plotcolor[0]+1.0/MAX_ITERATIONS)%1.0
  135.     lower_bound = min(np.array(T_DATA_IN).flat[:])
  136.     upper_bound = max(np.array(T_DATA_IN).flat[:])
  137.     step = 0.03 #set this to a lower value to get a smoother curve
  138.     input_value = lower_bound
  139.     x1 = []
  140.     x2 = []
  141.     while input_value < upper_bound:
  142.         fproped_value = fprop([input_value])
  143.         x2.append(fproped_value)
  144.         x1.append(input_value)
  145.         input_value += step
  146.  
  147.     #result
  148.     plt.plot(x1,x2,
  149.             marker = '',
  150.             linestyle = '-',
  151.             color = pltclr.hsv_to_rgb(plotcolor))
  152.  
  153.     #after the first line has been plottet in black, put some color in for the next lines
  154.     plotcolor[1] = 1
  155.     plotcolor[2] = 1
  156.     #trainingdata
  157.  
  158. #train the neural network
  159. #initialize the error in a way which does not prevent the loop from running
  160. error = ERROR_THRESHOLD+1
  161. interims = [] #needed for the backpropagation algorithm
  162. iterations = 0
  163.  
  164. print "training..."
  165. #stop when error is small enough or the computation takes too long
  166. while iterations < MAX_ITERATIONS and error > ERROR_THRESHOLD:
  167.     #iterate over the training data
  168.  
  169.     #at this point interims is broken because the plot() function called compute very often
  170.     #but it's going to be reset before it can break anything. (just to prevent confusion)
  171.  
  172.     iterations += 1
  173.  
  174.     for i in range(len(T_DATA_IN)):
  175.  
  176.         #backpropagate
  177.         interims = [] #reset interims
  178.  
  179.         target_value = T_DATA_OUT[i] #target for the NN to calculate
  180.         fproped_value = T_DATA_IN[i] #initialize with the input, nothing computed yet
  181.         fproped_value = fprop(fproped_value)
  182.         #create shape of network in delta
  183.         delta = LAYER_SIZES[:] #[:]: make copy of content
  184.         for l in range(len(delta)):
  185.             delta[l] = np.zeros(delta[l]).tolist()
  186.  
  187.         #for each output node compute
  188.         j = -1
  189.         for k in range(LAYER_SIZES[j]):
  190.             output_node_k = interims[j][k]
  191.             error = -(output_node_k - target_value[k])
  192.             delta[j][k] = dsigma(output_node_k) * error
  193.             #some stuff for the total error:
  194.             error += abs(fproped_value[k]-target_value[k])
  195.  
  196.  
  197.         #for each hidden node calculate
  198.         error_sum = 0.0
  199.         for i in range(LAYER_COUNT)[0:-1][::-1]: #delta for last layer is already calculated; start at the end
  200.             #i points to the current layer (index)
  201.             j = i+1 #point to the next layer
  202.             for n in range(LAYER_SIZES[i]): #iterate over nodes in layer i
  203.                 for k in range(LAYER_SIZES[j]): #iterate over nodes in layer i+1 (k in j)
  204.                     a = delta[j][k] #get delta from next layer in network
  205.                     b = weights[i][n][k] #get weight from layer i that points from n to k
  206.                     error_sum += a * b
  207.                 delta[i][n] = dsigma(interims[i][n]) * error_sum
  208.  
  209.         #update weights
  210.         for layer in range(LAYER_COUNT-1):
  211.             for node_lp0 in range(LAYER_SIZES[layer]):
  212.                 for node_lp1 in range(LAYER_SIZES[layer+1]):
  213.                     change = delta[layer+1][node_lp1] * interims[layer][node_lp0]
  214.                     previous_change = changes[layer][node_lp0][node_lp1]
  215.                     #For some reason i have to use +=, not -=
  216.                     weights[layer][node_lp0][node_lp1] += LEARNING_RATE * change + 0.5 * previous_change
  217.                     changes[layer][node_lp0][node_lp1] = change
  218.  
  219.         #now that i (hopefully) have the deltas, update the weights
  220.         #for i in range(len(weights)): #iterate over layers
  221.         #   for n in range(len(weights[i])): #iterate over nodes1
  222.         #       for n2 in range(len(weights[i][n])): #iterate over nodes2 that are connected to nodes1
  223.         #           #weights[i][n] is an array. each element in it points to a single node in l+1
  224.         #           dweight = LEARNING_RATE * delta[i][n] * interims[i][n]
  225.         #           weights[i][n][n2] -= dweight #substract dweight from every single weight
  226.  
  227.         if i == -1: #set this to some number to log maybe useful information
  228.             print "output:",fproped_value,"error:",error
  229.    
  230.     if CASE == 'regression':
  231.         plot()
  232.     if (100.0*iterations/MAX_ITERATIONS)%5 == 0:
  233.         print iterations*100/MAX_ITERATIONS,"%"
  234.  
  235. if iterations == MAX_ITERATIONS:
  236.     print "training aborted because max_iterations was reached. Error:",error
  237. else:
  238.     print "training finished"
  239.  
  240. #training finished
  241. #do some matplotlib stuff to show the neural-network's function
  242.  
  243. if CASE == 'regression':
  244.     plot()
  245.  
  246.     #plot the training-datapoints
  247.     #with transposing make sure, that no 2D input is put into this
  248.     plt.plot(np.array(T_DATA_IN).T[0].T.flat[:],np.array(T_DATA_OUT).flat[:],
  249.             marker = 'o',
  250.             linestyle = '')
  251.     plt.xlabel("input")
  252.     plt.ylabel("forward-propagated")
  253.     plt.title("training result graph")
  254.     plt.margins(0.1)
  255.     plt.show()
  256.  
  257. if CASE == 'classification':
  258.     for idatapoint in range(len(T_DATA_IN)):
  259.         datapoint = T_DATA_IN[idatapoint]
  260.         out = (np.int_(np.array(fprop(datapoint))*100)/100.0).tolist()
  261.         print "in:",datapoint,"out:",out,"target:",T_DATA_OUT[idatapoint]
  262.  
  263. quit()
  264.  
  265. #log of errors:
  266. #
  267. # was not taking the weight that points from node j to k but took the whole weight vector from j to nodes of next layer for delta
  268. # interims was updated before activation function was applied
  269. # input data was not normalized. Hence the converging to y=1
  270. # interims was not resetted to [] at the beginning of each iteration
  271. #
  272. #
  273. #
  274. #
  275. #
  276. #
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement