Guest User

Untitled

a guest
Nov 21st, 2017
87
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.91 KB | None | 0 0
  1. def question4_nnet(X, y, N = 30, w_init = 0.01, batch_size = 40, eta = 0.1, alpha = 0.1):
  2. ITER = 50
  3.  
  4. def sigmoid(x):
  5. return 1 / (1 + np.exp(-x))
  6.  
  7.  
  8. def J(x,*args):
  9. X,y_out,split,dim,c = args
  10. W_1_unraveled = x[0:split]
  11. W_2_unraveled = x[split:]
  12.  
  13. W_1 = W_1_unraveled.reshape((dim[0],dim[1]))
  14. W_2 = W_2_unraveled.reshape((dim[2], dim[0]))
  15.  
  16. #calculate total error
  17. J = 0
  18. for i in range(c):
  19. x_n = X[:,:,i].ravel()
  20. x_n = np.append(x_n, 1)
  21. v1 = W_1.dot(x_n)
  22. y = sigmoid(v1)[np.newaxis]
  23. v2 = W_2.dot(y.T)
  24. o = sigmoid(v2)
  25.  
  26. #one hot encode the desired output
  27. y_n = y_out[i]
  28. d = np.zeros((26,1))
  29. d[y_n-1, 0] = 1
  30.  
  31. #error term
  32. e = np.multiply(-d, np.log(o)) - np.multiply((1-d),np.log(1-o))
  33.  
  34. J += np.sum(e)
  35.  
  36. return J/c
  37.  
  38.  
  39. def Jgrad(x,*args):
  40. X,y_out,split,dim,c = args
  41. W_1_unraveled = x[0:split]
  42. W_2_unraveled = x[split:]
  43.  
  44. W_1 = W_1_unraveled.reshape((dim[0],dim[1]))
  45. W_2 = W_2_unraveled.reshape((dim[2], dim[0]))
  46.  
  47. #calculate total gradients
  48. Jgrad = np.zeros_like(x)
  49. for i in range(c):
  50. x_n = X[:,:,i].ravel()
  51. x_n = np.append(x_n, 1)
  52. v1 = W_1.dot(x_n)
  53. y = sigmoid(v1)[np.newaxis]
  54. v2 = W_2.dot(y.T)
  55. o = sigmoid(v2)
  56.  
  57. #one hot encode the desired output
  58. y_n = y_out[i]
  59. d = np.zeros((26,1))
  60. d[y_n-1, 0] = 1
  61.  
  62. #error term
  63. #second layer
  64. gamma2 = np.diag(np.multiply(o, 1-o))
  65. delta2 = (-np.divide(d,o) - np.divide(1-d,1-o)) * np.diag(np.multiply(o, 1-o))
  66. dw2 = delta2 * y
  67.  
  68. #first layer
  69. gamma1 = np.diag(np.multiply(y, 1-y))
  70. delta1 = gamma1 * W_2.T.dot(delta2)
  71. dw1 = delta1 * x_n.T
  72.  
  73. dw2 = dw2.ravel()
  74. dw1 = dw1.ravel()
  75.  
  76. Jgrad += np.append(dw1,dw2)
  77.  
  78. return Jgrad/c
  79.  
  80. p = 26
  81. a, b, c = X.shape
  82.  
  83. w_init = 6/(26+735)
  84. #initialize weights
  85. W_1 = np.random.uniform(-w_init, w_init, (N,a*b+1))
  86. W_2 = np.random.uniform(-w_init, w_init, (p,N))
  87.  
  88. errors = []
  89. total_error = 0
  90.  
  91. split = N*(a*b +1)
  92. dim = [N, a*b+1,p]
  93.  
  94. args = (X,y,split,dim,c)
  95.  
  96. w_current = np.append(W_1.ravel()[np.newaxis].T, W_2.ravel()[np.newaxis].T)
  97.  
  98. for _ in range(ITER):
  99. print(_)
  100. #minimize the error
  101. print(J(w_current, X,y,split,dim,c))
  102. w_current = scipy.optimize.fmin_cg(J, w_current, fprime=Jgrad, args=args, maxiter = 50)
  103.  
  104. W_1_unraveled = x[0:split]
  105. W_2_unraveled = x[split:]
  106.  
  107. W_1 = W_1_unraveled.reshape((dim[0],dim[1]))
  108. W_2 = W_2_unraveled.reshape((dim[2], dim[0]))
  109.  
  110. return (W_1, W_2)
Add Comment
Please, Sign In to add comment