Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def question4_nnet(X, y, N = 30, w_init = 0.01, batch_size = 40, eta = 0.1, alpha = 0.1):
- ITER = 50
- def sigmoid(x):
- return 1 / (1 + np.exp(-x))
- def J(x,*args):
- X,y_out,split,dim,c = args
- W_1_unraveled = x[0:split]
- W_2_unraveled = x[split:]
- W_1 = W_1_unraveled.reshape((dim[0],dim[1]))
- W_2 = W_2_unraveled.reshape((dim[2], dim[0]))
- #calculate total error
- J = 0
- for i in range(c):
- x_n = X[:,:,i].ravel()
- x_n = np.append(x_n, 1)
- v1 = W_1.dot(x_n)
- y = sigmoid(v1)[np.newaxis]
- v2 = W_2.dot(y.T)
- o = sigmoid(v2)
- #one hot encode the desired output
- y_n = y_out[i]
- d = np.zeros((26,1))
- d[y_n-1, 0] = 1
- #error term
- e = np.multiply(-d, np.log(o)) - np.multiply((1-d),np.log(1-o))
- J += np.sum(e)
- return J/c
- def Jgrad(x,*args):
- X,y_out,split,dim,c = args
- W_1_unraveled = x[0:split]
- W_2_unraveled = x[split:]
- W_1 = W_1_unraveled.reshape((dim[0],dim[1]))
- W_2 = W_2_unraveled.reshape((dim[2], dim[0]))
- #calculate total gradients
- Jgrad = np.zeros_like(x)
- for i in range(c):
- x_n = X[:,:,i].ravel()
- x_n = np.append(x_n, 1)
- v1 = W_1.dot(x_n)
- y = sigmoid(v1)[np.newaxis]
- v2 = W_2.dot(y.T)
- o = sigmoid(v2)
- #one hot encode the desired output
- y_n = y_out[i]
- d = np.zeros((26,1))
- d[y_n-1, 0] = 1
- #error term
- #second layer
- gamma2 = np.diag(np.multiply(o, 1-o))
- delta2 = (-np.divide(d,o) - np.divide(1-d,1-o)) * np.diag(np.multiply(o, 1-o))
- dw2 = delta2 * y
- #first layer
- gamma1 = np.diag(np.multiply(y, 1-y))
- delta1 = gamma1 * W_2.T.dot(delta2)
- dw1 = delta1 * x_n.T
- dw2 = dw2.ravel()
- dw1 = dw1.ravel()
- Jgrad += np.append(dw1,dw2)
- return Jgrad/c
- p = 26
- a, b, c = X.shape
- w_init = 6/(26+735)
- #initialize weights
- W_1 = np.random.uniform(-w_init, w_init, (N,a*b+1))
- W_2 = np.random.uniform(-w_init, w_init, (p,N))
- errors = []
- total_error = 0
- split = N*(a*b +1)
- dim = [N, a*b+1,p]
- args = (X,y,split,dim,c)
- w_current = np.append(W_1.ravel()[np.newaxis].T, W_2.ravel()[np.newaxis].T)
- for _ in range(ITER):
- print(_)
- #minimize the error
- print(J(w_current, X,y,split,dim,c))
- w_current = scipy.optimize.fmin_cg(J, w_current, fprime=Jgrad, args=args, maxiter = 50)
- W_1_unraveled = x[0:split]
- W_2_unraveled = x[split:]
- W_1 = W_1_unraveled.reshape((dim[0],dim[1]))
- W_2 = W_2_unraveled.reshape((dim[2], dim[0]))
- return (W_1, W_2)
Add Comment
Please, Sign In to add comment