Guest User

Untitled

a guest
Feb 27th, 2024
131
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.20 KB | None | 0 0
  1. import numpy as np
  2. import pandas as pd
  3. import matplotlib.pyplot as plt
  4. import copy as cp
  5.  
  6. import os
  7. for dirname, _, filenames in os.walk('/kaggle/input'):
  8.     for filename in filenames:
  9.         print(os.path.join(dirname, filename))
  10.  
  11. train_data = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
  12. train_data = np.array(train_data) #(42000, 785)
  13.  
  14. row, col = train_data.shape
  15. np.random.shuffle(train_data)
  16.  
  17. dev_data = train_data[0:1000].T #(785, 1000) 1 example per column (for 1000 columns)
  18. dev_outputs = dev_data[0] #(1000,)
  19. dev_inputs = dev_data[1:]/255.#(784, 1000)
  20.  
  21. train_data = train_data[1000:].T #(785, 41000) 1 example per column (for 41000 columns)
  22. train_outputs = train_data[0] #(41000, )
  23. train_inputs = train_data[1:]/255. #(784, 41000)
  24.  
  25.  
  26.  
  27. def init_params(layer_dims):
  28.     params = {}
  29.     layers = len(layer_dims)
  30.    
  31.     for layer in range(1, layers):
  32.         params['Weight' + str(layer)] = np.random.randn(layer_dims[layer], layer_dims[layer-1]) * np.sqrt(2/layer_dims[layer-1])
  33.         params['Bias' + str(layer)] = np.zeros((layer_dims[layer], 1))  
  34.     return params
  35.  
  36.  
  37.  
  38. def activation(activation, linear_out): #not 100%
  39.    
  40.     if activation == "relu":
  41.         return np.maximum(0, linear_out)
  42.    
  43.     elif activation =="leaky relu":
  44.         return np.where(linear_out > 0, linear_out, linear_out * 0.01)
  45.    
  46.     elif activation =="softmax":
  47.         exponent = np.exp(linear_out - np.max(linear_out))
  48.         return exponent / exponent.sum(axis=0)
  49.  
  50.  
  51.  
  52. def activation_deriv(activation, linear_out, prev_error): #not 100%
  53.     if activation == "relu":
  54.             error = np.array(prev_error, copy=True)
  55.             error[linear_out <= 0 ] = 0
  56.            
  57.     elif activation == "leaky relu":
  58.             error = np.array(prev_error, copy=True)
  59.             error[linear_out <= 0 ] = 0.01
  60.            
  61.     elif activation == "softmax":
  62.         softmax_out = 1/(1+np.exp(-linear_out))  # Or calculate it if not stored from forward pass
  63.         error = prev_error * softmax_out * (1-softmax_out)  
  64.     return error
  65.  
  66.  
  67.  
  68. def forward_propagation(params, inputs, activations): #100% bug free
  69.     caches = {}
  70.     layers = len(params) // 2
  71.    
  72.     caches['Activation_out' + str(0)] = inputs
  73.    
  74.     for layer in range(1, layers+1):
  75.        
  76.         weight = params['Weight' + str(layer)]
  77.         bias = params['Bias' + str(layer)]
  78.        
  79.         caches['Linear_out' + str(layer)] = np.dot(weight,inputs) + bias # Z calculation
  80.         linear_out = caches['Linear_out' + str(layer)] # Z
  81.        
  82.         caches['Activation_out' + str(layer)] = activation(activations[layer], linear_out)  # A calculation
  83.         inputs = caches['Activation_out' + str(layer)] #A
  84.        
  85.     final_out = inputs
  86.     return final_out, caches
  87.  
  88.  
  89. def compute_cost(final_out, outputs): #can't be this (error does decrease)
  90.     final_out = np.clip(final_out, 1e-10, 1 - 1e-10)
  91.     logprobs = -np.sum(outputs * np.log(final_out), axis=0)
  92.     cost = np.mean(logprobs)
  93.     return cost
  94.  
  95.  
  96. def one_hot(Y): #100% bug free
  97.     one_hot_Y = np.zeros((Y.size, Y.max() + 1))
  98.     one_hot_Y[np.arange(Y.size), Y] = 1
  99.     one_hot_Y = one_hot_Y.T
  100.     return one_hot_Y
  101.  
  102.  
  103.  
  104. def backward_propagation(final_out, outputs, caches, activations, params):
  105.     grads = {}
  106.     layers = len(caches) // 2
  107.    
  108.     grads['Weight_deriv' + str(layers)] = (final_out - outputs) @ caches['Activation_out' + str(layers-1)].T / outputs.shape[1]
  109.     grads['Bias_deriv' + str(layers)] = np.sum(final_out - outputs, axis=1, keepdims=True) / outputs.shape[1]
  110.  
  111.     prev_error = params['Weight' + str(layers)].T @ (final_out - outputs)
  112.  
  113.     for layer in range(layers-1, 0, -1):
  114.         error = activation_deriv(activations[layer], caches['Linear_out' + str(layer)], prev_error)
  115.  
  116.         grads['Weight_deriv' + str(layer)] = error @ caches['Activation_out' + str(layer-1)].T / outputs.shape[1]
  117.         grads['Bias_deriv' + str(layer)] = np.sum(error, axis=1, keepdims=True) / outputs.shape[1]
  118.  
  119.         prev_error = params['Weight' + str(layer)].T @ error
  120.  
  121.     return grads
  122.  
  123.  
  124.  
  125. def update_params(params, grads, learning_rate):
  126.     layers = len(params) // 2
  127.     for layer in range(1, layers+1):
  128.         params['Weight' + str(layer)] -= learning_rate * grads['Weight_deriv' + str(layer)]
  129.         params['Bias' + str(layer)] -= learning_rate * grads['Bias_deriv' + str(layer)]
  130.     return params
  131.  
  132.  
  133. def run_model(params, inputs, outputs, activations, learning_rate, epochs, num_batches):
  134.     error_list = []
  135.     accuracy_list = []
  136.    
  137.     outputs = one_hot(outputs)
  138.     batch_size = inputs.shape[1] // num_batches
  139.    
  140.     for epoch in range(epochs):
  141.        
  142.         epoch_error = 0
  143.         epoch_accuracy = 0
  144.  
  145.         for i in range(num_batches):
  146.             # Get the mini-batch
  147.             mini_batch_X = inputs[:, i*batch_size:(i+1)*batch_size]
  148.             mini_batch_Y = outputs[:, i*batch_size:(i+1)*batch_size]
  149.  
  150.             final_out, caches = forward_propagation(params, mini_batch_X, activations)  # Vectorized forward prop
  151.            
  152.             error = compute_cost(final_out, mini_batch_Y)
  153.             epoch_error += error  
  154.  
  155.             predictions = np.argmax(final_out, axis=0, keepdims=True)  
  156.             actuals = np.argmax(mini_batch_Y, axis=0, keepdims=True)
  157.             accuracy = np.mean(predictions.T == actuals)
  158.             epoch_accuracy += accuracy
  159.  
  160.             grads = backward_propagation(final_out, mini_batch_Y, caches, activations, params)  # Vectorized backprop
  161.  
  162.             params = update_params(params, grads, learning_rate)
  163.            
  164.             print("\n" + "Mini batch number " + str(i+1) + " / " + str(num_batches))
  165.  
  166.         error_list.append(epoch_error / num_batches)
  167.         accuracy_list.append(epoch_accuracy / num_batches)
  168.         print(f"Epoch {epoch+1}/{epochs} completed: Error = {epoch_error / num_batches}, Accuracy = {epoch_accuracy / num_batches}" + "\n")
  169.    
  170.     return params
  171.  
  172.  
  173. layer_dims = [784,512,128, 10]
  174. activations = ["None", "leaky relu", "leaky relu", "softmax"]
  175. learning_rate = 0.01
  176. epochs = 10
  177. num_batches = 1
  178. params = init_params(layer_dims)
  179.  
  180. params = run_model(params, train_inputs, train_outputs, activations, learning_rate, epochs, num_batches)
Advertisement
Add Comment
Please, Sign In to add comment