Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- import matplotlib.pyplot as plt
- import copy as cp
- import os
- for dirname, _, filenames in os.walk('/kaggle/input'):
- for filename in filenames:
- print(os.path.join(dirname, filename))
- train_data = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
- train_data = np.array(train_data) #(42000, 785)
- row, col = train_data.shape
- np.random.shuffle(train_data)
- dev_data = train_data[0:1000].T #(785, 1000) 1 example per column (for 1000 columns)
- dev_outputs = dev_data[0] #(1000,)
- dev_inputs = dev_data[1:]/255.#(784, 1000)
- train_data = train_data[1000:].T #(785, 41000) 1 example per column (for 41000 columns)
- train_outputs = train_data[0] #(41000, )
- train_inputs = train_data[1:]/255. #(784, 41000)
- def init_params(layer_dims):
- params = {}
- layers = len(layer_dims)
- for layer in range(1, layers):
- params['Weight' + str(layer)] = np.random.randn(layer_dims[layer], layer_dims[layer-1]) * np.sqrt(2/layer_dims[layer-1])
- params['Bias' + str(layer)] = np.zeros((layer_dims[layer], 1))
- return params
- def activation(activation, linear_out): #not 100%
- if activation == "relu":
- return np.maximum(0, linear_out)
- elif activation =="leaky relu":
- return np.where(linear_out > 0, linear_out, linear_out * 0.01)
- elif activation =="softmax":
- exponent = np.exp(linear_out - np.max(linear_out))
- return exponent / exponent.sum(axis=0)
- def activation_deriv(activation, linear_out, prev_error): #not 100%
- if activation == "relu":
- error = np.array(prev_error, copy=True)
- error[linear_out <= 0 ] = 0
- elif activation == "leaky relu":
- error = np.array(prev_error, copy=True)
- error[linear_out <= 0 ] = 0.01
- elif activation == "softmax":
- softmax_out = 1/(1+np.exp(-linear_out)) # Or calculate it if not stored from forward pass
- error = prev_error * softmax_out * (1-softmax_out)
- return error
- def forward_propagation(params, inputs, activations): #100% bug free
- caches = {}
- layers = len(params) // 2
- caches['Activation_out' + str(0)] = inputs
- for layer in range(1, layers+1):
- weight = params['Weight' + str(layer)]
- bias = params['Bias' + str(layer)]
- caches['Linear_out' + str(layer)] = np.dot(weight,inputs) + bias # Z calculation
- linear_out = caches['Linear_out' + str(layer)] # Z
- caches['Activation_out' + str(layer)] = activation(activations[layer], linear_out) # A calculation
- inputs = caches['Activation_out' + str(layer)] #A
- final_out = inputs
- return final_out, caches
- def compute_cost(final_out, outputs): #can't be this (error does decrease)
- final_out = np.clip(final_out, 1e-10, 1 - 1e-10)
- logprobs = -np.sum(outputs * np.log(final_out), axis=0)
- cost = np.mean(logprobs)
- return cost
- def one_hot(Y): #100% bug free
- one_hot_Y = np.zeros((Y.size, Y.max() + 1))
- one_hot_Y[np.arange(Y.size), Y] = 1
- one_hot_Y = one_hot_Y.T
- return one_hot_Y
- def backward_propagation(final_out, outputs, caches, activations, params):
- grads = {}
- layers = len(caches) // 2
- grads['Weight_deriv' + str(layers)] = (final_out - outputs) @ caches['Activation_out' + str(layers-1)].T / outputs.shape[1]
- grads['Bias_deriv' + str(layers)] = np.sum(final_out - outputs, axis=1, keepdims=True) / outputs.shape[1]
- prev_error = params['Weight' + str(layers)].T @ (final_out - outputs)
- for layer in range(layers-1, 0, -1):
- error = activation_deriv(activations[layer], caches['Linear_out' + str(layer)], prev_error)
- grads['Weight_deriv' + str(layer)] = error @ caches['Activation_out' + str(layer-1)].T / outputs.shape[1]
- grads['Bias_deriv' + str(layer)] = np.sum(error, axis=1, keepdims=True) / outputs.shape[1]
- prev_error = params['Weight' + str(layer)].T @ error
- return grads
- def update_params(params, grads, learning_rate):
- layers = len(params) // 2
- for layer in range(1, layers+1):
- params['Weight' + str(layer)] -= learning_rate * grads['Weight_deriv' + str(layer)]
- params['Bias' + str(layer)] -= learning_rate * grads['Bias_deriv' + str(layer)]
- return params
- def run_model(params, inputs, outputs, activations, learning_rate, epochs, num_batches):
- error_list = []
- accuracy_list = []
- outputs = one_hot(outputs)
- batch_size = inputs.shape[1] // num_batches
- for epoch in range(epochs):
- epoch_error = 0
- epoch_accuracy = 0
- for i in range(num_batches):
- # Get the mini-batch
- mini_batch_X = inputs[:, i*batch_size:(i+1)*batch_size]
- mini_batch_Y = outputs[:, i*batch_size:(i+1)*batch_size]
- final_out, caches = forward_propagation(params, mini_batch_X, activations) # Vectorized forward prop
- error = compute_cost(final_out, mini_batch_Y)
- epoch_error += error
- predictions = np.argmax(final_out, axis=0, keepdims=True)
- actuals = np.argmax(mini_batch_Y, axis=0, keepdims=True)
- accuracy = np.mean(predictions.T == actuals)
- epoch_accuracy += accuracy
- grads = backward_propagation(final_out, mini_batch_Y, caches, activations, params) # Vectorized backprop
- params = update_params(params, grads, learning_rate)
- print("\n" + "Mini batch number " + str(i+1) + " / " + str(num_batches))
- error_list.append(epoch_error / num_batches)
- accuracy_list.append(epoch_accuracy / num_batches)
- print(f"Epoch {epoch+1}/{epochs} completed: Error = {epoch_error / num_batches}, Accuracy = {epoch_accuracy / num_batches}" + "\n")
- return params
- layer_dims = [784,512,128, 10]
- activations = ["None", "leaky relu", "leaky relu", "softmax"]
- learning_rate = 0.01
- epochs = 10
- num_batches = 1
- params = init_params(layer_dims)
- params = run_model(params, train_inputs, train_outputs, activations, learning_rate, epochs, num_batches)
Advertisement
Add Comment
Please, Sign In to add comment