Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class DNNClassifier(object):
- '''
- Parameters: layer_dims -- List Dimensions of layers including input and output layer
- hidden_layers -- List of hidden layers
- 'relu','sigmoid','tanh','softplus','arctan','elu','identity','softmax'
- Note: 1. last layer must be softmax
- 2. For relu and elu need to mention alpha value as below
- ['tanh',('relu',alpha1),('elu',alpha2),('relu',alpha3),'softmax']
- need to give a tuple for relu and elu if you want to mention alpha
- if not default alpha is 0
- init_type -- init_type -- he_normal --> N(0,sqrt(2/fanin))
- he_uniform --> Uniform(-sqrt(6/fanin),sqrt(6/fanin))
- xavier_normal --> N(0,2/(fanin+fanout))
- xavier_uniform --> Uniform(-sqrt(6/fanin+fanout),sqrt(6/fanin+fanout))
- learning_rate -- Learning rate
- optimization_method -- optimization method 'SGD','SGDM','RMSP','ADAM'
- batch_size -- Batch size to update weights
- max_epoch -- Max epoch number
- Note : Max_iter = max_epoch * (size of traing / batch size)
- tolarance -- if abs(previous cost - current cost ) < tol training will be stopped
- if None -- No check will be performed
- keep_proba -- probability for dropout
- if 1 then there is no dropout
- penality -- regularization penality
- values taken 'l1','l2',None(default)
- lamda -- l1 or l2 regularization value
- beta1 -- SGDM and adam optimization param
- beta2 -- RMSP and adam optimization value
- seed -- Random seed to generate randomness
- verbose -- takes 0 or 1
- '''
- def __init__(self,layer_dims,hidden_layers,init_type='he_normal',learning_rate=0.1,
- optimization_method = 'SGD',batch_size=64,max_epoch=100,tolarance = 0.00001,
- keep_proba=1,penality=None,lamda=0,beta1=0.9,
- beta2=0.999,seed=None,verbose=0):
- self.layer_dims = layer_dims
- self.hidden_layers = hidden_layers
- self.init_type = init_type
- self.learning_rate = learning_rate
- self.optimization_method = optimization_method
- self.batch_size = batch_size
- self.keep_proba = keep_proba
- self.penality = penality
- self.lamda = lamda
- self.beta1 = beta1
- self.beta2 = beta2
- self.seed = seed
- self.max_epoch = max_epoch
- self.tol = tolarance
- self.verbose = verbose
- @staticmethod
- def weights_init(layer_dims,init_type='he_normal',seed=None):
- """
- Arguments:
- layer_dims -- python array (list) containing the dimensions of each layer in our network
- layer_dims lis is like [ no of input features,# of neurons in hidden layer-1,..,
- # of neurons in hidden layer-n shape,output]
- init_type -- he_normal --> N(0,sqrt(2/fanin))
- he_uniform --> Uniform(-sqrt(6/fanin),sqrt(6/fanin))
- xavier_normal --> N(0,2/(fanin+fanout))
- xavier_uniform --> Uniform(-sqrt(6/fanin+fanout),sqrt(6/fanin+fanout))
- seed -- random seed to generate weights
- Returns:
- parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":
- Wl -- weight matrix of shape (layer_dims[l], layer_dims[l-1])
- bl -- bias vector of shape (layer_dims[l], 1)
- """
- np.random.seed(seed)
- parameters = {}
- opt_parameters = {}
- L = len(layer_dims) # number of layers in the network
- if init_type == 'he_normal':
- for l in range(1, L):
- parameters['W' + str(l)] = np.random.normal(0,np.sqrt(2.0/layer_dims[l-1]),(layer_dims[l], layer_dims[l-1]))
- parameters['b' + str(l)] = np.random.normal(0,np.sqrt(2.0/layer_dims[l-1]),(layer_dims[l], 1))
- elif init_type == 'he_uniform':
- for l in range(1, L):
- parameters['W' + str(l)] = np.random.uniform(-np.sqrt(6.0/layer_dims[l-1]),
- np.sqrt(6.0/layer_dims[l-1]),
- (layer_dims[l], layer_dims[l-1]))
- parameters['b' + str(l)] = np.random.uniform(-np.sqrt(6.0/layer_dims[l-1]),
- np.sqrt(6.0/layer_dims[l-1]),
- (layer_dims[l], 1))
- elif init_type == 'xavier_normal':
- for l in range(1, L):
- parameters['W' + str(l)] = np.random.normal(0,2.0/(layer_dims[l]+layer_dims[l-1]),
- (layer_dims[l], layer_dims[l-1]))
- parameters['b' + str(l)] = np.random.normal(0,2.0/(layer_dims[l]+layer_dims[l-1]),
- (layer_dims[l], 1))
- elif init_type == 'xavier_uniform':
- for l in range(1, L):
- parameters['W' + str(l)] = np.random.uniform(-(np.sqrt(6.0/(layer_dims[l]+layer_dims[l-1]))),
- (np.sqrt(6.0/(layer_dims[l]+layer_dims[l-1]))),
- (layer_dims[l], layer_dims[l-1]))
- parameters['b' + str(l)] = np.random.uniform(-(np.sqrt(6.0/(layer_dims[l]+layer_dims[l-1]))),
- (np.sqrt(6.0/(layer_dims[l]+layer_dims[l-1]))),
- (layer_dims[l], 1))
- return parameters
- @staticmethod
- def sigmoid(X,derivative=False):
- '''Compute Sigmaoid and its derivative'''
- if derivative == False:
- out = 1 / (1 + np.exp(-np.array(X)))
- elif derivative == True:
- s = 1 / (1 + np.exp(-np.array(X)))
- out = s*(1-s)
- return out
- @staticmethod
- def ReLU(X,alpha=0,derivative=False):
- '''Compute ReLU function and derivative'''
- X = np.array(X,dtype=np.float64)
- if derivative == False:
- return np.where(X<0,alpha*X,X)
- elif derivative == True:
- X_relu = np.ones_like(X,dtype=np.float64)
- X_relu[X < 0] = alpha
- return X_relu
- @staticmethod
- def Tanh(X,derivative=False):
- '''Compute tanh values and derivative of tanh'''
- X = np.array(X)
- if derivative == False:
- return np.tanh(X)
- if derivative == True:
- return 1 - (np.tanh(X))**2
- @staticmethod
- def softplus(X,derivative=False):
- '''Compute tanh values and derivative of tanh'''
- X = np.array(X)
- if derivative == False:
- return np.log(1+np.exp(X))
- if derivative == True:
- return 1 / (1 + np.exp(-np.array(X)))
- @staticmethod
- def arctan(X,derivative=False):
- '''Compute tan^-1(X) and derivative'''
- if derivative == False:
- return np.arctan(X)
- if derivative == True:
- return 1/ (1 + np.square(X))
- @staticmethod
- def identity(X,derivative=False):
- '''identity function and derivative f(x) = x'''
- X = np.array(X)
- if derivative == False:
- return X
- if derivative == True:
- return np.ones_like(X)
- @staticmethod
- def elu(X,alpha=0,derivative=False):
- '''Exponential Linear Unit'''
- X = np.array(X,dtype=np.float64)
- if derivative == False:
- return np.where(X<0,alpha*(np.exp(X)-1),X)
- elif derivative == True:
- return np.where(X<0,alpha*(np.exp(X)),1)
- @staticmethod
- def softmax(X):
- """Compute softmax values for each sets of scores in x."""
- return np.exp(X) / np.sum(np.exp(X),axis=0)
- @staticmethod
- def forward_propagation(X, hidden_layers,parameters,keep_prob=1,seed=None):
- """"
- Arguments:
- X -- data, numpy array of shape (input size, number of examples)
- hidden_layers -- List of hideden layers
- weights -- Output of weights_init dict (parameters)
- keep_prob -- probability of keeping a neuron active during drop-out, scalar
- Returns:
- AL -- last post-activation value
- caches -- list of caches containing:
- every cache of linear_activation_forward() (there are L-1 of them, indexed from 0 to L-1)
- """
- if seed != None:
- np.random.seed(seed)
- caches = []
- A = X
- L = len(hidden_layers)
- for l,active_function in enumerate(hidden_layers,start=1):
- A_prev = A
- Z = np.dot(parameters['W' + str(l)],A_prev)+parameters['b' + str(l)]
- if type(active_function) is tuple:
- if active_function[0] == "relu":
- A = DNNClassifier.ReLU(Z,active_function[1])
- elif active_function[0] == 'elu':
- A = DNNClassifier.elu(Z,active_function[1])
- else:
- if active_function == "sigmoid":
- A = DNNClassifier.sigmoid(Z)
- elif active_function == "identity":
- A = DNNClassifier.identity(Z)
- elif active_function == "arctan":
- A = DNNClassifier.arctan(Z)
- elif active_function == "softplus":
- A = DNNClassifier.softplus(Z)
- elif active_function == "tanh":
- A = DNNClassifier.Tanh(Z)
- elif active_function == "softmax":
- A = DNNClassifier.softmax(Z)
- elif active_function == "relu":
- A = DNNClassifier.ReLU(Z)
- elif active_function == 'elu':
- A = DNNClassifier.elu(Z)
- if keep_prob != 1 and l != L and l != 1:
- D = np.random.rand(A.shape[0],A.shape[1])
- D = (D<keep_prob)
- A = np.multiply(A,D)
- A = A / keep_prob
- cache = ((A_prev, parameters['W' + str(l)],parameters['b' + str(l)],D), Z)
- caches.append(cache)
- else:
- cache = ((A_prev, parameters['W' + str(l)],parameters['b' + str(l)]), Z)
- #print(A.shape)
- caches.append(cache)
- return A, caches
- @staticmethod
- def compute_cost(A, Y, parameters, lamda=0,penality=None):
- """
- Implement the cost function with L2 regularization. See formula (2) above.
- Arguments:
- A -- post-activation, output of forward propagation
- Y -- "true" labels vector, of shape (output size, number of examples)
- parameters -- python dictionary containing parameters of the model
- Returns:
- cost - value of the regularized loss function
- """
- m = Y.shape[1]
- cost = np.squeeze(-np.sum(np.multiply(np.log(A),Y))/m)
- L = len(parameters)//2
- if penality == 'l2' and lamda != 0:
- sum_weights = 0
- for l in range(1, L):
- sum_weights = sum_weights + np.sum(np.square(parameters['W' + str(l)]))
- cost = cost + sum_weights * (lamda/(2*m))
- elif penality == 'l1' and lamda != 0:
- sum_weights = 0
- for l in range(1, L):
- sum_weights = sum_weights + np.sum(np.abs(parameters['W' + str(l)]))
- cost = cost + sum_weights * (lamda/(2*m))
- return cost
- @staticmethod
- def back_propagation(AL, Y, caches, hidden_layers, keep_prob=1, penality=None,lamda=0):
- """
- Implement the backward propagation
- Arguments:
- AL -- probability vector, output of the forward propagation (L_model_forward())
- Y -- true "label" vector (containing 0 if non-cat, 1 if cat)
- caches -- list of caches containing:
- hidden_layers -- hidden layer names
- keep_prob -- probabaility for dropout
- penality -- regularization penality 'l1' or 'l2' or None
- Returns:
- grads -- A dictionary with the gradients
- grads["dA" + str(l)] = ...
- grads["dW" + str(l)] = ...
- grads["db" + str(l)] = ...
- """
- grads = {}
- L = len(caches) # the number of layers
- m = AL.shape[1]
- Y = Y.reshape(AL.shape)
- # Initializing the backpropagation
- dZL = AL - Y
- cache = caches[L-1]
- linear_cache, activation_cache = cache
- AL, W, b = linear_cache
- grads["dW" + str(L)] = np.dot(dZL,AL.T)/m
- grads["db" + str(L)] = np.sum(dZL,axis=1,keepdims=True)/m
- grads["dA" + str(L-1)] = np.dot(W.T,dZL)
- # Loop from l=L-2 to l=0
- v_dropout = 0
- for l in reversed(range(L-1)):
- cache = caches[l]
- active_function = hidden_layers[l]
- linear_cache, Z = cache
- try:
- A_prev, W, b = linear_cache
- except:
- A_prev, W, b, D = linear_cache
- v_dropout = 1
- m = A_prev.shape[1]
- if keep_prob != 1 and v_dropout == 1:
- dA_prev = np.multiply(grads["dA" + str(l + 1)],D)
- dA_prev = dA_prev/keep_prob
- v_dropout = 0
- else:
- dA_prev = grads["dA" + str(l + 1)]
- v_dropout = 0
- if type(active_function) is tuple:
- if active_function[0] == "relu":
- dZ = np.multiply(dA_prev,DNNClassifier.ReLU(Z,active_function[1],derivative=True))
- elif active_function[0] == 'elu':
- dZ = np.multiply(dA_prev,DNNClassifier.elu(Z,active_function[1],derivative=True))
- else:
- if active_function == "sigmoid":
- dZ = np.multiply(dA_prev,DNNClassifier.sigmoid(Z,derivative=True))
- elif active_function == "relu":
- dZ = np.multiply(dA_prev,DNNClassifier.ReLU(Z,derivative=True))
- elif active_function == "tanh":
- dZ = np.multiply(dA_prev,DNNClassifier.Tanh(Z,derivative=True))
- elif active_function == "identity":
- dZ = np.multiply(dA_prev,DNNClassifier.identity(Z,derivative=True))
- elif active_function == "arctan":
- dZ = np.multiply(dA_prev,DNNClassifier.arctan(Z,derivative=True))
- elif active_function == "softplus":
- dZ = np.multiply(dA_prev,DNNClassifier.softplus(Z,derivative=True))
- elif active_function == 'elu':
- dZ = np.multiply(dA_prev,DNNClassifier.elu(Z,derivative=True))
- grads["dA" + str(l)] = np.dot(W.T,dZ)
- if penality == 'l2':
- grads["dW" + str(l + 1)] = (np.dot(dZ,A_prev.T)/m) + ((lamda * W)/m)
- elif penality == 'l1':
- grads["dW" + str(l + 1)] = (np.dot(dZ,A_prev.T)/m) + ((lamda * np.sign(W+10**-8))/m)
- else:
- grads["dW" + str(l + 1)] = (np.dot(dZ,A_prev.T)/m)
- grads["db" + str(l + 1)] = np.sum(dZ,axis=1,keepdims=True)/m
- return grads
- @staticmethod
- def update_parameters(parameters, grads,learning_rate,iter_no,method = 'SGD',opt_parameters=None,beta1=0.9,beta2=0.999):
- """
- Update parameters using gradient descent
- Arguments:
- parameters -- python dictionary containing your parameters
- grads -- python dictionary containing your gradients, output of L_model_backward
- method -- method for updation of weights
- 'SGD','SGDM','RMSP','ADAM'
- learning rate -- learning rate alpha value
- beta1 -- weighted avg parameter for SGDM and ADAM
- beta2 -- weighted avg parameter for RMSP and ADAM
- Returns:
- parameters -- python dictionary containing your updated parameters
- parameters["W" + str(l)] = ...
- parameters["b" + str(l)] = ...
- opt_parameters
- """
- L = len(parameters) // 2 # number of layers in the neural network
- if method == 'SGD':
- for l in range(L):
- parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate*grads["dW" + str(l + 1)]
- parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate*grads["db" + str(l + 1)]
- opt_parameters = None
- elif method == 'SGDM':
- for l in range(L):
- opt_parameters['vdb'+str(l+1)] = beta1*opt_parameters['vdb'+str(l+1)] + (1-beta1)*grads["db" + str(l + 1)]
- opt_parameters['vdw'+str(l+1)] = beta1*opt_parameters['vdw'+str(l+1)] + (1-beta1)*grads["dW" + str(l + 1)]
- parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate*opt_parameters['vdw'+str(l+1)]
- parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate*opt_parameters['vdb'+str(l+1)]
- elif method == 'RMSP':
- for l in range(L):
- opt_parameters['sdb'+str(l+1)] = beta2*opt_parameters['sdb'+str(l+1)] + \
- (1-beta2)*np.square(grads["db" + str(l + 1)])
- opt_parameters['sdw'+str(l+1)] = beta2*opt_parameters['sdw'+str(l+1)] + \
- (1-beta2)*np.square(grads["dW" + str(l + 1)])
- parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - \
- learning_rate*(grads["dW" + str(l + 1)]/(np.sqrt(opt_parameters['sdw'+str(l+1)])+10**-8))
- parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - \
- learning_rate*(grads["db" + str(l + 1)]/(np.sqrt(opt_parameters['sdb'+str(l+1)])+10**-8))
- elif method == 'ADAM':
- for l in range(L):
- opt_parameters['vdb'+str(l+1)] = beta1*opt_parameters['vdb'+str(l+1)] + (1-beta1)*grads["db" + str(l + 1)]
- opt_parameters['vdw'+str(l+1)] = beta1*opt_parameters['vdw'+str(l+1)] + (1-beta1)*grads["dW" + str(l + 1)]
- opt_parameters['sdb'+str(l+1)] = beta2*opt_parameters['sdb'+str(l+1)] + \
- (1-beta2)*np.square(grads["db" + str(l + 1)])
- opt_parameters['sdw'+str(l+1)] = beta2*opt_parameters['sdw'+str(l+1)] + \
- (1-beta2)*np.square(grads["dW" + str(l + 1)])
- learning_rate = learning_rate * np.sqrt((1-beta2**iter_no)/((1-beta1**iter_no)+10**-8))
- parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - \
- learning_rate*(opt_parameters['vdw'+str(l+1)]/\
- (np.sqrt(opt_parameters['sdw'+str(l+1)])+10**-8))
- parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - \
- learning_rate*(opt_parameters['vdb'+str(l+1)]/\
- (np.sqrt(opt_parameters['sdb'+str(l+1)])+10**-8))
- return parameters,opt_parameters
- def fit(self,X,y):
- '''
- X -- data, numpy array of shape (input size, number of examples)
- y -- lables, numpy array of shape (no of classes,n)
- '''
- np.random.seed(self.seed)
- self.grads = {}
- self.costs = []
- M = X.shape[1]
- opt_parameters = {}
- if self.verbose == 1:
- print('Initilizing Weights...')
- self.parameters = self.weights_init(self.layer_dims,self.init_type,self.seed)
- self.iter_no = 0
- idx = np.arange(0,M)
- if self.optimization_method != 'SGD':
- for l in range(1, len(self.layer_dims)):
- opt_parameters['vdw' + str(l)] = np.zeros((self.layer_dims[l], self.layer_dims[l-1]))
- opt_parameters['vdb' + str(l)] = np.zeros((self.layer_dims[l], 1))
- opt_parameters['sdw' + str(l)] = np.zeros((self.layer_dims[l], self.layer_dims[l-1]))
- opt_parameters['sdb' + str(l)] = np.zeros((self.layer_dims[l], 1))
- if self.verbose == 1:
- print('Starting Training...')
- for epoch_no in range(1,self.max_epoch+1):
- np.random.shuffle(idx)
- X = X[:,idx]
- y = y[:,idx]
- for i in range(0,M, self.batch_size):
- self.iter_no = self.iter_no + 1
- X_batch = X[:,i:i + self.batch_size]
- y_batch = y[:,i:i + self.batch_size]
- # Forward propagation:
- AL, cache = self.forward_propagation(X_batch,self.hidden_layers,self.parameters,self.keep_proba,self.seed)
- #cost
- cost = self.compute_cost(AL, y_batch, self.parameters,self.lamda,self.penality)
- self.costs.append(cost)
- if self.tol != None:
- try:
- if abs(cost - self.costs[-2]) < self.tol:
- return self
- except:
- pass
- #back prop
- grads = self.back_propagation(AL, y_batch, cache,self.hidden_layers,self.keep_proba,self.penality,self.lamda)
- #update params
- self.parameters,opt_parameters = self.update_parameters(self.parameters,grads,self.learning_rate,
- self.iter_no-1,self.optimization_method,
- opt_parameters,self.beta1,self.beta2)
- if self.verbose == 1:
- if self.iter_no % 100 == 0:
- print("Cost after iteration {}: {}".format(self.iter_no, cost))
- return self
- def predict(self,X,proba=False):
- '''predicting values
- arguments: X - iput data
- proba -- False then return value
- True then return probabaility
- '''
- out, _ = self.forward_propagation(X,self.hidden_layers,self.parameters,self.keep_proba,self.seed)
- if proba == True:
- return out.T
- else:
- return np.argmax(out, axis=0)
Add Comment
Please, Sign In to add comment