Guest User

Untitled

a guest
Dec 15th, 2018
110
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 22.90 KB | None | 0 0
  1. class DNNClassifier(object):
  2. '''
  3. Parameters: layer_dims -- List Dimensions of layers including input and output layer
  4. hidden_layers -- List of hidden layers
  5. 'relu','sigmoid','tanh','softplus','arctan','elu','identity','softmax'
  6. Note: 1. last layer must be softmax
  7. 2. For relu and elu need to mention alpha value as below
  8. ['tanh',('relu',alpha1),('elu',alpha2),('relu',alpha3),'softmax']
  9. need to give a tuple for relu and elu if you want to mention alpha
  10. if not default alpha is 0
  11. init_type -- init_type -- he_normal --> N(0,sqrt(2/fanin))
  12. he_uniform --> Uniform(-sqrt(6/fanin),sqrt(6/fanin))
  13. xavier_normal --> N(0,2/(fanin+fanout))
  14. xavier_uniform --> Uniform(-sqrt(6/fanin+fanout),sqrt(6/fanin+fanout))
  15.  
  16. learning_rate -- Learning rate
  17. optimization_method -- optimization method 'SGD','SGDM','RMSP','ADAM'
  18. batch_size -- Batch size to update weights
  19. max_epoch -- Max epoch number
  20. Note : Max_iter = max_epoch * (size of traing / batch size)
  21. tolarance -- if abs(previous cost - current cost ) < tol training will be stopped
  22. if None -- No check will be performed
  23. keep_proba -- probability for dropout
  24. if 1 then there is no dropout
  25. penality -- regularization penality
  26. values taken 'l1','l2',None(default)
  27. lamda -- l1 or l2 regularization value
  28. beta1 -- SGDM and adam optimization param
  29. beta2 -- RMSP and adam optimization value
  30. seed -- Random seed to generate randomness
  31. verbose -- takes 0 or 1
  32. '''
  33.  
  34. def __init__(self,layer_dims,hidden_layers,init_type='he_normal',learning_rate=0.1,
  35. optimization_method = 'SGD',batch_size=64,max_epoch=100,tolarance = 0.00001,
  36. keep_proba=1,penality=None,lamda=0,beta1=0.9,
  37. beta2=0.999,seed=None,verbose=0):
  38. self.layer_dims = layer_dims
  39. self.hidden_layers = hidden_layers
  40. self.init_type = init_type
  41. self.learning_rate = learning_rate
  42. self.optimization_method = optimization_method
  43. self.batch_size = batch_size
  44. self.keep_proba = keep_proba
  45. self.penality = penality
  46. self.lamda = lamda
  47. self.beta1 = beta1
  48. self.beta2 = beta2
  49. self.seed = seed
  50. self.max_epoch = max_epoch
  51. self.tol = tolarance
  52. self.verbose = verbose
  53. @staticmethod
  54. def weights_init(layer_dims,init_type='he_normal',seed=None):
  55.  
  56. """
  57. Arguments:
  58. layer_dims -- python array (list) containing the dimensions of each layer in our network
  59. layer_dims lis is like [ no of input features,# of neurons in hidden layer-1,..,
  60. # of neurons in hidden layer-n shape,output]
  61. init_type -- he_normal --> N(0,sqrt(2/fanin))
  62. he_uniform --> Uniform(-sqrt(6/fanin),sqrt(6/fanin))
  63. xavier_normal --> N(0,2/(fanin+fanout))
  64. xavier_uniform --> Uniform(-sqrt(6/fanin+fanout),sqrt(6/fanin+fanout))
  65. seed -- random seed to generate weights
  66. Returns:
  67. parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":
  68. Wl -- weight matrix of shape (layer_dims[l], layer_dims[l-1])
  69. bl -- bias vector of shape (layer_dims[l], 1)
  70. """
  71. np.random.seed(seed)
  72. parameters = {}
  73. opt_parameters = {}
  74. L = len(layer_dims) # number of layers in the network
  75. if init_type == 'he_normal':
  76. for l in range(1, L):
  77. parameters['W' + str(l)] = np.random.normal(0,np.sqrt(2.0/layer_dims[l-1]),(layer_dims[l], layer_dims[l-1]))
  78. parameters['b' + str(l)] = np.random.normal(0,np.sqrt(2.0/layer_dims[l-1]),(layer_dims[l], 1))
  79.  
  80. elif init_type == 'he_uniform':
  81. for l in range(1, L):
  82. parameters['W' + str(l)] = np.random.uniform(-np.sqrt(6.0/layer_dims[l-1]),
  83. np.sqrt(6.0/layer_dims[l-1]),
  84. (layer_dims[l], layer_dims[l-1]))
  85. parameters['b' + str(l)] = np.random.uniform(-np.sqrt(6.0/layer_dims[l-1]),
  86. np.sqrt(6.0/layer_dims[l-1]),
  87. (layer_dims[l], 1))
  88.  
  89. elif init_type == 'xavier_normal':
  90. for l in range(1, L):
  91. parameters['W' + str(l)] = np.random.normal(0,2.0/(layer_dims[l]+layer_dims[l-1]),
  92. (layer_dims[l], layer_dims[l-1]))
  93. parameters['b' + str(l)] = np.random.normal(0,2.0/(layer_dims[l]+layer_dims[l-1]),
  94. (layer_dims[l], 1))
  95.  
  96. elif init_type == 'xavier_uniform':
  97. for l in range(1, L):
  98. parameters['W' + str(l)] = np.random.uniform(-(np.sqrt(6.0/(layer_dims[l]+layer_dims[l-1]))),
  99. (np.sqrt(6.0/(layer_dims[l]+layer_dims[l-1]))),
  100. (layer_dims[l], layer_dims[l-1]))
  101. parameters['b' + str(l)] = np.random.uniform(-(np.sqrt(6.0/(layer_dims[l]+layer_dims[l-1]))),
  102. (np.sqrt(6.0/(layer_dims[l]+layer_dims[l-1]))),
  103. (layer_dims[l], 1))
  104.  
  105. return parameters
  106.  
  107. @staticmethod
  108. def sigmoid(X,derivative=False):
  109. '''Compute Sigmaoid and its derivative'''
  110. if derivative == False:
  111. out = 1 / (1 + np.exp(-np.array(X)))
  112. elif derivative == True:
  113. s = 1 / (1 + np.exp(-np.array(X)))
  114. out = s*(1-s)
  115. return out
  116. @staticmethod
  117. def ReLU(X,alpha=0,derivative=False):
  118. '''Compute ReLU function and derivative'''
  119. X = np.array(X,dtype=np.float64)
  120. if derivative == False:
  121. return np.where(X<0,alpha*X,X)
  122. elif derivative == True:
  123. X_relu = np.ones_like(X,dtype=np.float64)
  124. X_relu[X < 0] = alpha
  125. return X_relu
  126. @staticmethod
  127. def Tanh(X,derivative=False):
  128. '''Compute tanh values and derivative of tanh'''
  129. X = np.array(X)
  130. if derivative == False:
  131. return np.tanh(X)
  132. if derivative == True:
  133. return 1 - (np.tanh(X))**2
  134. @staticmethod
  135. def softplus(X,derivative=False):
  136. '''Compute tanh values and derivative of tanh'''
  137. X = np.array(X)
  138. if derivative == False:
  139. return np.log(1+np.exp(X))
  140. if derivative == True:
  141. return 1 / (1 + np.exp(-np.array(X)))
  142. @staticmethod
  143. def arctan(X,derivative=False):
  144. '''Compute tan^-1(X) and derivative'''
  145. if derivative == False:
  146. return np.arctan(X)
  147. if derivative == True:
  148. return 1/ (1 + np.square(X))
  149. @staticmethod
  150. def identity(X,derivative=False):
  151. '''identity function and derivative f(x) = x'''
  152. X = np.array(X)
  153. if derivative == False:
  154. return X
  155. if derivative == True:
  156. return np.ones_like(X)
  157. @staticmethod
  158. def elu(X,alpha=0,derivative=False):
  159. '''Exponential Linear Unit'''
  160. X = np.array(X,dtype=np.float64)
  161. if derivative == False:
  162. return np.where(X<0,alpha*(np.exp(X)-1),X)
  163. elif derivative == True:
  164. return np.where(X<0,alpha*(np.exp(X)),1)
  165. @staticmethod
  166. def softmax(X):
  167. """Compute softmax values for each sets of scores in x."""
  168. return np.exp(X) / np.sum(np.exp(X),axis=0)
  169. @staticmethod
  170. def forward_propagation(X, hidden_layers,parameters,keep_prob=1,seed=None):
  171.  
  172. """"
  173. Arguments:
  174. X -- data, numpy array of shape (input size, number of examples)
  175. hidden_layers -- List of hideden layers
  176. weights -- Output of weights_init dict (parameters)
  177. keep_prob -- probability of keeping a neuron active during drop-out, scalar
  178. Returns:
  179. AL -- last post-activation value
  180. caches -- list of caches containing:
  181. every cache of linear_activation_forward() (there are L-1 of them, indexed from 0 to L-1)
  182. """
  183. if seed != None:
  184. np.random.seed(seed)
  185. caches = []
  186. A = X
  187. L = len(hidden_layers)
  188. for l,active_function in enumerate(hidden_layers,start=1):
  189. A_prev = A
  190.  
  191. Z = np.dot(parameters['W' + str(l)],A_prev)+parameters['b' + str(l)]
  192.  
  193. if type(active_function) is tuple:
  194.  
  195. if active_function[0] == "relu":
  196. A = DNNClassifier.ReLU(Z,active_function[1])
  197. elif active_function[0] == 'elu':
  198. A = DNNClassifier.elu(Z,active_function[1])
  199. else:
  200. if active_function == "sigmoid":
  201. A = DNNClassifier.sigmoid(Z)
  202. elif active_function == "identity":
  203. A = DNNClassifier.identity(Z)
  204. elif active_function == "arctan":
  205. A = DNNClassifier.arctan(Z)
  206. elif active_function == "softplus":
  207. A = DNNClassifier.softplus(Z)
  208. elif active_function == "tanh":
  209. A = DNNClassifier.Tanh(Z)
  210. elif active_function == "softmax":
  211. A = DNNClassifier.softmax(Z)
  212. elif active_function == "relu":
  213. A = DNNClassifier.ReLU(Z)
  214. elif active_function == 'elu':
  215. A = DNNClassifier.elu(Z)
  216.  
  217. if keep_prob != 1 and l != L and l != 1:
  218. D = np.random.rand(A.shape[0],A.shape[1])
  219. D = (D<keep_prob)
  220. A = np.multiply(A,D)
  221. A = A / keep_prob
  222. cache = ((A_prev, parameters['W' + str(l)],parameters['b' + str(l)],D), Z)
  223. caches.append(cache)
  224. else:
  225. cache = ((A_prev, parameters['W' + str(l)],parameters['b' + str(l)]), Z)
  226. #print(A.shape)
  227. caches.append(cache)
  228. return A, caches
  229. @staticmethod
  230. def compute_cost(A, Y, parameters, lamda=0,penality=None):
  231. """
  232. Implement the cost function with L2 regularization. See formula (2) above.
  233.  
  234. Arguments:
  235. A -- post-activation, output of forward propagation
  236. Y -- "true" labels vector, of shape (output size, number of examples)
  237. parameters -- python dictionary containing parameters of the model
  238.  
  239. Returns:
  240. cost - value of the regularized loss function
  241. """
  242. m = Y.shape[1]
  243.  
  244. cost = np.squeeze(-np.sum(np.multiply(np.log(A),Y))/m)
  245.  
  246. L = len(parameters)//2
  247.  
  248. if penality == 'l2' and lamda != 0:
  249. sum_weights = 0
  250. for l in range(1, L):
  251. sum_weights = sum_weights + np.sum(np.square(parameters['W' + str(l)]))
  252. cost = cost + sum_weights * (lamda/(2*m))
  253. elif penality == 'l1' and lamda != 0:
  254. sum_weights = 0
  255. for l in range(1, L):
  256. sum_weights = sum_weights + np.sum(np.abs(parameters['W' + str(l)]))
  257. cost = cost + sum_weights * (lamda/(2*m))
  258. return cost
  259. @staticmethod
  260. def back_propagation(AL, Y, caches, hidden_layers, keep_prob=1, penality=None,lamda=0):
  261. """
  262. Implement the backward propagation
  263.  
  264. Arguments:
  265. AL -- probability vector, output of the forward propagation (L_model_forward())
  266. Y -- true "label" vector (containing 0 if non-cat, 1 if cat)
  267. caches -- list of caches containing:
  268. hidden_layers -- hidden layer names
  269. keep_prob -- probabaility for dropout
  270. penality -- regularization penality 'l1' or 'l2' or None
  271.  
  272. Returns:
  273. grads -- A dictionary with the gradients
  274. grads["dA" + str(l)] = ...
  275. grads["dW" + str(l)] = ...
  276. grads["db" + str(l)] = ...
  277. """
  278. grads = {}
  279. L = len(caches) # the number of layers
  280.  
  281. m = AL.shape[1]
  282. Y = Y.reshape(AL.shape)
  283.  
  284. # Initializing the backpropagation
  285. dZL = AL - Y
  286.  
  287. cache = caches[L-1]
  288. linear_cache, activation_cache = cache
  289. AL, W, b = linear_cache
  290. grads["dW" + str(L)] = np.dot(dZL,AL.T)/m
  291. grads["db" + str(L)] = np.sum(dZL,axis=1,keepdims=True)/m
  292. grads["dA" + str(L-1)] = np.dot(W.T,dZL)
  293.  
  294.  
  295. # Loop from l=L-2 to l=0
  296. v_dropout = 0
  297. for l in reversed(range(L-1)):
  298. cache = caches[l]
  299. active_function = hidden_layers[l]
  300.  
  301. linear_cache, Z = cache
  302. try:
  303. A_prev, W, b = linear_cache
  304. except:
  305. A_prev, W, b, D = linear_cache
  306. v_dropout = 1
  307.  
  308. m = A_prev.shape[1]
  309.  
  310. if keep_prob != 1 and v_dropout == 1:
  311. dA_prev = np.multiply(grads["dA" + str(l + 1)],D)
  312. dA_prev = dA_prev/keep_prob
  313. v_dropout = 0
  314. else:
  315. dA_prev = grads["dA" + str(l + 1)]
  316. v_dropout = 0
  317.  
  318.  
  319. if type(active_function) is tuple:
  320.  
  321. if active_function[0] == "relu":
  322. dZ = np.multiply(dA_prev,DNNClassifier.ReLU(Z,active_function[1],derivative=True))
  323. elif active_function[0] == 'elu':
  324. dZ = np.multiply(dA_prev,DNNClassifier.elu(Z,active_function[1],derivative=True))
  325. else:
  326. if active_function == "sigmoid":
  327. dZ = np.multiply(dA_prev,DNNClassifier.sigmoid(Z,derivative=True))
  328. elif active_function == "relu":
  329. dZ = np.multiply(dA_prev,DNNClassifier.ReLU(Z,derivative=True))
  330. elif active_function == "tanh":
  331. dZ = np.multiply(dA_prev,DNNClassifier.Tanh(Z,derivative=True))
  332. elif active_function == "identity":
  333. dZ = np.multiply(dA_prev,DNNClassifier.identity(Z,derivative=True))
  334. elif active_function == "arctan":
  335. dZ = np.multiply(dA_prev,DNNClassifier.arctan(Z,derivative=True))
  336. elif active_function == "softplus":
  337. dZ = np.multiply(dA_prev,DNNClassifier.softplus(Z,derivative=True))
  338. elif active_function == 'elu':
  339. dZ = np.multiply(dA_prev,DNNClassifier.elu(Z,derivative=True))
  340.  
  341. grads["dA" + str(l)] = np.dot(W.T,dZ)
  342.  
  343. if penality == 'l2':
  344. grads["dW" + str(l + 1)] = (np.dot(dZ,A_prev.T)/m) + ((lamda * W)/m)
  345. elif penality == 'l1':
  346. grads["dW" + str(l + 1)] = (np.dot(dZ,A_prev.T)/m) + ((lamda * np.sign(W+10**-8))/m)
  347. else:
  348. grads["dW" + str(l + 1)] = (np.dot(dZ,A_prev.T)/m)
  349.  
  350. grads["db" + str(l + 1)] = np.sum(dZ,axis=1,keepdims=True)/m
  351. return grads
  352.  
  353. @staticmethod
  354. def update_parameters(parameters, grads,learning_rate,iter_no,method = 'SGD',opt_parameters=None,beta1=0.9,beta2=0.999):
  355. """
  356. Update parameters using gradient descent
  357.  
  358. Arguments:
  359. parameters -- python dictionary containing your parameters
  360. grads -- python dictionary containing your gradients, output of L_model_backward
  361. method -- method for updation of weights
  362. 'SGD','SGDM','RMSP','ADAM'
  363. learning rate -- learning rate alpha value
  364. beta1 -- weighted avg parameter for SGDM and ADAM
  365. beta2 -- weighted avg parameter for RMSP and ADAM
  366.  
  367. Returns:
  368. parameters -- python dictionary containing your updated parameters
  369. parameters["W" + str(l)] = ...
  370. parameters["b" + str(l)] = ...
  371. opt_parameters
  372. """
  373.  
  374. L = len(parameters) // 2 # number of layers in the neural network
  375. if method == 'SGD':
  376. for l in range(L):
  377. parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate*grads["dW" + str(l + 1)]
  378. parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate*grads["db" + str(l + 1)]
  379. opt_parameters = None
  380. elif method == 'SGDM':
  381. for l in range(L):
  382. opt_parameters['vdb'+str(l+1)] = beta1*opt_parameters['vdb'+str(l+1)] + (1-beta1)*grads["db" + str(l + 1)]
  383. opt_parameters['vdw'+str(l+1)] = beta1*opt_parameters['vdw'+str(l+1)] + (1-beta1)*grads["dW" + str(l + 1)]
  384. parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate*opt_parameters['vdw'+str(l+1)]
  385. parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate*opt_parameters['vdb'+str(l+1)]
  386. elif method == 'RMSP':
  387. for l in range(L):
  388. opt_parameters['sdb'+str(l+1)] = beta2*opt_parameters['sdb'+str(l+1)] + \
  389. (1-beta2)*np.square(grads["db" + str(l + 1)])
  390. opt_parameters['sdw'+str(l+1)] = beta2*opt_parameters['sdw'+str(l+1)] + \
  391. (1-beta2)*np.square(grads["dW" + str(l + 1)])
  392. parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - \
  393. learning_rate*(grads["dW" + str(l + 1)]/(np.sqrt(opt_parameters['sdw'+str(l+1)])+10**-8))
  394. parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - \
  395. learning_rate*(grads["db" + str(l + 1)]/(np.sqrt(opt_parameters['sdb'+str(l+1)])+10**-8))
  396. elif method == 'ADAM':
  397. for l in range(L):
  398. opt_parameters['vdb'+str(l+1)] = beta1*opt_parameters['vdb'+str(l+1)] + (1-beta1)*grads["db" + str(l + 1)]
  399. opt_parameters['vdw'+str(l+1)] = beta1*opt_parameters['vdw'+str(l+1)] + (1-beta1)*grads["dW" + str(l + 1)]
  400. opt_parameters['sdb'+str(l+1)] = beta2*opt_parameters['sdb'+str(l+1)] + \
  401. (1-beta2)*np.square(grads["db" + str(l + 1)])
  402. opt_parameters['sdw'+str(l+1)] = beta2*opt_parameters['sdw'+str(l+1)] + \
  403. (1-beta2)*np.square(grads["dW" + str(l + 1)])
  404.  
  405. learning_rate = learning_rate * np.sqrt((1-beta2**iter_no)/((1-beta1**iter_no)+10**-8))
  406. parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - \
  407. learning_rate*(opt_parameters['vdw'+str(l+1)]/\
  408. (np.sqrt(opt_parameters['sdw'+str(l+1)])+10**-8))
  409. parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - \
  410. learning_rate*(opt_parameters['vdb'+str(l+1)]/\
  411. (np.sqrt(opt_parameters['sdb'+str(l+1)])+10**-8))
  412.  
  413. return parameters,opt_parameters
  414.  
  415. def fit(self,X,y):
  416. '''
  417. X -- data, numpy array of shape (input size, number of examples)
  418. y -- lables, numpy array of shape (no of classes,n)
  419.  
  420. '''
  421.  
  422. np.random.seed(self.seed)
  423. self.grads = {}
  424. self.costs = []
  425. M = X.shape[1]
  426. opt_parameters = {}
  427.  
  428. if self.verbose == 1:
  429. print('Initilizing Weights...')
  430. self.parameters = self.weights_init(self.layer_dims,self.init_type,self.seed)
  431. self.iter_no = 0
  432. idx = np.arange(0,M)
  433.  
  434. if self.optimization_method != 'SGD':
  435. for l in range(1, len(self.layer_dims)):
  436. opt_parameters['vdw' + str(l)] = np.zeros((self.layer_dims[l], self.layer_dims[l-1]))
  437. opt_parameters['vdb' + str(l)] = np.zeros((self.layer_dims[l], 1))
  438. opt_parameters['sdw' + str(l)] = np.zeros((self.layer_dims[l], self.layer_dims[l-1]))
  439. opt_parameters['sdb' + str(l)] = np.zeros((self.layer_dims[l], 1))
  440.  
  441. if self.verbose == 1:
  442. print('Starting Training...')
  443.  
  444. for epoch_no in range(1,self.max_epoch+1):
  445. np.random.shuffle(idx)
  446. X = X[:,idx]
  447. y = y[:,idx]
  448. for i in range(0,M, self.batch_size):
  449. self.iter_no = self.iter_no + 1
  450. X_batch = X[:,i:i + self.batch_size]
  451. y_batch = y[:,i:i + self.batch_size]
  452. # Forward propagation:
  453. AL, cache = self.forward_propagation(X_batch,self.hidden_layers,self.parameters,self.keep_proba,self.seed)
  454. #cost
  455. cost = self.compute_cost(AL, y_batch, self.parameters,self.lamda,self.penality)
  456. self.costs.append(cost)
  457.  
  458. if self.tol != None:
  459. try:
  460. if abs(cost - self.costs[-2]) < self.tol:
  461. return self
  462. except:
  463. pass
  464. #back prop
  465. grads = self.back_propagation(AL, y_batch, cache,self.hidden_layers,self.keep_proba,self.penality,self.lamda)
  466.  
  467. #update params
  468. self.parameters,opt_parameters = self.update_parameters(self.parameters,grads,self.learning_rate,
  469. self.iter_no-1,self.optimization_method,
  470. opt_parameters,self.beta1,self.beta2)
  471.  
  472. if self.verbose == 1:
  473. if self.iter_no % 100 == 0:
  474. print("Cost after iteration {}: {}".format(self.iter_no, cost))
  475.  
  476. return self
  477. def predict(self,X,proba=False):
  478. '''predicting values
  479. arguments: X - iput data
  480. proba -- False then return value
  481. True then return probabaility
  482. '''
  483.  
  484. out, _ = self.forward_propagation(X,self.hidden_layers,self.parameters,self.keep_proba,self.seed)
  485. if proba == True:
  486. return out.T
  487. else:
  488. return np.argmax(out, axis=0)
Add Comment
Please, Sign In to add comment