Guest User

Untitled

a guest
Apr 21st, 2018
114
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.16 KB | None | 0 0
  1. from ml import utilities
  2. import numpy as np
  3. import sys
  4. from abc import ABC,abstractmethod
  5. from functools import wraps
  6.  
  7. class ML(ABC):
  8. coef_ = []
  9. classes_ = []
  10. intercept_ = []
  11.  
  12. @abstractmethod
  13. def fit(self,X,y):
  14. pass
  15.  
  16. @abstractmethod
  17. def predict(self,X):
  18.  
  19. pass
  20.  
  21. @abstractmethod
  22. def score(self,X,y):
  23. pass
  24.  
  25.  
  26. class FisherLinearDiscriminant(ML):
  27. '''
  28. w = S^-1 * (m2 − m1)
  29. S = sum n -> c1 ((xn - m1) + (xn -m1)^-1) + sum n -> c2 ((xn - m2) + (xn -m2)^-1)
  30. w0 = -w^T * (m1 + m2)/2
  31. '''
  32.  
  33. means_ = []
  34.  
  35. def fit(self,X,y):
  36.  
  37. """Fit process classification model
  38. Parameters
  39. ----------
  40. X : array-like, shape = (n_samples, n_features)
  41. Training data
  42. y : array-like, shape = (n_samples,)
  43. Target values, must be binary
  44. Returns
  45. -------
  46. self : returns an instance of self.
  47. """
  48. self.classes_ = list(set(y))
  49. sum_samples = dict.fromkeys(self.classes_, [0]* len(X[0]))
  50. n_samples = dict.fromkeys(self.classes_, 0)
  51. for index, value in enumerate(y):
  52. sum_samples[value] = [old_val + new_val for old_val, new_val in zip(sum_samples[value], X[index])]
  53. n_samples[value] += 1
  54. for index, value in enumerate(self.classes_): #calculate means of each class
  55. self.means_.append(np.divide(sum_samples[value],n_samples[value]))
  56.  
  57. for class_index,class_ in enumerate(self.classes_):
  58. rest_means = [0]*len(X[0])
  59. rest_sample_size = 0
  60. for index, value in enumerate(self.classes_): #calculate means of each class
  61. if value != class_:
  62. rest_means =np.add(rest_means,sum_samples[value])
  63. rest_sample_size += 1
  64. rest_means = np.divide(rest_means,rest_sample_size)
  65. class_variance = [[0]*len(X[0]) for i in range(len(X[0]))]
  66. rest_classes_variance = [[0]*len(X[0]) for i in range(len(X[0]))]
  67. for i, j in enumerate(y):
  68. if j == class_:
  69. class_variance = np.add(class_variance,self._sample_variance(self.means_[class_index],X[i]))
  70. else:
  71. rest_classes_variance = np.add(rest_classes_variance,self._sample_variance(rest_means,X[i]))
  72. sw =np.add(class_variance,rest_classes_variance)
  73. sw_inverse = np.linalg.pinv(sw)
  74. sub_means = np.subtract(rest_means,self.means_[class_index])
  75. wieghts = np.matmul(sw_inverse,utilities.transpose_matrix(sub_means))
  76. self.coef_.append(np.transpose(wieghts)[0])
  77. avg_means = np.divide(np.add(rest_means,self.means_[class_index]),2)
  78. self.intercept_.append(np.multiply(np.dot(self.coef_[class_index],avg_means),-1))
  79. def predict(self,X):
  80. """Perform classification on an array of test vectors X.
  81. Parameters
  82. ----------
  83. X : array-like, shape = (n_samples, n_features)
  84. Returns
  85. -------
  86. C : array, shape = (n_samples,)
  87. Predicted target values for X, values are from ``classes_``
  88. """
  89. y = []
  90. for features in X:
  91. predicted_y = sys.maxsize
  92. predicted_class_index = -1
  93. for index,coef in enumerate(self.coef_):
  94. tmp_predicted_y =np.dot(coef,features) + self.intercept_[index]
  95. if predicted_y > tmp_predicted_y:
  96. predicted_y = tmp_predicted_y
  97. predicted_class_index = index
  98. y.append(self.classes_[predicted_class_index])
  99.  
  100. return y
  101.  
  102.  
  103. def score(self,X,y):
  104. """Returns the mean accuracy on the given test data and labels.
  105. In multi-label classification, this is the subset accuracy
  106. which is a harsh metric since you require for each sample that
  107. each label set be correctly predicted.
  108. Parameters
  109. ----------
  110. X : array-like, shape = (n_samples, n_features)
  111. Test samples.
  112. y : array-like, shape = (n_samples) or (n_samples, n_outputs)
  113. True labels for X.
  114. sample_weight : array-like, shape = [n_samples], optional
  115. Sample weights.
  116. Returns
  117. -------
  118. score : float
  119. Mean accuracy of self.predict(X) wrt. y.
  120. """
  121. confusion_matrix = [[0]*len(self.classes_) for i in range(len(self.classes_))]
  122. correct_predict = 0
  123. for real_class_index,features in enumerate(X):
  124. predicted_y = sys.maxsize
  125.  
  126. predicted_class_index = -1
  127. for index,coef in enumerate(self.coef_):
  128. tmp_predicted_y =np.dot(coef,features) + self.intercept_[index]
  129. if predicted_y > tmp_predicted_y:
  130. predicted_y = tmp_predicted_y
  131. predicted_class_index = index
  132. y_index = self.classes_.index(y[real_class_index])
  133. if y_index == predicted_class_index:
  134. correct_predict += 1
  135. confusion_matrix[y_index][predicted_class_index] += 1
  136. return float(correct_predict/len(X)) , confusion_matrix
  137.  
  138. def _sample_variance(self,class_mean,sample):
  139. sample_sub_mean_transpose = np.subtract(sample,class_mean)
  140. sample_sub_mean =np.transpose([sample_sub_mean_transpose])
  141. return np.matmul(sample_sub_mean,[sample_sub_mean_transpose])
  142.  
  143. class LeastSquares(ML):
  144. def fit(self,X,y):
  145. pass
  146.  
  147. def score(self,X,y):
  148. pass
  149.  
  150. def predict(self,X):
  151. pass
  152.  
  153. def multipy_matrix(matrix_1,matrix_2):
  154. '''
  155. multiply two matrices
  156. --------------------
  157. matrix_1 :
  158. 2 dimantions array
  159. matrix_2 :
  160. 2 dimantion array
  161.  
  162. return :
  163. 2 dimantion array matrix_1 rows * matrix2 columns
  164. '''
  165.  
  166. if len(matrix_1[0]) != len(matrix_2): # check if columns of matrix_1 equal rows of matrix_2
  167. raise ValueError('Number of column of vector 1 must equals Number of rows in second vector')
  168.  
  169. result_rows ,result_columns = len(matrix_1) , len(matrix_2[0])
  170. result_matrix = [[0]*result_columns for i in range(result_rows)]
  171.  
  172. matrix_2_transpose = transpose_matrix(matrix_2)
  173.  
  174. for row_num in range(result_rows):
  175. for column_num in range(result_columns):
  176. result_matrix[row_num][column_num] = dot_produnct(matrix_1[row_num],matrix_2_transpose[column_num])
  177.  
  178. return result_matrix
  179.  
  180.  
  181. def transpose_matrix(matrix):
  182. '''
  183. transpose of a matrix is an operator which flips a matrix over its diagonal
  184. ---------------------------------------------------------------------------
  185. matrix :
  186. disrable transpose matricx (multi dimantion array or vector)
  187.  
  188. return :
  189. transposed matrix
  190. '''
  191. if not type(matrix[0]) == list: # check if the matrix is vector
  192. result_rows = len(matrix)
  193. result_columns = 1
  194. else:
  195. result_rows = len(matrix[0])
  196. result_columns = len(matrix)
  197.  
  198.  
  199. transpose_matrix = [[0]*result_columns for i in range(result_rows)]
  200. for row in range(result_rows):
  201. transpose_matrix[row] = column(matrix,row)
  202.  
  203. return transpose_matrix
  204.  
  205.  
  206. def column(matrix, i):
  207. '''
  208. get column from matrix
  209. --------------------------------------
  210. matrix:
  211. (multi dimantion array or vector)
  212. i :
  213. column index
  214.  
  215. return :
  216. column list
  217. '''
  218. if not type(matrix[0]) == list:
  219. return [matrix[i]]
  220. else:
  221. return [row[i] for row in matrix]
  222.  
  223. def dot_produnct(vector_1,vector_2):
  224. if len(vector_1) != len(vector_2):
  225. raise ValueError('vector_1 and vector_2 must have the same lenght')
  226.  
  227. return sum( [vector_1[i]*vector_2[i] for i in range(len(vector_2))] )
  228.  
  229. def sum_two_vect(f_vect,s_vect):
  230. '''
  231. summation of two vectors
  232. -------------------------
  233. f_vect :
  234. array of first vector
  235. s_vect :
  236. array of second vector
  237. return
  238. summ vector
  239. '''
  240.  
  241. return [x + y for x, y in zip(f_vect, s_vect)]
  242.  
  243. def subtract_two_vect(f_vect,s_vect):
  244. '''
  245. summation of two vectors
  246. -------------------------
  247. f_vect :
  248. array of first vector
  249. s_vect :
  250. array of second vector
  251. return
  252. summ vector
  253. '''
  254.  
  255. return [x - y for x, y in zip(f_vect, s_vect)]
  256.  
  257. def sum_matrices(matrix_1,matrix_2):
  258. if len(matrix_1) != len(matrix_2) | len(matrix_1[0]) != len(matrix_2[0]) :
  259. raise ValueError("The two matrices must be the same size")
  260.  
  261. result_matrix = [[0]*len(matrix_1[0]) for i in range(len(matrix_1))]
  262. for row_num,m1_row in enumerate(matrix_1):
  263. result_matrix[row_num] = sum_two_vect(matrix_1[row_num],matrix_2[row_num])
  264.  
  265. return result_matrix
  266.  
  267. def div_arr_by_num(arr,num):
  268. '''
  269. divide array by number
  270. ----------------------
  271. arr :
  272. array contains the numbers
  273. num :
  274. number you want divide array elemets by it
  275. return :
  276. new array divided by the given number
  277. '''
  278. return [x / num for x in arr]
  279.  
  280. def matmult(a,b):
  281. zip_b = zip(*b)
  282. # uncomment next line if python 3 :
  283. zip_b = list(zip_b)
  284. return [[sum(ele_a*ele_b for ele_a, ele_b in zip(row_a, col_b))
  285. for col_b in zip_b] for row_a in a]
Add Comment
Please, Sign In to add comment