Advertisement
Guest User

Untitled

a guest
Mar 26th, 2015
189
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.88 KB | None | 0 0
  1. __author__ = 'Pavel Yurgin'
  2.  
  3. import numpy as np
  4.  
  5.  
  6. def read_data(path):
  7. Xy = np.genfromtxt(path, delimiter=',')
  8. X = np.ndarray((Xy.shape))
  9. X[:, 0] = -1
  10. X[:, 1:] = Xy[:, :-1]
  11. y = Xy[:, -1]
  12. y[y == 0] -= 1
  13. return X, y
  14.  
  15. def normalize(X):
  16. X = (X - np.mean(X)) / np.std(X)
  17. return X
  18.  
  19. def log_loss(M):
  20. return np.log2(1 + np.exp(-M)), - 1 / ((np.exp(M) + 1) * np.log(2))
  21.  
  22.  
  23. def sigmoid_loss(M):
  24. return 2 / (1 + np.exp(M)), -2 * np.exp(M) / (np.exp(M) + 1) ** 2
  25.  
  26.  
  27. def euclid_distance(X, Y):
  28. dist = np.sqrt(np.sum((X - Y) ** 2))
  29. return dist
  30.  
  31.  
  32. class GradientDescent:
  33. def __init__(self, *, alpha, threshold=1e-3, loss=sigmoid_loss):
  34. self.weights = []
  35. if alpha <= 0:
  36. raise ValueError("alpha should be positive")
  37. if threshold <= 0:
  38. raise ValueError("threshold should be positive")
  39. self.alpha = alpha
  40. self.threshold = threshold
  41. self.loss = loss
  42.  
  43. def fit(self, X, y):
  44. n = X.shape[1]
  45. self.weights = np.random.uniform(-1 / (2 * n), 1 / (2 * n), size=n)
  46. errors = []
  47. it = 0
  48. while True:
  49. M = np.dot(X, self.weights) * y
  50. loss, derivative = self.loss(M)
  51. grad_q = np.sum((derivative.T * (X.T * y)).T, axis=0)
  52. tmp = self.weights - self.alpha * grad_q
  53. errors.append(np.sum(loss))
  54. if euclid_distance(tmp, self.weights) < self.threshold:
  55. break
  56. self.weights = tmp
  57. it += 1
  58. if (it % 10000 == 0):
  59. print(it, np.sum(loss))
  60. return errors
  61.  
  62. def predict(self, X):
  63. return np.sign(np.dot(X, self.weights))
  64.  
  65.  
  66. class SGD:
  67. def __init__(self, *, alpha, loss=log_loss, k=1, n_iter=100):
  68. if alpha <= 0:
  69. raise ValueError("alpha should be positive")
  70. if k <= 0 or not isinstance(k, int):
  71. raise ValueError("k should be a positive integer")
  72. if n_iter <= 0 or not isinstance(n_iter, int):
  73. raise ValueError("n_iter should be a positive integer")
  74. self.k = k
  75. self.n_iter = n_iter
  76. self.alpha = alpha
  77. self.loss = loss
  78.  
  79. def fit(self, X, y):
  80. n = X.shape[1]
  81. self.weights = np.random.uniform(-1 / (2 * n), 1 / (2 * n), size=n)
  82. errors = []
  83. eta = 1 / len(X)
  84. q = self.loss(np.dot(X, self.weights) * y)[0].sum()
  85. for i in range(self.n_iter):
  86. idx = np.random.choice(X.shape[0], size=self.k)
  87. x = X[idx]
  88. sub_y = y[idx]
  89. M = np.dot(x, self.weights) * sub_y
  90. loss, derivative = self.loss(M)
  91. grad_q = np.sum((derivative.T * (x.T * sub_y)).T, axis=0)
  92. self.weights = self.weights - self.alpha * grad_q
  93. q = (1 - eta) * q + eta * np.sum(loss)
  94. errors.append(q)
  95. return errors
  96.  
  97. def predict(self, X):
  98. return np.sign(np.dot(X, self.weights))
  99.  
  100.  
  101. def get_precision_and_recall(y_pred, y_test, c):
  102. TP = len([True for i, j in zip(y_pred, y_test) if i == j and j == c])
  103. FP = len([True for i, j in zip(y_pred, y_test) if i == c and j != c])
  104. FN = len([True for i, j in zip(y_pred, y_test) if i != c and j != i])
  105.  
  106. if TP + FP == 0:
  107. precission = 0
  108. else:
  109. precission = TP / (TP + FP)
  110.  
  111. if TP + FP == 0:
  112. recall = 0
  113. else:
  114. recall = TP / (TP + FN)
  115.  
  116. return precission, recall
  117.  
  118.  
  119. def print_precision_recall(y_pred, y_test):
  120. classes = np.unique(y_test)
  121. for c in classes:
  122. precision, recall = get_precision_and_recall(y_pred, y_test, c)
  123. print(c, precision, recall)
  124.  
  125.  
  126. def train_test_split(X, y, ratio):
  127. sorted_args = np.argsort(y)
  128. X = X[sorted_args]
  129. y = y[sorted_args]
  130. bound = int(len(X) * ratio)
  131. X_test, y_test = X[:bound], y[:bound]
  132. X_train, y_train = X[bound:], y[bound:]
  133. return X_test, y_test, X_train, y_train
  134.  
  135.  
  136. def test_alghoritm(X, y):
  137. alpha = [1e-6, 1e-4, 1e-2, 1]
  138. loss_functions = [log_loss, sigmoid_loss]
  139. for loss in loss_functions:
  140. for a in alpha:
  141. gd = GradientDescent(alpha=a, loss=loss)
  142. q = gd.fit(X, y)
  143.  
  144.  
  145. if __name__ == '__main__':
  146. X, y = read_data("pima-indians-diabetes.csv")
  147. X = normalize(X)
  148. gd = GradientDescent(alpha=1e-2, threshold=1e-3, loss=sigmoid_loss)
  149. #err = gd.fit(X, y)
  150. #print(err)
  151. #pred = gd.predict(X)
  152. #print(pred)
  153. #print(y)
  154. #err = gd.fit(X, y)
  155. #print(err)
  156. #pred = gd.predict(X)
  157. #print(pred)
  158. #print(get_precision_and_recall(pred, y, 1))
  159. #print(get_precision_and_recall(pred, y, -1))
  160.  
  161. sgd = SGD(alpha=1e-2, k=10, n_iter=1000)
  162. err = sgd.fit(X, y)
  163. print(err)
  164. pred = sgd.predict(X)
  165. print(get_precision_and_recall(pred, y, 1))
  166. print(get_precision_and_recall(pred, y, -1))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement