Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- __author__ = 'Pavel Yurgin'
- import numpy as np
- def read_data(path):
- Xy = np.genfromtxt(path, delimiter=',')
- X = np.ndarray((Xy.shape))
- X[:, 0] = -1
- X[:, 1:] = Xy[:, :-1]
- y = Xy[:, -1]
- y[y == 0] -= 1
- return X, y
- def normalize(X):
- X = (X - np.mean(X)) / np.std(X)
- return X
- def log_loss(M):
- return np.log2(1 + np.exp(-M)), - 1 / ((np.exp(M) + 1) * np.log(2))
- def sigmoid_loss(M):
- return 2 / (1 + np.exp(M)), -2 * np.exp(M) / (np.exp(M) + 1) ** 2
- def euclid_distance(X, Y):
- dist = np.sqrt(np.sum((X - Y) ** 2))
- return dist
- class GradientDescent:
- def __init__(self, *, alpha, threshold=1e-3, loss=sigmoid_loss):
- self.weights = []
- if alpha <= 0:
- raise ValueError("alpha should be positive")
- if threshold <= 0:
- raise ValueError("threshold should be positive")
- self.alpha = alpha
- self.threshold = threshold
- self.loss = loss
- def fit(self, X, y):
- n = X.shape[1]
- self.weights = np.random.uniform(-1 / (2 * n), 1 / (2 * n), size=n)
- errors = []
- it = 0
- while True:
- M = np.dot(X, self.weights) * y
- loss, derivative = self.loss(M)
- grad_q = np.sum((derivative.T * (X.T * y)).T, axis=0)
- tmp = self.weights - self.alpha * grad_q
- errors.append(np.sum(loss))
- if euclid_distance(tmp, self.weights) < self.threshold:
- break
- self.weights = tmp
- it += 1
- if (it % 10000 == 0):
- print(it, np.sum(loss))
- return errors
- def predict(self, X):
- return np.sign(np.dot(X, self.weights))
- class SGD:
- def __init__(self, *, alpha, loss=log_loss, k=1, n_iter=100):
- if alpha <= 0:
- raise ValueError("alpha should be positive")
- if k <= 0 or not isinstance(k, int):
- raise ValueError("k should be a positive integer")
- if n_iter <= 0 or not isinstance(n_iter, int):
- raise ValueError("n_iter should be a positive integer")
- self.k = k
- self.n_iter = n_iter
- self.alpha = alpha
- self.loss = loss
- def fit(self, X, y):
- n = X.shape[1]
- self.weights = np.random.uniform(-1 / (2 * n), 1 / (2 * n), size=n)
- errors = []
- eta = 1 / len(X)
- q = self.loss(np.dot(X, self.weights) * y)[0].sum()
- for i in range(self.n_iter):
- idx = np.random.choice(X.shape[0], size=self.k)
- x = X[idx]
- sub_y = y[idx]
- M = np.dot(x, self.weights) * sub_y
- loss, derivative = self.loss(M)
- grad_q = np.sum((derivative.T * (x.T * sub_y)).T, axis=0)
- self.weights = self.weights - self.alpha * grad_q
- q = (1 - eta) * q + eta * np.sum(loss)
- errors.append(q)
- return errors
- def predict(self, X):
- return np.sign(np.dot(X, self.weights))
- def get_precision_and_recall(y_pred, y_test, c):
- TP = len([True for i, j in zip(y_pred, y_test) if i == j and j == c])
- FP = len([True for i, j in zip(y_pred, y_test) if i == c and j != c])
- FN = len([True for i, j in zip(y_pred, y_test) if i != c and j != i])
- if TP + FP == 0:
- precission = 0
- else:
- precission = TP / (TP + FP)
- if TP + FP == 0:
- recall = 0
- else:
- recall = TP / (TP + FN)
- return precission, recall
- def print_precision_recall(y_pred, y_test):
- classes = np.unique(y_test)
- for c in classes:
- precision, recall = get_precision_and_recall(y_pred, y_test, c)
- print(c, precision, recall)
- def train_test_split(X, y, ratio):
- sorted_args = np.argsort(y)
- X = X[sorted_args]
- y = y[sorted_args]
- bound = int(len(X) * ratio)
- X_test, y_test = X[:bound], y[:bound]
- X_train, y_train = X[bound:], y[bound:]
- return X_test, y_test, X_train, y_train
- def test_alghoritm(X, y):
- alpha = [1e-6, 1e-4, 1e-2, 1]
- loss_functions = [log_loss, sigmoid_loss]
- for loss in loss_functions:
- for a in alpha:
- gd = GradientDescent(alpha=a, loss=loss)
- q = gd.fit(X, y)
- if __name__ == '__main__':
- X, y = read_data("pima-indians-diabetes.csv")
- X = normalize(X)
- gd = GradientDescent(alpha=1e-2, threshold=1e-3, loss=sigmoid_loss)
- #err = gd.fit(X, y)
- #print(err)
- #pred = gd.predict(X)
- #print(pred)
- #print(y)
- #err = gd.fit(X, y)
- #print(err)
- #pred = gd.predict(X)
- #print(pred)
- #print(get_precision_and_recall(pred, y, 1))
- #print(get_precision_and_recall(pred, y, -1))
- sgd = SGD(alpha=1e-2, k=10, n_iter=1000)
- err = sgd.fit(X, y)
- print(err)
- pred = sgd.predict(X)
- print(get_precision_and_recall(pred, y, 1))
- print(get_precision_and_recall(pred, y, -1))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement