Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- from sklearn.linear_model import LogisticRegression
- from sklearn.cross_validation import train_test_split
- from sklearn.metrics import accuracy_score
- from sklearn.datasets import load_breast_cancer
- import matplotlib.pyplot as plt
- data = load_breast_cancer()
- np.random.seed(0)
- X = data.data
- Y = data.target
- X = X / X.max(axis=0)
- X_train, X_test, Y_train, Y_test = train_test_split(X, Y.ravel(), test_size=0.33)
- class LogReg(object):
- def __init__(self):
- pass
- def sigmoid(self, X):
- return 1 / (1 + np.exp(-X))
- def hypothesis(self, Xp):
- return Xp.dot(self.theta)
- def cost(self):
- r = (self.hypothesis(self.x))
- r=r/r.max(axis=0)
- r=np.absolute(r)
- p1=0
- p2=0
- for oy in range(len(self.y)):
- if self.y[oy]==1:
- p1+=float(np.log(r[oy]))
- else:
- p2+=float(np.log(1-np.log(r[oy])))
- r=-(p1+p2)/len(self.y)
- return r
- def fit(self, X, Y, alpha, itr):
- self.x = np.array(X)
- self.y = np.array(Y).reshape(-1, 1)
- self.alpha = alpha
- self.cst = []
- self.theta = np.zeros([self.x.shape[1], self.y.shape[1]])
- for i in range(itr):
- self.error = self.hypothesis(self.x) - self.y
- self.theta = self.theta - (self.alpha / len(self.x)) * self.x.T.dot(self.error)
- self.cst.append(self.cost())
- def costgraph(self):
- plt.plot(range(len(self.cst)), self.cst)
- plt.grid(True)
- plt.show()
- def preditc(self, X):
- output = self.hypothesis(X)
- output[output>=0.5]=1
- output[output <0.5] = 0
- return output
- a = LogReg()
- a.fit(X_train, Y_train, alpha=.07, itr=5000)
- output = a.preditc(X_test)
- print("Scratch Accuracy : ",accuracy_score(Y_test, output))
- model = LogisticRegression()
- model.fit(X_train, Y_train)
- print("Scikit Accuracy (With Default Parameters: ",accuracy_score(Y_test, model.predict(X_test)))
- a.costgraph()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement