Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class MyLogReg():
- def __init__(self, n_iter=100, learning_rate=0.1, weights=None, metric=None, verbose=1, score=0, reg=None, l1_coef=0, l2_coef=0,sgd_sample=None,random_state=42):
- self.n_iter = n_iter
- self.learning_rate = learning_rate
- self.weights = weights
- self.verbose = verbose
- self.metric = metric
- self.score = score
- self.reg = reg
- self.l1_coef = l1_coef
- self.l2_coef = l2_coef
- self.sgd_sample = sgd_sample
- self.random_state = random_state
- def __str__(self):
- attributes = ', '.join(f"{key}={value}" for key, value in vars(self).items())
- return f"MyLogReg class: {attributes}"
- def __repr__(self):
- attributes = ', '.join(f"{key}={value}" for key, value in vars(self).items())
- return f"MyLogReg class: {attributes}"
- @staticmethod
- def confusion_matrix_np(y_true, y_pred):
- K = len(np.unique(y_true))
- conf_matrix = np.zeros((K, K))
- for i in range(len(y_true)):
- conf_matrix[y_true[i]][y_pred[i]] += 1
- return conf_matrix
- @staticmethod
- def metrics(conf_matrix,metrics,y,y_pred_vec):
- if metrics == "accuracy":
- res = (conf_matrix[1,1]+conf_matrix[0,0])/np.sum(conf_matrix)
- return res
- if metrics == "precision":
- res = conf_matrix[1,1]/(conf_matrix[1,1]+conf_matrix[0,1])
- return res
- if metrics == "recall":
- res = conf_matrix[1,1]/(conf_matrix[1,1]+conf_matrix[1,0])
- return res
- if metrics == "f1":
- prec = conf_matrix[1,1]/(conf_matrix[1,1]+conf_matrix[0,1])
- rec = conf_matrix[1,1]/(conf_matrix[1,1]+conf_matrix[1,0])
- res = 2*(prec*rec)/(prec+rec)
- return res
- if metrics == "roc_auc":
- concat = np.transpose(np.vstack((y,np.round(y_pred_vec,10))))
- concat = concat[np.argsort(concat[:, 1])[::-1]]
- zero_indices = np.where(concat[:, 0] == 0)[0]
- arr_res=[]
- for i in zero_indices:
- ones_indices = np.where(concat[:i, 0] == 1)[0]
- unique_ones_indices = np.unique(concat[ones_indices, 1])
- c = np.count_nonzero(concat[i, 1] != unique_ones_indices) + 0.5 * np.count_nonzero(concat[i, 1] == unique_ones_indices)
- arr_res.append(c)
- auc_roc=sum(arr_res)/(len(zero_indices)*len(ones_indices))
- return auc_roc
- def fit(self, X_fit: pd.DataFrame, y_fit: pd.Series, n_iter=None, lr=None, verbose=None):
- random.seed(self.random_state)
- X = X_fit.copy()
- y = y_fit.copy()
- X.insert(0, "One", 1)
- n = X.shape[0]
- self.weights = np.ones(X.shape[1])
- def regularization(self):
- if self.reg == "l1":
- return self.l1_coef * np.sign(self.weights)
- elif self.reg == "l2":
- return self.l2_coef * 2 * self.weights
- elif self.reg == "elasticnet":
- return self.l1_coef * np.sign(self.weights) + self.l2_coef * 2 * self.weights
- else:
- return 0
- if n_iter is not None:
- self.n_iter = n_iter
- if lr is not None:
- self.learning_rate = lr
- if verbose is not None:
- self.verbose = verbose
- if self.sgd_sample is None:
- self.sgd_sample = X.shape[0]
- if type(self.sgd_sample) == float:
- self.sgd_sample = int(len(X)*self.sgd_sample)
- for i in range(1, self.n_iter + 1):
- if callable(self.learning_rate):
- lr = self.learning_rate(i)
- else:
- lr = self.learning_rate
- sample_rows_idx = random.sample(range(X.shape[0]), self.sgd_sample)
- X_sgd = X.iloc[sample_rows_idx]
- y_sgd = y.iloc[sample_rows_idx]
- ### pred vec on sample
- y_pred_vec_sgd = 1 / (1+ np.exp(-1*(X_sgd.to_numpy() @ self.weights)))
- ### LogLoss on all data
- y_pred_vec = 1 / (1+ np.exp(-1*(X.to_numpy() @ self.weights)))
- y_without_null = np.clip(y_pred_vec, 1e-15, 1 - 1e-15)
- LogLoss = -1/n*sum(y*np.log(y_without_null)+(1-y)*np.log(1-y_without_null))
- err=np.subtract(y_pred_vec_sgd,y_sgd)
- grad_vec = (1 / X_sgd.shape[0]) * (err @ (X_sgd.to_numpy())) + regularization(self)
- # calculate metrics
- predict_bin = np.where( y_pred_vec > 0.5, 1, 0 )
- conf_matrix = self.confusion_matrix_np(y,predict_bin)
- self.weights -= lr * grad_vec
- self.score = self.metrics(conf_matrix,self.metric,y,y_pred_vec)
- if i !=0 and self.verbose != 0:
- if i % self.verbose == 0 and self.metric != None:
- print(f"{i}:{LogLoss}|{self.metric}:{self.metrics(conf_matrix,self.metric,y,y_pred_vec)}")
- elif i % self.verbose == 0 and self.metric == None:
- print(f"{i}:{LogLoss}")
- def get_coef(self):
- return self.weights[1:]
- def predict(self, X_test: pd.DataFrame):
- X = X_test.copy()
- X.insert(0, "One", 1)
- y_pred_vec = 1 / (1+ np.exp(-1*(X.to_numpy() @ self.weights)))
- res = np.where( y_pred_vec > 0.5, 1, 0 )
- return res
- def predict_proba(self, X_test: pd.DataFrame):
- X = X_test.copy()
- X.insert(0, "One", 1)
- y_pred_vec = 1 / (1+ np.exp(-1*(X.to_numpy() @ self.weights)))
- return y_pred_vec
- def get_best_score(self):
- if self.score is not None:
- return float(self.score)
- else:
- return 0
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement