Advertisement
Dicere

LR

May 30th, 2023 (edited)
944
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.47 KB | Source Code | 0 0
  1. class MyLogReg():
  2.     def __init__(self, n_iter=100, learning_rate=0.1, weights=None, metric=None, verbose=1, score=0, reg=None, l1_coef=0, l2_coef=0,sgd_sample=None,random_state=42):
  3.         self.n_iter = n_iter
  4.         self.learning_rate = learning_rate
  5.         self.weights = weights
  6.         self.verbose = verbose
  7.         self.metric = metric
  8.         self.score = score
  9.         self.reg = reg
  10.         self.l1_coef = l1_coef
  11.         self.l2_coef = l2_coef
  12.         self.sgd_sample  = sgd_sample
  13.         self.random_state  = random_state
  14.     def __str__(self):
  15.         attributes = ', '.join(f"{key}={value}" for key, value in vars(self).items())
  16.         return f"MyLogReg class: {attributes}"
  17.  
  18.     def __repr__(self):
  19.         attributes = ', '.join(f"{key}={value}" for key, value in vars(self).items())
  20.         return f"MyLogReg class: {attributes}"
  21.  
  22.     @staticmethod
  23.     def confusion_matrix_np(y_true, y_pred):
  24.         K = len(np.unique(y_true))
  25.         conf_matrix = np.zeros((K, K))
  26.         for i in range(len(y_true)):
  27.             conf_matrix[y_true[i]][y_pred[i]] += 1
  28.         return conf_matrix
  29.        
  30.     @staticmethod
  31.     def metrics(conf_matrix,metrics,y,y_pred_vec):
  32.       if metrics == "accuracy":
  33.         res = (conf_matrix[1,1]+conf_matrix[0,0])/np.sum(conf_matrix)
  34.         return res
  35.       if metrics == "precision":
  36.         res = conf_matrix[1,1]/(conf_matrix[1,1]+conf_matrix[0,1])
  37.         return res
  38.       if metrics == "recall":
  39.         res = conf_matrix[1,1]/(conf_matrix[1,1]+conf_matrix[1,0])
  40.         return res
  41.       if metrics == "f1":
  42.         prec = conf_matrix[1,1]/(conf_matrix[1,1]+conf_matrix[0,1])
  43.         rec = conf_matrix[1,1]/(conf_matrix[1,1]+conf_matrix[1,0])
  44.         res = 2*(prec*rec)/(prec+rec)
  45.         return res
  46.       if metrics == "roc_auc":
  47.         concat = np.transpose(np.vstack((y,np.round(y_pred_vec,10))))
  48.         concat = concat[np.argsort(concat[:, 1])[::-1]]
  49.         zero_indices = np.where(concat[:, 0] == 0)[0]
  50.         arr_res=[]
  51.  
  52.         for i in zero_indices:
  53.             ones_indices = np.where(concat[:i, 0] == 1)[0]
  54.             unique_ones_indices = np.unique(concat[ones_indices, 1])
  55.             c = np.count_nonzero(concat[i, 1] != unique_ones_indices) + 0.5 * np.count_nonzero(concat[i, 1] == unique_ones_indices)
  56.             arr_res.append(c)
  57.  
  58.         auc_roc=sum(arr_res)/(len(zero_indices)*len(ones_indices))
  59.         return auc_roc
  60.  
  61.     def fit(self, X_fit: pd.DataFrame, y_fit: pd.Series, n_iter=None, lr=None, verbose=None):
  62.       random.seed(self.random_state)
  63.       X = X_fit.copy()
  64.       y = y_fit.copy()
  65.       X.insert(0, "One", 1)
  66.       n = X.shape[0]
  67.       self.weights =  np.ones(X.shape[1])
  68.  
  69.       def regularization(self):
  70.           if self.reg == "l1":
  71.               return self.l1_coef * np.sign(self.weights)
  72.           elif self.reg == "l2":
  73.               return self.l2_coef * 2 * self.weights
  74.           elif self.reg == "elasticnet":
  75.               return self.l1_coef * np.sign(self.weights) + self.l2_coef * 2 * self.weights
  76.           else:
  77.               return 0
  78.  
  79.       if n_iter is not None:
  80.           self.n_iter = n_iter
  81.       if lr is not None:
  82.           self.learning_rate = lr
  83.       if verbose is not None:
  84.           self.verbose = verbose
  85.       if self.sgd_sample is None:
  86.           self.sgd_sample = X.shape[0]
  87.  
  88.       if type(self.sgd_sample) == float:
  89.               self.sgd_sample = int(len(X)*self.sgd_sample)  
  90.  
  91.       for i in range(1, self.n_iter + 1):
  92.  
  93.         if callable(self.learning_rate):
  94.             lr = self.learning_rate(i)
  95.         else:
  96.             lr = self.learning_rate    
  97.  
  98.         sample_rows_idx = random.sample(range(X.shape[0]), self.sgd_sample)
  99.  
  100.         X_sgd = X.iloc[sample_rows_idx]
  101.         y_sgd = y.iloc[sample_rows_idx]
  102.  
  103.         ### pred vec on sample
  104.         y_pred_vec_sgd = 1 / (1+ np.exp(-1*(X_sgd.to_numpy() @ self.weights)))
  105.        
  106.         ### LogLoss on all data
  107.         y_pred_vec = 1 / (1+ np.exp(-1*(X.to_numpy() @ self.weights)))
  108.         y_without_null = np.clip(y_pred_vec, 1e-15, 1 - 1e-15)
  109.         LogLoss = -1/n*sum(y*np.log(y_without_null)+(1-y)*np.log(1-y_without_null))
  110.  
  111.         err=np.subtract(y_pred_vec_sgd,y_sgd)
  112.         grad_vec = (1 / X_sgd.shape[0]) * (err @ (X_sgd.to_numpy())) + regularization(self)
  113.  
  114.         # calculate metrics
  115.         predict_bin = np.where( y_pred_vec > 0.5, 1, 0 )
  116.         conf_matrix = self.confusion_matrix_np(y,predict_bin)
  117.  
  118.         self.weights -= lr * grad_vec
  119.         self.score = self.metrics(conf_matrix,self.metric,y,y_pred_vec)
  120.  
  121.         if i !=0 and self.verbose != 0:
  122.           if i % self.verbose == 0 and self.metric != None:
  123.             print(f"{i}:{LogLoss}|{self.metric}:{self.metrics(conf_matrix,self.metric,y,y_pred_vec)}")
  124.           elif i % self.verbose == 0 and self.metric == None:
  125.             print(f"{i}:{LogLoss}")
  126.  
  127.     def get_coef(self):
  128.         return self.weights[1:]
  129.  
  130.     def predict(self, X_test: pd.DataFrame):
  131.       X = X_test.copy()
  132.       X.insert(0, "One", 1)
  133.       y_pred_vec = 1 / (1+ np.exp(-1*(X.to_numpy() @ self.weights)))
  134.       res = np.where( y_pred_vec > 0.5, 1, 0 )
  135.       return res
  136.      
  137.     def predict_proba(self, X_test: pd.DataFrame):
  138.       X = X_test.copy()
  139.       X.insert(0, "One", 1)
  140.       y_pred_vec = 1 / (1+ np.exp(-1*(X.to_numpy() @ self.weights)))
  141.       return y_pred_vec
  142.    
  143.     def get_best_score(self):
  144.         if self.score is not None:
  145.               return float(self.score)
  146.         else:
  147.               return 0
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement