Advertisement
brainuser5705

i have no idea what i'm doing

Jun 23rd, 2022
770
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from imblearn.datasets import make_imbalance
  2.  
  3. max_sample_size = min(sur_total, not_total)
  4.  
  5. def get_equal_dist(feature, size_per, min_size):
  6.    
  7.     fin_count = round(max_sample_size * size_per)
  8.     assert min_size >= fin_count
  9.    
  10.     ratios = {}
  11.     for cat in train_set[feature].unique():
  12.        
  13.         sur_count = train_sur[train_sur[feature] == cat]
  14.         not_count = train_not[train_not[feature] == cat]
  15.        
  16.         if (sur_count >= min_size) and (not_count >= min_size):
  17.             ratios[cat] = fin_count
  18.    
  19.     sample_sur, sample_not = make_imbalance(train_sur, train_not[:max_sample_size],
  20.                                             sampling_strategy=ratios,
  21.                                             random_state=42)
  22.     return sample_sur, sample_not
  23.    
Advertisement
RAW Paste Data Copied
Advertisement