Advertisement
brainuser5705

I still have no idea what i'm doing

Jun 23rd, 2022
47
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.71 KB | None | 0 0
  1. def get_feat_dist(feature):
  2.  
  3. # truncate to max size first
  4.  
  5. # must be from each category
  6. sample_sur, _, sample_not, _ = train_test_split(train_sur, train_not[:max_sample_size], train_size=0.5, random_state=42, shuffle=True)
  7.  
  8. print(len(sample_sur.values))
  9. print(len(sample_not.values))
  10.  
  11. feature_sur = sample_sur[feature]
  12. feature_not = sample_not[feature]
  13.  
  14. results = {}
  15.  
  16. for cat in sorted(train_set[feature].unique()):
  17. sur_num = len(feature_sur[feature_sur==cat].values) / max_sample_size
  18. not_num = len(feature_not[feature_not==cat].values) / max_sample_size
  19. results[cat] = [sur_num, not_num]
  20.  
  21. return results
  22.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement