Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from imblearn.datasets import make_imbalance
- max_sample_size = min(sur_total, not_total)
- def get_equal_dist(feature, size_per, min_size):
- fin_count = round(max_sample_size * size_per)
- assert min_size >= fin_count
- ratios = {}
- for cat in train_set[feature].unique():
- sur_count = train_sur[train_sur[feature] == cat]
- not_count = train_not[train_not[feature] == cat]
- if (sur_count >= min_size) and (not_count >= min_size):
- ratios[cat] = fin_count
- sample_sur, sample_not = make_imbalance(train_sur, train_not[:max_sample_size],
- sampling_strategy=ratios,
- random_state=42)
- return sample_sur, sample_not
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement