Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- test = elder_with_time_nonan_test
- test['recommended'] = test['rating'].map(lambda value: 1 if value == "Recommended" else 0)
- def balanced_stratified_sample(df, col="recommended"):
- group_df = df.groupby(col)
- minimum_sample_size = group_df.size().min()
- new_data = pd.DataFrame()
- for group_name, g_df in group_df:
- sample = g_df.sample(minimum_sample_size)
- new_data = pd.concat([new_data, sample])
- return pd.DataFrame(new_data)
- stratified_df = balanced_stratified_sample(test)
- print "stratified sample: ", stratified_df.groupby("recommended").size()
- print "original sample:", test.groupby("recommended").size()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement