Advertisement
Guest User

Untitled

a guest
Sep 28th, 2016
60
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.66 KB | None | 0 0
  1. test = elder_with_time_nonan_test
  2. test['recommended'] = test['rating'].map(lambda value: 1 if value == "Recommended" else 0)
  3.  
  4. def balanced_stratified_sample(df, col="recommended"):
  5.  
  6. group_df = df.groupby(col)
  7.  
  8. minimum_sample_size = group_df.size().min()
  9.  
  10. new_data = pd.DataFrame()
  11.  
  12. for group_name, g_df in group_df:
  13. sample = g_df.sample(minimum_sample_size)
  14. new_data = pd.concat([new_data, sample])
  15.  
  16. return pd.DataFrame(new_data)
  17.  
  18.  
  19. stratified_df = balanced_stratified_sample(test)
  20. print "stratified sample: ", stratified_df.groupby("recommended").size()
  21. print "original sample:", test.groupby("recommended").size()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement