Guest User

Untitled

a guest
Nov 15th, 2018
95
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.01 KB | None | 0 0
  1. def cdf_diff(df, var, grp='label', col=None, rm_outlier=None, hard_lim=None, ax=None, xlim=None):
  2. '''Plot cummulative distributions of multiple groups for comparison.
  3. Arguments:
  4. df: DataFrame
  5. var: string, name of column to be plotted
  6. grp: string, grouping variable
  7. col: list, colors to use for each group
  8. rm_outlier: None|float, remove datapoints beyond this many sigma.
  9. ax: axis on which to plot. Default none will return a new figure
  10.  
  11. Examples:
  12. cdf_diff(feats_labeled,var='Creatinine' ,rm_outlier=4.0)
  13. fig, ax = plt.subplots(1, 2)
  14. psLearn.cdf_diff(feats_labeled,var='Creatinine' ,ax=ax[0],rm_outlier=4.0)
  15. psLearn.cdf_diff(feats_labeled,var='Sodium Level',ax=ax[1])
  16. '''
  17. if col is None:
  18. col = ['green', 'red']
  19. import statsmodels.api as sm
  20. if ax is None:
  21. fig, ax = plt.subplots(1, 1)
  22. grps = df[grp].unique()
  23. if len(df[var].unique()) == 2:
  24. df.groupby(grp)[var].mean().plot(ax = ax,kind='bar',color=col)
  25. ax.set_title(var)
  26. else:
  27. for g in grps:
  28. sample = df[df[grp]==g][var]
  29. sample = sample[np.isfinite(sample.values)]
  30. if rm_outlier is not None:
  31. sigma = sample.std()
  32. mu = sample.mean()
  33. sample = sample[sample > mu - rm_outlier * sigma ]
  34. sample = sample[sample < mu + rm_outlier * sigma ]
  35. if hard_lim is not None:
  36. sample = sample[sample > hard_lim[0] ]
  37. sample = sample[sample < hard_lim[1] ]
  38.  
  39. ecdf = sm.distributions.ECDF(sample)
  40. sample = sample[ecdf(sample) < 0.99]
  41.  
  42. x = np.linspace(min(sample), max(sample), 1000)
  43. y = ecdf(x)
  44. #x = np.append(x, [max(sample)])
  45. #y = np.append(y, [0])
  46. ax.step(x, y,label='%s = %s' % (grp,str(g)),c=col[int(g)])
  47. ax.set_title(var)
  48. ax.set_ylim([0,1])
  49. if xlim:
  50. ax.set_xlim(xlim)
Add Comment
Please, Sign In to add comment