Untitled

def cdf_diff(df, var, grp='label', col=None, rm_outlier=None, hard_lim=None, ax=None, xlim=None):
    '''Plot cummulative distributions of multiple groups for comparison.
    Arguments:
        df: DataFrame
        var: string, name of column to be plotted
        grp: string, grouping variable
        col: list, colors to use for each group
        rm_outlier: None|float, remove datapoints beyond this many sigma.
        ax: axis on which to plot. Default none will return a new figure

    Examples:
        cdf_diff(feats_labeled,var='Creatinine' ,rm_outlier=4.0)
        fig, ax = plt.subplots(1, 2)
        psLearn.cdf_diff(feats_labeled,var='Creatinine' ,ax=ax[0],rm_outlier=4.0)
        psLearn.cdf_diff(feats_labeled,var='Sodium Level',ax=ax[1])
    '''
    if col is None:
        col = ['green', 'red']
    import statsmodels.api as sm
    if ax is None:
        fig, ax = plt.subplots(1, 1)
    grps = df[grp].unique()
    if len(df[var].unique()) == 2:
        df.groupby(grp)[var].mean().plot(ax = ax,kind='bar',color=col)
        ax.set_title(var)
    else:
        for g in grps:
            sample = df[df[grp]==g][var]
            sample = sample[np.isfinite(sample.values)]
            if rm_outlier is not None:
                sigma = sample.std()
                mu = sample.mean()
                sample = sample[sample > mu - rm_outlier * sigma ]
                sample = sample[sample < mu + rm_outlier * sigma ]
            if hard_lim is not None:
                sample = sample[sample > hard_lim[0] ]
                sample = sample[sample < hard_lim[1] ]

            ecdf = sm.distributions.ECDF(sample)
            sample = sample[ecdf(sample) < 0.99]

            x = np.linspace(min(sample), max(sample), 1000)
            y = ecdf(x)
            #x = np.append(x, [max(sample)])
            #y = np.append(y, [0])
            ax.step(x, y,label='%s = %s' % (grp,str(g)),c=col[int(g)])
            ax.set_title(var)
            ax.set_ylim([0,1])
            if xlim:
                ax.set_xlim(xlim)