Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- data = {'code': ['a', 'b', 'a', 'c', 'c', 'c', 'c'],
- 'cost': [10, 20, 100, 10, 10, 500, 10]}
- df = pd.DataFrame(data)
- grouped = df.groupby('code')['cost'].agg(['sum', 'mean']).apply(pd.Series)
- def is_outlier(s):
- # Only calculate outliers when we have more than 100 observations
- if s.count() >= 100:
- return np.where(s >= s.quantile(0.75) + 1.5 * iqr(s), 1, 0).mean()
- else:
- return np.nan
Add Comment
Please, Sign In to add comment