Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sklearn.ensemble import IsolationForest
- def print_anomalies(query,column):
- df_anom = df[(df['query'] == query) & (df['device'] == 'desktop')]
- x=df_anom[column].values
- xx = np.linspace(df_anom[column].min(), df_anom[column].max(), len(df)).reshape(-1,1)
- isolation_forest = IsolationForest(n_estimators=100)
- isolation_forest.fit(x.reshape(-1, 1))
- anomaly_score = isolation_forest.decision_function(xx)
- # 1 = inlier, 0 = outlier
- outlier = isolation_forest.predict(xx)
- df_outliers = df_anom[list(map(lambda v: True if v < 0 else False,isolation_forest.predict(x.reshape(-1, 1))))]
- df_outliers = df_outliers[df_outliers.date >= df.date.max() - datetime.timedelta(days=14)]
- print(df_outliers)
- for q in top_queries_by_clicks:
- print_anomalies(q,'impressions')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement