Advertisement
Guest User

Untitled

a guest
Jul 22nd, 2019
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.98 KB | None | 0 0
  1. from sklearn.ensemble import IsolationForest
  2.  
  3. def plot_anomalies(query,column):
  4. df_anom = df[(df['query'] == query) & (df['device'] == 'desktop')]
  5. x=df_anom[column].values
  6. xx = np.linspace(df_anom[column].min(), df_anom[column].max(), len(df)).reshape(-1,1)
  7.  
  8. isolation_forest = IsolationForest(n_estimators=100)
  9. isolation_forest.fit(x.reshape(-1, 1))
  10.  
  11. anomaly_score = isolation_forest.decision_function(xx)
  12. # 1 = inlier, 0 = outlier
  13. outlier = isolation_forest.predict(xx)
  14.  
  15. plt.figure(figsize=(10,4))
  16. plt.plot(xx, anomaly_score, label='anomaly score')
  17. plt.fill_between(xx.T[0], np.min(anomaly_score), np.max(anomaly_score),
  18. where=outlier==-1, color='r',
  19. alpha=.4, label='outlier region')
  20. plt.legend()
  21. plt.ylabel('anomaly score')
  22. plt.xlabel(column)
  23. plt.title("{query} {column} Anomalies".format(query=query,column=column))
  24.  
  25. for q in top_queries_by_clicks:
  26. plot_anomalies(q,'impressions')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement