Untitled

from sklearn.ensemble import IsolationForest

def plot_anomalies(query,column):
    df_anom = df[(df['query'] == query) & (df['device'] == 'desktop')]
    x=df_anom[column].values
    xx = np.linspace(df_anom[column].min(), df_anom[column].max(), len(df)).reshape(-1,1)

    isolation_forest = IsolationForest(n_estimators=100)
    isolation_forest.fit(x.reshape(-1, 1))

    anomaly_score = isolation_forest.decision_function(xx)
    # 1 = inlier, 0 = outlier
    outlier = isolation_forest.predict(xx)

    plt.figure(figsize=(10,4))
    plt.plot(xx, anomaly_score, label='anomaly score')
    plt.fill_between(xx.T[0], np.min(anomaly_score), np.max(anomaly_score),
                     where=outlier==-1, color='r',
                     alpha=.4, label='outlier region')
    plt.legend()
    plt.ylabel('anomaly score')
    plt.xlabel(column)
    plt.title("{query} {column} Anomalies".format(query=query,column=column))

for q in top_queries_by_clicks:
    plot_anomalies(q,'impressions')