Advertisement
Guest User

Untitled

a guest
May 26th, 2019
103
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.18 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. from sklearn.decomposition import PCA
  4. import math
  5. from pandas import DataFrame
  6. import datetime
  7. from itertools import repeat
  8. from sklearn.ensemble import IsolationForest
  9. from matplotlib import pyplot as pltå
  10. import seaborn as sns
  11.  
  12. '''hbos and isolation forest,
  13. evaluation , model performance comparison from kaggle'''
  14.  
  15. data = pd.read_csv("truck.csv")
  16. print(data)
  17. print(data.shape)
  18. del data['Send_Date']
  19. del data['PARTITIONING']
  20. del data['VEHICLE_ID']
  21. del data['All_Fault_in_3_months']
  22.  
  23.  
  24.  
  25. orig = data.copy()
  26. print(orig[:10])
  27. print(data[:10])
  28.  
  29.  
  30. iforest = IsolationForest()
  31. iforest.fit(data)
  32. iforest_result = iforest.decision_function(data)
  33.  
  34. print(iforest_result[:10])
  35.  
  36. iforest_orig = orig.copy()
  37. iforest_orig['if'] = iforest_result
  38.  
  39. print(iforest_orig[:10])
  40.  
  41. iforest_top1000_data=iforest_orig.sort_values(by=['if'],ascending=True)[:1000]
  42.  
  43. print(iforest_top1000_data[:15])
  44.  
  45. print(len(iforest_top1000_data[lambda x:x['Class']==1]))
  46.  
  47. print(iforest_top1000_data['Class'].cumsum().sum())
  48. plt.scatter(range(1000),iforest_top1000_data['Class'].cumsum(),marker='1')
  49. plt.xlabel('Top N data')
  50. plt.ylabel('Anomalies found')
  51. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement