Advertisement
Guest User

Untitled

a guest
Jul 18th, 2019
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.55 KB | None | 0 0
  1. import aux_fn
  2. import tensorflow as tf
  3. import datetime
  4. import numpy as np
  5. from statsmodels.tsa.seasonal import seasonal_decompose
  6. import scipy
  7. from itertools import groupby
  8. import copy
  9.  
  10. class data_analyzer:
  11. def get_specificity(self):
  12. act=self.anomaly_actual
  13. mask=copy.copy(self.anomaly_pred)
  14. idx_zero = self.get_intervals(mask, 0)
  15. for i in idx_zero:
  16. if i[1] - i[0] < self.window_width:
  17. mask[i[0]:i[1]] = 1
  18.  
  19. ress1=self.tmp_fun(act,mask)
  20. ress2=self.tmp_fun(mask,act)
  21. ress=np.zeros(4) # (TP,TN,FP,FN)
  22. ress[0] = (ress1[0]+ress2[0])//2
  23. ress[2] = ress1[1]
  24. ress[3] = ress2[1]
  25. ress[1] = len(self.anomaly_pred) // self.window_width - ress[0] - ress[1] - ress[3]
  26. return ress
  27.  
  28. def get_intervals(self,data,value):
  29. lenss = [len(list(group)) for key, group in groupby(data)]
  30. keys = np.array([key for key, group in groupby(data)])
  31. lenss = np.cumsum(lenss)
  32. lenss = np.insert(lenss, 0, 0)
  33. idx = []
  34. for i in np.where(keys == value)[0]:
  35. idx.append(lenss[i:i + 2])
  36. return idx
  37.  
  38. def tmp_fun(self,act,mask):
  39. idx = self.get_intervals(act, 1)
  40. ress=[0,0]
  41. for i in idx:
  42. end=min(i[1],len(mask))
  43. mask_sl = mask[i[0]:end]
  44. if (max(mask_sl)) == 1:
  45. ress[0] += 1
  46. else:
  47. ress[1] += 1
  48.  
  49. return ress
  50.  
  51.  
  52. def get_prediction_from_score_quant(self,data):
  53. #if max(data)<3: #to be adjusted
  54. # return np.zeros(len(data), dtype=bool)
  55. threshold = np.quantile(data,0.999)
  56. pred = np.zeros(len(data), dtype=bool)
  57. pred[data > threshold] = True
  58. return pred
  59.  
  60.  
  61. def __init__(self,filename_raw,rolling_step=None,is_remove_trend=True):
  62. self.saved_column, timestamps = aux_fn.get_csv_data(filename_raw + '.csv')
  63. dates_list = [datetime.datetime.strptime(date, '%Y-%m-%d %H:%M:%S') for date in timestamps]
  64. lab = aux_fn.get_label_data(filename_raw + '.csv')
  65. self.anomaly_actual = aux_fn.get_anomaly_mask(dates_list, lab)
  66. self.model = tf.keras.models.load_model(filename_raw+'.h5')
  67. #model.summary()
  68. self.window_width = self.model.layers[0].input_shape[1]
  69. if rolling_step is None:
  70. rolling_step = self.window_width
  71.  
  72. data = aux_fn.rolling(self.saved_column, self.window_width, rolling_step)
  73. data = np.expand_dims(data, axis=2)
  74.  
  75. yhat = self.model.predict(data)
  76.  
  77. len1 = yhat.shape[1] + rolling_step * (yhat.shape[0] - 1)
  78. self.saved_column = self.saved_column[0:len1]
  79. self.anomaly_actual = self.anomaly_actual[0:len1]
  80. self.resst = np.zeros(len1)
  81. cnt = np.zeros(len1)
  82.  
  83. for i in range(yhat.shape[0]):
  84. self.resst[i * rolling_step:i * rolling_step + self.window_width] += yhat[i, :, 0]
  85. cnt[i * rolling_step:i * rolling_step + self.window_width] += 1
  86. self.resst = np.divide(self.resst, cnt)
  87. # resst=aux_fn.scaleData(resst)
  88. xc = aux_fn.xcorr(self.saved_column, self.resst, rolling_step)
  89. self.opt_shift = np.argmax(xc) - rolling_step
  90. if self.opt_shift > 0:
  91. self.saved_column = np.pad(self.saved_column, (0, self.opt_shift), 'mean')
  92. self.anomaly_actual = np.pad(self.anomaly_actual, (0, self.opt_shift), 'minimum')
  93. self.resst = np.pad(self.resst, (self.opt_shift, 0), 'mean')
  94. elif self.opt_shift < 0:
  95. self.saved_column = np.pad(self.saved_column, (-self.opt_shift, 0), 'mean')
  96. self.anomaly_actual = np.pad(self.anomaly_actual, (-self.opt_shift, 0),'minimum')
  97. self.resst = np.pad(self.resst, (0, -self.opt_shift), 'mean')
  98.  
  99. self.anomaly_score = (self.resst - self.saved_column)
  100. self.anomaly_score = aux_fn.smooth(self.anomaly_score,
  101. 50) # Smoothing parameter. Can be changed to get better results, but usually 20 works well
  102.  
  103. if is_remove_trend:
  104. _ , psd = scipy.signal.periodogram(self.anomaly_score)
  105. psd[0:self.window_width//2] = 0
  106. result = seasonal_decompose(self.anomaly_score, model='additive', freq=np.argmax(psd))
  107. self.anomaly_score = result.resid
  108. self.anomaly_score=np.nan_to_num(self.anomaly_score)
  109.  
  110. self.anomaly_score = (self.anomaly_score-np.mean(self.anomaly_score))/np.std(self.anomaly_score)
  111. self.anomaly_score = abs(self.anomaly_score)
  112. self.anomaly_pred = self.get_prediction_from_score_quant(self.anomaly_score)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement