SHARE
TWEET

Untitled

a guest Jul 18th, 2019 55 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import aux_fn
  2. import tensorflow as tf
  3. import datetime
  4. import numpy as np
  5. from statsmodels.tsa.seasonal import seasonal_decompose
  6. import scipy
  7. from itertools import groupby
  8. import copy
  9.  
  10. class data_analyzer:
  11.     def get_specificity(self):
  12.         act=self.anomaly_actual
  13.         mask=copy.copy(self.anomaly_pred)
  14.         idx_zero = self.get_intervals(mask, 0)
  15.         for i in idx_zero:
  16.             if i[1] - i[0] < self.window_width:
  17.                 mask[i[0]:i[1]] = 1
  18.  
  19.         ress1=self.tmp_fun(act,mask)
  20.         ress2=self.tmp_fun(mask,act)
  21.         ress=np.zeros(4) # (TP,TN,FP,FN)
  22.         ress[0] = (ress1[0]+ress2[0])//2
  23.         ress[2] = ress1[1]
  24.         ress[3] = ress2[1]
  25.         ress[1] = len(self.anomaly_pred) // self.window_width - ress[0] - ress[1] - ress[3]
  26.         return ress
  27.  
  28.     def get_intervals(self,data,value):
  29.         lenss = [len(list(group)) for key, group in groupby(data)]
  30.         keys = np.array([key for key, group in groupby(data)])
  31.         lenss = np.cumsum(lenss)
  32.         lenss = np.insert(lenss, 0, 0)
  33.         idx = []
  34.         for i in np.where(keys == value)[0]:
  35.             idx.append(lenss[i:i + 2])
  36.         return idx
  37.  
  38.     def tmp_fun(self,act,mask):
  39.         idx = self.get_intervals(act, 1)
  40.         ress=[0,0]
  41.         for i in idx:
  42.             end=min(i[1],len(mask))
  43.             mask_sl = mask[i[0]:end]
  44.             if (max(mask_sl)) == 1:
  45.                 ress[0] += 1
  46.             else:
  47.                 ress[1] += 1
  48.  
  49.         return ress
  50.  
  51.  
  52.     def get_prediction_from_score_quant(self,data):
  53.         #if max(data)<3: #to be adjusted
  54.         #    return np.zeros(len(data), dtype=bool)
  55.         threshold = np.quantile(data,0.999)
  56.         pred = np.zeros(len(data), dtype=bool)
  57.         pred[data > threshold] = True
  58.         return pred
  59.  
  60.  
  61.     def __init__(self,filename_raw,rolling_step=None,is_remove_trend=True):
  62.         self.saved_column, timestamps = aux_fn.get_csv_data(filename_raw + '.csv')
  63.         dates_list = [datetime.datetime.strptime(date, '%Y-%m-%d %H:%M:%S') for date in timestamps]
  64.         lab = aux_fn.get_label_data(filename_raw + '.csv')
  65.         self.anomaly_actual = aux_fn.get_anomaly_mask(dates_list, lab)
  66.         self.model = tf.keras.models.load_model(filename_raw+'.h5')
  67.         #model.summary()
  68.         self.window_width = self.model.layers[0].input_shape[1]
  69.         if rolling_step is None:
  70.              rolling_step = self.window_width
  71.  
  72.         data = aux_fn.rolling(self.saved_column, self.window_width, rolling_step)
  73.         data = np.expand_dims(data, axis=2)
  74.  
  75.         yhat = self.model.predict(data)
  76.  
  77.         len1 = yhat.shape[1] + rolling_step * (yhat.shape[0] - 1)
  78.         self.saved_column = self.saved_column[0:len1]
  79.         self.anomaly_actual = self.anomaly_actual[0:len1]
  80.         self.resst = np.zeros(len1)
  81.         cnt = np.zeros(len1)
  82.  
  83.         for i in range(yhat.shape[0]):
  84.             self.resst[i * rolling_step:i * rolling_step + self.window_width] += yhat[i, :, 0]
  85.             cnt[i * rolling_step:i * rolling_step + self.window_width] += 1
  86.         self.resst = np.divide(self.resst, cnt)
  87.         # resst=aux_fn.scaleData(resst)
  88.         xc = aux_fn.xcorr(self.saved_column, self.resst, rolling_step)
  89.         self.opt_shift = np.argmax(xc) - rolling_step
  90.         if self.opt_shift > 0:
  91.             self.saved_column = np.pad(self.saved_column, (0, self.opt_shift), 'mean')
  92.             self.anomaly_actual = np.pad(self.anomaly_actual, (0, self.opt_shift), 'minimum')
  93.             self.resst = np.pad(self.resst, (self.opt_shift, 0), 'mean')
  94.         elif self.opt_shift < 0:
  95.             self.saved_column = np.pad(self.saved_column, (-self.opt_shift, 0), 'mean')
  96.             self.anomaly_actual = np.pad(self.anomaly_actual, (-self.opt_shift, 0),'minimum')
  97.             self.resst = np.pad(self.resst, (0, -self.opt_shift), 'mean')
  98.  
  99.         self.anomaly_score = (self.resst - self.saved_column)
  100.         self.anomaly_score = aux_fn.smooth(self.anomaly_score,
  101.                                       50)  # Smoothing parameter. Can be changed to get better results, but usually 20 works well
  102.  
  103.         if is_remove_trend:
  104.             _ , psd = scipy.signal.periodogram(self.anomaly_score)
  105.             psd[0:self.window_width//2] = 0
  106.             result = seasonal_decompose(self.anomaly_score, model='additive', freq=np.argmax(psd))
  107.             self.anomaly_score = result.resid
  108.             self.anomaly_score=np.nan_to_num(self.anomaly_score)
  109.  
  110.         self.anomaly_score = (self.anomaly_score-np.mean(self.anomaly_score))/np.std(self.anomaly_score)
  111.         self.anomaly_score = abs(self.anomaly_score)
  112.         self.anomaly_pred = self.get_prediction_from_score_quant(self.anomaly_score)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top