SHARE
TWEET

Untitled

a guest Mar 24th, 2019 51 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import numpy as np
  2. import pandas as pd
  3. import plotly.offline
  4. from statsmodels.tsa.arima_model import ARMA
  5. from statsmodels.tsa.ar_model import AR
  6. from sklearn.metrics import mean_squared_error as mse
  7. import plotly.plotly as py
  8. import plotly.graph_objs as go
  9. import warnings
  10.  
  11.  
  12.  
  13.  
  14. # simple moving average
  15. def SMA(deg, threshold, data) -> tuple:
  16.     packets_sent = deg
  17.     data_copy = data[:deg]
  18.     warnings.filterwarnings('ignore')
  19.     for i in range(deg, len(data)):
  20.         model = ARMA(data_copy[i - deg:], order=(0, 0))
  21.         model = model.fit(disp=False)
  22.         pred = model.predict(0, 0)
  23.         if abs(data[i] - pred) >= threshold:
  24.             data_copy = np.append(data_copy, [data[i]])
  25.             packets_sent += 1
  26.         else:
  27.             data_copy = np.append(data_copy, [pred])
  28.     return packets_sent, mse(data, data_copy)
  29.  
  30.  
  31. # auto regressive model
  32. def ARModel(deg, threshold, data) -> tuple:
  33.     packets_sent = deg + 2
  34.     data_copy = data[:deg + 2]
  35.     for i in range(deg + 2, len(data)):
  36.         model = AR(data_copy)
  37.         model = model.fit(disp=False, maxlag=deg)
  38.         pred = model.predict(i, i)
  39.         if abs(data[i] - pred) >= threshold:
  40.             data_copy = np.append(data_copy, [data[i]])
  41.             packets_sent += 1
  42.         else:
  43.             data_copy = np.append(data_copy, pred)
  44.     return packets_sent, mse(data, data_copy)
  45.  
  46.  
  47. # calculate packages sent and MSE with different algorithms at different frequencies(sampling)
  48. # Eg. frequency = 4 means we take every 4 data point(slower sampling)
  49. def calculations(data, maxThreshold, step, data_type, algos_list, plot=True, freq=1):
  50.     if freq > 1:
  51.         data = data[::freq]
  52.     packets_sent_thresh2 = []
  53.     packets_sent_thresh4 = []
  54.     packets_sent_ar2 = []
  55.     packets_sent_ar3 = []
  56.     mse_err2 = []
  57.     mse_err4 = []
  58.     mse_err_ar2 = []
  59.     mse_err_ar3 = []
  60.     for thresh in range(0, maxThreshold + 1, step):
  61.         ps, er = SMA(2, thresh, data)
  62.         packets_sent_thresh2.append((ps / len(data)) * 100)
  63.         mse_err2.append(er)
  64.         ps, er = SMA(4, thresh, data)
  65.         packets_sent_thresh4.append((ps / len(data)) * 100)
  66.         mse_err4.append(er)
  67.         ps, er = ARModel(2, thresh, temperatures)
  68.         packets_sent_ar2.append((ps / len(data)) * 100)
  69.         mse_err_ar2.append(er)
  70.         ps, er = ARModel(3, thresh, data)
  71.         packets_sent_ar3.append((ps / len(data)) * 100)
  72.         mse_err_ar3.append(er)
  73.  
  74.     vals = [packets_sent_thresh2, packets_sent_thresh4, packets_sent_ar2, packets_sent_ar3]
  75.     err = [mse_err2, mse_err4, mse_err_ar2, mse_err_ar3]
  76.  
  77.     if plot:
  78.         x = np.array(range(0, maxThreshold + 1, step))
  79.         ps_algo_comparison(x, vals, data_type, algos_list)
  80.         err_algo_comparison(x, err, data_type, algos_list)
  81.  
  82.     return vals, err
  83.  
  84.  
  85. # compare packages sent percentage over different algorithms
  86. def ps_algo_comparison(x, y_list, data_type, algos_list):
  87.     data = []
  88.     for i in range(len(y_list)):
  89.         obj = go.Scatter(x=x, y=y_list[i], name=algos_list[i], showlegend=True)
  90.         data.append(obj)
  91.     layout = go.Layout(title='Packages Sent/Threshold ratio for '+data_type, xaxis=dict(title='Threshold'), yaxis=dict(title='% of sent packages'))
  92.     fig = go.Figure(data, layout)
  93.     plotly.offline.plot(fig, filename='ps-algorithms')
  94.  
  95.  
  96. # compare MSE over different algorithms
  97. def err_algo_comparison(x, errs, data_type, algos_list):
  98.     data = []
  99.     for i in range(len(errs)):
  100.         obj = go.Scatter(x=x, y=errs[i], name=algos_list[i], showlegend=True)
  101.         data.append(obj)
  102.     layout = go.Layout(title='MSE/Threshold ratio for '+data_type, xaxis=dict(title='Threshold'), yaxis=dict(title='MSE'))
  103.     fig = go.Figure(data, layout)
  104.     plotly.offline.plot(fig, filename='mse-algorithms')
  105.  
  106.  
  107. # compare packages sent percentage over data sampled at different frequencies for certain algorithm
  108. def ps_plot_frequencies(x, algo, y_list, freq, data_type):
  109.     data = []
  110.     for i, y in enumerate(y_list):
  111.         obj = go.Scatter(x=x, y=y, name="Normal Frequency" if freq[i] == 1 else f'Frequency({freq[i]})', showlegend=True)
  112.         data.append(obj)
  113.     layout = go.Layout(title='Packages Sent/Threshold with different data frequencies for ' + data_type+' with '+algo, xaxis=dict(title='Threshold'),
  114.                        yaxis=dict(title='% of sent packages'))
  115.     fig = go.Figure(data, layout)
  116.     plotly.offline.plot(fig, filename=algo+' frequency for '+data_type)
  117.  
  118.  
  119. def calc_freq(data, maxThreshold, step, algorithms_list, data_header):
  120.     tt = []
  121.     freq = [1, 2, 4]
  122.     for f in freq:
  123.         ps_algos, err_algos = calculations(data, maxThreshold, step, algos_list=algorithms_list, data_type=data_header, freq=f, plot=False)
  124.         tt.append(ps_algos)
  125.  
  126.     for algorithm, rez in zip(algs, zip(*tt)):
  127.         ps_plot_frequencies(np.array(range(0, maxThresh_temp + 1, temp_step)), algorithm, rez, freq, columns[1])
  128.  
  129.  
  130. # getting the dataset and preprocess it
  131. df = pd.read_csv("April.csv")
  132. df = df.loc[df["Station Name"] == "Stripa Granite)"]
  133. df = df[["Data Time", "Room Temperature °C"]]
  134. df["Data Time"] = pd.to_datetime(df["Data Time"])
  135. df = df.loc[(df["Data Time"] >= '5/28/2014  8:31:00') & (df["Data Time"] <= '6/21/2014  3:49:00')]
  136. df.sort_values(by=["Data Time"], inplace=True, ascending=True)
  137. temperatures = df["Room Temperature °C"].values
  138. columns = df.columns.tolist()
  139.  
  140. # plot the timeseries for temperature and wind speed
  141. temperature = go.Scatter(x=df["Data Time"], y=temperatures, name='Room Temperature °C('+'\u2103'+')', showlegend=True)
  142. data = [temperature]
  143. layout = go.Layout(title=columns[1]+"/"+columns[2], xaxis=dict(title='Date'))
  144. fig = go.Figure(data=data, layout=layout)
  145. plotly.offline.plot(fig, filename='time-series-temperature')
  146.  
  147. maxThresh_temp = 8
  148. temp_step = 1
  149.  
  150.  
  151. algs = ['SMA(2)', 'SMA(4)', 'AR(2)', 'AR(3)']
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top