• Sign Up
• Login
• API
• FAQ
• Tools
• Archive
daily pastebin goal
69%
SHARE
TWEET

# Untitled

a guest Mar 24th, 2019 49 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. import numpy as np
2. import pandas as pd
3. import plotly.offline
4. from statsmodels.tsa.arima_model import ARMA
5. from statsmodels.tsa.ar_model import AR
6. from sklearn.metrics import mean_squared_error as mse
7. import plotly.plotly as py
8. import plotly.graph_objs as go
9. import warnings
10.
11.
12.
13.
14. # simple moving average
15. def SMA(deg, threshold, data) -> tuple:
16.     packets_sent = deg
17.     data_copy = data[:deg]
18.     warnings.filterwarnings('ignore')
19.     for i in range(deg, len(data)):
20.         model = ARMA(data_copy[i - deg:], order=(0, 0))
21.         model = model.fit(disp=False)
22.         pred = model.predict(0, 0)
23.         if abs(data[i] - pred) >= threshold:
24.             data_copy = np.append(data_copy, [data[i]])
25.             packets_sent += 1
26.         else:
27.             data_copy = np.append(data_copy, [pred])
28.     return packets_sent, mse(data, data_copy)
29.
30.
31. # auto regressive model
32. def ARModel(deg, threshold, data) -> tuple:
33.     packets_sent = deg + 2
34.     data_copy = data[:deg + 2]
35.     for i in range(deg + 2, len(data)):
36.         model = AR(data_copy)
37.         model = model.fit(disp=False, maxlag=deg)
38.         pred = model.predict(i, i)
39.         if abs(data[i] - pred) >= threshold:
40.             data_copy = np.append(data_copy, [data[i]])
41.             packets_sent += 1
42.         else:
43.             data_copy = np.append(data_copy, pred)
44.     return packets_sent, mse(data, data_copy)
45.
46.
47. # calculate packages sent and MSE with different algorithms at different frequencies(sampling)
48. # Eg. frequency = 4 means we take every 4 data point(slower sampling)
49. def calculations(data, maxThreshold, step, data_type, algos_list, plot=True, freq=1):
50.     if freq > 1:
51.         data = data[::freq]
52.     packets_sent_thresh2 = []
53.     packets_sent_thresh4 = []
54.     packets_sent_ar2 = []
55.     packets_sent_ar3 = []
56.     mse_err2 = []
57.     mse_err4 = []
58.     mse_err_ar2 = []
59.     mse_err_ar3 = []
60.     for thresh in range(0, maxThreshold + 1, step):
61.         ps, er = SMA(2, thresh, data)
62.         packets_sent_thresh2.append((ps / len(data)) * 100)
63.         mse_err2.append(er)
64.         ps, er = SMA(4, thresh, data)
65.         packets_sent_thresh4.append((ps / len(data)) * 100)
66.         mse_err4.append(er)
67.         ps, er = ARModel(2, thresh, temperatures)
68.         packets_sent_ar2.append((ps / len(data)) * 100)
69.         mse_err_ar2.append(er)
70.         ps, er = ARModel(3, thresh, data)
71.         packets_sent_ar3.append((ps / len(data)) * 100)
72.         mse_err_ar3.append(er)
73.
74.     vals = [packets_sent_thresh2, packets_sent_thresh4, packets_sent_ar2, packets_sent_ar3]
75.     err = [mse_err2, mse_err4, mse_err_ar2, mse_err_ar3]
76.
77.     if plot:
78.         x = np.array(range(0, maxThreshold + 1, step))
79.         ps_algo_comparison(x, vals, data_type, algos_list)
80.         err_algo_comparison(x, err, data_type, algos_list)
81.
82.     return vals, err
83.
84.
85. # compare packages sent percentage over different algorithms
86. def ps_algo_comparison(x, y_list, data_type, algos_list):
87.     data = []
88.     for i in range(len(y_list)):
89.         obj = go.Scatter(x=x, y=y_list[i], name=algos_list[i], showlegend=True)
90.         data.append(obj)
91.     layout = go.Layout(title='Packages Sent/Threshold ratio for '+data_type, xaxis=dict(title='Threshold'), yaxis=dict(title='% of sent packages'))
92.     fig = go.Figure(data, layout)
93.     plotly.offline.plot(fig, filename='ps-algorithms')
94.
95.
96. # compare MSE over different algorithms
97. def err_algo_comparison(x, errs, data_type, algos_list):
98.     data = []
99.     for i in range(len(errs)):
100.         obj = go.Scatter(x=x, y=errs[i], name=algos_list[i], showlegend=True)
101.         data.append(obj)
102.     layout = go.Layout(title='MSE/Threshold ratio for '+data_type, xaxis=dict(title='Threshold'), yaxis=dict(title='MSE'))
103.     fig = go.Figure(data, layout)
104.     plotly.offline.plot(fig, filename='mse-algorithms')
105.
106.
107. # compare packages sent percentage over data sampled at different frequencies for certain algorithm
108. def ps_plot_frequencies(x, algo, y_list, freq, data_type):
109.     data = []
110.     for i, y in enumerate(y_list):
111.         obj = go.Scatter(x=x, y=y, name="Normal Frequency" if freq[i] == 1 else f'Frequency({freq[i]})', showlegend=True)
112.         data.append(obj)
113.     layout = go.Layout(title='Packages Sent/Threshold with different data frequencies for ' + data_type+' with '+algo, xaxis=dict(title='Threshold'),
114.                        yaxis=dict(title='% of sent packages'))
115.     fig = go.Figure(data, layout)
116.     plotly.offline.plot(fig, filename=algo+' frequency for '+data_type)
117.
118.
119. def calc_freq(data, maxThreshold, step, algorithms_list, data_header):
120.     tt = []
121.     freq = [1, 2, 4]
122.     for f in freq:
123.         ps_algos, err_algos = calculations(data, maxThreshold, step, algos_list=algorithms_list, data_type=data_header, freq=f, plot=False)
124.         tt.append(ps_algos)
125.
126.     for algorithm, rez in zip(algs, zip(*tt)):
127.         ps_plot_frequencies(np.array(range(0, maxThresh_temp + 1, temp_step)), algorithm, rez, freq, columns[1])
128.
129.
130. # getting the dataset and preprocess it
131. df = pd.read_csv("April.csv")
132. df = df.loc[df["Station Name"] == "Stripa Granite)"]
133. df = df[["Data Time", "Room Temperature °C"]]
134. df["Data Time"] = pd.to_datetime(df["Data Time"])
135. df = df.loc[(df["Data Time"] >= '5/28/2014  8:31:00') & (df["Data Time"] <= '6/21/2014  3:49:00')]
136. df.sort_values(by=["Data Time"], inplace=True, ascending=True)
137. temperatures = df["Room Temperature °C"].values
138. columns = df.columns.tolist()
139.
140. # plot the timeseries for temperature and wind speed
141. temperature = go.Scatter(x=df["Data Time"], y=temperatures, name='Room Temperature °C('+'\u2103'+')', showlegend=True)
142. data = [temperature]
143. layout = go.Layout(title=columns[1]+"/"+columns[2], xaxis=dict(title='Date'))
144. fig = go.Figure(data=data, layout=layout)
145. plotly.offline.plot(fig, filename='time-series-temperature')
146.
147. maxThresh_temp = 8
148. temp_step = 1
149.
150.
151. algs = ['SMA(2)', 'SMA(4)', 'AR(2)', 'AR(3)']
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy.

Top