Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- import plotly.offline
- import statsmodels
- from statsmodels.tsa.arima_model import ARMA
- from statsmodels.tsa.ar_model import AR
- from sklearn.metrics import mean_squared_error as mse
- import plotly.plotly as py
- import plotly.graph_objs as go
- import warnings
- # simple moving average
- def SMA(deg, threshold, data) -> tuple:
- packets_sent = deg
- data_copy = data[:deg]
- warnings.filterwarnings('ignore')
- for i in range(deg, len(data)):
- model = ARMA(data_copy[i - deg:], order=(0, 0))
- model = model.fit(disp=False)
- pred = model.predict(0, 0)
- if abs(data[i] - pred) >= threshold:
- data_copy = np.append(data_copy, [data[i]])
- packets_sent += 1
- else:
- data_copy = np.append(data_copy, [pred])
- return packets_sent, mse(data, data_copy)
- # auto regressive model
- def ARModel(deg, threshold, data) -> tuple:
- packets_sent = deg + 4
- data_copy = data[:deg + 4]
- for i in range(deg + 4, len(data)):
- model = AR(data_copy)
- model = model.fit(disp=False, maxlag=deg)
- pred = model.predict(i, i)
- if abs(data[i] - pred) >= threshold:
- data_copy = np.append(data_copy, [data[i]])
- packets_sent += 1
- else:
- data_copy = np.append(data_copy, pred)
- return packets_sent, mse(data, data_copy)
- # calculate packages sent and MSE with different algorithms at different frequencies(sampling)
- # Eg. frequency = 4 means we take every 4 data point(slower sampling)
- def calculations(data, maxThreshold, step, data_type, algos_list, plot=True, freq=1):
- if freq > 1:
- data = data[::freq]
- packets_sent_thresh2 = []
- packets_sent_thresh4 = []
- packets_sent_ar2 = []
- packets_sent_ar3 = []
- mse_err2 = []
- mse_err4 = []
- mse_err_ar2 = []
- mse_err_ar3 = []
- for thresh in range(0, maxThreshold + 1, step):
- ps, er = SMA(2, thresh, data)
- packets_sent_thresh2.append((ps / len(data)) * 100)
- mse_err2.append(er)
- ps, er = SMA(4, thresh, data)
- packets_sent_thresh4.append((ps / len(data)) * 100)
- mse_err4.append(er)
- ps, er = ARModel(2, thresh, temperatures)
- packets_sent_ar2.append((ps / len(data)) * 100)
- mse_err_ar2.append(er)
- ps, er = ARModel(3, thresh, data)
- packets_sent_ar3.append((ps / len(data)) * 100)
- mse_err_ar3.append(er)
- vals = [packets_sent_thresh2, packets_sent_thresh4, packets_sent_ar2, packets_sent_ar3]
- err = [mse_err2, mse_err4, mse_err_ar2, mse_err_ar3]
- if plot:
- x = np.array(range(0, maxThreshold + 1, step))
- ps_algo_comparison(x, vals, data_type, algos_list)
- err_algo_comparison(x, err, data_type, algos_list)
- return vals, err
- # compare packages sent percentage over different algorithms
- def ps_algo_comparison(x, y_list, data_type, algos_list):
- data = []
- for i in range(len(y_list)):
- obj = go.Scatter(x=x, y=y_list[i], name=algos_list[i], showlegend=True)
- data.append(obj)
- layout = go.Layout(title='Packages Sent/Threshold ratio for '+data_type, xaxis=dict(title='Threshold'), yaxis=dict(title='% of sent packages'))
- fig = go.Figure(data, layout)
- plotly.offline.plot(fig, filename='ps-algorithms')
- # compare MSE over different algorithms
- def err_algo_comparison(x, errs, data_type, algos_list):
- data = []
- for i in range(len(errs)):
- obj = go.Scatter(x=x, y=errs[i], name=algos_list[i], showlegend=True)
- data.append(obj)
- layout = go.Layout(title='MSE/Threshold ratio for '+data_type, xaxis=dict(title='Threshold'), yaxis=dict(title='MSE'))
- fig = go.Figure(data, layout)
- plotly.offline.plot(fig, filename='mse-algorithms')
- # compare packages sent percentage over data sampled at different frequencies for certain algorithm
- def ps_plot_frequencies(x, algo, y_list, freq, data_type):
- data = []
- for i, y in enumerate(y_list):
- obj = go.Scatter(x=x, y=y, name="Normal Frequency" if freq[i] == 1 else f'Frequency({freq[i]})', showlegend=True)
- data.append(obj)
- layout = go.Layout(title='Packages Sent/Threshold with different data frequencies for ' + data_type+' with '+algo, xaxis=dict(title='Threshold'),
- yaxis=dict(title='% of sent packages'))
- fig = go.Figure(data, layout)
- plotly.offline.plot(fig, filename=algo+' frequency for '+data_type)
- def calc_freq(data, maxThreshold, step, algorithms_list, data_header):
- tt = []
- freq = [1, 2, 4]
- for f in freq:
- ps_algos, err_algos = calculations(data, maxThreshold, step, algos_list=algorithms_list, data_type=data_header, freq=f, plot=False)
- tt.append(ps_algos)
- for algorithm, rez in zip(algs, zip(*tt)):
- ps_plot_frequencies(np.array(range(0, maxThresh_temp + 1, temp_step)), algorithm, rez, freq, columns[1])
- # getting the dataset and preprocess it
- df = pd.read_csv("beach-water-quality-automated-sensors-1.csv")
- df = df.loc[df["Beach Name"] == "Montrose Beach"]
- df = df[["Measurement Timestamp", "Water Temperature", "Wave Height"]]
- df["Measurement Timestamp"] = pd.to_datetime(df["Measurement Timestamp"])
- df = df.loc[(df["Measurement Timestamp"] >= '2014-01-01 00:00:00') & (df["Measurement Timestamp"] <= '2014-06-01 00:00:00')]
- df.sort_values(by=["Measurement Timestamp"], inplace=True, ascending=True)
- temperatures = df["Water Temperature"].values
- wave_height = df["Wave Height"].values
- columns = df.columns.tolist()
- """
- # plot the timeseries for temperature and wind speed
- temperature = go.Scatter(x=df["Measurement Timestamp"], y=temperatures, name='Temperature('+'\u2103'+')', showlegend=True)
- ws = go.Scatter(x=df["Measurement Timestamp"], y=wave_height, name=columns[2]+'(mph)', showlegend=True)
- data = [temperature, ws]
- layout = go.Layout(title=columns[1]+"/"+columns[2], xaxis=dict(title='Date'))
- fig = go.Figure(data=data, layout=layout)
- plotly.offline.plot(fig, filename='time-series-temperature')
- """
- maxThresh_temp = 8
- maxThresh_ws = 2
- temp_step = 1
- ws_step = 0.25
- algs = ['SMA(2)', 'SMA(4)', 'AR(2)', 'AR(3)']
- calculations(temperatures,maxThresh_temp,temp_step,"Water Temperature",algs)
- calc_freq(temperatures,maxThresh_temp,temp_step,algs,columns[2])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement