Advertisement
Guest User

Untitled

a guest
Mar 24th, 2019
80
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.27 KB | None | 0 0
  1. import numpy as np
  2. import pandas as pd
  3. import plotly.offline
  4. import statsmodels
  5. from statsmodels.tsa.arima_model import ARMA
  6. from statsmodels.tsa.ar_model import AR
  7. from sklearn.metrics import mean_squared_error as mse
  8. import plotly.plotly as py
  9. import plotly.graph_objs as go
  10. import warnings
  11.  
  12.  
  13. # simple moving average
  14. def SMA(deg, threshold, data) -> tuple:
  15. packets_sent = deg
  16. data_copy = data[:deg]
  17. warnings.filterwarnings('ignore')
  18. for i in range(deg, len(data)):
  19. model = ARMA(data_copy[i - deg:], order=(0, 0))
  20. model = model.fit(disp=False)
  21. pred = model.predict(0, 0)
  22. if abs(data[i] - pred) >= threshold:
  23. data_copy = np.append(data_copy, [data[i]])
  24. packets_sent += 1
  25. else:
  26. data_copy = np.append(data_copy, [pred])
  27. return packets_sent, mse(data, data_copy)
  28.  
  29.  
  30. # auto regressive model
  31. def ARModel(deg, threshold, data) -> tuple:
  32. packets_sent = deg + 4
  33. data_copy = data[:deg + 4]
  34. for i in range(deg + 4, len(data)):
  35. model = AR(data_copy)
  36. model = model.fit(disp=False, maxlag=deg)
  37. pred = model.predict(i, i)
  38. if abs(data[i] - pred) >= threshold:
  39. data_copy = np.append(data_copy, [data[i]])
  40. packets_sent += 1
  41. else:
  42. data_copy = np.append(data_copy, pred)
  43. return packets_sent, mse(data, data_copy)
  44.  
  45.  
  46. # calculate packages sent and MSE with different algorithms at different frequencies(sampling)
  47. # Eg. frequency = 4 means we take every 4 data point(slower sampling)
  48. def calculations(data, maxThreshold, step, data_type, algos_list, plot=True, freq=1):
  49. if freq > 1:
  50. data = data[::freq]
  51. packets_sent_thresh2 = []
  52. packets_sent_thresh4 = []
  53. packets_sent_ar2 = []
  54. packets_sent_ar3 = []
  55. mse_err2 = []
  56. mse_err4 = []
  57. mse_err_ar2 = []
  58. mse_err_ar3 = []
  59. for thresh in range(0, maxThreshold + 1, step):
  60. ps, er = SMA(2, thresh, data)
  61. packets_sent_thresh2.append((ps / len(data)) * 100)
  62. mse_err2.append(er)
  63. ps, er = SMA(4, thresh, data)
  64. packets_sent_thresh4.append((ps / len(data)) * 100)
  65. mse_err4.append(er)
  66. ps, er = ARModel(2, thresh, temperatures)
  67. packets_sent_ar2.append((ps / len(data)) * 100)
  68. mse_err_ar2.append(er)
  69. ps, er = ARModel(3, thresh, data)
  70. packets_sent_ar3.append((ps / len(data)) * 100)
  71. mse_err_ar3.append(er)
  72.  
  73. vals = [packets_sent_thresh2, packets_sent_thresh4, packets_sent_ar2, packets_sent_ar3]
  74. err = [mse_err2, mse_err4, mse_err_ar2, mse_err_ar3]
  75.  
  76. if plot:
  77. x = np.array(range(0, maxThreshold + 1, step))
  78. ps_algo_comparison(x, vals, data_type, algos_list)
  79. err_algo_comparison(x, err, data_type, algos_list)
  80.  
  81. return vals, err
  82.  
  83.  
  84. # compare packages sent percentage over different algorithms
  85. def ps_algo_comparison(x, y_list, data_type, algos_list):
  86. data = []
  87. for i in range(len(y_list)):
  88. obj = go.Scatter(x=x, y=y_list[i], name=algos_list[i], showlegend=True)
  89. data.append(obj)
  90. layout = go.Layout(title='Packages Sent/Threshold ratio for '+data_type, xaxis=dict(title='Threshold'), yaxis=dict(title='% of sent packages'))
  91. fig = go.Figure(data, layout)
  92. plotly.offline.plot(fig, filename='ps-algorithms')
  93.  
  94.  
  95. # compare MSE over different algorithms
  96. def err_algo_comparison(x, errs, data_type, algos_list):
  97. data = []
  98. for i in range(len(errs)):
  99. obj = go.Scatter(x=x, y=errs[i], name=algos_list[i], showlegend=True)
  100. data.append(obj)
  101. layout = go.Layout(title='MSE/Threshold ratio for '+data_type, xaxis=dict(title='Threshold'), yaxis=dict(title='MSE'))
  102. fig = go.Figure(data, layout)
  103. plotly.offline.plot(fig, filename='mse-algorithms')
  104.  
  105.  
  106. # compare packages sent percentage over data sampled at different frequencies for certain algorithm
  107. def ps_plot_frequencies(x, algo, y_list, freq, data_type):
  108. data = []
  109. for i, y in enumerate(y_list):
  110. obj = go.Scatter(x=x, y=y, name="Normal Frequency" if freq[i] == 1 else f'Frequency({freq[i]})', showlegend=True)
  111. data.append(obj)
  112. layout = go.Layout(title='Packages Sent/Threshold with different data frequencies for ' + data_type+' with '+algo, xaxis=dict(title='Threshold'),
  113. yaxis=dict(title='% of sent packages'))
  114. fig = go.Figure(data, layout)
  115. plotly.offline.plot(fig, filename=algo+' frequency for '+data_type)
  116.  
  117.  
  118. def calc_freq(data, maxThreshold, step, algorithms_list, data_header):
  119. tt = []
  120. freq = [1, 2, 4]
  121. for f in freq:
  122. ps_algos, err_algos = calculations(data, maxThreshold, step, algos_list=algorithms_list, data_type=data_header, freq=f, plot=False)
  123. tt.append(ps_algos)
  124.  
  125. for algorithm, rez in zip(algs, zip(*tt)):
  126. ps_plot_frequencies(np.array(range(0, maxThresh_temp + 1, temp_step)), algorithm, rez, freq, columns[1])
  127.  
  128.  
  129. # getting the dataset and preprocess it
  130. df = pd.read_csv("beach-water-quality-automated-sensors-1.csv")
  131. df = df.loc[df["Beach Name"] == "Montrose Beach"]
  132. df = df[["Measurement Timestamp", "Water Temperature", "Wave Height"]]
  133. df["Measurement Timestamp"] = pd.to_datetime(df["Measurement Timestamp"])
  134. df = df.loc[(df["Measurement Timestamp"] >= '2014-01-01 00:00:00') & (df["Measurement Timestamp"] <= '2014-06-01 00:00:00')]
  135. df.sort_values(by=["Measurement Timestamp"], inplace=True, ascending=True)
  136. temperatures = df["Water Temperature"].values
  137. wave_height = df["Wave Height"].values
  138. columns = df.columns.tolist()
  139. """
  140. # plot the timeseries for temperature and wind speed
  141. temperature = go.Scatter(x=df["Measurement Timestamp"], y=temperatures, name='Temperature('+'\u2103'+')', showlegend=True)
  142. ws = go.Scatter(x=df["Measurement Timestamp"], y=wave_height, name=columns[2]+'(mph)', showlegend=True)
  143. data = [temperature, ws]
  144. layout = go.Layout(title=columns[1]+"/"+columns[2], xaxis=dict(title='Date'))
  145. fig = go.Figure(data=data, layout=layout)
  146. plotly.offline.plot(fig, filename='time-series-temperature')
  147. """
  148. maxThresh_temp = 8
  149. maxThresh_ws = 2
  150. temp_step = 1
  151. ws_step = 0.25
  152.  
  153. algs = ['SMA(2)', 'SMA(4)', 'AR(2)', 'AR(3)']
  154.  
  155. calculations(temperatures,maxThresh_temp,temp_step,"Water Temperature",algs)
  156.  
  157. calc_freq(temperatures,maxThresh_temp,temp_step,algs,columns[2])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement