Advertisement
Guest User

Untitled

a guest
May 11th, 2024
40
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.34 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Sat May 11 17:06:39 2024
  5.  
  6. @author: annamusterfrau
  7. """
  8.  
  9. import pandas as pd
  10. import numpy as np
  11. import matplotlib.pyplot as plt
  12. import warnings
  13. from statsmodels.tsa.api import (kpss, adfuller,
  14. seasonal_decompose, STL)
  15. from statsmodels.tools.eval_measures import rmspe, rmse
  16. from sklearn.metrics import mean_absolute_percentage_error as mape
  17. from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
  18. from itertools import product
  19. from pathlib import Path
  20. warnings.filterwarnings('ignore')
  21. plt.rcParams["figure.figsize"] = [12, 5]
  22. life_file = Path('/Users/annamusterfrau/Desktop/life_expectancy_birth.csv')
  23. milk_file = Path('/Users/annamusterfrau/Desktop/milk_production.csv')
  24. life = pd.read_csv(life_file,index_col='year',parse_dates=True,skipfooter=1)
  25. milk = pd.read_csv(milk_file,index_col='month', parse_dates=True)
  26. fig, ax = plt.subplots(2, 1, figsize=(16, 12))
  27. life.plot(title='Annual Life Expectancy',
  28. legend=False, ax=ax[0])
  29. milk.plot(title='Monthly Milk Production',
  30. legend=False, ax=ax[1])
  31. def split_data(data, test_split):
  32. l = len(data)
  33. t_idx = round(l*(1-test_split))
  34. train, test = data[ : t_idx], data[t_idx : ]
  35. print(f'train: {len(train)} , test: {len(test)}')
  36. return train, test
  37. test_split = 0.15
  38. milk_train, milk_test = split_data(milk, test_split)
  39. life_train, life_test = split_data(life, test_split)
  40. def check_stationarity(df):
  41. results = adfuller(df)[1:3]
  42. s = 'Non-Stationary'
  43. if results[0] < 0.05:
  44. s = 'Stationary'
  45. print(f"'{s}\t p-value:{results[0]} \t lags:{results[1]}")
  46. return (s, results[0])
  47. def get_best_model(score, c='AIC'):
  48. initial_score = score[0][c]
  49. best_model = 0
  50. for k,v in score.items():
  51. if v[c] < initial_score:
  52. initial_score = v[c]
  53. best_model = k
  54. print(f'Best model: {best_model} with lowest {c} score: {initial_score}')
  55. return score[best_model]['model']
  56. def plot_forecast(model, start, train, test):
  57. forecast = pd.DataFrame(model.forecast(test.shape[0]), index=test.index)
  58. ax = train.loc[start:].plot(style='--')
  59. test.plot(ax=ax)
  60. forecast.plot(ax=ax, style = '-.')
  61. ax.legend(['orig_train', 'orig_test', 'forecast'])
  62. plt.show()
  63. def combinator(items):
  64. combo = [i for i in product(*items)]
  65. return combo
  66. life_diff = life.diff().dropna()
  67. fig, ax = plt.subplots(2,1, figsize=(12,8))
  68. plot_acf(life_diff, zero=False, ax=ax[0])
  69. plot_pacf(life_diff, zero=False, ax=ax[1])
  70. plt.show()
  71. milk_diff_12 = milk.diff(12).dropna()
  72. check_stationarity(milk_diff_12)
  73. milk_diff_12_1 = milk.diff(12).diff(1).dropna()
  74. check_stationarity(milk_diff_12_1)
  75. fig, ax = plt.subplots(1,2)
  76. plot_acf(milk_diff_12_1, zero=False, ax=ax[0], lags=36)
  77. plot_pacf(milk_diff_12_1, zero=False, ax=ax[1], lags=36)
  78. plt.show()
  79. from statsmodels.tsa.statespace.sarimax import SARIMAX
  80. model = SARIMAX(milk, order=(0,1,1),
  81. seasonal_order=(0,1,1, 12)).fit(disp=False)
  82. plot_acf(model.resid[1:], zero=False)
  83. plt.show()
  84. from statsmodels.tsa.api import ExponentialSmoothing
  85. trend = ['add', 'mul']
  86. damped = [True, False]
  87. life_ex_comb = combinator([trend, damped])
  88. life_ex_comb
  89. [('add', True), ('add', False), ('mul', True), ('mul',
  90. False)]
  91. train = life_train.values.ravel()
  92. y = life_test.values.ravel()
  93. score = {}
  94. for i, (t, dp) in enumerate(life_ex_comb):
  95. exp = ExponentialSmoothing(train,
  96. trend=t,
  97. damped_trend=dp,
  98. seasonal=None)
  99. model = exp.fit(use_brute=True, optimized=True)
  100. y_hat = model.forecast(len(y))
  101. score[i] = {'trend':t,
  102. 'damped':dp,
  103. 'AIC':model.aic,
  104. 'BIC':model.bic,
  105. 'AICc':model.aicc,
  106. 'RMSPE': rmspe(y, y_hat),
  107. 'RMSE' : rmse(y, y_hat),
  108. 'MAPE' : mape(y, y_hat),
  109. 'model': model}
  110. life_best_model = get_best_model(score, 'AIC')
  111. life_best_model.summary()
  112. plot_forecast(life_best_model, '2000', life_train, life_test)
  113. trend , damped= ['add', 'mul'], [True, False]
  114. seasonal, periods = ['add' , 'mul'], [4, 6, 12]
  115. milk_exp_comb = combinator([trend, damped, seasonal, periods])
  116. train = milk_train.values.ravel()
  117. y = milk_test.values.ravel()
  118. milk_model_scores = {}
  119. for i, (t, dp, s, sp) in enumerate(milk_exp_comb):
  120. exp = ExponentialSmoothing(train,
  121. trend=t,
  122. damped_trend=dp,
  123. seasonal=s,
  124. seasonal_periods=sp)
  125. model = exp.fit(use_brute=True, optimized=True)
  126. y_hat = model.forecast(len(y))
  127. milk_model_scores[i] = {'trend':t,
  128. 'damped':dp,
  129. 'AIC':model.aic,
  130. 'BIC':model.bic,
  131. 'AICc': model.aicc,
  132. 'RMSPE': rmspe(y, y_hat),
  133. 'RMSE' : rmse(y, y_hat),
  134. 'MAPE' : mape(y, y_hat),
  135. 'model': model}
  136. milk_model = get_best_model(milk_model_scores)
  137. milk_model.summary()
  138. plot_forecast(milk_model, '1969', milk_train, milk_test)
  139. from darts.models import ExponentialSmoothing
  140. from darts import TimeSeries
  141. model = ExponentialSmoothing(seasonal_periods=12)
  142. ts = TimeSeries.from_dataframe(milk.reset_index(), time_col='month', value_cols='production', freq='MS')
  143. train, test = split_data(ts, 0.15)
  144. model.fit(train)
  145. forecast = model.predict(len(test), num_samples=100)
  146. train.plot()
  147. forecast.plot(label='forecast', low_quantile=0.05, high_quantile=0.95)
  148. model.model.summary()
  149. from statsmodels.tsa.arima.model import ARIMA
  150. from statsmodels.stats.diagnostic import acorr_ljungbox
  151. decomposed = seasonal_decompose(life)
  152. decomposed.plot();
  153. plt.show()
  154. life_df1 = life.diff().dropna()
  155. check_stationarity(life_df1)
  156. life_df1.plot()
  157. fig, ax = plt.subplots(1,2)
  158. plot_acf(life_df1, ax=ax[0])
  159. plot_pacf(life_df1, ax=ax[1])
  160. plt.show()
  161. model = ARIMA(life_train, order=(0,1,1))
  162. results = model.fit()
  163. results.summary()
  164. (acorr_ljungbox(results.resid,
  165. lags=25,
  166. return_df=True) < 0.05)['lb_pvalue'].sum()
  167. plot_acf(results.resid, zero=False)
  168. plt.show()
  169. results.plot_diagnostics()
  170. plt.show()
  171. plot_forecast(results, '2000', life_train,life_test)
  172. print(life_test.head())
  173. print(life_train.head())
  174.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement