Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- Created on Sat May 11 17:06:39 2024
- @author: annamusterfrau
- """
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- import warnings
- from statsmodels.tsa.api import (kpss, adfuller,
- seasonal_decompose, STL)
- from statsmodels.tools.eval_measures import rmspe, rmse
- from sklearn.metrics import mean_absolute_percentage_error as mape
- from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
- from itertools import product
- from pathlib import Path
- warnings.filterwarnings('ignore')
- plt.rcParams["figure.figsize"] = [12, 5]
- life_file = Path('/Users/annamusterfrau/Desktop/life_expectancy_birth.csv')
- milk_file = Path('/Users/annamusterfrau/Desktop/milk_production.csv')
- life = pd.read_csv(life_file,index_col='year',parse_dates=True,skipfooter=1)
- milk = pd.read_csv(milk_file,index_col='month', parse_dates=True)
- fig, ax = plt.subplots(2, 1, figsize=(16, 12))
- life.plot(title='Annual Life Expectancy',
- legend=False, ax=ax[0])
- milk.plot(title='Monthly Milk Production',
- legend=False, ax=ax[1])
- def split_data(data, test_split):
- l = len(data)
- t_idx = round(l*(1-test_split))
- train, test = data[ : t_idx], data[t_idx : ]
- print(f'train: {len(train)} , test: {len(test)}')
- return train, test
- test_split = 0.15
- milk_train, milk_test = split_data(milk, test_split)
- life_train, life_test = split_data(life, test_split)
- def check_stationarity(df):
- results = adfuller(df)[1:3]
- s = 'Non-Stationary'
- if results[0] < 0.05:
- s = 'Stationary'
- print(f"'{s}\t p-value:{results[0]} \t lags:{results[1]}")
- return (s, results[0])
- def get_best_model(score, c='AIC'):
- initial_score = score[0][c]
- best_model = 0
- for k,v in score.items():
- if v[c] < initial_score:
- initial_score = v[c]
- best_model = k
- print(f'Best model: {best_model} with lowest {c} score: {initial_score}')
- return score[best_model]['model']
- def plot_forecast(model, start, train, test):
- forecast = pd.DataFrame(model.forecast(test.shape[0]), index=test.index)
- ax = train.loc[start:].plot(style='--')
- test.plot(ax=ax)
- forecast.plot(ax=ax, style = '-.')
- ax.legend(['orig_train', 'orig_test', 'forecast'])
- plt.show()
- def combinator(items):
- combo = [i for i in product(*items)]
- return combo
- life_diff = life.diff().dropna()
- fig, ax = plt.subplots(2,1, figsize=(12,8))
- plot_acf(life_diff, zero=False, ax=ax[0])
- plot_pacf(life_diff, zero=False, ax=ax[1])
- plt.show()
- milk_diff_12 = milk.diff(12).dropna()
- check_stationarity(milk_diff_12)
- milk_diff_12_1 = milk.diff(12).diff(1).dropna()
- check_stationarity(milk_diff_12_1)
- fig, ax = plt.subplots(1,2)
- plot_acf(milk_diff_12_1, zero=False, ax=ax[0], lags=36)
- plot_pacf(milk_diff_12_1, zero=False, ax=ax[1], lags=36)
- plt.show()
- from statsmodels.tsa.statespace.sarimax import SARIMAX
- model = SARIMAX(milk, order=(0,1,1),
- seasonal_order=(0,1,1, 12)).fit(disp=False)
- plot_acf(model.resid[1:], zero=False)
- plt.show()
- from statsmodels.tsa.api import ExponentialSmoothing
- trend = ['add', 'mul']
- damped = [True, False]
- life_ex_comb = combinator([trend, damped])
- life_ex_comb
- [('add', True), ('add', False), ('mul', True), ('mul',
- False)]
- train = life_train.values.ravel()
- y = life_test.values.ravel()
- score = {}
- for i, (t, dp) in enumerate(life_ex_comb):
- exp = ExponentialSmoothing(train,
- trend=t,
- damped_trend=dp,
- seasonal=None)
- model = exp.fit(use_brute=True, optimized=True)
- y_hat = model.forecast(len(y))
- score[i] = {'trend':t,
- 'damped':dp,
- 'AIC':model.aic,
- 'BIC':model.bic,
- 'AICc':model.aicc,
- 'RMSPE': rmspe(y, y_hat),
- 'RMSE' : rmse(y, y_hat),
- 'MAPE' : mape(y, y_hat),
- 'model': model}
- life_best_model = get_best_model(score, 'AIC')
- life_best_model.summary()
- plot_forecast(life_best_model, '2000', life_train, life_test)
- trend , damped= ['add', 'mul'], [True, False]
- seasonal, periods = ['add' , 'mul'], [4, 6, 12]
- milk_exp_comb = combinator([trend, damped, seasonal, periods])
- train = milk_train.values.ravel()
- y = milk_test.values.ravel()
- milk_model_scores = {}
- for i, (t, dp, s, sp) in enumerate(milk_exp_comb):
- exp = ExponentialSmoothing(train,
- trend=t,
- damped_trend=dp,
- seasonal=s,
- seasonal_periods=sp)
- model = exp.fit(use_brute=True, optimized=True)
- y_hat = model.forecast(len(y))
- milk_model_scores[i] = {'trend':t,
- 'damped':dp,
- 'AIC':model.aic,
- 'BIC':model.bic,
- 'AICc': model.aicc,
- 'RMSPE': rmspe(y, y_hat),
- 'RMSE' : rmse(y, y_hat),
- 'MAPE' : mape(y, y_hat),
- 'model': model}
- milk_model = get_best_model(milk_model_scores)
- milk_model.summary()
- plot_forecast(milk_model, '1969', milk_train, milk_test)
- from darts.models import ExponentialSmoothing
- from darts import TimeSeries
- model = ExponentialSmoothing(seasonal_periods=12)
- ts = TimeSeries.from_dataframe(milk.reset_index(), time_col='month', value_cols='production', freq='MS')
- train, test = split_data(ts, 0.15)
- model.fit(train)
- forecast = model.predict(len(test), num_samples=100)
- train.plot()
- forecast.plot(label='forecast', low_quantile=0.05, high_quantile=0.95)
- model.model.summary()
- from statsmodels.tsa.arima.model import ARIMA
- from statsmodels.stats.diagnostic import acorr_ljungbox
- decomposed = seasonal_decompose(life)
- decomposed.plot();
- plt.show()
- life_df1 = life.diff().dropna()
- check_stationarity(life_df1)
- life_df1.plot()
- fig, ax = plt.subplots(1,2)
- plot_acf(life_df1, ax=ax[0])
- plot_pacf(life_df1, ax=ax[1])
- plt.show()
- model = ARIMA(life_train, order=(0,1,1))
- results = model.fit()
- results.summary()
- (acorr_ljungbox(results.resid,
- lags=25,
- return_df=True) < 0.05)['lb_pvalue'].sum()
- plot_acf(results.resid, zero=False)
- plt.show()
- results.plot_diagnostics()
- plt.show()
- plot_forecast(results, '2000', life_train,life_test)
- print(life_test.head())
- print(life_train.head())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement