SHARE
TWEET

Untitled

a guest Apr 23rd, 2019 68 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. library(tseries)
  2. library(forecast)
  3. library(cluster)
  4. ### In case it is your first time in R, install them as: ###
  5. #install.packages('tseries')
  6. #install.packages('forecast')
  7. #install.packages('cluster')
  8.  
  9. path <- '/Users/heidaoskpetursdottir/Documents/DecisionMaking/'
  10.  
  11. PassengersData <- na.omit(read.csv(paste0(path,'passengers.csv'),sep=','))$Passengers.thousands.
  12.  
  13. DataSet <- PassengersData
  14.  
  15. # Use the last year as test set (validation set) but remember that you need to redict for the following year, so you wont have the real data to compare.
  16.  
  17. smp_size <- length(DataSet)-12
  18. train_ind <- c(1:smp_size)
  19. train <- DataSet[train_ind]
  20. test <- DataSet[-train_ind]
  21.  
  22. plot(train,col='blue',pch=20,main = '',xlab = 'Time [months]',ylab='# thousands of passengers',xlim=c(1,length(train)),ylim = c(min(DataSet),max(DataSet)))
  23. lines(train,col='blue',lwd=1)
  24.  
  25. #Data for number of passengers during the past 11 years
  26.  
  27. plot(train,col='blue',pch=20,main = '',xlab = 'Time [months]',ylab='# thousands of passengers',xlim=c(1,length(DataSet)),ylim = c(min(DataSet),max(DataSet)))
  28. lines(train,col='blue',lwd=1)
  29. points(c((length(train)+1):(length(DataSet))),test,pch=20,col='black')
  30. lines(c((length(train)+1):(length(DataSet))),test,lwd=1,col='black')
  31. abline(v=length(train),col='grey')
  32. legend('topleft',legend=c('Training Set','Test Set'),col=c('blue','black'),lwd=c(2,2))
  33.  
  34. ##############################
  35. ### TIME SERIES ANLYSIS ######
  36. ##############################
  37.  
  38. # Plot ACF and PACF to check stationarity and correlations:
  39. acf(train,main='ACF for Data',length(train)/2)
  40. pacf(train,main='PACF for Data',length(train)/2)
  41.  
  42. # Apply transformations if required:
  43. #log(train)
  44. #diff(train,1)
  45. #diff(log(train),1)
  46. #diff(log(train),S)
  47.  
  48. ###########################################################################################
  49. # choose model Parameters
  50. # Redifine train set
  51. train2 <- diff(diff(log(train),12),1) # differentiate once for the mean and once for the seasonality.
  52. # log to remove the variance. We want all our data to be random noice. We are removing all structure we have in our data to get the random walk to see the correlation between the lacks.
  53.  
  54. acf(train2,main='ACF for Data after differentiation',length(train)/2)
  55. # look at the signficant lag
  56. pacf(train2,main='PACF for Data after differentiation',length(train)/2)
  57.  
  58. plot(train2,col="blue",pch=20,main = "",xlab = "Time [months]",ylab="# thousands of passengers",xlim=c(1,length(DataSet)),ylim = c(min(diff(DataSet)),max(diff(DataSet))))
  59. lines(train2,col="blue",lwd=1)
  60.  
  61. ###########################################################################################
  62. # Create ARIMA order=(p,d,q) or SARIMA order=(p,d,q)x(P,D,Q)S:
  63.   p = 1 # AR(p)
  64.   d = 1 # differentiate order
  65.   q = 1 # MA(q)
  66.   # Seasonal
  67.   S = 12# Seasonality  
  68.   P = 1 # AR(P) Seasonal
  69.   D = 1 # differentiate order Seasonal
  70.   Q = 1 # MA(Q) Seasonal if there is a spike in 12 then 1, if in 24 then 2...
  71. ###########################################################################################
  72. #  train2 %>%
  73. #    Arima(order=c(p,d,q), seasonal=c(P,D,Q)) %>%
  74. #    residuals() %>% ggtsdisplay()
  75. #  
  76. #  Arima(train2, order=c(p,d,q), seasonal=c(P,D,Q))
  77.  
  78. TS_Model <- arima(log(train),order = c(p,d,q), seasonal = list(order = c(P,D,Q), period = S))
  79. # Check residuals of the model:
  80. hist(TS_Model$residuals,prob = T,breaks = 20,col="deepskyblue1",main="Histogram residuals")
  81. curve(dnorm(x, mean(TS_Model$residuals), sd(TS_Model$residuals)), add=TRUE, col="red", lwd=2)
  82.  
  83. qqnorm(TS_Model$residuals,main="Q-Q plot residuals")
  84. qqline(TS_Model$residuals)
  85.  
  86. plot(c(fitted(TS_Model)),c(TS_Model$residuals),pch=20,col="red",xlab = "Fitted Values",ylab="Residuals",main="Residual vs Fitted residuals")
  87. abline(h=0)
  88.  
  89. acf(TS_Model$residuals,length(train)/2,main="ACF for residuals")
  90.  
  91. ##############################################
  92. ### LETS MAKE PREDICTIONS AND PLOT THEM ######
  93. ##############################################
  94.  
  95. # Predict n.ahead steps:
  96. Predictions <- predict(TS_Model, n.ahead = length(test)+12)
  97. # Plot predictions:
  98. plot(train,type = 'l',col='blue',lwd=2,main = 'Fill Data',xlab = 'Time',ylab='Data',xlim=c(1,length(DataSet)),ylim = c(0,max(DataSet)))
  99. lines(c((length(train)+1):(length(DataSet)+12)),test,lwd=2,col='gray60')
  100. lines(c((length(train)+1):(length(DataSet)+12)),exp(Predictions$pred),lwd=1,col='red')
  101. lines(c((length(train)+1):(length(DataSet)+12)),exp(Predictions$pred+Predictions$se),lwd=1,lty=2,col='red')
  102. lines(c((length(train)+1):(length(DataSet)+12)),exp(Predictions$pred-Predictions$se),lwd=1,lty=2,col='red')
  103. abline(v=length(train),col='grey')
  104. legend('topleft',legend=c('Past observations','Real Observations','Mean Forecast','95% Conf. Intervals Forecast'),col=c('blue','gray60','red','red'),lwd=c(2,2,1,1),lty=c(1,1,1,2))
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top