Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #load dataset
- data <- structure(list(Year = c(1997, 1997, 1997, 1997, 1997, 1998, 1998,
- 1998, 1998, 1998, 1999, 1999, 1999, 1999, 1999, 2000, 2000, 2000,
- 2000, 2000, 2001, 2001, 2001, 2001, 2001, 2002, 2002, 2002, 2002,
- 2002, 2003, 2003, 2003, 2003, 2003, 2004, 2004, 2004, 2004, 2004,
- 2005, 2005, 2005, 2005, 2005, 2006, 2006, 2006, 2006, 2006, 2007,
- 2007, 2007, 2007, 2007, 2008, 2008, 2008, 2008, 2008, 2009, 2009,
- 2009, 2009, 2009, 2010, 2010, 2010, 2010, 2010, 2011, 2011, 2011,
- 2011, 2011, 2012, 2012, 2012, 2012, 2012, 2013, 2013, 2013, 2013,
- 2013, 2014, 2014, 2014, 2014, 2014, 2015, 2015, 2015, 2015, 2015,
- 2016, 2016, 2016, 2016, 2016, 2017, 2017, 2017, 2017, 2017, 2018,
- 2018, 2018, 2018, 2018, 2019, 2019), Term_no = c(1, 2, 3, 4,
- 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
- 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1,
- 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2,
- 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3,
- 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4,
- 5, 1, 2), Terms = c("19971", "19972", "19973", "19974", "19975",
- "19981", "19982", "19983", "19984", "19985", "19991", "19992",
- "19993", "19994", "19995", "20001", "20002", "20003", "20004",
- "20005", "20011", "20012", "20013", "20014", "20015", "20021",
- "20022", "20023", "20024", "20025", "20031", "20032", "20033",
- "20034", "20035", "20041", "20042", "20043", "20044", "20045",
- "20051", "20052", "20053", "20054", "20055", "20061", "20062",
- "20063", "20064", "20065", "20071", "20072", "20073", "20074",
- "20075", "20081", "20082", "20083", "20084", "20085", "20091",
- "20092", "20093", "20094", "20095", "20101", "20102", "20103",
- "20104", "20105", "20111", "20112", "20113", "20114", "20115",
- "20121", "20122", "20123", "20124", "20125", "20131", "20132",
- "20133", "20134", "20135", "20141", "20142", "20143", "20144",
- "20145", "20151", "20152", "20153", "20154", "20155", "20161",
- "20162", "20163", "20164", "20165", "20171", "20172", "20173",
- "20174", "20175", "20181", "20182", "20183", "20184", "20185",
- "20191", "20192"), Enrollment = c(100, 122, 75, 102, 100, 78,
- 69, 54, 66, 72, 74, 66, 38, 55, 56, 62, 71, 56, 70, 53, 50, 56,
- 46, 54, 52, 45, 42, 34, 40, 41, 40, 40, 40, 40, 44, 36, 35, 29,
- 29, 36, 36, 20, 28, 22, 32, 40, 48, 53, 52, 62, 60, 52, 43, 37,
- 47, 54, 62, 69, 108, 98, 80, 84, 61, 87, 86, 59, 58, 67, 75,
- 98, 77, 100, 108, 103, 116, 96, 109, 112, 91, 107, 113, 77, 81,
- 122, 135, 135, 147, 111, 171, 169, 174, 167, 92, 92, 145, 102,
- 91, 53, 101, 100, 78, 63, 38, 42, 39, 42, 50, 43, 47, 28, 30,
- 53), var1 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), var2 = c(0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1)), row.names = c(NA, -112L), class = c("tbl_df", "tbl",
- "data.frame"))
- #set timeseries 1 (full)
- enroll = ts(data=data$Enrollment, frequency = 5,
- start=c(1997,1), end=c(2019,2))
- #set testing set (partial)
- enroll_partial = ts(data=data$Enrollment, frequency = 5,
- start=c(1997,1), end=c(2017,2))
- summary(enroll) #summary stats - note mean: 71.39
- plot(enroll) #visualize ts
- decomposed <- decompose(enroll, type="mult") # use type = "additive" for additive components
- plot (decomposed) # see plot below
- ggseasonplot(enroll)
- #timeseries diagnostics
- plot(diff(enroll)) #attempt to detrend TS - looks good
- acf(diff(enroll))
- pacf(diff(enroll))
- #load regressors
- x_var1 <- ts(data=data$var1, frequency = 5,
- start=c(1997,1), end=c(2019,2))
- x_var2 <- ts(data=data$var2, frequency = 5,
- start=c(1997,1), end=c(2019,2))
- x <- ts.union(x_var1, x_var2)
- x_var1_par <- ts(data=data$var1, frequency = 5,
- start=c(1997,1), end=c(2017,2))
- x_var2_par <- ts(data=data$var2, frequency = 5,
- start=c(1997,1), end=c(2017,2))
- x_par <- ts.union(x_var1_par, x_var2_par)
- #ARIMA
- model <- auto.arima(enroll, xreg=x)
- model
- coeftest(model)
- checkresiduals(model)
- tsdisplay(residuals(model), lag.max=45, main='(1,1,1)(1,0,0)[5] Model Residuals')
- #testing model
- model_par <-arima((enroll_partial), c(1, 1, 1),seasonal = list(order = c(1, 0, 0), period = 5), xreg=x_par)
- model_par
- #TEST PREDICTIONS
- fcast_par <- forecast(model_par, h=10) #error
- fcast_par <- forecast(model_par, h=10, xreg=x_par) #error
- fcast_par <- forecast(model_par, h=10, xreg=forecast(x_par,h=10)) #error
- model_x_par <- arima((x_par), c(1, 1, 1),seasonal = list(order = c(1, 0, 0), period = 5)) #error
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement