SHARE
TWEET

Untitled

a guest Sep 9th, 2019 102 in 327 days
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. get_statistics2 <- function(ts_df, indep, dep, h=1, start=1872, end=2018, est_periods_OOS = 20) {
  2.  
  3.   #### IS ANALYSIS
  4.  
  5.   #1. Historical mean model
  6.   avg   <- mean(window(ts_df, start, end)[, dep], na.rm=TRUE)
  7.   IS_error_N <- (window(ts_df, start, end)[, dep] - avg)
  8.  
  9.   #2. OLS model
  10.   reg <- dyn$lm(eval(parse(text=dep)) ~ lag(eval(parse(text=indep)), -1), data=window(ts_df, start, end))
  11.   IS_error_A <- reg$residuals
  12.   ###
  13.   #3. RF
  14.   rf = dyn$randomForest(eval(parse(text=dep)) ~ lag(eval(parse(text=indep)), -1),
  15.                         data=window(ts_df, start, end))
  16.  
  17.   ####OOS ANALYSIS
  18.   OOS_error_N <- numeric(end - start - est_periods_OOS)
  19.   OOS_error_A <- numeric(end - start - est_periods_OOS)
  20.   #Only use information that is available up to the time at which the forecast is made
  21.   j <- 0
  22.   for (i in (start + est_periods_OOS):(end-1)) {
  23.     j <- j + 1
  24.     #Get the actual ERP that you want to predict
  25.     actual_ERP <- as.numeric(window(ts_df, i+1, i+1)[, dep])
  26.    
  27.     #1. Historical mean model
  28.     OOS_error_N[j] <- actual_ERP - mean(window(ts_df, start, i)[, dep], na.rm=TRUE)
  29.    
  30.     #2. OLS model
  31.     reg_OOS <- dyn$lm(eval(parse(text=dep)) ~ lag(eval(parse(text=indep)), -1),
  32.                       data=window(ts_df, start, i))
  33.     #Compute_error
  34.     df <- data.frame(x=as.numeric(window(ts_df, i, i)[, indep]))
  35.     names(df) <- indep
  36.     pred_ERP   <- predict.lm(reg_OOS, newdata=df)
  37.     OOS_error_A[j] <-  pred_ERP - actual_ERP
  38.    
  39.   }
  40.  
  41.  
  42.   #Compute statistics
  43.   MSE_N <- mean(OOS_error_N^2)
  44.   MSE_A <- mean(OOS_error_A^2)
  45.   T <- length(!is.na(ts_df[, dep]))
  46.   OOS_R2  <- 1 - MSE_A/MSE_N
  47.   #Is the -1 enough (maybe -2 needed because of lag)?
  48.   OOS_oR2 <- OOS_R2 - (1-OOS_R2)*(reg$df.residual)/(T - 1)
  49.   dRMSE <- sqrt(MSE_N) - sqrt(MSE_A)
  50.   ##
  51.  
  52.   #### CREATE PLOT
  53.   IS  <- cumsum(IS_error_N[2:length(IS_error_N)]^2)-cumsum(IS_error_A^2)
  54.   OOS <- cumsum(OOS_error_N^2)-cumsum(OOS_error_A^2)
  55.   df  <- data.frame(x=seq.int(from=start + 1 + est_periods_OOS, to=end),
  56.                     IS=IS[(1 + est_periods_OOS):length(IS)],
  57.                     OOS=OOS) #Because you lose one observation due to the lag
  58.   #Shift IS errors vertically, so that the IS line begins
  59.   # at zero on the date of first OOS prediction. (see Goyal/Welch (2008, p. 1465))
  60.   df$IS <- df$IS - df$IS[1]
  61.   df  <- melt(df, id.var="x")
  62.   plotGG <- ggplot(df) +
  63.     geom_line(aes(x=x, y=value,color=variable)) +
  64.     geom_rect(data=data.frame(),#Needed by ggplot2, otherwise not transparent
  65.               aes(xmin=1973, xmax=1975,ymin=-0.2,ymax=0.2),
  66.               fill='red',
  67.               alpha=0.1) +
  68.     scale_y_continuous('Cumulative SSE Difference', limits=c(-0.2, 0.2)) +
  69.     scale_x_continuous('Year')
  70.   ##
  71.  
  72.   return(list(IS_error_N = IS_error_N,
  73.               IS_error_A = reg$residuals,
  74.               OOS_error_N = OOS_error_N,
  75.               OOS_error_A = OOS_error_A,
  76.               IS_R2 = summary(reg)$r.squared,
  77.               IS_aR2 = summary(reg)$adj.r.squared,
  78.               OOS_R2  = OOS_R2,
  79.               OOS_oR2 = OOS_oR2,
  80.               dRMSE = dRMSE,
  81.               plotGG = plotGG,
  82.               print.rf = print(rf)))
  83.  
  84. }
  85.  
  86. dp_stat2 <- get_statistics2(ts_annual, "dp", "rp_div", start=1872)
  87. dp_stat2$print.rf
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top