Untitled

# get_os <- function(){
#   sysinf <- Sys.info()
#   if (!is.null(sysinf)){
#     os <- sysinf['sysname']
#     if (os == 'Darwin') os <- "osx"
#   } else { ## mystery machine
#     os <- .Platform$OS.type
#     if (grepl("^darwin", R.version$os)) os <- "osx"
#     if (grepl("linux-gnu", R.version$os)) os <- "linux"
#   }
#   as.character(tolower(os))
# }
# sourceName <- paste0("pacman_0.5.0", ifelse(get_os()=="windows",".zip",".tgz"))
# if("pacman" %in% rownames(installed.packages()) == F) install.packages(sourceName, repos = NULL, type="win.binary")
if("pacman" %in% rownames(installed.packages()) == F) install.packages("pacman")
pacman::p_load(fUnitRoots, urca, vars, aod, zoo, tseries, igraph, visNetwork, abind, Hmisc)
TIME <- Sys.time()
Sig_95 <- 0.05 # p-value for stationarity testing with ADF and KPSS
Sig_90 <- 0.1 # p-value for Wald test hypotheses of G-causality
maxLag <- 12 # max lag number if VAR lag selection
nAhead <- 24 # horizon for IRF and FEVD plots
totOrd <- 3 # maximum order of integration to try for stationarity
maxChr <- 25
G_test <- c('chi2','Ftest') # Wald statistic for causality: 'chi2' or 'Ftest'
names(G_test) <- G_test
useLinReg <- FALSE # Should Wald statistic be estimated from linear regression?
savePlots <- FALSE

#Load data
setwd(dirname(sys.frame(1)$ofile)) # this line doesn't work on MAC OS
csvFileName <- "LT_2003M07_2013M02"
#csvFileName <- "coffee_data_from_1976"
myData <- read.csv('sutvarkytas_ir_isglaudytas.csv', header=T)
fixData <- function(data_arr, coef){
  index = 1
  res_arr = array()

  for (j in 1:6) {
    for (i in 1:12) {
      if(i == 12 && index != 6 * 12) {
        gr = data_arr[index]
        saus = data_arr[index + 1]
        res = (gr + saus) / 2
      } else if(i == 1 && index != 1) {
        res = res_arr[index - 1]
      } else {
        res = data_arr[index]
      }
      res_arr[index] = res
      index = index + 1
    }
  }
  index = index - 1
  res_arr[index] = res_arr[index] * coef
  return(res_arr)
}

myData$Y1_GPM = fixData(myData$Y1_GPM, 0.83)
myData$Y2_GPM = fixData(myData$Y2_GPM, 0.83)
cat(myDataN <- names(myData))
myDataX <- as.Date(paste(sub("M","-",myData$Date),"-01",sep=""))
#myDataX <- as.Date(as.yearqtr(sub("Q","-",myData$Date)))
#myDataX <- as.Date(sub("\\.","-",sub("\\.","-",myData$Date)))
#myDataX <- as.Date(paste(sub(".","-",myData$Date),sep=""))
if (savePlots) dir.create(file.path(csvFileName),showWarnings=F)

testingPairs <- t(combn(myDataN[-1],2))
#mainCols <- c('ESI','NetMig') # leave only variables of interest
#testingPairs <- testingPairs[testingPairs[,1] %in% mainCols | testingPairs[,2] %in% mainCols,]
v <- myDataN[-1]
vMat <- matrix(0, nrow = length(v), ncol = length(v), dimnames = list(FROM=v, TO=v))
crossImpact <- lapply(G_test, function(x) vMat)
sumOfCoefs <- crossImpact

# create R variables from 'csv' columns and make pairs to test
for (i in 1:length(myDataN)) {
  eval(parse(text=paste(myDataN[i],' <- myData$',myDataN[i],sep='')))
}

for (nr in 1:nrow(testingPairs)) {
  myPair <- testingPairs[nr,]
  eval(parse(text=paste('data1 <- ',myPair[1],sep='')))
  eval(parse(text=paste('data2 <- ',myPair[2],sep='')))
  varPair <- cbind(data1,data2)
  colnames(varPair) <- myPair

  # visualize time series
  ylim1 <- c(min(data1)*(1-sign(min(data1))*0.9),max(data1)*(1+sign(max(data1))/10))
  plot(x=myDataX,y=data1,ylim=ylim1,col='red',type='l',lwd=1,xaxt='n',axes=F,xlab='Time (quarterly periodicity)',ylab='',main='')
  axis(1,pretty(myDataX),labels=format(pretty(myDataX),'%Y'))
  axis(2, pretty(ylim1), col='red')
  par(new=T)
  ylim2 <- c(min(data2)*(1-sign(min(data2))*0.9),max(data2)*(1+sign(max(data2))/10))
  plot(x=myDataX,y=data2,ylim=ylim2,col='blue',type='l',lwd=1,xaxt='n',axes=F,ylab='',xlab='')
  axis(4, pretty(ylim2), col='blue')
  abline(h='', v=myDataX[which(as.integer(format(myDataX,'%m'))==1)], col="gray", lty=3);
  box()
  legend('bottom', myPair,col=c('red','blue'),lwd=c(1,1),horiz=(nchar(myPair[1])<maxChr && nchar(myPair[2])<maxChr))

  options(warn=-1) # suppress 'p-value greater than printed p-value' warning

  # 1. Test for integration. ADF / KPSS tests for unit-root / stationarity

  p_ADF <- matrix(rep(0,2*(totOrd+1)),2,(totOrd+1))
  p_KPSS <- matrix(rep(0,2*(totOrd+1)),2,(totOrd+1))
  p_ADF[1,1] <- adf.test(data1)$p.value
  p_ADF[2,1] <- adf.test(data2)$p.value
  p_KPSS[1,1] <- kpss.test(data1)$p.value
  p_KPSS[2,1] <- kpss.test(data2)$p.value

  for (i in 1:totOrd) { #Test for unit roots / stationarity
    eval(parse(text=paste('d_data1 <- diff(data1,',i,')',sep='')))
    eval(parse(text=paste('d_data2 <- diff(data2,',i,')',sep='')))
    p_ADF[1,i+1] <- adf.test(d_data1)$p.value
    p_ADF[2,i+1] <- adf.test(d_data2)$p.value
    p_KPSS[1,i+1] <- kpss.test(d_data1)$p.value
    p_KPSS[2,i+1] <- kpss.test(d_data2)$p.value
  }

  ordADF <- NULL
  ordADF <- c(which(p_ADF[1,]<=Sig_95)[1],which(p_ADF[2,]<=Sig_95)[1])
  ordADF[is.na(ordADF)] <- 1
  ordKPSS <- NULL
  ordKPSS <- c(which(p_KPSS[1,]>=Sig_95)[1],which(p_KPSS[2,]>=Sig_95)[1])
  ordKPSS[is.na(ordKPSS)] <- 1

  options(warn=1) # restore warning setting back to default

  # 2. Determine max order of integration (m).If none of the series
  # in integrated, the usual Granger-causality test can be done. (m=0)

  maxOrd <- max(c(ordADF,ordKPSS)) - 1

  # 3. Set up VAR models to select lag order (DO NOT difference the data for VAR).

  bestLag <- VARselect(varPair,lag=maxLag,type="both")

  varResid <- NULL

  for (i in 1:length(bestLag$selection)) { #VAR Model
    varModel <- VAR(varPair,p=bestLag$selection[i],type="both")
    # Portmanteau- and Breusch-Godfrey test for autocorrelation
    result <- try(serial.test(varModel))
    if (class(result) == "try-error") {
      varResid[i] <- 0
    } else {
      varResid[i] <- serial.test(varModel)$serial$p.value
    }
  }

  # 4. From best models (found by VARselect) choose the one,
  # which has least autocorrelated residuals (most correct).

  optLag <- bestLag$selection[which(varResid == max(varResid))[1]]

  # 5. Augmented VAR model with additional lag VAR(p+m) is set up.
  varModel <- VAR(varPair,p=optLag+maxOrd,type="both")

  if (useLinReg) {

    # VAR model is seperately set up as a linear model
    # which turns the Wald testing more comprihensive

    #lag variables
    zoo1 <- NULL; zoo1 <- zoo(varPair[,1])
    zoo2 <- NULL; zoo2 <- zoo(varPair[,2])
    idx <- 2:(optLag+maxOrd+1)
    zoo1.l<-lag(zoo1,-(0:(optLag+maxOrd)),na.pad=T)
    zoo2.l<-lag(zoo2,-(0:(optLag+maxOrd)),na.pad=T)

    lm1<-lm(zoo1~zoo2.l[,idx]+zoo1.l[,idx]+index(zoo1))
    lm1$fitted.values <- na.omit(lm1$fitted.values)
    lm2<-lm(zoo2~zoo1.l[,idx]+zoo2.l[,idx]+index(zoo2))
    lm2$fitted.values <- na.omit(lm2$fitted.values)

  }

  # 6. Wald-test for the first p variables only with p degrees of freedom.
  myTitle <- ''; arTitle <- paste('\n\nAR(',optLag,'+',maxOrd,')',sep='')

  wt1 <- NULL; wt2 <- NULL

  # Wald.test.1 (H0: 2 does not Granger-cause 1)
  # Wald.test.2 (H0: 1 does not Granger-cause 2)
  # Sum of coefficients to determine response direction
  if (useLinReg) {
    wt1 <- wald.test(b=coef(lm1), Sigma=vcov(lm1), Terms=c(2:(optLag+1)), df=(length(myDataX)-length(coef(lm1))-optLag+maxOrd*2)[[1]] )
    wt2 <- wald.test(b=coef(lm2), Sigma=vcov(lm2), Terms=c(2:(optLag+1)), df=(length(myDataX)-length(coef(lm2))-optLag+maxOrd*2)[[1]] )
    sum1 <- sum(coef(lm1)[c(2:(optLag+1+maxOrd))])
    sum2 <- sum(coef(lm2)[c(2:(optLag+1+maxOrd))])
  } else {
    wt1 <- wald.test(b=coef(varModel$varresult[[1]]), Sigma=vcov(varModel$varresult[[1]]), Terms=seq(2,2*optLag,by=2),df=(length(myDataX)-length(coef(varModel$varresult[[1]]))-optLag+maxOrd*2)[[1]])
    wt2 <- wald.test(b=coef(varModel$varresult[[2]]), Sigma=vcov(varModel$varresult[[2]]), Terms=seq(1,2*optLag,by=2),df=(length(myDataX)-length(coef(varModel$varresult[[2]]))-optLag+maxOrd*2)[[1]])
    sum1 <- sum(coef(varModel$varresult[[1]])[seq(2,2*optLag+2*maxOrd,by=2)])
    sum2 <- sum(coef(varModel$varresult[[2]])[seq(1,2*optLag+2*maxOrd,by=2)])
  }

  myTitle <- paste0(myTitle,sprintf('\n(coefs): %s {%5.3f} & %s {%5.3f}',myPair[1],sum2,myPair[2],sum1))

  p_wt1 <- NULL; p_wt2 <- NULL
  for (t in 1:length(G_test)) {
    eval(parse(text=paste('p_wt1[t] <- wt1$result$',G_test[t],'[[\'P\']]',sep='')))
    eval(parse(text=paste('p_wt2[t] <- wt2$result$',G_test[t],'[[\'P\']]',sep='')))
    G <- ' \227 '
    if ((p_wt1[t]<Sig_90 && p_wt2[t]<Sig_90)) {
      crossImpact[[G_test[t]]][myPair[1],myPair[2]] <- 1 - p_wt2[t]
      crossImpact[[G_test[t]]][myPair[2],myPair[1]] <- 1 - p_wt1[t]
      sumOfCoefs[[G_test[t]]][myPair[1],myPair[2]] <- sum2
      sumOfCoefs[[G_test[t]]][myPair[2],myPair[1]] <- sum1
      G <- ' \U2194 '
      #G <- ' <-> '
    } else if (p_wt1[t]<Sig_90) {
      crossImpact[[G_test[t]]][myPair[2],myPair[1]] <- 1 - p_wt1[t]
      sumOfCoefs[[G_test[t]]][myPair[2],myPair[1]] <- sum1
      G <- ' \U2190 '
      #G <- ' <- '
    } else if (p_wt2[t]<Sig_90) {
      crossImpact[[G_test[t]]][myPair[1],myPair[2]] <- 1 - p_wt2[t]
      sumOfCoefs[[G_test[t]]][myPair[1],myPair[2]] <- sum2
      G <- ' \U2192 '
      #G <- ' -> '
    }
    myTitle <- paste0(myTitle,sprintf('\n(%s): %s [%5.3f]%s%s [%5.3f]',G_test[t],myPair[1],p_wt2[t],G,myPair[2],p_wt1[t]))
    #myTitle <- paste0(myTitle,sprintf('\n(%s): ',G_test[t]),myPair[1],' [',sprintf('%5.3f',p_wt2[t]),']',G,myPair[2],' [',sprintf('%5.3f',p_wt1[t]),']')
  }
  cat(paste(arTitle,myTitle,sep=''))
  title(myTitle,cex.main=0.8)

  # 7. IRF and FEVD plots
  if (!useLinReg) {
    if (any(p_wt1<Sig_90) && any(p_wt2<Sig_90)) {
      if (savePlots) win.metafile(paste0(csvFileName,'/',myPair[2],'_response_to_',myPair[1],'.wmf'),width=8,height=6)
      plot(irf(varModel, impulse = myPair[1], response = myPair[2], ci=1-Sig_90, runs=1000, n.ahead=nAhead))
      if (savePlots) dev.off()
      if (savePlots) win.metafile(paste0(csvFileName,'/',myPair[1],'_response_to_',myPair[2],'.wmf'),width=8,height=6)
      plot(irf(varModel, impulse = myPair[2], response = myPair[1], ci=1-Sig_90, runs=1000, n.ahead=nAhead))
      if (savePlots) dev.off()
      if (savePlots) win.metafile(paste0(csvFileName,'/',myPair[1],'_',myPair[2],'_combined_FEVD.wmf'),width=6,height=8)
      plot(fevd(varModel,n.ahead=nAhead))
      if (savePlots) dev.off()
    } else if (any(p_wt1<Sig_90)) {
      if (savePlots) win.metafile(paste0(csvFileName,'/',myPair[1],'_response_to_',myPair[2],'.wmf'),width=8,height=6)
      plot(irf(varModel, impulse = myPair[2], response = myPair[1], ci=1-Sig_90, runs=1000, n.ahead=nAhead))
      if (savePlots) dev.off()
      if (savePlots) win.metafile(paste0(csvFileName,'/',myPair[1],'_',myPair[2],'_combined_FEVD.wmf'),width=6,height=8)
      plot(fevd(varModel,n.ahead=nAhead))
      if (savePlots) dev.off()
    } else if (any(p_wt2<Sig_90)) {
      if (savePlots) win.metafile(paste0(csvFileName,'/',myPair[2],'_response_to_',myPair[1],'.wmf'),width=8,height=6)
      plot(irf(varModel, impulse = myPair[1], response = myPair[2], ci=1-Sig_90, runs=1000, n.ahead=nAhead))
      if (savePlots) dev.off()
      if (savePlots) win.metafile(paste0(csvFileName,'/',myPair[1],'_',myPair[2],'_combined_FEVD.wmf'),width=6,height=8)
      plot(fevd(varModel,n.ahead=nAhead))
      if (savePlots) dev.off()
    }
  }
}

# 8. construct and display graph from the cross-impact matrix
for (t in 1:length(G_test)) {
  iGraph <- graph_from_adjacency_matrix(crossImpact[[G_test[t]]],weighted=TRUE)
  visGraph <- toVisNetworkData(iGraph)
  iiGraph <- graph_from_adjacency_matrix(sumOfCoefs[[G_test[t]]],weighted=TRUE)
  vvisGraph <- toVisNetworkData(iiGraph)
  visGraph$nodes$shadow <- TRUE
  visGraph$edges$color <- "red"
  visGraph$edges$color[vvisGraph$edges$weight > 0] <- "green"
  visGraph$edges$width <- 2 * visGraph$edges$weight ^ 9
  visGraph$edges$title <- paste0('p-value = ',format(1 - visGraph$edges$weight, digits = 1))
  #mutate(visGraph$edges, width = weight/5 + 1)
  #plot.igraph(iGraph,main=G_test[t])
  visnet <- visNetwork(nodes = visGraph$nodes, edges = visGraph$edges, main=paste0("G-causality by ",G_test[t])) %>%
    #visIgraphLayout(layout = "layout_nicely") %>%
    visLayout(improvedLayout=T) %>% visNodes(size=20) %>% visEdges(arrows="middle", physics=T) %>%
    visOptions(highlightNearest=list(enabled=T, labelOnly=F, algorithm="hierarchical"), nodesIdSelection=list(enabled=T, values=v))
  print(visnet)
}

# 9. construct a scatterplot with active and passive sums from cross-impact matrix
crossImpactArray <- abind(crossImpact,along=3)
crossImpactBest <- apply(crossImpactArray,c(1,2),max)
correlationResult <- rcorr(as.matrix(myData[,-1]))
pooledCrossImpact <- (correlationResult$P < Sig_95) + (crossImpactBest > (1-Sig_90)) + (crossImpactBest > (1-Sig_95))
XY <- data.frame(Asum=rowSums(pooledCrossImpact,na.rm=T),Psum=colSums(pooledCrossImpact,na.rm=T))
plot(Asum ~ Psum, XY, ylim=c(0,max(XY$Asum)+1), xlim=c(0,max(XY$Psum)+1), main=csvFileName)
abline(v = mean(XY$Psum), h = mean(XY$Asum), col="red", lwd=1, lty=2)
text(Asum ~ Psum, XY, labels = row.names(XY), pos = 4)

cat('\n\n')
print(Sys.time() - TIME)