Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(quantmod)
- library(JADE)
- library(BSSasymp)
- library(dtw)
- library(xts)
- start <- as.Date("2016-01-01")
- end <- Sys.Date()
- # Please, note that the end date is used dynamically, the results obtained in the end can differ, if
- # executed in another day.
- pool <- getSymbols(Symbols = c( "AAPL", "JPM", "COST", "DIS", "MSFT", "AXP", "CAT", "GS", "FDX", "GOOG"),
- src = "yahoo", from = start, to = end)
- portfolio1 <- 0.5 * AAPL + 0.5 * GOOG
- portfolio2 <- 0.15 * AAPL + 0.85 * GOOG
- colnames(portfolio1) <- c("Open", "High", "Low", "Close", "Volume", "Adjusted")
- colnames(portfolio2) <- c("Open", "High", "Low", "Close", "Volume", "Adjusted")
- # making Close data
- portClose1 <- portfolio1$Close
- portClose2 <- portfolio2$Close
- Close <- cbind(portClose1, portClose2)
- #View(Close)
- #### Running JADE algorithm ####
- plot(Close)
- normClose <- diff(Close)
- plot(normClose)
- jadeClose <- JADE(normClose, n.comp = 2, eps = 1e-06, maxiter = 200, na.action = na.omit)
- plot.ts(bss.components(jadeClose), nc = 1, main = "JADE solution")
- ICclose <- as.data.frame(rownames(jadeClose$S))
- rownames(jadeClose$S) <- NULL
- ICclose <- cbind(ICclose, as.data.frame(jadeClose$S))
- class(ICclose)
- #View(ICclose)
- class(as.numeric(normClose$Close))
- #### Aligning the Results ####
- num1 <- as.data.frame(as.numeric(diff(AAPL$AAPL.Close)))
- num1 <- num1[-1,]
- num2 <- as.data.frame(as.numeric(diff(JPM$JPM.Close)))
- num2 <- num2[-1,]
- num3 <- as.data.frame(as.numeric(diff(COST$COST.Close)))
- num3 <- num3[-1,]
- num4 <- as.data.frame(as.numeric(diff(DIS$DIS.Close)))
- num4 <- num4[-1,]
- num5 <- as.data.frame(as.numeric(diff(MSFT$MSFT.Close)))
- num5 <- num5[-1,]
- num6 <- as.data.frame(as.numeric(diff(AXP$AXP.Close)))
- num6 <- num6[-1,]
- num7 <- as.data.frame(as.numeric(diff(CAT$CAT.Close)))
- num7 <- num7[-1,]
- num8 <- as.data.frame(as.numeric(diff(GS$GS.Close)))
- num8 <- num8[-1,]
- num9 <- as.data.frame(as.numeric(diff(FDX$FDX.Close)))
- num9 <- num9[-1,]
- num10 <- as.data.frame(as.numeric(diff(GOOG$GOOG.Close)))
- num10 <- num10[-1,]
- num_list <- list(num1, num2, num3, num4, num5, num6, num7, num8, num9, num10)
- num_list
- ######################
- #### Based on DTW ####
- ######################
- # 1st component with DTW
- list_distance_matrix1 <- list()
- for (i in 1:length(num_list)) {
- list_distance_matrix1[[i]] <- dtw(ICclose$IC.1, num_list[[i]])
- }
- min <- list_distance_matrix1[[1]]$distance
- index <- 1
- for (i in 2:length(num_list)) {
- if(min > list_distance_matrix1[[i]]$distance){
- min <- list_distance_matrix1[[i]]$distance
- index <- i
- }
- }
- min
- index
- # I decided to divide the overall data into epsiodes, then compare them separately.
- # The episodewise implementation can be found at the end of the file, however it have not provided any
- # significant results. The final version was left with the general case.
- err11 <- c()
- for ( k in 1:25){
- x <- sum((k*ICclose$IC.1-num_list[[index]])^2)
- err11 <- c(err11,x)
- }
- plot(err11)
- which.min(err11)
- plot(which.min(err11) * ICclose$IC.1, type = "l", col = "red", ylim = c(-10,10))
- par(new = TRUE)
- plot(as.numeric(num_list[[index]]), type = "l", ylim = c(-10,10))
- par(new = FALSE)
- # 2nd component with DTW
- list_distance_matrix2 <- list()
- for (i in 1:length(num_list)) {
- list_distance_matrix2[[i]] <- dtw(ICclose$IC.2, num_list[[i]])
- }
- min2 <- list_distance_matrix2[[1]]$distance
- index2 <- 1
- for (i in 2:length(num_list)) {
- if(min2 > list_distance_matrix1[[i]]$distance){
- min2 <- list_distance_matrix1[[i]]$distance
- index2 <- i
- }
- }
- min2
- index2
- # Again the 5th component
- err12 <- c()
- for ( k in 1:25){
- x <- sum((k*ICclose$IC.2-num_list[[index2]])^2)
- err12 <- c(err12,x)
- }
- plot(err12)
- which.min(err12)
- plot(which.min(err12) * ICclose$IC.2, type = "l", col = "red", ylim = c(-10,10))
- par(new = TRUE)
- plot(as.numeric(num_list[[index2]]), type = "l", ylim = c(-10,10))
- par(new = FALSE)
- # Indeed, it has much similarities and visually they are very similar too. However, there are some aspects
- # seen in comparison with the other graphs that the model has not taken into account.
- #############################################
- #### Resuts Based on Visual Similarities ####
- #############################################
- ## 1st component ##
- num1 <- as.data.frame(as.numeric(diff(AAPL$AAPL.Close)))
- num1 <- num1[-1,]
- # err1 <- sum((ICclose$IC.1-num1)^2)/length(num1) # in neighborhood of this value
- err1 <- c()
- for ( k in 1:25){
- x <- sum((k*ICclose$IC.1-num1)^2)
- err1 <- c(err1,x)
- }
- plot(err1)
- which.min(err1)
- plot(which.min(err1) * ICclose$IC.1, type = "l", col = "red", ylim = c(-7,7))
- par(new = TRUE)
- plot(as.numeric(diff(AAPL$AAPL.Close)), type = "l", ylim = c(-7,7))
- par(new = FALSE)
- # 2nd component
- num2 <- as.data.frame(as.numeric(diff(GOOG$GOOG.Close)))
- num2 <- num2[-1,]
- err2 <- c()
- for ( k in 1:20){
- x <- sum((k*ICclose$IC.2-num2)^2)
- err2 <- c(err2,x)
- }
- plot(err2)
- which.min(err2)
- plot( which.min(err2) * ICclose$IC.2, type = "l", col = "red", ylim = c(-50,50))
- par(new = TRUE)
- plot(as.numeric(diff(GOOG$GOOG.Close)), type = "l", ylim = c(-50,50))
- par(new = FALSE)
- # Even multiplied the IC2 by the factor assessed above, the momdel aligned with the 5th component that is MSFT
- #############################
- #### EPISODE application ####
- #############################
- num5_episodes <- list(num_list[[5]][1:100], num_list[[5]][101:200], num_list[[5]][201:300],
- num_list[[5]][301:400],num_list[[5]][401:500],num_list[[5]][501:length(num_list[[5]])])
- num1_episodes <- list(num_list[[1]][1:100], num_list[[1]][101:200], num_list[[1]][201:300],
- num_list[[1]][301:400],num_list[[1]][401:500],num_list[[1]][501:length(num_list[[1]])])
- IC1_episodes <- list(ICclose$IC.1[1:100], ICclose$IC.1[101:200], ICclose$IC.1[201:300],
- ICclose$IC.1[301:400],ICclose$IC.1[401:500],ICclose$IC.1[501:length(num_list[[1]])])
- # num1 episodes THE RIGHT ONE
- list_distance_matrix1 <- list()
- for (i in 1:length(IC1_episodes)) {
- list_distance_matrix1[[i]] <- dtw(IC1_episodes[[i]], num1_episodes[[i]])
- }
- dist_data <- as.data.frame(list_distance_matrix1[[1]]$distance)
- for (i in 2:length(list_distance_matrix1)) {
- dist_data <- rbind(dist_data, list_distance_matrix1[[i]]$distance)
- }
- colnames(dist_data) <- "num1"
- # num5 episodes
- list_distance_matrix1 <- list()
- for (i in 1:length(IC1_episodes)) {
- list_distance_matrix1[[i]] <- dtw(IC1_episodes[[i]], num5_episodes[[i]])
- }
- dist_data[1,2] <- as.data.frame(list_distance_matrix1[[1]]$distance)
- for (i in 2:length(list_distance_matrix1)) {
- dist_data[i,2] <- list_distance_matrix1[[i]]$distance
- }
- colnames(dist_data)[2] <- "num5"
- # it can be seen that the data is more likely to be num 5 which is not the the one
- # there is not any significant difference in episode application.
- # As it gavw no significat results then we will not execute the same procedure for the 2nd component.
- # View(dist_data)
Add Comment
Please, Sign In to add comment