Guest User

Untitled

a guest
May 27th, 2018
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.73 KB | None | 0 0
  1. library(quantmod)
  2. library(JADE)
  3. library(BSSasymp)
  4. library(dtw)
  5. library(xts)
  6.  
  7. start <- as.Date("2016-01-01")
  8. end <- Sys.Date()
  9. # Please, note that the end date is used dynamically, the results obtained in the end can differ, if
  10. # executed in another day.
  11.  
  12. pool <- getSymbols(Symbols = c( "AAPL", "JPM", "COST", "DIS", "MSFT", "AXP", "CAT", "GS", "FDX", "GOOG"),
  13. src = "yahoo", from = start, to = end)
  14.  
  15.  
  16. portfolio1 <- 0.5 * AAPL + 0.5 * GOOG
  17. portfolio2 <- 0.15 * AAPL + 0.85 * GOOG
  18.  
  19.  
  20. colnames(portfolio1) <- c("Open", "High", "Low", "Close", "Volume", "Adjusted")
  21. colnames(portfolio2) <- c("Open", "High", "Low", "Close", "Volume", "Adjusted")
  22.  
  23. # making Close data
  24. portClose1 <- portfolio1$Close
  25. portClose2 <- portfolio2$Close
  26.  
  27. Close <- cbind(portClose1, portClose2)
  28. #View(Close)
  29.  
  30. #### Running JADE algorithm ####
  31. plot(Close)
  32. normClose <- diff(Close)
  33. plot(normClose)
  34. jadeClose <- JADE(normClose, n.comp = 2, eps = 1e-06, maxiter = 200, na.action = na.omit)
  35. plot.ts(bss.components(jadeClose), nc = 1, main = "JADE solution")
  36.  
  37. ICclose <- as.data.frame(rownames(jadeClose$S))
  38. rownames(jadeClose$S) <- NULL
  39. ICclose <- cbind(ICclose, as.data.frame(jadeClose$S))
  40. class(ICclose)
  41. #View(ICclose)
  42. class(as.numeric(normClose$Close))
  43.  
  44.  
  45. #### Aligning the Results ####
  46. num1 <- as.data.frame(as.numeric(diff(AAPL$AAPL.Close)))
  47. num1 <- num1[-1,]
  48. num2 <- as.data.frame(as.numeric(diff(JPM$JPM.Close)))
  49. num2 <- num2[-1,]
  50. num3 <- as.data.frame(as.numeric(diff(COST$COST.Close)))
  51. num3 <- num3[-1,]
  52. num4 <- as.data.frame(as.numeric(diff(DIS$DIS.Close)))
  53. num4 <- num4[-1,]
  54. num5 <- as.data.frame(as.numeric(diff(MSFT$MSFT.Close)))
  55. num5 <- num5[-1,]
  56. num6 <- as.data.frame(as.numeric(diff(AXP$AXP.Close)))
  57. num6 <- num6[-1,]
  58. num7 <- as.data.frame(as.numeric(diff(CAT$CAT.Close)))
  59. num7 <- num7[-1,]
  60. num8 <- as.data.frame(as.numeric(diff(GS$GS.Close)))
  61. num8 <- num8[-1,]
  62. num9 <- as.data.frame(as.numeric(diff(FDX$FDX.Close)))
  63. num9 <- num9[-1,]
  64. num10 <- as.data.frame(as.numeric(diff(GOOG$GOOG.Close)))
  65. num10 <- num10[-1,]
  66.  
  67. num_list <- list(num1, num2, num3, num4, num5, num6, num7, num8, num9, num10)
  68. num_list
  69.  
  70.  
  71. ######################
  72. #### Based on DTW ####
  73. ######################
  74.  
  75. # 1st component with DTW
  76. list_distance_matrix1 <- list()
  77. for (i in 1:length(num_list)) {
  78. list_distance_matrix1[[i]] <- dtw(ICclose$IC.1, num_list[[i]])
  79. }
  80.  
  81. min <- list_distance_matrix1[[1]]$distance
  82. index <- 1
  83. for (i in 2:length(num_list)) {
  84. if(min > list_distance_matrix1[[i]]$distance){
  85. min <- list_distance_matrix1[[i]]$distance
  86. index <- i
  87. }
  88. }
  89. min
  90. index
  91. # I decided to divide the overall data into epsiodes, then compare them separately.
  92. # The episodewise implementation can be found at the end of the file, however it have not provided any
  93. # significant results. The final version was left with the general case.
  94.  
  95. err11 <- c()
  96. for ( k in 1:25){
  97. x <- sum((k*ICclose$IC.1-num_list[[index]])^2)
  98. err11 <- c(err11,x)
  99. }
  100. plot(err11)
  101. which.min(err11)
  102.  
  103.  
  104. plot(which.min(err11) * ICclose$IC.1, type = "l", col = "red", ylim = c(-10,10))
  105. par(new = TRUE)
  106. plot(as.numeric(num_list[[index]]), type = "l", ylim = c(-10,10))
  107. par(new = FALSE)
  108.  
  109.  
  110. # 2nd component with DTW
  111. list_distance_matrix2 <- list()
  112. for (i in 1:length(num_list)) {
  113. list_distance_matrix2[[i]] <- dtw(ICclose$IC.2, num_list[[i]])
  114. }
  115.  
  116. min2 <- list_distance_matrix2[[1]]$distance
  117. index2 <- 1
  118. for (i in 2:length(num_list)) {
  119. if(min2 > list_distance_matrix1[[i]]$distance){
  120. min2 <- list_distance_matrix1[[i]]$distance
  121. index2 <- i
  122. }
  123. }
  124. min2
  125. index2
  126. # Again the 5th component
  127.  
  128. err12 <- c()
  129. for ( k in 1:25){
  130. x <- sum((k*ICclose$IC.2-num_list[[index2]])^2)
  131. err12 <- c(err12,x)
  132. }
  133. plot(err12)
  134. which.min(err12)
  135.  
  136.  
  137. plot(which.min(err12) * ICclose$IC.2, type = "l", col = "red", ylim = c(-10,10))
  138. par(new = TRUE)
  139. plot(as.numeric(num_list[[index2]]), type = "l", ylim = c(-10,10))
  140. par(new = FALSE)
  141.  
  142. # Indeed, it has much similarities and visually they are very similar too. However, there are some aspects
  143. # seen in comparison with the other graphs that the model has not taken into account.
  144.  
  145.  
  146. #############################################
  147. #### Resuts Based on Visual Similarities ####
  148. #############################################
  149.  
  150. ## 1st component ##
  151. num1 <- as.data.frame(as.numeric(diff(AAPL$AAPL.Close)))
  152. num1 <- num1[-1,]
  153. # err1 <- sum((ICclose$IC.1-num1)^2)/length(num1) # in neighborhood of this value
  154.  
  155. err1 <- c()
  156. for ( k in 1:25){
  157. x <- sum((k*ICclose$IC.1-num1)^2)
  158. err1 <- c(err1,x)
  159. }
  160. plot(err1)
  161. which.min(err1)
  162.  
  163.  
  164. plot(which.min(err1) * ICclose$IC.1, type = "l", col = "red", ylim = c(-7,7))
  165. par(new = TRUE)
  166. plot(as.numeric(diff(AAPL$AAPL.Close)), type = "l", ylim = c(-7,7))
  167. par(new = FALSE)
  168.  
  169.  
  170. # 2nd component
  171. num2 <- as.data.frame(as.numeric(diff(GOOG$GOOG.Close)))
  172. num2 <- num2[-1,]
  173.  
  174.  
  175. err2 <- c()
  176. for ( k in 1:20){
  177. x <- sum((k*ICclose$IC.2-num2)^2)
  178. err2 <- c(err2,x)
  179. }
  180. plot(err2)
  181. which.min(err2)
  182.  
  183.  
  184. plot( which.min(err2) * ICclose$IC.2, type = "l", col = "red", ylim = c(-50,50))
  185. par(new = TRUE)
  186. plot(as.numeric(diff(GOOG$GOOG.Close)), type = "l", ylim = c(-50,50))
  187. par(new = FALSE)
  188.  
  189. # Even multiplied the IC2 by the factor assessed above, the momdel aligned with the 5th component that is MSFT
  190.  
  191.  
  192.  
  193.  
  194. #############################
  195. #### EPISODE application ####
  196. #############################
  197.  
  198. num5_episodes <- list(num_list[[5]][1:100], num_list[[5]][101:200], num_list[[5]][201:300],
  199. num_list[[5]][301:400],num_list[[5]][401:500],num_list[[5]][501:length(num_list[[5]])])
  200.  
  201. num1_episodes <- list(num_list[[1]][1:100], num_list[[1]][101:200], num_list[[1]][201:300],
  202. num_list[[1]][301:400],num_list[[1]][401:500],num_list[[1]][501:length(num_list[[1]])])
  203. IC1_episodes <- list(ICclose$IC.1[1:100], ICclose$IC.1[101:200], ICclose$IC.1[201:300],
  204. ICclose$IC.1[301:400],ICclose$IC.1[401:500],ICclose$IC.1[501:length(num_list[[1]])])
  205.  
  206.  
  207.  
  208. # num1 episodes THE RIGHT ONE
  209. list_distance_matrix1 <- list()
  210. for (i in 1:length(IC1_episodes)) {
  211. list_distance_matrix1[[i]] <- dtw(IC1_episodes[[i]], num1_episodes[[i]])
  212. }
  213.  
  214. dist_data <- as.data.frame(list_distance_matrix1[[1]]$distance)
  215. for (i in 2:length(list_distance_matrix1)) {
  216. dist_data <- rbind(dist_data, list_distance_matrix1[[i]]$distance)
  217. }
  218. colnames(dist_data) <- "num1"
  219.  
  220. # num5 episodes
  221. list_distance_matrix1 <- list()
  222. for (i in 1:length(IC1_episodes)) {
  223. list_distance_matrix1[[i]] <- dtw(IC1_episodes[[i]], num5_episodes[[i]])
  224. }
  225.  
  226. dist_data[1,2] <- as.data.frame(list_distance_matrix1[[1]]$distance)
  227. for (i in 2:length(list_distance_matrix1)) {
  228. dist_data[i,2] <- list_distance_matrix1[[i]]$distance
  229. }
  230. colnames(dist_data)[2] <- "num5"
  231. # it can be seen that the data is more likely to be num 5 which is not the the one
  232. # there is not any significant difference in episode application.
  233. # As it gavw no significat results then we will not execute the same procedure for the 2nd component.
  234. # View(dist_data)
Add Comment
Please, Sign In to add comment