Advertisement
Guest User

Correlation of Tesla share quote and Robintrack popularity

a guest
Jul 18th, 2019
95
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 6.82 KB | None | 0 0
  1. # robintrack_correlation.R
  2. #
  3. # FC, 18.07.2019
  4. #
  5. # env.: R version 3.6.0 (2019-04-26) -- "Planting of a Tree"
  6. #
  7. # Platform: x86_64-w64-mingw32/x64 (64-bit)
  8. #
  9. #
  10. # test: OK
  11. #
  12.  
  13. library(chron) # loads as.POSIXlt, as.POSIXct
  14. library(tidyquant) # loads tidyquant, tidyverse, lubridate, xts, quantmod, TTRlibrary(xts)
  15. library(cowplot)
  16. library(scales)  # for scale_y_continuous()
  17.  
  18.  
  19. ###############################################
  20. ## user input:
  21. # how many years of daily quotes:
  22. curr1 <- c('USD')
  23. tckr1 <- c('TSLA')
  24. security_name1 <- paste("Tesla (", tckr1, ")", sep ="")
  25. ###############################################
  26.  
  27.  
  28. # class(popularity): data.frame
  29. popularity <- read.csv("Robintrack popularity, Tesla, 2018-2019.csv", header = T, dec = ".", sep = ";")
  30. # input:
  31. # timestamp;popularity
  32. # 2018-05-02 04:53:19 UTC;81038
  33. # 2018-05-02 06:38:31 UTC;81038
  34. # 2018-05-03 00:34:51 UTC;80137
  35. # 2018-05-03 06:33:24 UTC;80137
  36. # 2018-05-03 06:48:25 UTC;80137
  37. # 2018-05-03 07:06:33 UTC;80137
  38. # 2018-05-03 07:33:50 UTC;80137
  39. # 2018-05-03 07:43:02 UTC;80137
  40. # 2018-05-03 11:19:16 UTC;80138
  41. # 2018-05-03 12:42:40 UTC;80138
  42. # 2018-05-03 13:42:40 UTC;80149
  43. # 2018-05-03 14:42:41 UTC;81223
  44.  
  45. # > head(popularity)
  46. #                 timestamp popularity
  47. # 1 2018-05-02 04:53:19 UTC      81038
  48. # 2 2018-05-02 06:38:31 UTC      81038
  49. # 3 2018-05-03 00:34:51 UTC      80137
  50. # 4 2018-05-03 06:33:24 UTC      80137
  51. # 5 2018-05-03 06:48:25 UTC      80137
  52. # 6 2018-05-03 07:06:33 UTC      80137
  53. # >
  54.  
  55.  
  56. times1 <- as.POSIXlt(popularity$timestamp, tz = "UTC")
  57.  
  58. df1 <- data.frame(time = times1, popularity = popularity$popularity)
  59. # > head(df1)
  60. #                  time popularity
  61. # 1 2018-05-02 04:53:19      81038
  62. # 2 2018-05-02 06:38:31      81038
  63. # 3 2018-05-03 00:34:51      80137
  64. # 4 2018-05-03 06:33:24      80137
  65. # 5 2018-05-03 06:48:25      80137
  66. # 6 2018-05-03 07:06:33      80137
  67. #
  68.  
  69.  
  70. # keep only the last popularity data for a day:
  71. df1_xts <- as.xts(df1, order.by = df1$time)
  72. # > head(df1_xts)
  73. #                     time                  popularity
  74. # 2018-05-02 04:53:19 "2018-05-02 04:53:19" " 81038"
  75. # 2018-05-02 06:38:31 "2018-05-02 06:38:31" " 81038"
  76. # 2018-05-03 00:34:51 "2018-05-03 00:34:51" " 80137"
  77. # 2018-05-03 06:33:24 "2018-05-03 06:33:24" " 80137"
  78. # 2018-05-03 06:48:25 "2018-05-03 06:48:25" " 80137"
  79. # 2018-05-03 07:06:33 "2018-05-03 07:06:33" " 80137"
  80. # Warnmeldung:
  81. # timezone of object (UTC) is different than current timezone ().
  82.  
  83.  
  84. ep       <- endpoints(df1_xts, on = "days")
  85. df1_xts1 <- df1_xts[ep]
  86. # > head(df1_xts1)
  87. #                     time                  popularity
  88. # 2018-05-02 06:38:31 "2018-05-02 06:38:31" " 81038"
  89. # 2018-05-03 23:42:40 "2018-05-03 23:42:40" " 83646"
  90. # 2018-05-04 23:42:42 "2018-05-04 23:42:42" " 83240"
  91. # 2018-05-05 23:42:41 "2018-05-05 23:42:41" " 83237"
  92. # 2018-05-06 23:42:42 "2018-05-06 23:42:42" " 83237"
  93. # 2018-05-07 23:42:41 "2018-05-07 23:42:41" " 81129"
  94. # Warnmeldung:
  95. # timezone of object (UTC) is different than current timezone ().
  96.  
  97.  
  98. df1a <- data.frame(date       = as.Date(index(df1_xts1)),
  99.                    popularity = as.numeric(df1_xts1$popularity))
  100. # > head(df1a)
  101. #         date popularity
  102. # 1 2018-05-02      81038
  103. # 2 2018-05-03      83646
  104. # 3 2018-05-04      83240
  105. # 4 2018-05-05      83237
  106. # 5 2018-05-06      83237
  107. # 6 2018-05-07      81129
  108. #
  109. # > str(df1a)
  110. # 'data.frame':   435 obs. of  2 variables:
  111. #  $ date      : Date, format: "2018-05-02" "2018-05-03" "2018-05-04" ...
  112. #  $ popularity: num  81038 83646 83240 83237 83237 ...
  113. # >
  114.  
  115. first_date <- df1b[1, ]$date
  116. last_date  <- last(df1b$date)
  117.  
  118.  
  119. dat1 <- tq_get(tckr1[1], get = "stock.prices", from = first_date, to = last_date)
  120. # > dat1
  121. # # A tibble: 304 x 7
  122. #    date        open  high   low close   volume adjusted
  123. #    <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
  124. #  1 2018-05-02  299.  307.  298.  301.  8970400     301.
  125. #  2 2018-05-03  279.  288.  275.  284. 17352100     284.
  126. #  3 2018-05-04  283   297.  280.  294.  8569400     294.
  127. #  4 2018-05-07  298.  306.  295.  303.  8678200     303.
  128. #  5 2018-05-08  301.  308.  299   302.  5930000     302.
  129. #  6 2018-05-09  300.  307.  300.  307.  5727400     307.
  130. #  7 2018-05-10  308.  313.  304.  305.  5651600     305.
  131. #  8 2018-05-11  308.  309.  299.  301.  4679600     301.
  132. #  9 2018-05-14  303.  305.  292.  292.  7286800     292.
  133. # 10 2018-05-15  285.  287.  280.  284.  9519200     284.
  134. # # ... with 294 more rows
  135.  
  136.  
  137. # merge df1b (popularity) with daily close quotes:
  138. df1b <- as_tibble(df1a)
  139.  
  140. # df1c is a data.frame:
  141. df1c <- merge(x = df1b, y = dat1, by = "date")
  142. # > head(df1c)
  143. #         date popularity   open   high    low  close   volume adjusted
  144. # 1 2018-05-02      81038 298.57 306.85 297.78 301.15  8970400   301.15
  145. # 2 2018-05-03      83646 278.79 288.04 275.23 284.45 17352100   284.45
  146. # 3 2018-05-04      83240 283.00 296.86 279.52 294.09  8569400   294.09
  147. # 4 2018-05-07      81129 297.50 305.96 295.17 302.77  8678200   302.77
  148. # 5 2018-05-08      80656 300.80 307.75 299.00 301.97  5930000   301.97
  149. # 6 2018-05-09      79994 300.41 307.01 300.05 306.85  5727400   306.85
  150. # >
  151. #
  152.  
  153.  
  154. df1d <- as_tibble(df1c)  # have a tibble here for plotting
  155.  
  156.  
  157. ######################
  158. ######################
  159. # correlation:
  160. x1 <- df1c$adjusted
  161. y1 <- df1c$popularity
  162. cor.test(x1, y1)
  163. # > cor.test(x1, y1)
  164. #
  165. #         Pearson's product-moment correlation
  166. #
  167. # data:  x1 and y1
  168. # t = -34.092, df = 297, p-value < 2.2e-16
  169. # alternative hypothesis: true correlation is not equal to 0
  170. # 95 percent confidence interval:
  171. #  -0.9134175 -0.8667673
  172. # sample estimates:
  173. #        cor
  174. # -0.8924537
  175. #
  176. ######################
  177. ######################
  178.  
  179.  
  180. # plot the multi chart:
  181. plot_title1 <- paste(security_name1, ": ", first_date, " - ", last_date, sep = "")
  182.  
  183. ylabel  <- paste("[", curr1, "]")
  184. ylabel2 <- paste("Volume")
  185. ylabel3 <- paste("Popularity [accounts] (red)")
  186.  
  187.  
  188. # plot price:
  189. p1 <- df1d %>%
  190.       ggplot(aes(x = date)) +
  191.  
  192.       # plot daily prices as a line:
  193.       geom_line(aes(y = adjusted), colour = "grey") +
  194.  
  195.       # plot daily popularity prices as a line:
  196.       geom_line(aes(y = popularity / 400), colour = "red") +
  197.       scale_y_continuous(sec.axis = sec_axis(~ (. * 400),
  198.                          name = ylabel3,
  199.                          label = comma)) +
  200.  
  201.       labs(title = plot_title1, y = ylabel) +
  202.  
  203.       theme_tq() +
  204.  
  205.       theme(
  206.         axis.title.x = element_blank(),
  207.         axis.text.x = element_blank()
  208.       )
  209.  
  210.  
  211. # plot volume below:
  212. p2 <- df1d %>%
  213.       ggplot(aes(x = date, y = volume)) +
  214.  
  215.       # plot daily volume data as bars:
  216.       geom_bar(stat = "identity") +
  217.  
  218.       labs(y = ylabel2) +
  219.       scale_y_continuous(label = comma) +
  220.  
  221.       theme_tq() +
  222.  
  223.       theme(
  224.         axis.title.x = element_blank()
  225.       )
  226.  
  227.  
  228. plot_grid(p1, p2, ncol = 1, align = 'v')
  229.  
  230.  
  231. # end of robintrack_correlation.R
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement