Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # robintrack_correlation.R
- #
- # FC, 18.07.2019
- #
- # env.: R version 3.6.0 (2019-04-26) -- "Planting of a Tree"
- #
- # Platform: x86_64-w64-mingw32/x64 (64-bit)
- #
- #
- # test: OK
- #
- library(chron) # loads as.POSIXlt, as.POSIXct
- library(tidyquant) # loads tidyquant, tidyverse, lubridate, xts, quantmod, TTRlibrary(xts)
- library(cowplot)
- library(scales) # for scale_y_continuous()
- ###############################################
- ## user input:
- # how many years of daily quotes:
- curr1 <- c('USD')
- tckr1 <- c('TSLA')
- security_name1 <- paste("Tesla (", tckr1, ")", sep ="")
- ###############################################
- # class(popularity): data.frame
- popularity <- read.csv("Robintrack popularity, Tesla, 2018-2019.csv", header = T, dec = ".", sep = ";")
- # input:
- # timestamp;popularity
- # 2018-05-02 04:53:19 UTC;81038
- # 2018-05-02 06:38:31 UTC;81038
- # 2018-05-03 00:34:51 UTC;80137
- # 2018-05-03 06:33:24 UTC;80137
- # 2018-05-03 06:48:25 UTC;80137
- # 2018-05-03 07:06:33 UTC;80137
- # 2018-05-03 07:33:50 UTC;80137
- # 2018-05-03 07:43:02 UTC;80137
- # 2018-05-03 11:19:16 UTC;80138
- # 2018-05-03 12:42:40 UTC;80138
- # 2018-05-03 13:42:40 UTC;80149
- # 2018-05-03 14:42:41 UTC;81223
- # > head(popularity)
- # timestamp popularity
- # 1 2018-05-02 04:53:19 UTC 81038
- # 2 2018-05-02 06:38:31 UTC 81038
- # 3 2018-05-03 00:34:51 UTC 80137
- # 4 2018-05-03 06:33:24 UTC 80137
- # 5 2018-05-03 06:48:25 UTC 80137
- # 6 2018-05-03 07:06:33 UTC 80137
- # >
- times1 <- as.POSIXlt(popularity$timestamp, tz = "UTC")
- df1 <- data.frame(time = times1, popularity = popularity$popularity)
- # > head(df1)
- # time popularity
- # 1 2018-05-02 04:53:19 81038
- # 2 2018-05-02 06:38:31 81038
- # 3 2018-05-03 00:34:51 80137
- # 4 2018-05-03 06:33:24 80137
- # 5 2018-05-03 06:48:25 80137
- # 6 2018-05-03 07:06:33 80137
- #
- # keep only the last popularity data for a day:
- df1_xts <- as.xts(df1, order.by = df1$time)
- # > head(df1_xts)
- # time popularity
- # 2018-05-02 04:53:19 "2018-05-02 04:53:19" " 81038"
- # 2018-05-02 06:38:31 "2018-05-02 06:38:31" " 81038"
- # 2018-05-03 00:34:51 "2018-05-03 00:34:51" " 80137"
- # 2018-05-03 06:33:24 "2018-05-03 06:33:24" " 80137"
- # 2018-05-03 06:48:25 "2018-05-03 06:48:25" " 80137"
- # 2018-05-03 07:06:33 "2018-05-03 07:06:33" " 80137"
- # Warnmeldung:
- # timezone of object (UTC) is different than current timezone ().
- ep <- endpoints(df1_xts, on = "days")
- df1_xts1 <- df1_xts[ep]
- # > head(df1_xts1)
- # time popularity
- # 2018-05-02 06:38:31 "2018-05-02 06:38:31" " 81038"
- # 2018-05-03 23:42:40 "2018-05-03 23:42:40" " 83646"
- # 2018-05-04 23:42:42 "2018-05-04 23:42:42" " 83240"
- # 2018-05-05 23:42:41 "2018-05-05 23:42:41" " 83237"
- # 2018-05-06 23:42:42 "2018-05-06 23:42:42" " 83237"
- # 2018-05-07 23:42:41 "2018-05-07 23:42:41" " 81129"
- # Warnmeldung:
- # timezone of object (UTC) is different than current timezone ().
- df1a <- data.frame(date = as.Date(index(df1_xts1)),
- popularity = as.numeric(df1_xts1$popularity))
- # > head(df1a)
- # date popularity
- # 1 2018-05-02 81038
- # 2 2018-05-03 83646
- # 3 2018-05-04 83240
- # 4 2018-05-05 83237
- # 5 2018-05-06 83237
- # 6 2018-05-07 81129
- #
- # > str(df1a)
- # 'data.frame': 435 obs. of 2 variables:
- # $ date : Date, format: "2018-05-02" "2018-05-03" "2018-05-04" ...
- # $ popularity: num 81038 83646 83240 83237 83237 ...
- # >
- first_date <- df1b[1, ]$date
- last_date <- last(df1b$date)
- dat1 <- tq_get(tckr1[1], get = "stock.prices", from = first_date, to = last_date)
- # > dat1
- # # A tibble: 304 x 7
- # date open high low close volume adjusted
- # <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
- # 1 2018-05-02 299. 307. 298. 301. 8970400 301.
- # 2 2018-05-03 279. 288. 275. 284. 17352100 284.
- # 3 2018-05-04 283 297. 280. 294. 8569400 294.
- # 4 2018-05-07 298. 306. 295. 303. 8678200 303.
- # 5 2018-05-08 301. 308. 299 302. 5930000 302.
- # 6 2018-05-09 300. 307. 300. 307. 5727400 307.
- # 7 2018-05-10 308. 313. 304. 305. 5651600 305.
- # 8 2018-05-11 308. 309. 299. 301. 4679600 301.
- # 9 2018-05-14 303. 305. 292. 292. 7286800 292.
- # 10 2018-05-15 285. 287. 280. 284. 9519200 284.
- # # ... with 294 more rows
- # merge df1b (popularity) with daily close quotes:
- df1b <- as_tibble(df1a)
- # df1c is a data.frame:
- df1c <- merge(x = df1b, y = dat1, by = "date")
- # > head(df1c)
- # date popularity open high low close volume adjusted
- # 1 2018-05-02 81038 298.57 306.85 297.78 301.15 8970400 301.15
- # 2 2018-05-03 83646 278.79 288.04 275.23 284.45 17352100 284.45
- # 3 2018-05-04 83240 283.00 296.86 279.52 294.09 8569400 294.09
- # 4 2018-05-07 81129 297.50 305.96 295.17 302.77 8678200 302.77
- # 5 2018-05-08 80656 300.80 307.75 299.00 301.97 5930000 301.97
- # 6 2018-05-09 79994 300.41 307.01 300.05 306.85 5727400 306.85
- # >
- #
- df1d <- as_tibble(df1c) # have a tibble here for plotting
- ######################
- ######################
- # correlation:
- x1 <- df1c$adjusted
- y1 <- df1c$popularity
- cor.test(x1, y1)
- # > cor.test(x1, y1)
- #
- # Pearson's product-moment correlation
- #
- # data: x1 and y1
- # t = -34.092, df = 297, p-value < 2.2e-16
- # alternative hypothesis: true correlation is not equal to 0
- # 95 percent confidence interval:
- # -0.9134175 -0.8667673
- # sample estimates:
- # cor
- # -0.8924537
- #
- ######################
- ######################
- # plot the multi chart:
- plot_title1 <- paste(security_name1, ": ", first_date, " - ", last_date, sep = "")
- ylabel <- paste("[", curr1, "]")
- ylabel2 <- paste("Volume")
- ylabel3 <- paste("Popularity [accounts] (red)")
- # plot price:
- p1 <- df1d %>%
- ggplot(aes(x = date)) +
- # plot daily prices as a line:
- geom_line(aes(y = adjusted), colour = "grey") +
- # plot daily popularity prices as a line:
- geom_line(aes(y = popularity / 400), colour = "red") +
- scale_y_continuous(sec.axis = sec_axis(~ (. * 400),
- name = ylabel3,
- label = comma)) +
- labs(title = plot_title1, y = ylabel) +
- theme_tq() +
- theme(
- axis.title.x = element_blank(),
- axis.text.x = element_blank()
- )
- # plot volume below:
- p2 <- df1d %>%
- ggplot(aes(x = date, y = volume)) +
- # plot daily volume data as bars:
- geom_bar(stat = "identity") +
- labs(y = ylabel2) +
- scale_y_continuous(label = comma) +
- theme_tq() +
- theme(
- axis.title.x = element_blank()
- )
- plot_grid(p1, p2, ncol = 1, align = 'v')
- # end of robintrack_correlation.R
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement