Advertisement
lvalnegri

data2fst

May 28th, 2018
290
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 2.41 KB | None | 0 0
  1. # load packages
  2. pkg <- c('data.table', 'dygraphs', 'fst', 'RMySQL', 'xts')
  3. lapply(pkg, require, char = TRUE)
  4.  
  5. # load all data
  6. dbc <- dbConnect(MySQL(), group = 'dataOps', dbname = 'cycle_hire_ldn')
  7. dts <- data.table(dbGetQuery(dbc,
  8.         "SELECT start_station_id, start_day, start_hour, end_station_id, end_day, end_hour, duration FROM hires"
  9. ))
  10. dbDisconnect(dbc)
  11.  
  12. # save order by start station
  13. setorderv(dts, c('start_station_id', 'start_day', 'start_hour', 'end_station_id', 'end_day', 'end_hour'))
  14. y <- dts[, .N, .(station = start_station_id)]
  15. write.fst(y, 'D:/tmp/fst/idxs')
  16. write.fst(dts, 'D:/tmp/fst/dtss')
  17.  
  18. # save order by month
  19. setorderv(dts, c('start_day', 'start_station_id', 'start_hour', 'end_station_id', 'end_day', 'end_hour'))
  20. y <- dts[, .N, .(month = substr(start_day, 1, 6))]
  21. write.fst(dts, 'D:/tmp/fst/dtsm')
  22. write.fst(y, 'D:/tmp/fst/idxm')
  23.  
  24. get_station_data <- function(x, by_station = TRUE, cols = NULL){
  25.     y <- read.fst( paste0('D:/tmp/fst/idx', ifelse(by_station, 's', 'm')), as.data.table = TRUE)
  26.     if(by_station){
  27.         rs <- y[station < x, sum(N)] + 1
  28.         re <- y[station <= x, sum(N)]
  29.     } else {
  30.         if(length(x) == 1){
  31.             rs <- y[month < x, sum(N)] + 1
  32.             re <- y[month <= x, sum(N)]
  33.         } else {
  34.             if(x[1] == '.'){
  35.                 rs <- 1
  36.                 re <- y[month <= x[2], sum(N)]
  37.             } else if(x[2] == '.'){
  38.                 rs <- y[month < x[1], sum(N)] + 1
  39.                 re <- y[, sum(N)]
  40.             } else {
  41.                 rs <- y[month < x[1], sum(N)] + 1
  42.                 re <- y[month <= x[2], sum(N)]
  43.             }
  44.         }
  45.     }
  46.     read.fst( paste0('D:/tmp/fst/dts', ifelse(by_station, 's', 'm')), columns = cols, from = rs, to = re, as.data.table = TRUE)
  47. }
  48.  
  49. # test
  50. y <- as.data.frame(get_station_data(12)[, .N, .(start_day = as.Date(as.character(start_day), '%Y%m%d'))])
  51. y <- xts(y[, -1], order.by = y[, 1])
  52. dygraph(y) %>%
  53.             dyLegend(width = 700, show = "always", hideOnMouseOut = FALSE) %>%
  54.             dyAxis('y', label = 'Total Number of hires', drawGrid = TRUE) %>%
  55.             dyHighlight( highlightCircleSize = 4, highlightSeriesBackgroundAlpha = 0.4, hideOnMouseOut = TRUE, highlightSeriesOpts = list(strokeWidth = 2) ) %>%
  56. #            dyRangeSelector( dateWindow = c(Sys.Date() - 61, Sys.Date() - 1), height = 30, strokeColor = 'black' ) %>%
  57.             dyRoller(rollPeriod = 7)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement