celestialgod

take mean of previous 7 values with rollapply

Dec 5th, 2016
262
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 3.03 KB | None | 0 0
  1. library(data.table)
  2. library(pipeR)
  3. library(zoo)
  4. library(plyr)
  5.  
  6. # generate data
  7. numDays <- 20
  8. numID <- 3000
  9. numRecords <- numDays * numID * 0.5
  10. set.seed(5)
  11. DT <- data.table(matrix(rnorm(numDays * numID), numID)) %>>% `[`(j = ID := 1:numID) %>>%
  12.   melt.data.table(id.vars = numDays + 1, variable.name = "Day", value.name = "X") %>>%
  13.   `[`(j = Day := as.integer(mapvalues(Day, paste0("V", 1:numDays), 1:numDays))) %>>%
  14.   `[`(i = sample(nrow(.), numRecords)) %>>% setorder(ID, Day)
  15.  
  16. DT[ , Day2 := factor(Day, levels = seq(min(DT$Day), max(DT$Day)))]
  17. setkey(DT, ID, Day)
  18.  
  19. # check the number of observation is greater than 1
  20. # DT[ , .N, by = .(ID)] %>>% `[[`("N") %>>% `!=`(1) %>>% all # TRUE
  21.  
  22. mean2 <- function(x) {
  23.   if (length(x) == 1 || all(is.na(x)))
  24.     return(NA)
  25.   mean(head(x, length(x) - 1), na.rm = TRUE)
  26. }
  27.  
  28. f1 <- function(DT) {
  29.   DT2 <- dcast.data.table(DT, ID ~ Day2, sum, fill = NA, drop = FALSE, value.var = "X")
  30.   meanDT <- DT2[ , 2:ncol(DT2)] %>>% as.matrix %>>% t %>>%
  31.     rollapply(8, mean2, partial = TRUE, align = "right") %>>% data.table %>>%
  32.     melt.data.table(measure.var = 1:ncol(.), variable.name = "ID", value.name = "x_mean",
  33.                     variable.factor = FALSE) %>>%
  34.     `[`(j = Day := 1:(ncol(DT2)-1), by = .(ID)) %>>%
  35.     `[`(j = ID := as.integer(mapvalues(ID, paste0("V", 1:nrow(DT2)), DT2$ID)))
  36.   return(merge(DT, meanDT, by = c("ID", "Day")))
  37. }
  38.  
  39. f2 <- function(DT) {
  40.   DT[CJ(unique(ID), seq(min(Day), max(Day)))] %>>%
  41.     `[`(j = .(Day = rollapply(Day, 8, max, partial = TRUE, align = "right"),
  42.               x_mean = rollapply(X, 8, mean2, partial = TRUE, align = "right")),
  43.         by = .(ID)) %>>%
  44.     merge(DT, by = c("ID", "Day"), all.y = TRUE)
  45. }
  46.  
  47. mvAvg_f <- function(val, day, span = 7) {
  48.   sapply(day, function(y){
  49.     tmp <- day - y
  50.     mean(val[which(tmp < 0 & tmp >= -span)])
  51.   })
  52. }
  53. f3 <- function(DT) {
  54.   DT[ , x_mean_2 := mvAvg_f(X, Day), by = .(ID)]
  55. }
  56.  
  57. f4 <- function(DT) {
  58.   DT[ , x_mean_3 := sapply(Day, function(s) mean(X[between(Day, s-7, s-1)])), by = .(ID)]
  59. }
  60.  
  61. f5 <- function(DT) {
  62.   DT[ , x_mean_4 := sapply(Day, function(s) mean(X[Day >= s-7 & Day < s])), by = .(ID)]
  63. }
  64.  
  65. resDT1 <- f1(DT)
  66. resDT2 <- f2(DT)
  67. all.equal(resDT1, resDT2, ignore.col.order = TRUE) # TRUE
  68. f3(DT)
  69. f4(DT)
  70. f5(DT)
  71. all.equal(resDT1$x_mean, DT$x_mean_2, ignore.col.order = TRUE) # TRUE
  72. all.equal(resDT1$x_mean, DT$x_mean_3, ignore.col.order = TRUE) # TRUE
  73. all.equal(resDT1$x_mean, DT$x_mean_4, ignore.col.order = TRUE) # TRUE
  74.  
  75. library(microbenchmark)
  76. microbenchmark(f1(DT), f2(DT), f3(DT), f4(DT), f5(DT), times = 10L)
  77. # Unit: milliseconds
  78. #    expr       min        lq      mean    median        uq       max neval
  79. #  f1(DT) 4879.9619 4909.4480 4961.4962 4938.1009 4997.4011 5152.2432    10
  80. #  f2(DT) 4051.7383 4095.4616 4133.2843 4133.9591 4162.8146 4245.0343    10
  81. #  f3(DT)  321.6416  322.3145  324.9348  323.4118  325.8534  331.5509    10
  82. #  f4(DT) 1942.0352 1956.5268 1992.7289 1979.8526 2014.8336 2096.0038    10
  83. #  f5(DT)  149.6435  151.4261  155.0831  152.9530  161.5350  162.9220    10
Advertisement
Add Comment
Please, Sign In to add comment