celestialgod

data.table::foverlaps + foreach + iterators

Apr 12th, 2017
252
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 2.38 KB | None | 0 0
  1. library(foreach)
  2. library(iterators)
  3. library(data.table)
  4. library(pipeR)
  5.  
  6. set.seed(10)
  7. k <- 1
  8. outList <- foreach(v = iter(matrix(sample(3:29, 6000, TRUE), 1000), by = "row")) %:% when(k <= 4) %do%
  9.   {
  10.     if (all(diff(sort(v)) > 2)) {
  11.       k <- k + 1
  12.       return(data.table(studentID = k, matrix(c(1, sort(v), 31), 4, 2, TRUE, list(NULL, c("Start", "End")))))
  13.     } else return(NULL)
  14.   }
  15. outDT <- rbindlist(outList) %>>% `[`(j = `:=`(studentID = match(studentID, sort(unique(studentID))),
  16.                                               avgScore = abs(rnorm(nrow(.)))))
  17. #     studentID Start End  avgScore
  18. #  1:         1     1   3 0.4605151
  19. #  2:         1     6  10 0.2350253
  20. #  3:         1    19  22 0.6432573
  21. #  4:         1    25  31 0.9131981
  22. #  5:         2     1   4 0.9882860
  23. #  6:         2     7  11 0.1127413
  24. #  7:         2    16  20 1.4900499
  25. #  8:         2    26  31 0.4432356
  26. #  9:         3     1   5 1.3623441
  27. # 10:         3    10  14 1.0452357
  28. # 11:         3    21  25 0.2339315
  29. # 12:         3    28  31 2.5524180
  30. # 13:         4     1   4 1.7687187
  31. # 14:         4     7  10 0.6595706
  32. # 15:         4    19  23 0.3707332
  33. # 16:         4    26  31 0.5928033
  34.  
  35. iter <- isplit(outDT, outDT$studentID)
  36. resDT <- copy(iter$nextElem()$value) %>>% `[`(j = `:=`(studentID = NULL))
  37. setkey(resDT, Start, End)
  38. while (TRUE) {
  39.   v <- tryCatch(iter$nextElem(), error = function(e) e)
  40.   if (any(class(v) == "error"))
  41.     break
  42.   resDT <- foverlaps(v$value, resDT, type = "any", nomatch = 0) %>>%
  43.     `[`(j = `:=`(Start = pmax(Start, i.Start), End = pmin(End, i.End))) %>>%
  44.     `[`(j = .(Start, End))
  45.   setkey(resDT, Start, End)
  46. }
  47. #    Start End
  48. # 1:     1   3
  49. # 2:    10  10
  50. # 3:    28  31
  51.  
  52. finalResDT <- foreach(it = isplit(outDT, outDT$studentID), .final = rbindlist) %do%
  53.   {
  54.     foverlaps(it$value, resDT, type = "any", nomatch = 0) %>>%
  55.       `[`(j = avgScore := (i.End - End + 1) / (Start - i.Start + 1) * avgScore) %>>%
  56.       `[`(j = .(Start, End, studentID, avgScore))
  57.   } %>>% dcast(Start + End ~ studentID, val.var = "avgScore") %>>%
  58.   setnames(as.character(1:(ncol(.)-2)), paste0("studentID-", 1:(ncol(.)-2)))
  59. #    Start End studentID-1 studentID-2 studentID-3 studentID-4
  60. # 1:     1   3  0.46051506  1.97657201    4.087032   3.5374375
  61. # 2:    10  10  0.04700506  0.05637067    5.226179   0.1648927
  62. # 3:    28  31  0.22829953  0.14774520    2.552418   0.1976011
Advertisement
Add Comment
Please, Sign In to add comment