celestialgod

data.table non-equi joins

Dec 25th, 2016
302
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 2.71 KB | None | 0 0
  1. library(data.table)
  2. # data generation
  3. numBus <- 20L
  4. departTimeSpan <- "20 mins"
  5. data1 <- data.table(busNo = 1L:numBus,
  6.                     departTime = seq.POSIXt(as.POSIXct("1990-01-01 05:50:00"),
  7.                                             by = departTimeSpan, length.out = numBus))
  8. ## data1
  9. #     busNo          departTime
  10. #  1:     1 1990-01-01 05:50:00
  11. #  2:     2 1990-01-01 06:10:00
  12. #  3:     3 1990-01-01 06:30:00
  13. #  4:     4 1990-01-01 06:50:00
  14. #  5:     5 1990-01-01 07:10:00
  15. #  6:     6 1990-01-01 07:30:00
  16. #  7:     7 1990-01-01 07:50:00
  17. #  8:     8 1990-01-01 08:10:00
  18. #  9:     9 1990-01-01 08:30:00
  19. # 10:    10 1990-01-01 08:50:00
  20. # 11:    11 1990-01-01 09:10:00
  21. # 12:    12 1990-01-01 09:30:00
  22. # 13:    13 1990-01-01 09:50:00
  23. # 14:    14 1990-01-01 10:10:00
  24. # 15:    15 1990-01-01 10:30:00
  25. # 16:    16 1990-01-01 10:50:00
  26. # 17:    17 1990-01-01 11:10:00
  27. # 18:    18 1990-01-01 11:30:00
  28. # 19:    19 1990-01-01 11:50:00
  29. # 20:    20 1990-01-01 12:10:00
  30.  
  31. # 產生2015/10/01~2015/12/31中前後五分鐘的班次
  32. dataCnt <- 5L
  33. data2 <- data.table(departTime = sample(data1$departTime, dataCnt, TRUE) +
  34.                       as.difftime(sample(9404:9495, dataCnt, TRUE), units = "days") +
  35.                       as.difftime(sample(-300:300, dataCnt, TRUE), units = "secs"))
  36. #             departTime
  37. # 1: 2015-11-25 08:49:53
  38. # 2: 2015-10-27 09:47:25
  39. # 3: 2015-11-02 10:27:39
  40. # 4: 2015-10-18 08:11:40
  41. # 5: 2015-12-02 09:11:13
  42.  
  43. ## method 1
  44. # find the departure time range
  45. diffTime5mins <- as.difftime(5, units = "mins")
  46. data1[ , start := departTime - diffTime5mins]
  47. data1[ , end := departTime + diffTime5mins]
  48. setkey(data1, start, end)
  49.  
  50. # align time to 1990/01/01
  51. data2[ , start := departTime - floor(departTime - as.POSIXct("1990-01-01 00:00:00"))]
  52. data2[ , end := start]
  53.  
  54. # use foverlaps to find the busNo
  55. foverlaps(data2, data1)[ , .(i.departTime, busNo)]
  56. #           i.departTime busNo
  57. # 1: 2015-11-25 08:49:53    10
  58. # 2: 2015-10-27 09:47:25    13
  59. # 3: 2015-11-02 10:27:39    15
  60. # 4: 2015-10-18 08:11:40     8
  61. # 5: 2015-12-02 09:11:13    11
  62.  
  63. ## method 2
  64. # find the departure time range
  65. diffTime5mins <- as.difftime(5, units = "mins")
  66. data1[ , start := departTime - diffTime5mins]
  67. data1[ , end := departTime + diffTime5mins]
  68. setkey(data1, start, end)
  69. # align time to 1990/01/01
  70. data2[ , departTimeAlign := departTime - floor(departTime - as.POSIXct("1990-01-01 00:00:00"))]
  71. # use Non-equi joins
  72. data2[data1, on = .(departTimeAlign >= start, departTimeAlign <= end), nomatch = 0][ , .(departTime, busNo)]
  73. #             departTime busNo
  74. # 1: 2015-10-18 08:11:40     8
  75. # 2: 2015-11-25 08:49:53    10
  76. # 3: 2015-12-02 09:11:13    11
  77. # 4: 2015-10-27 09:47:25    13
  78. # 5: 2015-11-02 10:27:39    15
Advertisement
Add Comment
Please, Sign In to add comment