celestialgod

fast data.table string detection merge

Oct 17th, 2016
249
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 1.05 KB | None | 0 0
  1. library(data.table)
  2. library(stringr)
  3. library(pipeR)
  4. library(zoo)
  5.  
  6. # 產生資料
  7. numDigits <- 6
  8. numRows <- 1000
  9. DT2 <- data.table(str = rollapply(sample(9, numRows*numDigits, TRUE),
  10.                                   numDigits, function(x) str_c(x, collapse = ""),
  11.                                   by = numDigits), value = NA_character_)
  12.  
  13. # 產生mapping table
  14. allPatterns <- substring(DT2$str,1,3) %>>% unique %>>% sort
  15. DT <- data.table(pattern = str_c("^", allPatterns, ".*"),
  16.                  value = sprintf("A%03i", 1:length(allPatterns)))
  17.  
  18. # mapping開始
  19. st <- proc.time()
  20. for (i in 1:nrow(DT))
  21.   set(DT2, which(str_detect(DT2$str, DT$pattern[i])),
  22.       which(names(DT2) == "value"), DT$value[i])
  23. proc.time() - st
  24. #    user  system elapsed
  25. #    0.11    0.00    0.11
  26.  
  27. print(DT2)
  28. #          str value
  29. #    1: 588847  A297
  30. #    2: 472447  A225
  31. #    3: 181823  A048
  32. #    4: 928228  A495
  33. #    5: 331838  A139
  34. #   ---            
  35. #  996: 172326  A042
  36. #  997: 522373  A253
  37. #  998: 828978  A437
  38. #  999: 617415  A311
  39. # 1000: 877184  A470
Advertisement
Add Comment
Please, Sign In to add comment