Advertisement
Guest User

Untitled

a guest
Aug 5th, 2020
503
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.99 KB | None | 0 0
  1. Sys.setlocale(locale = "C") # for windows OS
  2. library(stringi)
  3. library(magrittr)
  4. patn <- c("中", "西", "西屯") %>% paste0("區") # 在此列舉所有的區
  5. adrs <-
  6. c("407台中市西屯區中港路12345號中區", # 應抓到2項 (要再額外處理)
  7. "407台中市西屯區中港路12345號", # 應抓到1項
  8. "407台中市西門太狂區中港路12345號", # 應抓到0項
  9. "12345號中港路西屯區台中市407" # 應抓到1項
  10. )
  11. Sys.setlocale(locale = "cht") # for windows OS
  12. res <-
  13. sapply(adrs, stri_extract_all, fixed = patn) %>%
  14. apply(., 2, function(x){patn[which(!is.na(x))]})
  15. res
  16. # $`407台中市西屯區中港路12345號中區`
  17. # [1] "中區" "西屯區"
  18. #
  19. # $`407台中市西屯區中港路12345號`
  20. # [1] "西屯區"
  21. #
  22. # $`407台中市西門太狂區中港路12345號`
  23. # character(0)
  24. #
  25. # $`12345號中港路西屯區台中市407`
  26. # [1] "西屯區"
  27.  
  28. # 之後針對 sapply(res, length) != 1 者去檢查
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement