Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- Sys.setlocale(locale = "C") # for windows OS
- library(stringi)
- library(magrittr)
- patn <- c("中", "西", "西屯") %>% paste0("區") # 在此列舉所有的區
- adrs <-
- c("407台中市西屯區中港路12345號中區", # 應抓到2項 (要再額外處理)
- "407台中市西屯區中港路12345號", # 應抓到1項
- "407台中市西門太狂區中港路12345號", # 應抓到0項
- "12345號中港路西屯區台中市407" # 應抓到1項
- )
- Sys.setlocale(locale = "cht") # for windows OS
- res <-
- sapply(adrs, stri_extract_all, fixed = patn) %>%
- apply(., 2, function(x){patn[which(!is.na(x))]})
- res
- # $`407台中市西屯區中港路12345號中區`
- # [1] "中區" "西屯區"
- #
- # $`407台中市西屯區中港路12345號`
- # [1] "西屯區"
- #
- # $`407台中市西門太狂區中港路12345號`
- # character(0)
- #
- # $`12345號中港路西屯區台中市407`
- # [1] "西屯區"
- # 之後針對 sapply(res, length) != 1 者去檢查
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement