Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- address <- c("宜蘭縣數學鎮數學里10鄰數學路100巷16之2號",
- "基隆市太陽區太陽里17鄰太陽三街223之2號十九樓",
- "基隆市白雲區白雲里20鄰白雲三街59號十樓之1",
- "新竹市海洋區海洋里13鄰海洋路29號六樓",
- "臺北市小明區小名里20鄰小名路222號二十樓",
- "新北市語文區語文里17鄰語文路221號二十九樓之5",
- "宜蘭縣飛機鎮飛機里3鄰飛機路73號",
- "新北市紅色區紅色里15鄰紅色路四段15號之4十七樓")
- library(magrittr)
- library(plyr)
- library(stringr)
- library(stringi)
- address_converted <- sapply(address, function(x){
- raw_address <- charToRaw(x)
- loc_maybe_fullwidth_digits <- which(raw_address == "a2")
- second_loc <- raw_address[loc_maybe_fullwidth_digits+1] %>% as.integer
- loc_fullwidth_digits <- loc_maybe_fullwidth_digits[second_loc >= 175 & second_loc <= 184] + 1
- raw_address[loc_fullwidth_digits] <- raw_address[loc_fullwidth_digits] %>%
- as.integer %>% '-'(127) %>% as.raw
- return(rawToChar(raw_address[setdiff(1:length(raw_address), loc_fullwidth_digits-1)]))
- }) %>% `names<-`(NULL)
- # [1] "宜蘭縣數學鎮數學里10鄰數學路100巷16之2號"
- # [2] "基隆市太陽區太陽里17鄰太陽三街223之2號十九樓"
- # [3] "基隆市白雲區白雲里20鄰白雲三街59號十樓之1"
- # [4] "新竹市海洋區海洋里13鄰海洋路29號六樓"
- # [5] "臺北市小明區小名里20鄰小名路222號二十樓"
- # [6] "新北市語文區語文里17鄰語文路221號二十九樓之5"
- # [7] "宜蘭縣飛機鎮飛機里3鄰飛機路73號"
- # [8] "新北市紅色區紅色里15鄰紅色路四段15號之4十七樓"
- chinese2digits <- function(x){
- vals <- sapply(str_split(x, "")[[1]], function(chi_digit){
- mapvalues(chi_digit, c("零", "一", "二", "三", "四", "五", "六", "七", "八", "九",
- "十", "百", "千", "萬", "億"), c(0:10, 10^c(2,3,4,8)), FALSE)
- }) %>% as.integer
- digit_output <- 0
- base_term <- 1
- for (i in rev(seq_along(vals)))
- {
- if (vals[i] >= 10 && i == 1)
- {
- base_term <- ifelse(vals[i] > base_term, vals[i], base_term * vals[i])
- digit_output <- digit_output + vals[i]
- } else if (vals[i] >= 10)
- {
- base_term <- ifelse(vals[i] > base_term, vals[i], base_term * vals[i])
- } else
- {
- digit_output <- digit_output + base_term * vals[i]
- }
- }
- return(digit_output)
- }
- ## test
- # chinese2digits("一百五十二") # 152
- # chinese2digits("一億零八萬零三百二十三") # 100080323
- # chinese2digits("十九") # 19
- address_converted2 <- sapply(address_converted, function(x){
- pattern_starts <- "[零一二三四五六七八九十百千萬億]+樓"
- if (!str_detect(x, pattern_starts))
- return(x)
- stairs <- str_extract(x, pattern_starts)
- x <- str_replace(x, str_c("(\\d+)(", pattern_starts, ")"), "\\1, \\2")
- x <- str_replace(stairs, "樓", "") %>% chinese2digits %>% str_c("樓") %>%
- {str_replace(x, stairs, .)}
- return(x)
- }) %>% `names<-`(NULL)
- # [1] "宜蘭縣數學鎮數學里10鄰數學路100巷16之2號" "基隆市太陽區太陽里17鄰太陽三街223之2號19樓"
- # [3] "基隆市白雲區白雲里20鄰白雲三街59號10樓之1" "新竹市海洋區海洋里13鄰海洋路29號6樓"
- # [5] "臺北市小明區小名里20鄰小名路222號20樓" "新北市語文區語文里17鄰語文路221號29樓之5"
- # [7] "宜蘭縣飛機鎮飛機里3鄰飛機路73號" "新北市紅色區紅色里15鄰紅色路四段15號之4, 17樓"
- sapply(address_converted2, str_extract_all, pattern = "\\d+")
- # $`宜蘭縣數學鎮數學里10鄰數學路100巷16之2號`
- # [1] "10" "100" "16" "2"
- #
- # $基隆市太陽區太陽里17鄰太陽三街223之2號19樓
- # [1] "17" "223" "2" "19"
- #
- # $`基隆市白雲區白雲里20鄰白雲三街59號10樓之1, `
- # [1] "20" "59" "10" "1"
- #
- # $新竹市海洋區海洋里13鄰海洋路29號6樓
- # [1] "13" "29" "6"
- #
- # $臺北市小明區小名里20鄰小名路222號20樓
- # [1] "20" "222" "20"
- #
- # $`新北市語文區語文里17鄰語文路221號29樓之5, `
- # [1] "17" "221" "29" "5"
- #
- # $宜蘭縣飛機鎮飛機里3鄰飛機路73號
- # [1] "3" "73"
- #
- # $`新北市紅色區紅色里15鄰紅色路四段15號之4, 17樓`
- # [1] "15" "15" "4" "17"
Add Comment
Please, Sign In to add comment