Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(httr)
- library(parallel)
- queryUrl <- "http://building-apply.publicwork.ntpc.gov.tw/opendata/OpenDataSearchUrl.do"
- query <- list(d = "OPENDATA", c= "BUILDLIC", "門牌.行政區" = "板橋區")
- # luancher several Rscript
- cl <- makeCluster(detectCores())
- # export variables to Rscript
- clusterExport(cl, c("query", "queryUrl"))
- # import library in each Rscript
- invisible(clusterEvalQ(cl, library(httr)))
- invisible(clusterEvalQ(cl, library(jsonlite)))
- st <- proc.time()
- # GET JSON file parallely
- dataList <- parLapplyLB(cl, seq(1, by = 100, length.out = 100), function(i){
- # 取得 JSON
- jsonFile <- content(GET(queryUrl, user_agent("R"), query = c(query, Start = i)), "text")
- # parse JSON
- tmp <- fromJSON(jsonFile)$data
- # 取得id
- id <- tmp[["_id"]][["$oid"]]
- # 取得address這個data.frame並加上id
- address <- do.call(rbind, mapply(function(x, y) cbind(id = x, y), id, tmp[["門牌"]], SIMPLIFY = FALSE))
- # 取得其他欄位資訊,並取得id
- info <- cbind(id = id, tmp[ , 2:26])
- return(list(address, info))
- })
- proc.time() - st
- stopCluster(cl)
- # user system elapsed
- # 0.06 0.00 94.97
- # method 1: 直接用lapply + do.call + rbind去處理
- st <- proc.time()
- addressAll <- do.call(rbind, lapply(dataList, `[[`, 1))
- infoAll <- do.call(rbind, lapply(dataList, `[[`, 2))
- write.csv(addressAll, "address.csv", row.names = FALSE, quote = FALSE)
- write.csv(infoAll, "info.csv", row.names = FALSE, quote = FALSE)
- proc.time() - st
- # user system elapsed
- # 0.52 0.03 0.54
- # method 2: 使用purrr的transpose,不過purrr的dep超多,安裝起來滿麻煩的,自己看要不要用
- library(purrr)
- st <- proc.time()
- datas <- lapply(transpose(dataList), function(x) do.call(rbind, x))
- write.csv(datas[[1]], "address.csv", row.names = F, quote = FALSE)
- write.csv(datas[[2]], "info.csv", row.names = F, quote = FALSE)
- proc.time() - st
- # user system elapsed
- # 0.50 0.01 0.51
Advertisement
Add Comment
Please, Sign In to add comment