opendata 使用執照

library(httr)
library(parallel)

queryUrl <- "http://building-apply.publicwork.ntpc.gov.tw/opendata/OpenDataSearchUrl.do"
query <- list(d = "OPENDATA", c= "BUILDLIC", "門牌.行政區" = "板橋區")

# luancher several Rscript
cl <- makeCluster(detectCores())
# export variables to Rscript
clusterExport(cl, c("query", "queryUrl"))
# import library in each Rscript
invisible(clusterEvalQ(cl, library(httr)))
invisible(clusterEvalQ(cl, library(jsonlite)))
st <- proc.time()
# GET JSON file parallely
dataList <- parLapplyLB(cl, seq(1, by = 100, length.out = 100), function(i){
  # 取得 JSON
  jsonFile <- content(GET(queryUrl, user_agent("R"), query = c(query, Start = i)), "text")
  # parse JSON
  tmp <- fromJSON(jsonFile)$data
  # 取得id
  id <- tmp[["_id"]][["$oid"]]
  # 取得address這個data.frame並加上id
  address <- do.call(rbind, mapply(function(x, y) cbind(id = x, y), id, tmp[["門牌"]], SIMPLIFY = FALSE))
  # 取得其他欄位資訊，並取得id
  info <- cbind(id = id, tmp[ , 2:26])
  return(list(address, info))
})
proc.time() - st
stopCluster(cl)
# user  system elapsed
# 0.06    0.00   94.97

# method 1: 直接用lapply + do.call + rbind去處理
st <- proc.time()
addressAll <- do.call(rbind, lapply(dataList, `[[`, 1))
infoAll <- do.call(rbind, lapply(dataList, `[[`, 2))
write.csv(addressAll, "address.csv", row.names = FALSE, quote = FALSE)
write.csv(infoAll, "info.csv", row.names = FALSE, quote = FALSE)
proc.time() - st
# user  system elapsed
# 0.52    0.03    0.54

# method 2: 使用purrr的transpose，不過purrr的dep超多，安裝起來滿麻煩的，自己看要不要用
library(purrr)
st <- proc.time()
datas <- lapply(transpose(dataList), function(x) do.call(rbind, x))
write.csv(datas[[1]], "address.csv", row.names = F, quote = FALSE)
write.csv(datas[[2]], "info.csv", row.names = F, quote = FALSE)
proc.time() - st
# user  system elapsed
# 0.50    0.01    0.51