celestialgod

opendata 使用執照

Feb 18th, 2017
301
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 1.92 KB | None | 0 0
  1. library(httr)
  2. library(parallel)
  3.  
  4. queryUrl <- "http://building-apply.publicwork.ntpc.gov.tw/opendata/OpenDataSearchUrl.do"
  5. query <- list(d = "OPENDATA", c= "BUILDLIC", "門牌.行政區" = "板橋區")
  6.  
  7. # luancher several Rscript
  8. cl <- makeCluster(detectCores())
  9. # export variables to Rscript
  10. clusterExport(cl, c("query", "queryUrl"))
  11. # import library in each Rscript
  12. invisible(clusterEvalQ(cl, library(httr)))
  13. invisible(clusterEvalQ(cl, library(jsonlite)))
  14. st <- proc.time()
  15. # GET JSON file parallely
  16. dataList <- parLapplyLB(cl, seq(1, by = 100, length.out = 100), function(i){
  17.   # 取得 JSON
  18.   jsonFile <- content(GET(queryUrl, user_agent("R"), query = c(query, Start = i)), "text")
  19.   # parse JSON
  20.   tmp <- fromJSON(jsonFile)$data
  21.   # 取得id
  22.   id <- tmp[["_id"]][["$oid"]]
  23.   # 取得address這個data.frame並加上id
  24.   address <- do.call(rbind, mapply(function(x, y) cbind(id = x, y), id, tmp[["門牌"]], SIMPLIFY = FALSE))
  25.   # 取得其他欄位資訊,並取得id
  26.   info <- cbind(id = id, tmp[ , 2:26])
  27.   return(list(address, info))
  28. })
  29. proc.time() - st
  30. stopCluster(cl)
  31. # user  system elapsed
  32. # 0.06    0.00   94.97
  33.  
  34. # method 1: 直接用lapply + do.call + rbind去處理
  35. st <- proc.time()
  36. addressAll <- do.call(rbind, lapply(dataList, `[[`, 1))
  37. infoAll <- do.call(rbind, lapply(dataList, `[[`, 2))
  38. write.csv(addressAll, "address.csv", row.names = FALSE, quote = FALSE)
  39. write.csv(infoAll, "info.csv", row.names = FALSE, quote = FALSE)
  40. proc.time() - st
  41. # user  system elapsed
  42. # 0.52    0.03    0.54
  43.  
  44. # method 2: 使用purrr的transpose,不過purrr的dep超多,安裝起來滿麻煩的,自己看要不要用
  45. library(purrr)
  46. st <- proc.time()
  47. datas <- lapply(transpose(dataList), function(x) do.call(rbind, x))
  48. write.csv(datas[[1]], "address.csv", row.names = F, quote = FALSE)
  49. write.csv(datas[[2]], "info.csv", row.names = F, quote = FALSE)
  50. proc.time() - st
  51. # user  system elapsed
  52. # 0.50    0.01    0.51
Advertisement
Add Comment
Please, Sign In to add comment