celestialgod

Crawl Reservoir

Jun 7th, 2017
545
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 1.49 KB | None | 0 0
  1. library(data.table)
  2. library(stringi)
  3. library(lubridate)
  4. library(foreach)
  5. library(httr)
  6. library(xml2)
  7. library(rvest)
  8. library(pipeR)
  9.  
  10. # windows才需要先把locale改成C,並用stringi::stri_conv轉成big5
  11. backupLocale <- Sys.getlocale("LC_COLLATE")
  12. Sys.setlocale("LC_ALL", 'C')
  13.  
  14. timeVec <- seq(ymd("2003/01/01"), ymd("2003/01/11"), 1)
  15. reservoirUrl <-
  16. "http://fhy.wra.gov.tw/ReservoirPage_2011/StorageCapacity.aspx"
  17.  
  18. tmp <- foreach(time = timeVec, .final = rbindlist) %do% {
  19.   VIEWSTATE <- GET(reservoirUrl) %>>% content %>>%
  20.     xml_find_all("//input[@name='__VIEWSTATE']") %>>% xml_attr("value")
  21.                                                                        
  22.   postBody <- c(list("所有水庫"),
  23.                 lapply(c(year, month, day), function(f) f(time)),
  24.                 list(VIEWSTATE)) %>>%
  25.     `names<-`(c(paste0("ctl00$cphMain$",
  26.                        c("cboSearch", paste0("ucDate$cbo",
  27.                 c("Year", "Month","Day")))),
  28.                 "__VIEWSTATE"))
  29.   outTbl <- POST(reservoirUrl, body = postBody) %>>% content %>>%
  30.     xml_find_first("//table[@id='ctl00_cphMain_gvList']") %>>%
  31.     html_table(fill = TRUE) %>>% setDT %>>%
  32.     `[`(j = lapply(.SD, stri_conv, from = "UTF-8", to = "Big5"))
  33.   setnames(outTbl, stri_conv(names(outTbl), "UTF-8", "Big5"))
  34.   fwrite(outTbl, sprintf("result/%s.csv", as.character(time)))
  35.   return(NULL)
  36. }
  37. Sys.setlocale(locale = backupLocale)
  38. fwrite(rbindlist(lapply(list.files("result", "\\.csv$"), fread)), "merge.csv")
Advertisement
Add Comment
Please, Sign In to add comment