Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(httr)
- library(xml2)
- library(rvest)
- library(pipeR)
- library(data.table)
- library(stringr)
- library(lubridate)
- url <- "https://sharaku.eorc.jaxa.jp/cgi-bin/typ_db/typ_db.cgi?lang=e&mode=search&GPM=ON&GW1AM2=ON&TRMM=ON&P1AME=ON&A2AMS=ON&area=&sy=1997&sm=12&ey=2018&em=09&tname="
- typhoonInfoWeb <- GET(url) %>>% content
- cols <- typhoonInfoWeb %>>%
- xml_find_all("//th[@class='c1']") %>>%
- xml_text
- values <- typhoonInfoWeb %>>%
- xml_find_all("//td[@class='c2']") %>>%
- xml_text %>>%
- str_trim
- typhoneNames <- typhoonInfoWeb %>>%
- xml_find_all("//table[@id='searchresult']") %>>%
- xml_find_all("//h2") %>>%
- xml_text %>>% str_replace_all("^[^A-Za-z ]", "")
- typhoonMat <- matrix(NA_character_, nrow = length(typhoneNames), ncol = uniqueN(cols))
- typhoonMat[cbind(cumsum(cols == cols[1]), match(cols, unique(cols)))] <- values
- typhoonDT <- data.table(typhoonMat) %>>%
- setnames(unique(cols) %>>% str_replace_all("[ :]+$", "")) %>>%
- `[`(j = `:=`(start_dt = parse_date_time(str_extract(Period, "^[A-Za-z]{3} \\d{2},\\d{4}"), "mdy"),
- end_dt = parse_date_time(str_extract(Period, "[A-Za-z]{3} \\d{2},\\d{4}$"), "mdy")))
- saveRDS(typhoonDT, "typhoonInfoDT.rds")
Advertisement
Add Comment
Please, Sign In to add comment