Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(httr)
- library(xml2)
- library(rvest)
- library(pipeR)
- library(data.table)
- library(stringr)
- library(lubridate)
- getActiveTyphoonFunc <- function() {
- url <- "https://sharaku.eorc.jaxa.jp/cgi-bin/typhoon_rt/main.cgi?lang=en"
- tmp <- GET(url) %>>% content
- areas <- c("asiaarea", "americaarea", "oceaniaarea")
- activeTyphoonDT <- lapply(areas, function(s){
- t <- xml_find_first(tmp, sprintf("//div[@id='%s']//ul[@class='act_typ']", s))
- if (xml_length(t) == 0L)
- return(NULL)
- else {
- urls <- t %>>% xml_children %>>% xml_children
- data.table(area = s,
- typhoonNum = str_extract(xml_text(urls), "[^(]+"),
- typhoonName = str_replace(xml_text(urls), "[^(]+\\(([A-Z\\-]+)\\)", "\\1"),
- url = xml_attr(urls, "href") %>>%
- str_replace("\\./", str_replace(url, "[^/]+$", "")))
- }
- }) %>>% rbindlist
- return(activeTyphoonDT)
- }
- getActiveTyphoonDetailFunc <- function(url) {
- outDT <- GET(url) %>>% content %>>%
- xml_find_all("//table[@class='typinfoS']") %>>%
- html_table(header = FALSE) %>>%
- lapply(function(df){
- data.table(satellite = df[1, 2], obs_date = parse_date_time(df[2, 2], "ymd HM"),
- r1 = str_extract(df[3, 2], "^\\d+.\\d+[NS]"), r2 = str_extract(df[3, 2], "\\d+.\\d+[NS]$"),
- r3 = str_extract(df[4, 2], "^\\d+.\\d+[EW]"), r4 = str_extract(df[4, 2], "\\d+.\\d+[EW]$"),
- pressure = df[7, 2], winds = df[8, 2])
- }) %>>% rbindlist
- return(outDT)
- }
- activeTyphoonDT <- getActiveTyphoonFunc()
- getActiveTyphoonDetailFunc(activeTyphoonDT$url[1])
- getActiveTyphoonDetailFunc(activeTyphoonDT$url[2])
- getNasaDataFunc <- function(year, typhoonNames) {
- url <- "http://rammb.cira.colostate.edu/products/tc_realtime/season.asp?storm_season=%i"
- infoUrl <- GET(sprintf(url, year)) %>>%
- content %>>%
- xml_find_all(sprintf("//a[contains(.,'%s')]", typhoonNames)) %>>%
- xml_attr("href") %>>%
- (str_c(str_replace(url, "[^/]+$", ""), .))
- tmp <- GET(infoUrl) %>>% content
- outList <- html_table(tmp, header = 1L) %>>%
- setNames(c("forecast_track", "track_history")) %>>%
- lapply(as.data.table)
- forecastTime <- xml_find_all(tmp, "//h4[contains(.,'Latest Forecast')]") %>>%
- xml_text %>>% str_extract("\\d{12}") %>>% parse_date_time("ymdHM")
- outList$forecast_track[ , forecastTime := forecastTime + `Forecast Hour` * 60]
- imageUrl <- "http://rammb.cira.colostate.edu/products/tc_realtime/archive.asp?product=4kmirimg&storm_identifier=%s" %>>%
- sprintf(str_extract(infoUrl, "[^=]+$"))
- outList$imageDT <- GET(imageUrl) %>>% content %>>% xml_find_all("//table") %>>% {
- header <- xml_find_all(., "//tr/th") %>>% sapply(xml_text)
- tmp2 <- xml_find_all(., "//tr/td")
- idx <- which(sapply(tmp2, xml_text) == "Image")
- data.table(urlTime = tmp2[idx-4L] %>>% sapply(xml_text) %>>% parse_date_time("ymd HM"),
- imageUrls = tmp2[idx] %>>% xml_children %>>% sapply(xml_attr, attr = "href")) %>>%
- setnames(c(header[1], tail(header, 1)))
- }
- return(outList)
- }
- x <- getNasaDataFunc(2018, activeTyphoonDT$typhoonName[1])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement