Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(tidyverse)
- library(lexRankr)
- library(purrr)
- source <- read_html("https://beta.theglobeandmail.com/politics/")
- links <- source %>%
- html_nodes(".o-card__link") %>%
- html_attr("href") %>%
- xml2::url_absolute("https://beta.theglobeandmail.com")
- pages <- links %>% map(read_html)
- articles <- pages %>%
- map(. %>%
- html_nodes(".c-article-body__text") %>%
- html_text()
- )
- headlines <- source %>%
- html_nodes('.o-card__content-text') %>%
- html_text
- headlines_df <- data_frame(headlines, links, articles)
- headlines_df %>%
- map(lexRank(articles, n = 2, sentencesAsDocs = FALSE))
- # Or
- headlines_df %>%
- mutate(summary = lexRank(articles, n = 2, sentencesAsDocs = FALSE)
- art <- read_html("https://www.theglobeandmail.com/news/world/worried-about-naftas-fate-wynne-calls-for-more-trade-with-china/article37071055/")
- article <- art %>%
- html_nodes(".c-article-body__text") %>%
- html_text()
- test <- lexRank(article, n=1, sentencesAsDocs = FALSE)
Add Comment
Please, Sign In to add comment