Guest User

Untitled

a guest
Nov 24th, 2017
86
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.96 KB | None | 0 0
  1. library(tidyverse)
  2. library(lexRankr)
  3. library(purrr)
  4.  
  5. source <- read_html("https://beta.theglobeandmail.com/politics/")
  6.  
  7. links <- source %>%
  8. html_nodes(".o-card__link") %>%
  9. html_attr("href") %>%
  10. xml2::url_absolute("https://beta.theglobeandmail.com")
  11.  
  12. pages <- links %>% map(read_html)
  13.  
  14. articles <- pages %>%
  15. map(. %>%
  16. html_nodes(".c-article-body__text") %>%
  17. html_text()
  18. )
  19.  
  20. headlines <- source %>%
  21. html_nodes('.o-card__content-text') %>%
  22. html_text
  23.  
  24. headlines_df <- data_frame(headlines, links, articles)
  25.  
  26. headlines_df %>%
  27. map(lexRank(articles, n = 2, sentencesAsDocs = FALSE))
  28.  
  29. # Or
  30. headlines_df %>%
  31. mutate(summary = lexRank(articles, n = 2, sentencesAsDocs = FALSE)
  32.  
  33. art <- read_html("https://www.theglobeandmail.com/news/world/worried-about-naftas-fate-wynne-calls-for-more-trade-with-china/article37071055/")
  34.  
  35. article <- art %>%
  36. html_nodes(".c-article-body__text") %>%
  37. html_text()
  38.  
  39. test <- lexRank(article, n=1, sentencesAsDocs = FALSE)
Add Comment
Please, Sign In to add comment