Advertisement
Guest User

scrape20

a guest
Aug 13th, 2021
179
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 1.18 KB | None | 0 0
  1. library(tidyverse)
  2. library(rvest)
  3.  
  4. hp_link <- "https://www.mountainproject.com/"
  5.  
  6. hp_links <- read_html(hp_link) |>
  7.   html_nodes("a") |>
  8.   html_attr("href")
  9.  
  10. route_links <- hp_links |>
  11.   keep(str_detect(hp_links, "/route/")) |>
  12.   unique() |>
  13.   head(-1)
  14.  
  15. base <- route_links |>
  16.   str_split("/route/") |>
  17.   pluck(1) |>
  18.   pluck(1) |>
  19.   paste0("/route/stats")
  20.  
  21. stat_pages <- route_links |>
  22.   str_split("/route") |>
  23.   map_chr(pluck(2)) |>
  24.   (\(x) paste0(base, x))()
  25.  
  26.  
  27. get_users <- function(link) {
  28.   route <- link |>
  29.     read_html() |>
  30.     html_nodes("table") |>
  31.     pluck(4) |>
  32.     html_nodes("a") |>
  33.     html_attr("href") |>
  34.     str_extract("user/\\d+") |>
  35.     str_remove("user/")
  36.   route
  37. }
  38.  
  39. users <- map(stat_pages, get_users)
  40.  
  41. route_names <- route_links |>
  42.   str_extract("\\d+/.+") |>
  43.   str_remove("\\d+/")
  44.  
  45. users |>
  46.   map(as.data.frame) |>
  47.   setNames(route_names) |>
  48.   bind_rows(.id = "route") |>
  49.   setNames(c("route", "user")) |>
  50.   mutate(user = as.numeric(user)) |>
  51.   pivot_wider(names_from = user, values_from = route, values_fn = length) |>
  52.   pivot_longer(everything(), names_to = "user", values_to = "count") |>
  53.   View()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement