Guest User

Grabbing GB5 results

a guest
Nov 15th, 2020
174
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 3.06 KB | None | 0 0
  1. library(assertthat)
  2. library(tidyverse)
  3. library(xml2)
  4. library(rlang)
  5. library(parallel)
  6.  
  7.  
  8. extract.ram <- function(link) {
  9.   doc <- read_html(url(link))
  10.  
  11.   el <- xml_find_first(doc, ".//tr[contains(., 'Size')]")
  12.  
  13.   xml_text(xml_children(el)[[2]])
  14. }
  15.  
  16.  
  17. extract.row <- function(row) {
  18.   col <- xml_find_first(row, ".//div[contains(@class, 'col-12 col-lg-4')]")
  19.  
  20.  
  21.   assert_that(xml_text(xml_children(col)[[1]]) == "\nSystem\n")
  22.   system <- str_squish(xml_text(xml_children(col)[[2]]))
  23.   url <- xml_attr(xml_children(col)[[2]], "href")
  24.   #RAM <- extract.ram(paste0("https://browser.geekbench.com", url))
  25.   description <- str_squish(xml_text(xml_children(col)[[3]]))
  26.  
  27.   cols <- xml_find_all(row, ".//div[contains(@class, 'col-6 col-md-3 col-lg-2')]")
  28.  
  29.   bind_cols(
  30.     data.frame(System = system, Description = description, stringsAsFactors=FALSE),
  31.     map(1:length(cols), ~ {
  32.       title <- str_squish(xml_text(xml_find_first(cols[[.]], ".//span[contains(@class, 'list-col-subtitle')]")))
  33.       text <- str_squish(xml_text(xml_find_first(cols[[.]], ".//span[contains(@class, 'list-col-text')]")))
  34.  
  35.       X <- data.frame(text, stringsAsFactors = FALSE)
  36.       names(X) <- title
  37.  
  38.       X
  39.     }) %>% bind_cols()
  40.   )
  41. }
  42.  
  43. extract.page <- function(page, query) {
  44.   doc <- read_html(url(glue::glue("https://browser.geekbench.com/v5/cpu/search?page={page}&q={query}")))
  45.  
  46.   rows <- xml_find_all(doc, ".//div[contains(@class, 'list-col-inner')]")
  47.  
  48.   map(1:length(rows), ~ extract.row(rows[[.]])) %>% bind_rows()
  49. }
  50.  
  51. grab.data <- function(query) {
  52.   geekbench.url <- glue::glue("https://browser.geekbench.com/v5/cpu/search?&q={query}")
  53.  
  54.   # get number of pages
  55.   doc <- read_html(url(geekbench.url))
  56.   page.count <-
  57.     map_chr(xml_find_all(doc, ".//a[contains(@class, 'page-link')]"), xml_text) %>%
  58.     { suppressWarnings(as.numeric(.)) } %>%
  59.     na.omit() %>%
  60.     max(., 1)
  61.    
  62.   #page.count <- min(page.count, 2)
  63.   #
  64.   # print(page.count)
  65.    
  66.   #page.count <- min(page.count, 16)
  67.   # grab all pages
  68.   mclapply(1:page.count, function(page) {
  69.     cat(glue::glue("Getting page {page} out of {page.count}..."), "\n")
  70.     while(TRUE) {
  71.       result <- try(extract.page(page, query))
  72.       if(is.data.frame(result)) return(result)
  73.       cat(glue::glue("Retrying page {page} out of {page.count}..."), "\n")  
  74.     }
  75.    
  76.   }) %>%
  77.   bind_rows() %>%
  78.   as_tibble()
  79. }
  80.  
  81.  
  82. options(mc.cores = 5)
  83. data <- bind_rows(
  84.   grab.data("4800U") %>% mutate(CPU = "Ryzen 4800U"),
  85.   grab.data("MacBookAir10,1") %>% mutate(CPU = "Apple M1 (MacBook Air)"),
  86.   grab.data("1165G7") %>% mutate(CPU = "Intel i7-1165G7")
  87. ) %>%
  88. as_tibble %>%
  89. mutate(
  90.   `Single-Core Score` = as.integer(`Single-Core Score`),
  91.   `Multi-Core Score` = as.integer(`Multi-Core Score`)
  92. )
  93.  
  94.  
  95. saveRDS(data, "geekbench-data.rds")
  96. write_csv(data, "gb5-m1-4800U-1165G7.csv")
  97.  
  98.  
  99. p_single <- ggplot(data) + geom_density(aes(`Single-Core Score`, color = CPU))
  100. p_multi <- ggplot(data) + geom_density(aes(`Multi-Core Score`, color = CPU))
  101.  
  102. ggsave("gb5_m1_single.jpg", p_single)
  103. ggsave("gb5_m1_multi.jpg", p_multi)
Advertisement
Add Comment
Please, Sign In to add comment