celestialgod

get all imported R packages

Mar 23rd, 2018
151
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 1.65 KB | None | 0 0
  1. #---------------------------
  2. # remove all existent objects
  3. #---------------------------
  4. rm(list = ls())
  5. gc()
  6.  
  7. #---------------------------
  8. # installation of all required packages
  9. #---------------------------
  10. required_pkgs <- c("httr", "xml2", "pipeR", "stringr")
  11. if (any(!required_pkgs %in% rownames(installed.packages())))
  12.   install.packages(required_pkgs[!required_pkgs %in% rownames(installed.packages())])
  13.  
  14. #---------------------------
  15. # library all required packages
  16. #---------------------------
  17. invisible(sapply(required_pkgs, library, character.only = TRUE))
  18.  
  19. #---------------------------
  20. # capture names of all packages
  21. #---------------------------
  22. cran <- "https://cran.r-project.org/web/packages/available_packages_by_date.html"
  23. all_pkg_a <- GET(cran) %>>% content %>>% xml_find_all("//td/a")
  24. pkg_names <- xml_text(all_pkg_a)
  25.  
  26. #---------------------------
  27. # create urls for searching each package
  28. #---------------------------
  29. urls <- paste(dirname(cran), xml_attr(all_pkg_a, "href"), sep = "/")
  30.  
  31. #---------------------------
  32. # get all imports info and spilt the package name of imported packages
  33. #---------------------------
  34. library(parallel)
  35. cl <- makeCluster(4L)
  36. clusterExport(cl, "required_pkgs")
  37. clusterEvalQ(cl, sapply(required_pkgs, library, character.only = TRUE)) %>>% invisible
  38. t1 <- proc.time()
  39. package_crawler <- clusterApplyLB(cl, urls, function(url){
  40.   pkg_imports <- GET(url) %>>% content %>>% xml_find_all("//td[contains(., 'Imports')]") %>>%
  41.     xml_parent %>>% xml_find_all("td") %>>% `[[`(2) %>>% xml_text("text")
  42.   str_match_all(pkg_imports, "([a-zA-Z0-9]+)(\\s+\\([^,]+\\))?")[[1]]
  43. })
  44. proc.time() - t1
  45. stopCluster(cl)
Advertisement
Add Comment
Please, Sign In to add comment