Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #---------------------------
- # remove all existent objects
- #---------------------------
- rm(list = ls())
- gc()
- #---------------------------
- # installation of all required packages
- #---------------------------
- required_pkgs <- c("httr", "xml2", "pipeR", "stringr")
- if (any(!required_pkgs %in% rownames(installed.packages())))
- install.packages(required_pkgs[!required_pkgs %in% rownames(installed.packages())])
- #---------------------------
- # library all required packages
- #---------------------------
- invisible(sapply(required_pkgs, library, character.only = TRUE))
- #---------------------------
- # capture names of all packages
- #---------------------------
- cran <- "https://cran.r-project.org/web/packages/available_packages_by_date.html"
- all_pkg_a <- GET(cran) %>>% content %>>% xml_find_all("//td/a")
- pkg_names <- xml_text(all_pkg_a)
- #---------------------------
- # create urls for searching each package
- #---------------------------
- urls <- paste(dirname(cran), xml_attr(all_pkg_a, "href"), sep = "/")
- #---------------------------
- # get all imports info and spilt the package name of imported packages
- #---------------------------
- library(parallel)
- cl <- makeCluster(4L)
- clusterExport(cl, "required_pkgs")
- clusterEvalQ(cl, sapply(required_pkgs, library, character.only = TRUE)) %>>% invisible
- t1 <- proc.time()
- package_crawler <- clusterApplyLB(cl, urls, function(url){
- pkg_imports <- GET(url) %>>% content %>>% xml_find_all("//td[contains(., 'Imports')]") %>>%
- xml_parent %>>% xml_find_all("td") %>>% `[[`(2) %>>% xml_text("text")
- str_match_all(pkg_imports, "([a-zA-Z0-9]+)(\\s+\\([^,]+\\))?")[[1]]
- })
- proc.time() - t1
- stopCluster(cl)
Advertisement
Add Comment
Please, Sign In to add comment