Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(fulltext)
- # Search
- ## Seaerch for arXiv papers
- res <- ft_search(query='ecology', from='arxiv')
- ## Get arXiv ids
- ids <- res$arxiv$data$id
- ## Fetch PDFs
- out <- ft_get(ids, from = "arxiv", cache=TRUE, backend="rds", path = "mypdfs")
- ## see your folder mypdfs (should be pdfs in there)
- ## paths to pdf files
- out$arxiv$data$path
- ## path to a single file
- out$arxiv$data$path$`hep-ph/9303206v1`
- # Extract text
- ## extract all text
- texts <- ft_extract(out)
- ## text from a single article
- texts$arxiv$data$data[[1]]
- ## Or, you can do it manually
- pdftools::pdf_text(out$arxiv$data$path$`hep-ph/9303206v1`)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement