Advertisement
Guest User

Untitled

a guest
Feb 23rd, 2017
66
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.60 KB | None | 0 0
  1. library(fulltext)
  2.  
  3. # Search
  4. ## Seaerch for arXiv papers
  5. res <- ft_search(query='ecology', from='arxiv')
  6.  
  7. ## Get arXiv ids
  8. ids <- res$arxiv$data$id
  9.  
  10. ## Fetch PDFs
  11. out <- ft_get(ids, from = "arxiv", cache=TRUE, backend="rds", path = "mypdfs")
  12.  
  13. ## see your folder mypdfs (should be pdfs in there)
  14.  
  15. ## paths to pdf files
  16. out$arxiv$data$path
  17.  
  18. ## path to a single file
  19. out$arxiv$data$path$`hep-ph/9303206v1`
  20.  
  21. # Extract text
  22. ## extract all text
  23. texts <- ft_extract(out)
  24.  
  25. ## text from a single article
  26. texts$arxiv$data$data[[1]]
  27.  
  28. ## Or, you can do it manually
  29. pdftools::pdf_text(out$arxiv$data$path$`hep-ph/9303206v1`)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement