Advertisement
Guest User

Untitled

a guest
Apr 29th, 2016
49
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.36 KB | None | 0 0
  1. library("reutils")
  2.  
  3. # NB: to grab FASTA of accession number x, do this:
  4. #
  5. # f <- content(efetch(x, db="nuccore", rettype="fasta", retmode="text"))
  6. #
  7. # Result is a string, with \n separators.
  8.  
  9. grab.results <- function (term) {
  10. # Search for the given term on nuccore. This gives us a list of
  11. # record IDs.
  12. ids <- esearch(term, db="nuccore")
  13.  
  14. # Grab summaries for the given record IDs, as a sort-of data frame.
  15. sum <- esummary(ids, db="nuccore")
  16. data <- content(sum, as="parsed")
  17.  
  18. # For some reason, this parser gives us lists of lists instead of a
  19. # proper data frame (which should be lists of vectors). Return a
  20. # fixed-up version. Also turn Slen into an integer column.
  21. f <- data.frame(lapply(data, as.character), stringsAsFactors=FALSE)
  22. f$Slen <- as.integer(f$Slen)
  23. f
  24. }
  25.  
  26. first.of.type <- function (results, type) {
  27.  
  28. filtered <- results
  29.  
  30. # Sort by Slen (biggest first)
  31. filtered <- filtered[order(filtered$Slen, decreasing=TRUE),]
  32.  
  33. # Return the first accession number
  34. filtered$OSLT[1]
  35. }
  36.  
  37. scrape.genbank <- function (species, genes) {
  38. # Create dataset skeleton
  39. data <- data.frame(Species=species)
  40. for (i in genes) {
  41. data[,i] <- rep(NA, length(species))
  42. }
  43.  
  44. # Look up data for each species
  45. for (i in 1:length(species)) {
  46. for (g in genes) {
  47. n <- species[i]
  48. print(sprintf("Looking up %s (%s) (%d/%d)...",
  49. n, g, i, length(species)))
  50.  
  51.  
  52. query <- sprintf("%s AND %s", n, g)
  53.  
  54. tryCatch({
  55. r <- grab.results(query)
  56. data[i,g] <- first.of.type(r, g)
  57. }, error = function(e) {
  58.  
  59. })
  60. }
  61. }
  62.  
  63. data
  64. }
  65.  
  66.  
  67. rebuild <- function () {
  68. data <- read.csv("highlightedSkates.csv", stringsAsFactors=FALSE)
  69. data <- scrape.genbank(unique(data$Host), c("NADH", "COI", "CO1", "RAG1"))
  70. write.csv(data, "Skate_allmarkers_accessions.csv")
  71.  
  72.  
  73. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement