Advertisement
Guest User

Untitled

a guest
May 24th, 2017
56
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.76 KB | None | 0 0
  1. library(taxize)
  2. library(reshape2)
  3.  
  4. ### stuff starts here
  5.  
  6. #### remove the rank filter from the clustering method
  7.  
  8. class2tree_helper_no_filter <- function(x){
  9. #x <- x[!x$rank == "no rank", ]
  10. df <- x[-nrow(x), 'id']
  11. names(df) <- x[-nrow(x), 'rank']
  12. df <- data.frame(t(data.frame(df)), stringsAsFactors = FALSE)
  13. data.frame(tip = x[nrow(x), "name"], df, stringsAsFactors = FALSE)
  14. }
  15.  
  16.  
  17. #### get classification & convert into dataframe
  18.  
  19. spnames <- c('Klattia flava', 'Trollius sibiricus', 'Arachis paraguariensis',
  20. 'Tanacetum boreale', 'Gentiana yakushimensis','Sesamum schinzianum',
  21. 'Pilea verrucosa','Tibouchina striphnocalyx','Lycium dasystemum',
  22. 'Berkheya echinacea','Androcymbium villosum',
  23. 'Helianthus annuus','Madia elegans','Lupinus albicaulis',
  24. 'Pinus lambertiana')
  25. out <- classification(spnames, db='ncbi')
  26. df <- rbind.fill(lapply(out, class2tree_helper_no_filter))
  27.  
  28. # initialize empty df for calculated distances
  29.  
  30. df_dist <- data.frame(x = character(nrow(df)*nrow(df)), y = character(nrow(df)*nrow(df)), dist = numeric(nrow(df)*nrow(df)), stringsAsFactors = FALSE)
  31.  
  32. # iterate over each row of classification DF & fill distance
  33. # this is probably the most un-R thing ever but my tired brain didn't find any better solution
  34. c = 1
  35.  
  36. for (r1 in 1:nrow(df)) {
  37. n1 = df[r1,1]
  38. x = as.vector(t(df[r1,-1]))
  39. for (r2 in 1:nrow(df)) {
  40. n2 = df[r2,1]
  41. y = as.vector(t(df[r2,-1]))
  42. distance = length(union(x,y))-length(intersect(x,y))
  43. df_dist$x[c] = n1
  44. df_dist$y[c] = n2
  45. df_dist$dist[c] = distance
  46. c = c +1
  47. }
  48. }
  49.  
  50. # convert long dataframe to distance object & plot hclust result
  51. dist_matrix <- as.dist(acast(df_dist, x ~ y, value.var='dist', fun.aggregate = sum, margins=FALSE))
  52. plot(hclust(dist_matrix))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement