Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(taxize)
- library(reshape2)
- ### stuff starts here
- #### remove the rank filter from the clustering method
- class2tree_helper_no_filter <- function(x){
- #x <- x[!x$rank == "no rank", ]
- df <- x[-nrow(x), 'id']
- names(df) <- x[-nrow(x), 'rank']
- df <- data.frame(t(data.frame(df)), stringsAsFactors = FALSE)
- data.frame(tip = x[nrow(x), "name"], df, stringsAsFactors = FALSE)
- }
- #### get classification & convert into dataframe
- spnames <- c('Klattia flava', 'Trollius sibiricus', 'Arachis paraguariensis',
- 'Tanacetum boreale', 'Gentiana yakushimensis','Sesamum schinzianum',
- 'Pilea verrucosa','Tibouchina striphnocalyx','Lycium dasystemum',
- 'Berkheya echinacea','Androcymbium villosum',
- 'Helianthus annuus','Madia elegans','Lupinus albicaulis',
- 'Pinus lambertiana')
- out <- classification(spnames, db='ncbi')
- df <- rbind.fill(lapply(out, class2tree_helper_no_filter))
- # initialize empty df for calculated distances
- df_dist <- data.frame(x = character(nrow(df)*nrow(df)), y = character(nrow(df)*nrow(df)), dist = numeric(nrow(df)*nrow(df)), stringsAsFactors = FALSE)
- # iterate over each row of classification DF & fill distance
- # this is probably the most un-R thing ever but my tired brain didn't find any better solution
- c = 1
- for (r1 in 1:nrow(df)) {
- n1 = df[r1,1]
- x = as.vector(t(df[r1,-1]))
- for (r2 in 1:nrow(df)) {
- n2 = df[r2,1]
- y = as.vector(t(df[r2,-1]))
- distance = length(union(x,y))-length(intersect(x,y))
- df_dist$x[c] = n1
- df_dist$y[c] = n2
- df_dist$dist[c] = distance
- c = c +1
- }
- }
- # convert long dataframe to distance object & plot hclust result
- dist_matrix <- as.dist(acast(df_dist, x ~ y, value.var='dist', fun.aggregate = sum, margins=FALSE))
- plot(hclust(dist_matrix))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement