Guest User

Untitled

a guest
Oct 19th, 2017
63
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.46 KB | None | 0 0
  1. #' Get tree nodes
  2. #'
  3. #' Get the nodes required for a clustering tree
  4. #'
  5. #' @param clusterings data.frame describing clusterings at different
  6. #' resolutions. Columns must be named "res.X" where X is numeric and indicates
  7. #' the clustering resolution.
  8. #' @param tSNE data.frame giving the t-SNE positions of each cell
  9. #'
  10. #' @return data.frame contining the nodes of a clustering tree
  11. getTreeNodes <- function(clusterings, tSNE = NULL) {
  12.  
  13. nodes <- lapply(colnames(clusterings), function(res) {
  14. clustering <- clusterings[, res]
  15. clusters <- sort(unique(clustering))
  16.  
  17. node <- sapply(clusters, function(cluster) {
  18. is.cluster <- clustering == cluster
  19. size <- sum(is.cluster)
  20. res.clean <- stringr::str_replace(res, "res.", "")
  21. node.name <- paste0("R", res.clean, "C", cluster)
  22.  
  23. node.data <- c(Node = node.name,
  24. Res = res.clean,
  25. Cluster = cluster,
  26. Size = size)
  27.  
  28. if (!is.null(tSNE)) {
  29. node.data["tSNE1"] <- mean(tSNE[is.cluster, 1])
  30. node.data["tSNE2"] <- mean(tSNE[is.cluster, 2])
  31. }
  32.  
  33. return(node.data)
  34. })
  35.  
  36. return(t(node))
  37. })
  38.  
  39. nodes <- do.call("rbind", nodes) %>%
  40. dplyr::as_tibble() %>%
  41. dplyr::mutate_at(c("Res", "Cluster", "Size"), as.numeric)
  42.  
  43. if (!is.null(tSNE)) {
  44. nodes <- nodes %>%
  45. dplyr::mutate_at(c("tSNE1", "tSNE2"), as.numeric)
  46. }
  47.  
  48. return(nodes)
  49. }
  50.  
  51. #' Get tree edges
  52. #'
  53. #' Get the edges required for a clustering tree
  54. #'
  55. #' @param clusterings data.frame describing clusterings at different
  56. #' resolutions. Columns must be named "res.X" where X is numeric and indicates
  57. #' the clustering resolution.
  58. #' @param nodes data.frame describing tree nodes
  59. #' @param tSNE logical. Whether to add tSNE coordinates.
  60. #'
  61. #' @return data.frame containing the edges of a clustering tree
  62. getTreeEdges <- function(clusterings, nodes, tSNE = FALSE) {
  63.  
  64. res.values <- colnames(clusterings)
  65. res.clean <- stringr::str_replace(res.values, "res.", "")
  66. res.values <- res.values[order(as.numeric(res.clean))]
  67.  
  68. edges <- lapply(1:(ncol(clusterings) - 1), function(i) {
  69.  
  70. from.res <- res.values[i]
  71. to.res <- res.values[i + 1]
  72.  
  73. from.clusters <- sort(unique(clusterings[, from.res]))
  74. to.clusters <- sort(unique(clusterings[, to.res]))
  75.  
  76. trans.df <- expand.grid(FromClust = from.clusters,
  77. ToClust = to.clusters, stringsAsFactors = FALSE)
  78.  
  79. trans <- apply(trans.df, 1, function(x) {
  80. from.clust <- x[1]
  81. to.clust <- x[2]
  82.  
  83. is.from <- clusterings[, from.res] == from.clust
  84. is.to <- clusterings[, to.res] == to.clust
  85.  
  86. trans.count <- sum(is.from & is.to)
  87.  
  88. from.size <- sum(is.from)
  89. to.size <- sum(is.to)
  90. trans.prop.from <- trans.count / from.size
  91. trans.prop.to <- trans.count / to.size
  92.  
  93. return(c(trans.count, trans.prop.from, trans.prop.to))
  94. })
  95.  
  96. trans.df$FromRes <- as.numeric(gsub("res.", "", from.res))
  97. trans.df$ToRes <- as.numeric(gsub("res.", "", to.res))
  98. trans.df$TransCount <- trans[1, ]
  99. trans.df$TransPropFrom <- trans[2, ]
  100. trans.df$TransPropTo <- trans[3, ]
  101.  
  102. return(trans.df)
  103. })
  104.  
  105. edges <- dplyr::bind_rows(edges)
  106.  
  107. levs <- sort(unique(edges$ToClust))
  108. edges <- edges %>%
  109. dplyr::mutate(FromClust = factor(FromClust, levels = levs)) %>%
  110. dplyr::mutate(ToClust = factor(ToClust, levels = levs)) %>%
  111. dplyr::mutate(FromNode = paste0("R", FromRes, "C", FromClust)) %>%
  112. dplyr::mutate(ToNode = paste0("R", ToRes, "C", ToClust)) %>%
  113. dplyr::left_join(nodes, by = c(FromNode = "Node"))
  114.  
  115. if (tSNE) {
  116. edges <- edges %>%
  117. dplyr::rename(tSNE1From = tSNE1, tSNE2From = tSNE2)
  118. }
  119.  
  120. edges <- edges %>%
  121. dplyr::left_join(nodes, by = c(ToNode = "Node"))
  122.  
  123. if (tSNE) {
  124. edges <- edges %>%
  125. dplyr::rename(tSNE1To = tSNE1, tSNE2To = tSNE2)
  126. }
  127.  
  128. edges <- edges %>%
  129. dplyr::select(matches("From"), matches("To"), TransCount)
  130.  
  131. return(edges)
  132. }
  133.  
  134. #' Plot clustering tree
  135. #'
  136. #' Plot a tree showing the relationshop between clusterings at different
  137. #' resolutions
  138. #'
  139. #' @param clusterings data.frame describing clusterings at different
  140. #' resolutions. Columns must be named "res.X" where X is numeric and indicates
  141. #' the clustering resolution.
  142. #' @param count.filter Threshold for the number of cells associated with an edge
  143. #' @param prop.filter Threhold for the to cluster proportion associated with an
  144. #' edge
  145. #'
  146. #' @return ggplot of the clustering tree
  147. plotClusteringTree <- function(clusterings, count.filter = 0,
  148. prop.filter = 0.1) {
  149.  
  150. library("ggraph")
  151.  
  152. nodes <- getTreeNodes(clusterings)
  153. edges <- getTreeEdges(clusterings, nodes)
  154.  
  155. edges %>%
  156. dplyr::filter(TransCount > count.filter) %>%
  157. dplyr::filter(TransPropTo > prop.filter) %>%
  158. dplyr::select(FromNode, ToNode, everything()) %>%
  159. igraph::graph_from_data_frame(vertices = nodes) %>%
  160. ggraph(layout = "tree") +
  161. geom_edge_link(arrow = arrow(length = unit(1, 'mm')),
  162. end_cap = circle(3.5, "mm"), edge_width = 1,
  163. aes(colour = log(TransCount), alpha = TransPropTo)) +
  164. geom_node_point(aes(colour = factor(Res),
  165. size = Size)) +
  166. geom_node_text(aes(label = Cluster), size = 3) +
  167. scale_size(range = c(4, 15)) +
  168. scale_edge_colour_gradientn(colours = viridis::viridis(100)) +
  169. guides(size = guide_legend(title = "Cluster Size",
  170. title.position = "top"),
  171. colour = guide_legend(title = "Clustering Resolution",
  172. title.position = "top"),
  173. edge_colour = guide_edge_colorbar(title = "Cell Count (log)",
  174. title.position = "top"),
  175. edge_alpha = guide_legend(title = "Cluster Prop",
  176. title.position = "top", nrow = 2)) +
  177. cowplot::theme_nothing() +
  178. theme(legend.position = "bottom")
  179. }
  180.  
  181. #' Plot clustering tree
  182. #'
  183. #' Plot a tree showing the relationshop between clusterings at different
  184. #' resolutions
  185. #'
  186. #' @param seurat Seurat object that has been clustered at different resolutions
  187. #' @param count.filter Threshold for the number of cells associated with an edge
  188. #' @param prop.filter Threhold for the to cluster proportion associated with an
  189. #' edge
  190. #' @param levels Resolutions to included. If NULL all are used.
  191. #'
  192. #' @return ggplot of the clustering tree
  193. plotClusteringTreeSeurat <- function(seurat, count.filter = 0,
  194. prop.filter = 0.1, levels = NULL) {
  195.  
  196. clusterings <- seurat@meta.data %>% dplyr::select(dplyr::contains("res."))
  197.  
  198. if (!is.null(levels)) {
  199. clusterings <- clusterings[, paste0("res.", levels)]
  200. }
  201.  
  202. plotClusteringTree(clusterings, count.filter = count.filter,
  203. prop.filter = prop.filter)
  204. }
  205.  
  206. #' Plot clustering tree-SNE
  207. #'
  208. #' Plot a clustering tree projected onto t-SNE coordinates
  209. #'
  210. #' @param clusterings data.frame describing clusterings at different
  211. #' resolutions. Columns must be named "res.X" where X is numeric and indicates
  212. #' the clustering resolution.
  213. #' @param tSNE data.frame giving the t-SNE positions of each cell
  214. #' @param count.filter Threshold for the number of cells associated with an edge
  215. #' @param prop.filter Threhold for the to cluster proportion associated with an
  216. #' edge
  217. #' @param plot.sides Whether to add plots from the point of view of the t-SNE
  218. #' dimensions
  219. #'
  220. #' @return ggplot of the clustering tree-SNE
  221. plotClusteringTreeSNE <- function(clusterings, tSNE, count.filter = 0,
  222. prop.filter = 0.1, plot.sides = FALSE) {
  223.  
  224. library("ggplot2")
  225.  
  226. nodes <- getTreeNodes(clusterings, tSNE)
  227. edges <- getTreeEdges(clusterings, nodes, tSNE = TRUE) %>%
  228. dplyr::filter(TransCount > count.filter) %>%
  229. dplyr::filter(TransPropTo > prop.filter)
  230.  
  231. top <- ggplot(as_tibble(tSNE), aes(x = tSNE_1, y = tSNE_2)) +
  232. geom_point(alpha = 0.1) +
  233. geom_point(data = nodes,aes(x = tSNE1, y = tSNE2,
  234. colour = factor(Res), size = Size)) +
  235. geom_segment(data = edges,
  236. aes(x = tSNE1From, y = tSNE2From,
  237. xend = tSNE1To, yend = tSNE2To,
  238. alpha = TransPropTo, colour = factor(FromRes)),
  239. arrow = arrow(length = unit(0.02, "npc")),
  240. size = 1) +
  241. scale_size(range = c(3, 15)) +
  242. viridis::scale_colour_viridis(discrete = TRUE) +
  243. guides(size = guide_legend(title = "Size",
  244. title.position = "top"),
  245. colour = guide_legend(title = "Resolution",
  246. title.position = "top"),
  247. alpha = guide_legend(title = "Proportion",
  248. title.position = "top", nrow = 2)) +
  249. cowplot::theme_cowplot()
  250.  
  251. if (plot.sides) {
  252. side1 <- plotTreeSNESide(tSNE, nodes, edges, side = 1)
  253. side2 <- plotTreeSNESide(tSNE, nodes, edges, side = 2)
  254. sides <- cowplot::plot_grid(side1, side2, ncol = 1)
  255. legend <- cowplot::get_legend(top + theme(legend.position = "bottom"))
  256. treeSNE <- cowplot::plot_grid(top + theme(legend.position = "none"),
  257. sides, ncol = 2, rel_widths = c(2, 1))
  258. treeSNE <- cowplot::plot_grid(treeSNE, legend, ncol = 1,
  259. rel_heights = c(4, 1))
  260. } else {
  261. treeSNE <- top
  262. }
  263.  
  264. return(treeSNE)
  265. }
  266.  
  267. #' Plot clustering tree-SNE
  268. #'
  269. #' Plot a clustering tree projected onto t-SNE coordinates
  270. #'
  271. #' @param seurat Seurat object that has been clustered at different resolutions
  272. #' @param count.filter Threshold for the number of cells associated with an edge
  273. #' @param prop.filter Threhold for the to cluster proportion associated with an
  274. #' edge
  275. #' @param plot.sides Whether to add plots from the point of view of the t-SNE
  276. #' dimensions
  277. #' @param levels Resolutions to included. If NULL all are used.
  278. #'
  279. #' @return ggplot of the clustering tree-SNE
  280. plotClusteringTreeSNESeurat <- function(seurat, count.filter = 0,
  281. prop.filter = 0.1, plot.sides = FALSE,
  282. levels = NULL) {
  283.  
  284. clusterings <- seurat@meta.data %>% dplyr::select(dplyr::contains("res."))
  285. tSNE <- seurat@dr$tsne@cell.embeddings
  286.  
  287. if (!is.null(levels)) {
  288. clusterings <- clusterings[, paste0("res.", levels)]
  289. }
  290.  
  291. plotClusteringTreeSNE(clusterings, tSNE, count.filter = count.filter,
  292. prop.filter = prop.filter, plot.sides = plot.sides)
  293. }
  294.  
  295. #' Plot clustering tree-SNE side
  296. #'
  297. #' Plot a clustering tree-SNE where the x-axis is a t-SNE dimension and the
  298. #' y-axis is clustering resolution
  299. #'
  300. #' @param tSNE data.frame giving the t-SNE coordinates of each
  301. #' @param nodes data.frame describing the nodes of the clustering tree
  302. #' @param edges data.frame describing the edges of the clustering tree
  303. #' @param side t-SNE dimension to use as the x-axis
  304. #'
  305. #' @return ggplot of the side view of a clustering tree-SNE
  306. plotTreeSNESide <- function(tSNE, nodes, edges, side = 1) {
  307.  
  308. nodes$ResFactor <- factor(nodes$Res)
  309. edges$FromResFactor <- factor(edges$FromRes)
  310.  
  311. ggplot(as_tibble(tSNE), aes_string(x = paste0("tSNE_", side), y = 1)) +
  312. geom_point(alpha = 0.1) +
  313. geom_point(data = nodes,
  314. aes_string(x = paste0("tSNE", side), y = "Res",
  315. colour = "ResFactor", size = "Size")) +
  316. geom_segment(data = edges,
  317. aes_string(x = paste0("tSNE", side, "From"),
  318. y = "FromRes",
  319. xend = paste0("tSNE", side, "To"),
  320. yend = "ToRes",
  321. alpha = "TransPropTo",
  322. colour = "FromResFactor"),
  323. arrow = arrow(length = unit(0.02, "npc")),
  324. size = 1) +
  325. scale_size(range = c(1, 8)) +
  326. viridis::scale_colour_viridis(discrete = TRUE) +
  327. scale_y_reverse() +
  328. ylab("Resolution") +
  329. cowplot::theme_cowplot() +
  330. theme(legend.position = "none")
  331. }
Add Comment
Please, Sign In to add comment