Advertisement
Guest User

Untitled

a guest
Jan 24th, 2017
93
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.77 KB | None | 0 0
  1. # Create a "balloon plot" as alternative to a heatmap with ggplot2
  2. #
  3. # January 2017
  4. # Author: Markus Konrad <markus.konrad@wzb.eu>, WZB Berlin Social Science Center
  5.  
  6. library(dplyr)
  7. library(tidyr)
  8. library(ggplot2)
  9.  
  10. # define the variables that will be displayed in the columns
  11. vars <- c('awake', 'sleep_total', 'sleep_rem')
  12.  
  13. # prepare the data: we use the "msleep" dataset which comes with ggplot2
  14. df <- msleep[!is.na(msleep$vore), c('name', 'vore', vars)] %>% # only select the columns we need from the msleep dataset
  15. group_by(vore) %>% sample_n(5) %>% ungroup() %>% # select 5 random rows from each "vore" group as subset
  16. gather(key = variable, value = value, -name, -vore) %>% # make a long table format: gather columns in rows
  17. filter(!is.na(value)) %>% # remove rows with NA-values -> those will be empty spots in the plot
  18. arrange(vore, name) # order by vore and name
  19.  
  20. # add a "row" column which will be the y position in the plot: group by vore and name, then set "row" as group index
  21. df <- df %>% mutate(row = group_indices_(df, .dots=c('vore', 'name')))
  22. # add a "col" column which will be the x position in the plot: group by variable, then set "col" as group index
  23. df <- df %>% mutate(col = group_indices_(df, .dots=c('variable')))
  24.  
  25. # get character vector of variable names for the x axis. the order is important, hence arrange(col)!
  26. vars_x_axis <- c(df %>% arrange(col) %>% select(variable) %>% distinct())$variable
  27. # get character vector of observation names for the y axis. again, the order is important but "df" is already ordered
  28. names_y_axis <- c(df %>% group_by(row) %>% distinct(name) %>% ungroup() %>% select(name))$name
  29.  
  30. # now plot
  31. # make color dependent on vore, size and alpha dependent on value
  32. # x and y must be set as factor() otherwise scale_x/y_discrete() won't work
  33. ggplot(df, aes(x=factor(col), y=factor(row), color=vore, size=value, alpha=value)) +
  34. geom_point() + # plot as points
  35. geom_text(aes(label=value, x=col + 0.25), alpha=1.0, size=3) + # display the value next to the "balloons"
  36. scale_alpha_continuous(range=c(0.3, 0.7)) +
  37. scale_size_area(max_size = 5) +
  38. scale_x_discrete(breaks=1:length(vars_x_axis), labels=vars_x_axis, position='top') + # set the labels on the X axis
  39. scale_y_discrete(breaks=1:length(names_y_axis), labels=names_y_axis) + # set the labels on the Y axis
  40. theme_bw() +
  41. theme(axis.line = element_blank(), # disable axis lines
  42. axis.title = element_blank(), # disable axis titles
  43. panel.border = element_blank(), # disable panel border
  44. panel.grid.major.x = element_blank(), # disable lines in grid on X-axis
  45. panel.grid.minor.x = element_blank()) # disable lines in grid on X-axis
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement