brianhaas19

Tate Art Museum (Tidy Tuesday 2021-01-12)

Jan 17th, 2021 (edited)
1,278
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. ###################################################################################################
  2. # Tate Art Museum - The aspect ratio of an art collection.
  3. # Created by: reddit.com/user/brianhaas19
  4.  
  5. # Link to data
  6. # https://github.com/rfordatascience/tidytuesday/tree/master/data/2021/2021-01-12
  7.  
  8. # Setup
  9. ### Libraries:
  10. library(tidyverse)
  11. library(ggExtra)
  12. theme_set(theme_bw())
  13.  
  14. ### Colors:
  15. blue <- "steelblue"
  16.  
  17. # Load data
  18. ### Raw data:
  19. artwork <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-01-12/artwork.csv')
  20.  
  21. # Clean data
  22. ### Mainly just removing variables with too much missing data:
  23.  
  24. artwork <- artwork %>%
  25.   select(-thumbnailCopyright, -depth, -inscription, -thumbnailUrl) # too much missing data
  26.  
  27. # Fix an incorrect height which causes an outlier when calculating aspect ratio:
  28. artwork[(artwork$id == 93796), "width"] <- 2400
  29. artwork[(artwork$id == 93796), "height"] <- 3600
  30.  
  31. # Visualize
  32. # Suggested parameters for R notebook: fig.height=9, fig.width=12
  33.  
  34. gr <- (1 + sqrt(5))/2 # Golden ratio
  35. p1 <- artwork %>%
  36.   filter(!is.na(width), !is.na(height), !is.na(year)) %>%
  37.   mutate(aspect_ratio = height/width) %>%
  38.   filter(year >= 1775, aspect_ratio < 2, aspect_ratio > 0.5) %>%
  39.   ggplot(aes(year, aspect_ratio)) +
  40.   geom_point(color = blue, size = 0.5, alpha = 0.2) +
  41.   scale_x_continuous(breaks = seq(1700, 2000, 50)) +
  42.   scale_y_continuous(breaks = round(c(1/gr, 2/3, 5/7, 3/4, 4/5, 5/6, 1, 6/5, 5/4, 4/3, 7/5, 3/2, gr), 2),
  43.                      labels = c(expression(1/phi), '2/3', '5/7', '3/4', '4/5', '5/6', 1, '6/5', '5/4', '4/3', '7/5', '3/2', expression("Golden ratio: "*phi)),
  44.                      expand = c(0, 0),
  45.                      sec.axis = sec_axis(trans = ~.*1, name = NULL,
  46.                                          breaks = round(c(1/gr, 2/3, 5/7, 3/4, 4/5, 5/6, 1, 6/5, 5/4, 4/3, 7/5, 3/2, gr), 2),
  47.                                          labels = c(expression(1/phi), '2/3', '5/7', '3/4', '4/5', '5/6', 1, '6/5', '5/4', '4/3', '7/5', '3/2', expression(phi)))) +
  48.   labs(x = "Year", y = "Aspect Ratio",
  49.        title = expression(paste(bold("Tate Art Museum"), " - The aspect ratio of an art collection.")),
  50.        subtitle = "The aspect ratio - calculated as height/width - of 58,992 works of art in the Tate Art Museum plotted over time.",
  51.        caption = expression(paste("Created by: ", italic("reddit.com/user/brianhaas19"), "\tData source: ", italic("https://github.com/rfordatascience/tidytuesday/tree/master/data/2021/2021-01-12")))) +
  52.   theme(plot.title = element_text(vjust = -1), # shift it down a bit
  53.         plot.subtitle = element_text(vjust = -1), # shift it down a bit
  54.         plot.caption = element_text(hjust = 0),
  55.         panel.grid.minor = element_blank())
  56.  
  57. P <- ggMarginal(p1, type = "histogram", size = 8,
  58.                 xparams = list(binwidth = 5), yparams = list(binwidth = 0.05),
  59.                 fill = blue, color = "white")
  60. P
  61.  
  62. ### Uncomment to save to disk:
  63. # ggsave(plot = P,
  64. #        filename = "tate_aspect_ratio.png",
  65. #        width = 12, height = 9)
  66. ###################################################################################################
RAW Paste Data