brianhaas19

Movie Jump Scares From CSV

Oct 31st, 2020 (edited)
278
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 5.18 KB | None | 0 0
  1. ###################################################################################################
  2. # Visualizing jump scares in 666 scary movies rated by wheresthejump.com
  3. # The average 'jumpiness' of scary movies increases as the movie goes on
  4. # before dropping off steeply at the end.
  5. # Created by: reddit.com/user/brianhaas19
  6.  
  7. # Link to orignal data:
  8. # https://wheresthejump.com/full-movie-list/
  9.  
  10. ### Setup
  11. library(tidyverse)
  12. library(rvest)
  13. library(lubridate)
  14.  
  15. ### Read data
  16. jumps <- read_csv("https://pastebin.com/raw/k01iufdS") # cleaned data saved as a CSV here
  17. num_movies <- length(unique(jumps$id))
  18.  
  19. ### Visualise the data
  20.  
  21. # First, construct a `timeline` data frame which consists of 100 time `segment`s for each movie.
  22. # Plot each movie as a straight line, with the movies ordered by "scariness" on the y-axis (scariest at the top).
  23. # Then, whenever a jump occurs in the movie, add a jump to the line for that movie. If it's a major jump
  24. # add a bigger jump to the line:
  25.  
  26. # Order the movies by scariness and save the order of the `id`s:
  27. ids <- jumps %>%
  28.   mutate(jump_scare_rating_adj = jump_scare_rating * jump_count) %>%
  29.   arrange(jump_scare_rating_adj, jump_scare_rating, jump_count) %>%
  30.   select(id) %>%
  31.   unlist() %>%
  32.   unique()
  33.  
  34. # Add a trend line for the average amount of jump scares over time:
  35. n <- 8 # number of cuts
  36. a = 100/n # parameter for getting sequence of midpoints
  37. jumps_cut <- jumps %>% # table of average amount of jump scares at each point
  38.   mutate(jump_location_cut = cut_interval(jump_location, n)) %>%
  39.   group_by(jump_location_cut) %>%
  40.   summarise(average_jump_count = sum(jump_count)/num_movies) %>%
  41.   mutate(x = seq(a/2, 100 - a/2, by = 100/n),
  42.          y = scale(average_jump_count, center = TRUE)*100 + length(unique(jumps$id))/2)
  43.  
  44. # Build the timeline for each movie and add the jump scares:
  45. timeline <- tibble(
  46.   id = ids
  47. ) %>%
  48.   mutate(count = 101, # 100 + 1, as we start at zero
  49.          scare_rank = row_number()) %>%
  50.   uncount(count) %>%  # Ref: https://stackoverflow.com/a/55492365
  51.   mutate(segment = rep(0:100, length(unique(jumps$id)))) %>%
  52.   left_join(jumps, by = c("id", "segment" = "jump_location")) %>%
  53.   select(id, scare_rank, segment, jump_scare_rating, jump_number, major_jump) %>%
  54.   mutate(jump_intensity = ifelse(is.na(major_jump), scare_rank, ifelse(major_jump, scare_rank + 20, scare_rank + 10))) # +20 for major_jump, +10 for !major_jump, 0 for NA
  55.  
  56. # Plot:
  57. # Recommended figure size if using R Notebook: fig.width=12, fig.height = 8
  58. g <- ggplot(timeline) +
  59.   geom_line(aes(segment, jump_intensity, group = scare_rank), # movie timelines with jumps where appropriate
  60.             size = 0.5, alpha = 0.2, color = "white",
  61.             show.legend = FALSE) +
  62.   geom_point(data = jumps_cut, # points for the trend line
  63.              aes(x, y, color = "Average 'jumpiness' over time")) +
  64.   geom_line(data = jumps_cut, # trend line for average number of jump scares over time
  65.             aes(x, y, color = "Average 'jumpiness' over time"),
  66.             size = 2,
  67.             alpha = 0.5) +
  68.   labs(title = expression(paste("Visualizing jump scares in 666 scary movies rated by ",
  69.                                 italic("wheresthejump.com"))),
  70.        subtitle = "The average 'jumpiness' of scary movies increases as the movie goes on before dropping off steeply at the end.",
  71.        caption = expression(paste("Created by: ",
  72.                                   italic("reddit.com/user/brianhaas19"), "\tData source: ",
  73.                                   italic("https://wheresthejump.com/full-movie-list/")))) +
  74.   scale_x_continuous(name = NULL,
  75.                      breaks = c(2, 50, 98),
  76.                      labels = c("Start", "Middle of Movie", "End"),
  77.                      expand = c(0, 0)) +
  78.   scale_y_continuous(name = "Movie Scariness",
  79.                      breaks = c(10, 325, 640), # positioning of the labels
  80.                      labels = c("Not Scary", "Scary", "Very Scary!"),
  81.                      expand = c(0, 0)) +
  82.   scale_color_manual(name = NULL, values = c("Average 'jumpiness' over time" = "white")) +
  83.   theme_bw() +
  84.   theme(plot.background = element_rect(fill = "#222222"), # grey taken from wheresthejump.com
  85.         panel.background = element_rect(fill = "#410000"), # red taken from wheresthejump.com
  86.         panel.grid = element_blank(),
  87.         plot.title = element_text(color = "white", size = 14),
  88.         plot.subtitle = element_text(color = "white", size = 12),
  89.         plot.caption = element_text(color = "white", size = 10, hjust = 0),
  90.         axis.title = element_text(color = "white", size = 12),
  91.         axis.text = element_text(color = "white", size = 10),
  92.         axis.line = element_line(color = "white"),
  93.         legend.position = "bottom",
  94.         legend.background = element_rect(fill = "#410000"),
  95.         legend.key = element_rect(fill = "#410000"),
  96.         legend.text = element_text(color = "white", size = 12),
  97.         legend.key.width = unit(2, "cm"))
  98. g
  99. ggsave(file = str_c(getwd(), "/movie_jump_scares2.png"), plot = g, width = 12, height = 8) # uncomment to save to disk in current working directory
  100. ###################################################################################################
Add Comment
Please, Sign In to add comment