Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ###################################################################################################
- # Visualizing jump scares in 666 scary movies rated by wheresthejump.com
- # The average 'jumpiness' of scary movies increases as the movie goes on
- # before dropping off steeply at the end.
- # Created by: reddit.com/user/brianhaas19
- # Link to orignal data:
- # https://wheresthejump.com/full-movie-list/
- ### Setup
- library(tidyverse)
- library(rvest)
- library(lubridate)
- ### Read data
- jumps <- read_csv("https://pastebin.com/raw/k01iufdS") # cleaned data saved as a CSV here
- num_movies <- length(unique(jumps$id))
- ### Visualise the data
- # First, construct a `timeline` data frame which consists of 100 time `segment`s for each movie.
- # Plot each movie as a straight line, with the movies ordered by "scariness" on the y-axis (scariest at the top).
- # Then, whenever a jump occurs in the movie, add a jump to the line for that movie. If it's a major jump
- # add a bigger jump to the line:
- # Order the movies by scariness and save the order of the `id`s:
- ids <- jumps %>%
- mutate(jump_scare_rating_adj = jump_scare_rating * jump_count) %>%
- arrange(jump_scare_rating_adj, jump_scare_rating, jump_count) %>%
- select(id) %>%
- unlist() %>%
- unique()
- # Add a trend line for the average amount of jump scares over time:
- n <- 8 # number of cuts
- a = 100/n # parameter for getting sequence of midpoints
- jumps_cut <- jumps %>% # table of average amount of jump scares at each point
- mutate(jump_location_cut = cut_interval(jump_location, n)) %>%
- group_by(jump_location_cut) %>%
- summarise(average_jump_count = sum(jump_count)/num_movies) %>%
- mutate(x = seq(a/2, 100 - a/2, by = 100/n),
- y = scale(average_jump_count, center = TRUE)*100 + length(unique(jumps$id))/2)
- # Build the timeline for each movie and add the jump scares:
- timeline <- tibble(
- id = ids
- ) %>%
- mutate(count = 101, # 100 + 1, as we start at zero
- scare_rank = row_number()) %>%
- uncount(count) %>% # Ref: https://stackoverflow.com/a/55492365
- mutate(segment = rep(0:100, length(unique(jumps$id)))) %>%
- left_join(jumps, by = c("id", "segment" = "jump_location")) %>%
- select(id, scare_rank, segment, jump_scare_rating, jump_number, major_jump) %>%
- mutate(jump_intensity = ifelse(is.na(major_jump), scare_rank, ifelse(major_jump, scare_rank + 20, scare_rank + 10))) # +20 for major_jump, +10 for !major_jump, 0 for NA
- # Plot:
- # Recommended figure size if using R Notebook: fig.width=12, fig.height = 8
- g <- ggplot(timeline) +
- geom_line(aes(segment, jump_intensity, group = scare_rank), # movie timelines with jumps where appropriate
- size = 0.5, alpha = 0.2, color = "white",
- show.legend = FALSE) +
- geom_point(data = jumps_cut, # points for the trend line
- aes(x, y, color = "Average 'jumpiness' over time")) +
- geom_line(data = jumps_cut, # trend line for average number of jump scares over time
- aes(x, y, color = "Average 'jumpiness' over time"),
- size = 2,
- alpha = 0.5) +
- labs(title = expression(paste("Visualizing jump scares in 666 scary movies rated by ",
- italic("wheresthejump.com"))),
- subtitle = "The average 'jumpiness' of scary movies increases as the movie goes on before dropping off steeply at the end.",
- caption = expression(paste("Created by: ",
- italic("reddit.com/user/brianhaas19"), "\tData source: ",
- italic("https://wheresthejump.com/full-movie-list/")))) +
- scale_x_continuous(name = NULL,
- breaks = c(2, 50, 98),
- labels = c("Start", "Middle of Movie", "End"),
- expand = c(0, 0)) +
- scale_y_continuous(name = "Movie Scariness",
- breaks = c(10, 325, 640), # positioning of the labels
- labels = c("Not Scary", "Scary", "Very Scary!"),
- expand = c(0, 0)) +
- scale_color_manual(name = NULL, values = c("Average 'jumpiness' over time" = "white")) +
- theme_bw() +
- theme(plot.background = element_rect(fill = "#222222"), # grey taken from wheresthejump.com
- panel.background = element_rect(fill = "#410000"), # red taken from wheresthejump.com
- panel.grid = element_blank(),
- plot.title = element_text(color = "white", size = 14),
- plot.subtitle = element_text(color = "white", size = 12),
- plot.caption = element_text(color = "white", size = 10, hjust = 0),
- axis.title = element_text(color = "white", size = 12),
- axis.text = element_text(color = "white", size = 10),
- axis.line = element_line(color = "white"),
- legend.position = "bottom",
- legend.background = element_rect(fill = "#410000"),
- legend.key = element_rect(fill = "#410000"),
- legend.text = element_text(color = "white", size = 12),
- legend.key.width = unit(2, "cm"))
- g
- ggsave(file = str_c(getwd(), "/movie_jump_scares2.png"), plot = g, width = 12, height = 8) # uncomment to save to disk in current working directory
- ###################################################################################################
Add Comment
Please, Sign In to add comment