Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #### Load data ####
- playlist_summary_external <- read.delim2("~/Desktop/playlist_summary_external.txt", stringsAsFactors = FALSE)
- #### Calculate success metrics ####
- spotify_playlists <- playlist_summary_external %>%
- filter(owner == 'spotify', monthly_stream30s >= 5000) %>%
- mutate(
- pct_skippers = skippers/users,
- dau_mau_ratio = dau/mau,
- wau_mau_ratio = wau/mau,
- streams30s_per_mau = monthly_stream30s/mau,
- streams30s_per_dau = stream30s/dau,
- skips = streams - stream30s,
- skip_rate = ifelse(dau < 50, NA, skips/streams),
- mau_growth_mom = mau/mau_previous_month - 1,
- mom_retention = ifelse(mau_previous_month > 50, mau_both_months/mau_previous_month, NA)
- )
- #### How does artist diversity correlate with success metrics? ####
- artist_diversity <- spotify_playlists %>%
- filter(n_artists > 2) %>%
- mutate(
- tracks_per_artist = n_tracks/n_artists,
- artist_diversity = 1-((n_tracks/n_artists)/(sqrt(n_tracks)/2))
- ) %>%
- mutate(
- `MoM Retention` = mom_retention,
- `N 30s Streams Per MAU` = streams30s_per_mau,
- `Skip Rate` = skip_rate
- )
- artist_diversity %>%
- select(artist_diversity, `Skip Rate`, `MoM Retention`, `N 30s Streams Per MAU`) %>%
- gather(key, value, -artist_diversity) %>%
- ggplot(aes(x = artist_diversity, y = value)) +
- facet_wrap(~key, scales = 'free_y') +
- geom_point(alpha = 0.6) +
- geom_smooth(span = 0.999) +
- scale_x_continuous(breaks = seq(0.2, 0.8, 0.2), limits = c(0.1, 0.9)) +
- labs(x = 'Artist Diversity', y = '',
- title = 'Playlist artist diversity vs. Success metrics',
- subtitle = 'Spotify-owned playlists, > 5,000 30s streams') +
- theme_classic() + theme(strip.text.x = element_text(size = 10.5))
- #### Does the number of tracks in a playlist influence success metrics? ####
- spotify_playlists %>%
- filter(n_tracks <= 1000) %>%
- mutate(
- `MoM Retention` = mom_retention,
- `N 30s Streams Per MAU` = streams30s_per_mau,
- `Skip Rate` = skip_rate
- ) %>%
- select(n_tracks, `Skip Rate`, `MoM Retention`, `N 30s Streams Per MAU`) %>%
- gather(key, value, -n_tracks) %>%
- ggplot(aes(x = n_tracks, y = value)) +
- geom_point() +
- geom_smooth(span = 0.999) +
- facet_wrap(~key, scales = 'free_y') +
- scale_x_log10() +
- theme_classic() +
- labs(x = 'Number of tracks on playlist', y = '',
- title = 'Playlist length vs. Success metrics', subtitle = 'Spotify-owned playlists, > 5,000 30s streams')
- #### Do particular moods correlate with success metrics? ####
- spotify_playlists_mood <- spotify_playlists %>%
- select(mood_1, mood_2, mood_3,
- streams, stream30s, dau, mau,
- dau_mau_ratio, streams30s_per_mau, skip_rate, mom_retention) %>%
- gather(key = col, value = mood, -streams, -stream30s, -dau, -mau, -dau_mau_ratio, -streams30s_per_mau, -skip_rate, -mom_retention) %>%
- select(-col) %>%
- filter(!mood %in% c('-', 'Other')) %>%
- mutate(mood = reorder(mood, -streams30s_per_mau, mean, na.rm = TRUE))
- spotify_playlists_mood_avg <- spotify_playlists_mood %>%
- group_by(mood) %>%
- summarise(
- avg = mean(streams30s_per_mau, na.rm = TRUE),
- median = median(streams30s_per_mau, na.rm = TRUE)
- ) %>%
- ungroup()
- ggplot(spotify_playlists_mood, aes(x = streams30s_per_mau, fill = mood)) +
- geom_density() +
- facet_wrap(~mood) +
- geom_vline(xintercept = mean(spotify_playlists$streams30s_per_mau, na.rm = TRUE)) +
- scale_x_continuous(labels = scales::comma) +
- theme_minimal() + theme(strip.text.x = element_text(size = 10.5)) +
- geom_text(data = spotify_playlists_mood_avg, size = 3.25, fontface = 'bold', aes(x = 5, y = .135, label = scales::comma(round(avg, 1)))) +
- labs(x = '30s streams per MAU', y = '', title = 'Distribution of 30s streams per MAU, by mood')
- ###
- spotify_playlists_mood %>%
- group_by(mood) %>%
- summarise(
- avg_retention = mean(mom_retention, na.rm = TRUE),
- avg_skip_rate = mean(skip_rate, na.rm = TRUE),
- avg_streams30s_per_mau = mean(streams30s_per_mau, na.rm = TRUE)
- ) %>%
- ungroup() %>%
- mutate(
- retention_rk = min_rank(-avg_retention),
- skip_rk = min_rank(avg_skip_rate),
- streams30s_per_mau_rk = min_rank(-avg_streams30s_per_mau),
- avg_rk = (retention_rk + skip_rk + streams30s_per_mau_rk)/3
- ) %>%
- arrange(-avg_rk)
- View(spotify_playlists %>%
- filter(
- mood_1 == 'Sentimental' | mood_2 == 'Sentimental' | mood_3 == 'Sentimental',
- mom_retention > 0.16, streams30s_per_dau > 10, skip_rate < 0.16
- )
- )
- View(spotify_playlists %>%
- filter(
- mood_1 == 'Urgent' | mood_2 == 'Urgent' | mood_3 == 'Urgent',
- mom_retention < 0.14, streams30s_per_dau < 9, skip_rate > 0.3
- )
- )
- mood_for_lm <- spotify_playlists_mood %>%
- select(mood, skip_rate) %>%
- mutate(val = 1, id = seq(1, nrow(spotify_playlists_mood), 1)) %>%
- spread(key = mood, value = val, fill = 0) %>%
- select(-id)
- summary(lm(skip_rate ~ ., data = mood_for_lm))
- #### Do particular genres correlate with success metrics? ####
- spotify_playlists_genre <- spotify_playlists %>%
- top_n(50, mau) %>%
- select(genre_1, genre_2, genre_3,
- streams, stream30s, dau, mau,
- dau_mau_ratio, streams30s_per_dau, skip_rate, mom_retention) %>%
- gather(key = col, value = genre, -streams, -stream30s, -dau, -mau, -dau_mau_ratio, -streams30s_per_dau, -skip_rate, -mom_retention) %>%
- select(-col) %>%
- filter(!genre %in% c('-', 'Other')) %>%
- mutate(genre = reorder(genre, -mau, mean, na.rm = TRUE))
- spotify_playlists_genre %>%
- group_by(genre) %>%
- summarise(
- n = n(),
- `Average MAU Per Playlist (000s)` = mean(mau, na.rm = TRUE)/1000
- ) %>%
- ungroup() %>%
- ggplot(aes(x = reorder(genre, n), y = n, fill = `Average MAU Per Playlist (000s)`)) +
- geom_bar(stat = 'identity') +
- coord_flip() +
- scale_fill_gradient2(low = '#CD5C5C', mid = '#F0E68C', high = '#32CD32', midpoint = mean(spotify_playlists_genre$mau)/1000) +
- theme_minimal() + theme(legend.position = 'bottom') +
- labs(x = '', y = 'Number of playlists', title = 'Top 50 Playlists by MAU: Most Popular Genres',
- subtitle = 'Spotify-owned playlists')
- View(spotify_playlists %>%
- top_n(50, mau) %>%
- filter(
- genre_1 == 'Rap' | genre_2 == 'Rap' | genre_3 == 'Rap'
- )
- )
- genre_for_lm <- spotify_playlists %>%
- select(genre_1, genre_2, genre_3,
- streams, stream30s, dau, mau,
- dau_mau_ratio, streams30s_per_dau, skip_rate, mom_retention) %>%
- gather(key = col, value = genre, -streams, -stream30s, -dau, -mau, -dau_mau_ratio, -streams30s_per_dau, -skip_rate, -mom_retention) %>%
- select(-col) %>%
- filter(!genre %in% c('-', 'Other')) %>%
- mutate(genre = reorder(genre, -mau, mean, na.rm = TRUE)) %>%
- select(genre, mau) %>%
- mutate(val = 1, id = seq(1, nrow(.), 1)) %>%
- spread(key = genre, value = val, fill = 0) %>%
- select(-id)
- summary(lm(mau ~ ., data = genre_for_lm))
- ####
- spotify_playlists_genre2 <- spotify_playlists %>%
- select(genre_1, genre_2, genre_3, monthly_stream30s) %>%
- gather(key = col, value = genre, -monthly_stream30s) %>%
- select(-col) %>%
- filter(!genre %in% c('-', 'Other')) %>%
- group_by(genre) %>%
- summarise(monthly_stream30s = sum(monthly_stream30s, na.rm = TRUE))
- ggplot(spotify_playlists_genre2, aes(x = reorder(genre, monthly_stream30s), y = monthly_stream30s)) +
- geom_bar(stat = 'identity', fill = 'steelblue4') +
- coord_flip() +
- theme_minimal() +
- scale_y_continuous(labels = scales::comma) +
- labs(x = '', y = 'Monthly 30s Streams',
- title = 'Monthly 30s Streams by Genre',
- subtitle = 'Spotify-owned playlists, > 5,000 30s streams')
- #### Do particular keywords in a playlist title influence success metrics? ####
- tokens <- spotify_playlists %>%
- select(mau, tokens) %>%
- mutate(
- tokens = str_replace_all(tokens, "\\[|\\]", ""),
- tokens = strsplit(as.character(tokens), ", ")
- ) %>%
- unnest(tokens)
- average_mau_by_token <- tokens %>%
- group_by(tokens) %>%
- summarise(
- avg_mau = mean(mau),
- n = n()
- ) %>%
- ungroup() %>%
- filter(n > 2) %>%
- arrange(-avg_mau)
- average_mau_by_token %>%
- top_n(15, avg_mau) %>%
- ggplot(aes(x = reorder(tokens, avg_mau), y = avg_mau)) +
- geom_bar(stat = 'identity', fill = 'steelblue4') +
- coord_flip() +
- theme_minimal() +
- scale_y_continuous(labels = scales::comma, limits = c(0, 525000)) +
- labs(x = '', y = 'Average MAU per playlist',
- title = 'Playlist titles: Average MAU per playlist by token',
- subtitle = 'Spotify-owned playlists, > 5,000 30s streams')
- ####
- tokens %>%
- left_join(y = sentiments, by = c('tokens' = 'word')) %>%
- group_by(sentiment) %>%
- summarise(
- avg_mau = mean(mau),
- n = n()
- ) %>%
- ungroup() %>%
- filter(n > 2) %>%
- arrange(-avg_mau)
- ####
- spotify_playlists %>%
- filter(stream30s > 1000) %>%
- ggplot(aes(x = pct_skippers, y = skip_rate)) +
- geom_point() + geom_smooth() +
- scale_x_continuous(labels = scales::percent) +
- scale_y_continuous(labels = scales::percent) +
- theme_minimal() +
- labs(
- x = 'skippers/users', y = '(streams - stream30s)/streams',
- title = '% Skippers vs Skip Rate'
- )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement