Advertisement
Guest User

rsmith_spotify

a guest
Sep 23rd, 2018
103
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 9.14 KB | None | 0 0
  1. #### Load data ####
  2.  
  3. playlist_summary_external <- read.delim2("~/Desktop/playlist_summary_external.txt", stringsAsFactors = FALSE)
  4.  
  5. #### Calculate success metrics ####
  6.  
  7. spotify_playlists <- playlist_summary_external %>%
  8.   filter(owner == 'spotify', monthly_stream30s >= 5000) %>%
  9.   mutate(
  10.     pct_skippers = skippers/users,
  11.     dau_mau_ratio = dau/mau,
  12.     wau_mau_ratio = wau/mau,
  13.     streams30s_per_mau = monthly_stream30s/mau,
  14.     streams30s_per_dau = stream30s/dau,
  15.     skips = streams - stream30s,
  16.     skip_rate = ifelse(dau < 50, NA, skips/streams),
  17.     mau_growth_mom = mau/mau_previous_month - 1,
  18.     mom_retention = ifelse(mau_previous_month > 50, mau_both_months/mau_previous_month, NA)
  19.   )
  20.  
  21. #### How does artist diversity correlate with success metrics? ####
  22.  
  23. artist_diversity <- spotify_playlists %>%
  24.   filter(n_artists > 2) %>%
  25.   mutate(
  26.     tracks_per_artist = n_tracks/n_artists,
  27.     artist_diversity = 1-((n_tracks/n_artists)/(sqrt(n_tracks)/2))
  28.   ) %>%
  29.   mutate(
  30.     `MoM Retention` = mom_retention,
  31.     `N 30s Streams Per MAU` = streams30s_per_mau,
  32.     `Skip Rate` = skip_rate
  33.   )
  34. artist_diversity %>%
  35.   select(artist_diversity, `Skip Rate`, `MoM Retention`, `N 30s Streams Per MAU`) %>%
  36.   gather(key, value, -artist_diversity) %>%
  37.   ggplot(aes(x = artist_diversity, y = value)) +
  38.   facet_wrap(~key, scales = 'free_y') +
  39.   geom_point(alpha = 0.6) +
  40.   geom_smooth(span = 0.999) +
  41.   scale_x_continuous(breaks = seq(0.2, 0.8, 0.2), limits = c(0.1, 0.9)) +
  42.   labs(x = 'Artist Diversity', y = '',
  43.        title = 'Playlist artist diversity vs. Success metrics',
  44.        subtitle = 'Spotify-owned playlists, > 5,000 30s streams') +
  45.   theme_classic() + theme(strip.text.x = element_text(size = 10.5))
  46.  
  47. #### Does the number of tracks in a playlist influence success metrics? ####
  48.  
  49. spotify_playlists %>%
  50.   filter(n_tracks <= 1000) %>%
  51.   mutate(
  52.     `MoM Retention` = mom_retention,
  53.     `N 30s Streams Per MAU` = streams30s_per_mau,
  54.     `Skip Rate` = skip_rate
  55.   ) %>%
  56.   select(n_tracks, `Skip Rate`, `MoM Retention`, `N 30s Streams Per MAU`) %>%
  57.   gather(key, value, -n_tracks) %>%
  58.   ggplot(aes(x = n_tracks, y = value)) +
  59.   geom_point() +
  60.   geom_smooth(span = 0.999) +
  61.   facet_wrap(~key, scales = 'free_y') +
  62.   scale_x_log10() +
  63.   theme_classic() +
  64.   labs(x = 'Number of tracks on playlist', y = '',
  65.        title = 'Playlist length vs. Success metrics', subtitle = 'Spotify-owned playlists, > 5,000 30s streams')
  66.  
  67. #### Do particular moods correlate with success metrics? ####
  68.  
  69. spotify_playlists_mood <- spotify_playlists %>%
  70.   select(mood_1, mood_2, mood_3,
  71.          streams, stream30s, dau, mau,
  72.          dau_mau_ratio, streams30s_per_mau, skip_rate, mom_retention) %>%
  73.   gather(key = col, value = mood, -streams, -stream30s, -dau, -mau, -dau_mau_ratio, -streams30s_per_mau, -skip_rate, -mom_retention) %>%
  74.   select(-col) %>%
  75.   filter(!mood %in% c('-', 'Other')) %>%
  76.   mutate(mood = reorder(mood, -streams30s_per_mau, mean, na.rm = TRUE))
  77.  
  78. spotify_playlists_mood_avg <- spotify_playlists_mood %>%
  79.   group_by(mood) %>%
  80.   summarise(
  81.     avg = mean(streams30s_per_mau, na.rm = TRUE),
  82.     median = median(streams30s_per_mau, na.rm = TRUE)
  83.   ) %>%
  84.   ungroup()
  85.  
  86. ggplot(spotify_playlists_mood, aes(x = streams30s_per_mau, fill = mood)) +
  87.   geom_density() +
  88.   facet_wrap(~mood) +
  89.   geom_vline(xintercept = mean(spotify_playlists$streams30s_per_mau, na.rm = TRUE)) +
  90.   scale_x_continuous(labels = scales::comma) +
  91.   theme_minimal() + theme(strip.text.x = element_text(size = 10.5)) +
  92.   geom_text(data = spotify_playlists_mood_avg, size = 3.25, fontface = 'bold', aes(x = 5, y = .135, label = scales::comma(round(avg, 1)))) +
  93.   labs(x = '30s streams per MAU', y = '', title = 'Distribution of 30s streams per MAU, by mood')
  94.  
  95. ###
  96.  
  97. spotify_playlists_mood %>%
  98.   group_by(mood) %>%
  99.   summarise(
  100.     avg_retention = mean(mom_retention, na.rm = TRUE),
  101.     avg_skip_rate = mean(skip_rate, na.rm = TRUE),
  102.     avg_streams30s_per_mau = mean(streams30s_per_mau, na.rm = TRUE)
  103.   ) %>%
  104.   ungroup() %>%
  105.   mutate(
  106.     retention_rk = min_rank(-avg_retention),
  107.     skip_rk = min_rank(avg_skip_rate),
  108.     streams30s_per_mau_rk = min_rank(-avg_streams30s_per_mau),
  109.     avg_rk = (retention_rk + skip_rk + streams30s_per_mau_rk)/3
  110.   ) %>%
  111.   arrange(-avg_rk)
  112.  
  113. View(spotify_playlists %>%
  114.        filter(
  115.          mood_1 == 'Sentimental' | mood_2 == 'Sentimental' | mood_3 == 'Sentimental',
  116.          mom_retention > 0.16, streams30s_per_dau > 10, skip_rate < 0.16
  117.          )
  118.      )
  119.  
  120. View(spotify_playlists %>%
  121.        filter(
  122.          mood_1 == 'Urgent' | mood_2 == 'Urgent' | mood_3 == 'Urgent',
  123.          mom_retention < 0.14, streams30s_per_dau < 9, skip_rate > 0.3
  124.        )
  125. )
  126.  
  127. mood_for_lm <- spotify_playlists_mood %>%
  128.   select(mood, skip_rate) %>%
  129.   mutate(val = 1, id = seq(1, nrow(spotify_playlists_mood), 1)) %>%
  130.   spread(key = mood, value = val, fill = 0) %>%
  131.   select(-id)
  132.  
  133. summary(lm(skip_rate ~ ., data = mood_for_lm))
  134.  
  135. #### Do particular genres correlate with success metrics? ####
  136.  
  137. spotify_playlists_genre <- spotify_playlists %>%
  138.   top_n(50, mau) %>%
  139.   select(genre_1, genre_2, genre_3,
  140.          streams, stream30s, dau, mau,
  141.          dau_mau_ratio, streams30s_per_dau, skip_rate, mom_retention) %>%
  142.   gather(key = col, value = genre, -streams, -stream30s, -dau, -mau, -dau_mau_ratio, -streams30s_per_dau, -skip_rate, -mom_retention) %>%
  143.   select(-col) %>%
  144.   filter(!genre %in% c('-', 'Other')) %>%
  145.   mutate(genre = reorder(genre, -mau, mean, na.rm = TRUE))
  146.  
  147. spotify_playlists_genre %>%
  148.   group_by(genre) %>%
  149.   summarise(
  150.     n = n(),
  151.     `Average MAU Per Playlist (000s)` = mean(mau, na.rm = TRUE)/1000
  152.     ) %>%
  153.   ungroup() %>%
  154.   ggplot(aes(x = reorder(genre, n), y = n, fill = `Average MAU Per Playlist (000s)`)) +
  155.   geom_bar(stat = 'identity') +
  156.   coord_flip() +
  157.   scale_fill_gradient2(low = '#CD5C5C', mid = '#F0E68C', high = '#32CD32', midpoint = mean(spotify_playlists_genre$mau)/1000) +
  158.   theme_minimal() + theme(legend.position = 'bottom') +
  159.   labs(x = '', y = 'Number of playlists', title = 'Top 50 Playlists by MAU: Most Popular Genres',
  160.        subtitle = 'Spotify-owned playlists')
  161.  
  162. View(spotify_playlists %>%
  163.        top_n(50, mau) %>%
  164.        filter(
  165.          genre_1 == 'Rap' | genre_2 == 'Rap' | genre_3 == 'Rap'
  166.        )
  167. )
  168.  
  169. genre_for_lm <- spotify_playlists %>%
  170.   select(genre_1, genre_2, genre_3,
  171.          streams, stream30s, dau, mau,
  172.          dau_mau_ratio, streams30s_per_dau, skip_rate, mom_retention) %>%
  173.   gather(key = col, value = genre, -streams, -stream30s, -dau, -mau, -dau_mau_ratio, -streams30s_per_dau, -skip_rate, -mom_retention) %>%
  174.   select(-col) %>%
  175.   filter(!genre %in% c('-', 'Other')) %>%
  176.   mutate(genre = reorder(genre, -mau, mean, na.rm = TRUE)) %>%
  177.   select(genre, mau) %>%
  178.   mutate(val = 1, id = seq(1, nrow(.), 1)) %>%
  179.   spread(key = genre, value = val, fill = 0) %>%
  180.   select(-id)
  181.  
  182. summary(lm(mau ~ ., data = genre_for_lm))
  183.  
  184. ####
  185.  
  186. spotify_playlists_genre2 <- spotify_playlists %>%
  187.   select(genre_1, genre_2, genre_3, monthly_stream30s) %>%
  188.   gather(key = col, value = genre, -monthly_stream30s) %>%
  189.   select(-col) %>%
  190.   filter(!genre %in% c('-', 'Other')) %>%
  191.   group_by(genre) %>%
  192.   summarise(monthly_stream30s = sum(monthly_stream30s, na.rm = TRUE))
  193.  
  194. ggplot(spotify_playlists_genre2, aes(x = reorder(genre, monthly_stream30s), y = monthly_stream30s)) +
  195.   geom_bar(stat = 'identity', fill = 'steelblue4') +
  196.   coord_flip() +
  197.   theme_minimal() +
  198.   scale_y_continuous(labels = scales::comma) +
  199.   labs(x = '', y = 'Monthly 30s Streams',
  200.        title = 'Monthly 30s Streams by Genre',
  201.        subtitle = 'Spotify-owned playlists, > 5,000 30s streams')
  202.  
  203. #### Do particular keywords in a playlist title influence success metrics? ####
  204.  
  205. tokens <- spotify_playlists %>%
  206.   select(mau, tokens) %>%
  207.   mutate(
  208.     tokens = str_replace_all(tokens, "\\[|\\]", ""),
  209.     tokens = strsplit(as.character(tokens), ", ")
  210.     ) %>%
  211.   unnest(tokens)
  212.  
  213. average_mau_by_token <- tokens %>%
  214.   group_by(tokens) %>%
  215.   summarise(
  216.     avg_mau = mean(mau),
  217.     n = n()
  218.     ) %>%
  219.   ungroup() %>%
  220.   filter(n > 2) %>%
  221.   arrange(-avg_mau)
  222.  
  223. average_mau_by_token %>%
  224.   top_n(15, avg_mau) %>%
  225.   ggplot(aes(x = reorder(tokens, avg_mau), y = avg_mau)) +
  226.   geom_bar(stat = 'identity', fill = 'steelblue4') +
  227.   coord_flip() +
  228.   theme_minimal() +
  229.   scale_y_continuous(labels = scales::comma, limits = c(0, 525000)) +
  230.   labs(x = '', y = 'Average MAU per playlist',
  231.        title = 'Playlist titles: Average MAU per playlist by token',
  232.        subtitle = 'Spotify-owned playlists, > 5,000 30s streams')
  233.  
  234. ####
  235.  
  236. tokens %>%
  237.   left_join(y = sentiments, by = c('tokens' = 'word')) %>%
  238.   group_by(sentiment) %>%
  239.   summarise(
  240.     avg_mau = mean(mau),
  241.     n = n()
  242.   ) %>%
  243.   ungroup() %>%
  244.   filter(n > 2) %>%
  245.   arrange(-avg_mau)
  246.  
  247. ####
  248.  
  249. spotify_playlists %>%
  250.   filter(stream30s > 1000) %>%
  251.   ggplot(aes(x = pct_skippers, y = skip_rate)) +
  252.   geom_point() + geom_smooth() +
  253.   scale_x_continuous(labels = scales::percent) +
  254.   scale_y_continuous(labels = scales::percent) +
  255.   theme_minimal() +
  256.   labs(
  257.     x = 'skippers/users', y = '(streams - stream30s)/streams',
  258.     title = '% Skippers vs Skip Rate'
  259.   )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement