Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library("RMySQL")
- library("ggplot2")
- library("dplyr")
- library("ggthemes")
- dbDisconnect(mydb)
- all_cons <- dbListConnections(MySQL())
- for (con in all_cons)
- dbDisconnect(con)
- mydb = dbConnect(MySQL(), user='threader', password='<PASSWORD>', dbname='threads', host='localhost')
- query <- "
- SELECT
- retention_curve.week_active,
- retention_curve.total_retained,
- max_possible.max_possible_users_active,
- retention_curve.org_name,
- retention_curve.total_retained / max_possible.max_possible_users_active pct_retained
- FROM
- (SELECT
- week_active,
- org_name,
- COUNT(1) total_retained
- FROM
- (SELECT
- user_id,
- week_active,
- org_name
- FROM
- (SELECT
- visits.user_id,
- first_joined.join_date,
- visits.ds date_visited,
- orgs.uid org_name,
- FLOOR(TIMESTAMPDIFF(SECOND, first_joined.join_date, visits.ds)/(7*24*60*60)) week_active
- FROM
- (SELECT
- MIN(ds) join_date,
- user_id
- FROM
- analytics.page_view_events
- WHERE
- user_id REGEXP '^[0-9]+$'
- AND NOT page_type IN ('email_digest')
- AND ds > '2017-12-31'
- GROUP BY
- user_id
- ) first_joined
- LEFT OUTER JOIN
- (SELECT
- ds,
- user_id
- FROM
- analytics.page_view_events
- WHERE
- user_id REGEXP '^[0-9]+$'
- AND NOT page_type IN ('email_digest')
- AND ds > '2017-12-31'
- GROUP BY
- ds,
- user_id) visits
- ON
- first_joined.user_id = visits.user_id
- JOIN
- analytics.dim_users users
- ON
- users.user_id = visits.user_id
- JOIN
- threads.people people
- ON
- people.uid = visits.user_id
- JOIN
- threads.orgs orgs
- ON
- orgs.uid = people.org_id
- WHERE
- NOT users.org_id IN ('3797', '34360205099', '34360227175', '34360287846', '34360262442', '34360266575', '34360233513', '34360376764')
- AND NOT users.is_employee_owned) sub
- GROUP BY
- user_id,
- week_active,
- org_name) retention
- GROUP BY
- week_active,
- org_name) retention_curve
- JOIN
- (SELECT
- weeks_active.max_week_active,
- orgs.name org_name,
- COUNT(1) max_possible_users_active
- FROM
- (SELECT
- MAX(FLOOR(TIMESTAMPDIFF(SECOND, ds, '2018-03-15')/(7*24*60*60))) max_possible_week_active,
- user_id
- FROM
- analytics.page_view_events
- WHERE
- user_id REGEXP '^[0-9]+$'
- AND NOT page_type IN ('email_digest')
- AND ds > '2017-12-31'
- GROUP BY
- user_id) max_week_active
- CROSS JOIN
- (
- SELECT 0 AS max_week_active UNION ALL
- SELECT 1 AS max_week_active UNION ALL
- SELECT 2 AS max_week_active UNION ALL
- SELECT 3 AS max_week_active UNION ALL
- SELECT 4 AS max_week_active UNION ALL
- SELECT 5 AS max_week_active UNION ALL
- SELECT 6 AS max_week_active UNION ALL
- SELECT 7 AS max_week_active UNION ALL
- SELECT 8 AS max_week_active UNION ALL
- SELECT 9 AS max_week_active UNION ALL
- SELECT 10 AS max_week_active
- ) weeks_active
- JOIN
- analytics.dim_users users
- ON
- users.user_id = max_week_active.user_id
- JOIN
- threads.people people
- ON
- people.uid = max_week_active.user_id
- JOIN
- threads.orgs orgs
- ON
- orgs.uid = people.org_id
- WHERE
- NOT users.org_id IN ('3797', '34360205099', '34360227175', '34360287846', '34360262442', '34360266575', '34360233513', '34360376764')
- AND NOT users.is_employee_owned
- AND weeks_active.max_week_active <= max_week_active.max_possible_week_active
- GROUP BY
- weeks_active.max_week_active,
- orgs.name) max_possible
- ON
- max_possible.max_week_active = retention_curve.week_active
- AND max_possible.org_name = retention_curve.org_name
- "
- rs = dbSendQuery(mydb, query)
- data = fetch(rs, n=-1)
- ggplot(data, aes(x = week_active, y = pct_retained)) +
- geom_line(size = 1.5) +
- scale_fill_manual(values=c("#CC6666")) +
- ggtitle("% of cohort retained") +
- theme(plot.title = element_text(size=16, hjust=0.5)) +
- xlab("weeks since joined") +
- ylab("% of cohort still active") +
- theme(legend.position="bottom", legend.direction="horizontal",
- legend.title = element_blank()) +
- theme(axis.line = element_line(size=1, colour = "black"),
- panel.grid.major = element_line(colour = "#d3d3d3"), panel.grid.minor = element_blank(),
- panel.border = element_blank(), panel.background = element_blank()) +
- theme(plot.title = element_text(size = 14, family = "Tahoma", face = "bold"),
- text=element_text(family="Tahoma"),
- axis.text.x=element_text(colour="black", size = 10),
- axis.text.y=element_text(colour="black", size = 10),
- legend.key=element_rect(fill="white", colour="white")) +
- theme(axis.title.x = element_text(size = 20, family = "Tahoma")) +
- theme(axis.text.x = element_text(size = 20, family = "Tahoma") ) +
- theme(axis.title.y = element_text(size = 20, family = "Tahoma")) +
- theme(axis.text.y = element_text(size = 20, family = "Tahoma") ) +
- scale_x_continuous(breaks=c(1,2, 3,4, 5, 6, 7, 8, 9, 10, 11)) +
- scale_y_continuous(labels = scales::percent, limits = c(0,1))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement