Advertisement
Guest User

Untitled

a guest
Mar 19th, 2018
109
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.35 KB | None | 0 0
  1. library("RMySQL")
  2. library("ggplot2")
  3. library("dplyr")
  4. library("ggthemes")
  5. dbDisconnect(mydb)
  6. all_cons <- dbListConnections(MySQL())
  7. for (con in all_cons)
  8. dbDisconnect(con)
  9. mydb = dbConnect(MySQL(), user='threader', password='<PASSWORD>', dbname='threads', host='localhost')
  10.  
  11. query <- "
  12. SELECT
  13. retention_curve.week_active,
  14. retention_curve.total_retained,
  15. max_possible.max_possible_users_active,
  16. retention_curve.org_name,
  17. retention_curve.total_retained / max_possible.max_possible_users_active pct_retained
  18. FROM
  19. (SELECT
  20. week_active,
  21. org_name,
  22. COUNT(1) total_retained
  23. FROM
  24. (SELECT
  25. user_id,
  26. week_active,
  27. org_name
  28. FROM
  29. (SELECT
  30. visits.user_id,
  31. first_joined.join_date,
  32. visits.ds date_visited,
  33. orgs.uid org_name,
  34. FLOOR(TIMESTAMPDIFF(SECOND, first_joined.join_date, visits.ds)/(7*24*60*60)) week_active
  35. FROM
  36. (SELECT
  37. MIN(ds) join_date,
  38. user_id
  39. FROM
  40. analytics.page_view_events
  41. WHERE
  42. user_id REGEXP '^[0-9]+$'
  43. AND NOT page_type IN ('email_digest')
  44. AND ds > '2017-12-31'
  45. GROUP BY
  46. user_id
  47. ) first_joined
  48. LEFT OUTER JOIN
  49. (SELECT
  50. ds,
  51. user_id
  52. FROM
  53. analytics.page_view_events
  54. WHERE
  55. user_id REGEXP '^[0-9]+$'
  56. AND NOT page_type IN ('email_digest')
  57. AND ds > '2017-12-31'
  58. GROUP BY
  59. ds,
  60. user_id) visits
  61. ON
  62. first_joined.user_id = visits.user_id
  63. JOIN
  64. analytics.dim_users users
  65. ON
  66. users.user_id = visits.user_id
  67. JOIN
  68. threads.people people
  69. ON
  70. people.uid = visits.user_id
  71. JOIN
  72. threads.orgs orgs
  73. ON
  74. orgs.uid = people.org_id
  75. WHERE
  76. NOT users.org_id IN ('3797', '34360205099', '34360227175', '34360287846', '34360262442', '34360266575', '34360233513', '34360376764')
  77. AND NOT users.is_employee_owned) sub
  78. GROUP BY
  79. user_id,
  80. week_active,
  81. org_name) retention
  82. GROUP BY
  83. week_active,
  84. org_name) retention_curve
  85. JOIN
  86. (SELECT
  87. weeks_active.max_week_active,
  88. orgs.name org_name,
  89. COUNT(1) max_possible_users_active
  90. FROM
  91. (SELECT
  92. MAX(FLOOR(TIMESTAMPDIFF(SECOND, ds, '2018-03-15')/(7*24*60*60))) max_possible_week_active,
  93. user_id
  94. FROM
  95. analytics.page_view_events
  96. WHERE
  97. user_id REGEXP '^[0-9]+$'
  98. AND NOT page_type IN ('email_digest')
  99. AND ds > '2017-12-31'
  100. GROUP BY
  101. user_id) max_week_active
  102. CROSS JOIN
  103. (
  104. SELECT 0 AS max_week_active UNION ALL
  105. SELECT 1 AS max_week_active UNION ALL
  106. SELECT 2 AS max_week_active UNION ALL
  107. SELECT 3 AS max_week_active UNION ALL
  108. SELECT 4 AS max_week_active UNION ALL
  109. SELECT 5 AS max_week_active UNION ALL
  110. SELECT 6 AS max_week_active UNION ALL
  111. SELECT 7 AS max_week_active UNION ALL
  112. SELECT 8 AS max_week_active UNION ALL
  113. SELECT 9 AS max_week_active UNION ALL
  114. SELECT 10 AS max_week_active
  115. ) weeks_active
  116. JOIN
  117. analytics.dim_users users
  118. ON
  119. users.user_id = max_week_active.user_id
  120. JOIN
  121. threads.people people
  122. ON
  123. people.uid = max_week_active.user_id
  124. JOIN
  125. threads.orgs orgs
  126. ON
  127. orgs.uid = people.org_id
  128. WHERE
  129. NOT users.org_id IN ('3797', '34360205099', '34360227175', '34360287846', '34360262442', '34360266575', '34360233513', '34360376764')
  130. AND NOT users.is_employee_owned
  131. AND weeks_active.max_week_active <= max_week_active.max_possible_week_active
  132. GROUP BY
  133. weeks_active.max_week_active,
  134. orgs.name) max_possible
  135. ON
  136. max_possible.max_week_active = retention_curve.week_active
  137. AND max_possible.org_name = retention_curve.org_name
  138. "
  139.  
  140. rs = dbSendQuery(mydb, query)
  141. data = fetch(rs, n=-1)
  142.  
  143. ggplot(data, aes(x = week_active, y = pct_retained)) +
  144. geom_line(size = 1.5) +
  145. scale_fill_manual(values=c("#CC6666")) +
  146. ggtitle("% of cohort retained") +
  147. theme(plot.title = element_text(size=16, hjust=0.5)) +
  148. xlab("weeks since joined") +
  149. ylab("% of cohort still active") +
  150. theme(legend.position="bottom", legend.direction="horizontal",
  151. legend.title = element_blank()) +
  152. theme(axis.line = element_line(size=1, colour = "black"),
  153. panel.grid.major = element_line(colour = "#d3d3d3"), panel.grid.minor = element_blank(),
  154. panel.border = element_blank(), panel.background = element_blank()) +
  155. theme(plot.title = element_text(size = 14, family = "Tahoma", face = "bold"),
  156. text=element_text(family="Tahoma"),
  157. axis.text.x=element_text(colour="black", size = 10),
  158. axis.text.y=element_text(colour="black", size = 10),
  159. legend.key=element_rect(fill="white", colour="white")) +
  160. theme(axis.title.x = element_text(size = 20, family = "Tahoma")) +
  161. theme(axis.text.x = element_text(size = 20, family = "Tahoma") ) +
  162. theme(axis.title.y = element_text(size = 20, family = "Tahoma")) +
  163. theme(axis.text.y = element_text(size = 20, family = "Tahoma") ) +
  164. scale_x_continuous(breaks=c(1,2, 3,4, 5, 6, 7, 8, 9, 10, 11)) +
  165. scale_y_continuous(labels = scales::percent, limits = c(0,1))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement