Advertisement
johnbacon

Growth Book "Import Past Experiments" SQL (2021-08-12)

Aug 12th, 2021
3,521
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
SQL 1.94 KB | None | 0 0
  1. -- Past Experiments
  2. WITH __experiments AS (
  3.   SELECT
  4.     user_id,
  5.     anonymous_id,
  6.     received_at AS TIMESTAMP,
  7.     experiment_id,
  8.     variation_id,
  9.     context_page_path AS url,
  10.     context_user_agent AS user_agent
  11.   FROM
  12.     experiment_viewed
  13. ),
  14. __experimentDates AS (
  15.   SELECT
  16.     experiment_id,
  17.     variation_id,
  18.     date_trunc('day', TIMESTAMP) AS DATE,
  19.     COUNT(DISTINCT anonymous_id) AS users
  20.   FROM
  21.     __experiments
  22.   WHERE
  23.     TIMESTAMP > toDateTime('2020-08-12 11:51:08')
  24.   GROUP BY
  25.     experiment_id,
  26.     variation_id,
  27.     date_trunc('day', TIMESTAMP)
  28. ),
  29. __userThresholds AS (
  30.   SELECT
  31.     experiment_id,
  32.     variation_id,
  33.     -- It's common for a small number of tracking events to continue coming in
  34.     -- long after an experiment ends, so limit to days with enough traffic
  35.     MAX(users) * 0.05 AS threshold
  36.   FROM
  37.     __experimentDates
  38.   WHERE
  39.     -- Skip days where a variation got 5 or fewer visitors since it's probably not real traffic
  40.     users > 5
  41.   GROUP BY
  42.     experiment_id,
  43.     variation_id
  44. ),
  45. __variations AS (
  46.   SELECT
  47.     d.experiment_id,
  48.     d.variation_id,
  49.     MIN(d.DATE) AS start_date,
  50.     MAX(d.DATE) AS end_date,
  51.     SUM(d.users) AS users
  52.   FROM
  53.     __experimentDates d
  54.     JOIN __userThresholds u ON (
  55.       d.users > u.threshold
  56.       AND d.experiment_id = u.experiment_id
  57.       AND d.variation_id = u.variation_id
  58.     )
  59.   GROUP BY
  60.     d.experiment_id,
  61.     d.variation_id
  62. )
  63. SELECT
  64.   *
  65. FROM
  66.   __variations
  67. WHERE
  68.   -- Skip experiments with fewer than 200 users since they don't have enough data
  69.   users > 200 -- Skip experiments that are 5 days or shorter (most likely means it was stopped early)
  70.   AND date_diff('day', start_date, end_date) > 5 -- Skip experiments that start of the very first day since we're likely missing data
  71.   AND date_diff(
  72.     'day',
  73.     toDateTime('2020-08-12 11:51:08'),
  74.     start_date
  75.   ) > 2
  76. ORDER BY
  77.   experiment_id ASC,
  78.   variation_id ASC
  79.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement