Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- -- Past Experiments
- WITH __experiments AS (
- SELECT
- user_id,
- anonymous_id,
- received_at AS TIMESTAMP,
- experiment_id,
- variation_id,
- context_page_path AS url,
- context_user_agent AS user_agent
- FROM
- experiment_viewed
- ),
- __experimentDates AS (
- SELECT
- experiment_id,
- variation_id,
- date_trunc('day', TIMESTAMP) AS DATE,
- COUNT(DISTINCT anonymous_id) AS users
- FROM
- __experiments
- WHERE
- TIMESTAMP > toDateTime('2020-08-12 11:51:08')
- GROUP BY
- experiment_id,
- variation_id,
- date_trunc('day', TIMESTAMP)
- ),
- __userThresholds AS (
- SELECT
- experiment_id,
- variation_id,
- -- It's common for a small number of tracking events to continue coming in
- -- long after an experiment ends, so limit to days with enough traffic
- MAX(users) * 0.05 AS threshold
- FROM
- __experimentDates
- WHERE
- -- Skip days where a variation got 5 or fewer visitors since it's probably not real traffic
- users > 5
- GROUP BY
- experiment_id,
- variation_id
- ),
- __variations AS (
- SELECT
- d.experiment_id,
- d.variation_id,
- MIN(d.DATE) AS start_date,
- MAX(d.DATE) AS end_date,
- SUM(d.users) AS users
- FROM
- __experimentDates d
- JOIN __userThresholds u ON (
- d.users > u.threshold
- AND d.experiment_id = u.experiment_id
- AND d.variation_id = u.variation_id
- )
- GROUP BY
- d.experiment_id,
- d.variation_id
- )
- SELECT
- *
- FROM
- __variations
- WHERE
- -- Skip experiments with fewer than 200 users since they don't have enough data
- users > 200 -- Skip experiments that are 5 days or shorter (most likely means it was stopped early)
- AND date_diff('day', start_date, end_date) > 5 -- Skip experiments that start of the very first day since we're likely missing data
- AND date_diff(
- 'day',
- toDateTime('2020-08-12 11:51:08'),
- start_date
- ) > 2
- ORDER BY
- experiment_id ASC,
- variation_id ASC
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement