-- Past Experiments WITH __experiments as ( SELECT user_id, anonymous_id, received_at as timestamp, experiment_id, variation_id, context_page_path as url, context_user_agent as user_agent FROM experiment_viewed ), __experimentDates as ( SELECT experiment_id, variation_id, date_trunc('day', timestamp) as date, count(distinct anonymous_id) as users FROM __experiments WHERE timestamp > toDateTime('2020-08-12 11:51:08') GROUP BY experiment_id, variation_id, date_trunc('day', timestamp) ), __userThresholds as ( SELECT experiment_id, variation_id, -- It's common for a small number of tracking events to continue coming in -- long after an experiment ends, so limit to days with enough traffic max(users) * 0.05 as threshold FROM __experimentDates WHERE -- Skip days where a variation got 5 or fewer visitors since it's probably not real traffic users > 5 GROUP BY experiment_id, variation_id ), __variations as ( SELECT d.experiment_id, d.variation_id, MIN(d.date) as start_date, MAX(d.date) as end_date, SUM(d.users) as users FROM __experimentDates d JOIN __userThresholds u ON ( d.users > u.threshold AND d.experiment_id = u.experiment_id AND d.variation_id = u.variation_id ) GROUP BY d.experiment_id, d.variation_id ) SELECT * FROM __variations WHERE -- Skip experiments with fewer than 200 users since they don't have enough data users > 200 -- Skip experiments that are 5 days or shorter (most likely means it was stopped early) AND date_diff('day', start_date, end_date) > 5 -- Skip experiments that start of the very first day since we're likely missing data AND date_diff( 'day', toDateTime('2020-08-12 11:51:08'), start_date ) > 2 ORDER BY experiment_id ASC, variation_id ASC