Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- with first_edits as (
- select
- event_user_text,
- trunc(event_timestamp, 'MONTH') as date,
- min(event_timestamp) as first_edit_time
- from wmf.mediawiki_history mh
- where
- event_timestamp >= '2019-03-01' and
- event_timestamp < '2019-11-01' and
- page_namespace_historical % 2 == 1 and
- event_user_revision_count < 100 and
- not event_user_is_anonymous and
- event_entity = 'revision' and
- event_type = 'create' and
- mh.snapshot = '2019-10'
- group by event_user_text, trunc(event_timestamp, 'MONTH')
- )
- SELECT
- (count(30_days.user_name) / count(*)) as retention_rate,
- date
- FROM
- (
- SELECT distinct(fe.event_user_text) as user_name,
- fe.date as date,
- fe.first_edit_time
- FROM first_edits fe
- ) 1st_edit
- LEFT JOIN
- (
- SELECT distinct(event_user_text) as user_name
- FROM wmf.mediawiki_history
- WHERE
- event_entity = 'revision' and
- event_type = 'create' and
- event_user_revision_count < 100 and
- event_user_id IS NOT NULL and
- snapshot = '2019-10' and
- page_namespace_historical % 2 == 1 and
- event_timestamp >= '2019-03-01' and
- event_timestamp < '2019-11-01' and
- -- second since previous revision is between two and 30 days
- unix_timestamp(event_timestamp, 'YYYYMMddHHmmss') >=
- (unix_timestamp(first_edit_time, 'YYYYMMddHHmmss') + (2*24*60)) and
- unix_timestamp(event_timestamp, 'YYYYMMddHHmmss') <=
- (unix_timestamp(first_edit_time, 'YYYYMMddHHmmss') + (30*24*60))
- ) 30_days
- on 1st_edit.user_name = 30_days.user_name
- Group by date
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement