Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #standardsql
- WITH all_spams_ml AS (
- SELECT DISTINCT requestId, req_rca_request_category_id,
- FIRST_VALUE(vrd.score) OVER (PARTITION BY requestId ORDER BY vrd.score DESC) as score,
- IF(req_ghosted_time IS NOT NULL, true, false) as is_ghosted,
- req_flags
- FROM mi.fides_events n, UNNEST(verifierData) as vrd
- INNER JOIN website.req_requests r ON r.req_request_id = requestId
- LEFT OUTER JOIN a.mts_requests mr ON r.req_request_id = mr.request_id
- WHERE n._PARTITIONTIME >= '2017-09-09' AND n._PARTITIONTIME <= '2017-09-14'
- AND time > 1505001600000
- AND req_create_time > 1505001600000
- AND vrd.name = "request.ml_spam_verifier"
- -- AND ((LOWER(request_delete_reason) NOT LIKE "%test%"
- -- AND LOWER(request_delete_reason) NOT LIKE "%duplicate%"
- -- AND request_delete_reason NOT LIKE "%Cascading%") OR request_delete_reason IS NULL)
- AND score > 0.378
- )
- SELECT COUNT(DISTINCT requestId) from all_spams_ml f WHERE f.is_ghosted IS TRUE OR f.req_flags & 4 <> 0
- -- WITH all_spams_fides AS (
- -- SELECT requestId, req_rca_request_category_id, request_delete_reason, request_delete_spam_tag,
- -- FIRST_VALUE(vrd.score) OVER (PARTITION BY requestId ORDER BY vrd.score DESC) as score,
- -- vrd.name,
- -- vrd.ruleData,
- -- IF(req_ghosted_time IS NOT NULL, true, false) as is_ghosted,
- -- req_flags
- -- FROM mi.fides_events n, UNNEST(verifierData) as vrd
- -- INNER JOIN website.req_requests r ON r.req_request_id = requestId
- -- LEFT OUTER JOIN a.mts_requests mr ON r.req_request_id = mr.request_id
- -- WHERE n._PARTITIONTIME >= '2017-09-09' AND n._PARTITIONTIME <= '2017-09-14'
- -- AND time > 1505001600000
- -- AND req_create_time > 1505001600000
- -- -- AND vrd.name = "request.ml_spam_verifier"
- -- AND vrd.name = "request.baseline"
- -- AND vrd.outcome = 2
- -- -- AND ((LOWER(request_delete_reason) NOT LIKE "%test%"
- -- -- AND LOWER(request_delete_reason) NOT LIKE "%duplicate%"
- -- -- AND request_delete_reason NOT LIKE "%Cascading%") OR request_delete_reason IS NULL)
- -- -- AND score > 0.378
- -- )
- -- SELECT COUNT(DISTINCT requestId) from all_spams_fides f WHERE f.is_ghosted IS TRUE --OR f.req_flags & 4 <> 0
- -- spams_fides_select_rules AS (
- -- SELECT DISTINCT requestId , s.name, s.req_rca_request_category_id, s.is_ghosted, s.req_flags
- -- FROM all_spams_fides s, UNNEST(ruleData) as k
- -- WHERE k.score = 1.0
- -- AND k.name IN ("has_caveo_blacklisted_ip", "is_category_not_auto_verifiable", "has_risky_attachment", "could_have_private_info", "too_many_requests_from_same_ip", "request_has_blacklisted_device_fingerprint", "has_blacklisted_contact")
- -- )
- -- sent_fides_not_model AS (
- -- SELECT f.requestId, f.is_ghosted, f.req_flags, f.name, f.req_rca_request_category_id, ml.score as ml_score --COUNT(DISTINCT f.requestId)
- -- FROM spams_fides_select_rules f
- -- LEFT OUTER JOIN all_spams_ml ml ON f.requestId = ml.requestId
- -- WHERE ml.score <= 0.613
- -- -- AND ( f.is_ghosted IS TRUE OR f.req_flags & 4 <> 0) -- 416 (56g + 360d)
- -- -- f.req_rca_request_category_id != 95
- -- )
- -- SELECT COUNT(DISTINCT f.requestId)
- -- FROM all_spams_fides f
- -- INNER JOIN all_spams_ml ml ON ml.requestId = f.requestId
- -- WHERE ml.score > 0.431
- -- AND ( ml.is_ghosted IS TRUE OR ml.req_flags & 4 <> 0 ) -- 484
- -- SELECT COUNT(DISTINCT requestId) FROM sent_fides_not_model -- 6553 total sent, 2570 with select rules (216 deleted requests: 33g, 182 deletes)
- -- SELECT DISTINCT requestId , s.name as verifier_name, k.name as rule_name, k.score, k.outcome, s.req_rca_request_category_id, s.ml_score
- -- FROM sent_fides_not_model s
- -- WHERE k.score = 1.0 AND
- -- (is_ghosted IS TRUE OR req_flags & 4 <> 0)
- -- AND k.name = "has_caveo_blacklisted_ip"
- -- -- WHERE k.key = 'feature_ipZipDist'
- -- ORDER BY 7
- -- SELECT DISTINCT request_delete_reason from all_spams_ml
- -- bucketed AS (
- -- SELECT
- -- CASE WHEN (is_ghosted IS TRUE OR req_flags & 4 <> 0) THEN 'deleted'
- -- ELSE 'good' END AS outcome,
- -- requestId,
- -- request_delete_reason,
- -- request_delete_spam_tag,
- -- FLOOR(score*10000)/10000 as buckets
- -- FROM all_spams_ml
- -- -- WHERE req_flags & 4 = 0 AND is_ghosted IS FALSE
- -- -- GROUP BY 1,2
- -- )
- -- SELECT
- -- *
- -- FROM bucketed b
- -- ORDER BY 5 DESC
- -- SELECT COUNT(DISTINCT requestId) from all_spams_fides f
- -- WHERE f.is_ghosted IS NOT TRUE AND f.req_flags & 4 = 0
- -- SELECT COUNT(DISTINCT requestId) from all_spams_ml ml
- -- WHERE ml.is_ghosted IS TRUE OR ml.req_flags & 4 <> 0
- -- SELECT CASE WHEN (ml.is_ghosted IS TRUE OR ml.req_flags & 4 <> 0) THEN 'deleted'
- -- ELSE 'good' END AS outcome,
- -- COUNT(DISTINCT requestId) from all_spams_ml ml
- -- GROUP BY 1
- -- -- sent by both fides and ML
- -- -- 16869
- -- SELECT COUNT(DISTINCT f.requestId)
- -- FROM all_spams_ml ml
- -- INNER JOIN all_spams_fides f ON f.requestId = ml.requestId
- --WHERE ( ml.is_ghosted IS TRUE OR ml.req_flags & 4 <> 0 ) -- 2225 (269g + 1956d)
- -- -- 6901 sent by ML and not sent by fides
- -- SELECT COUNT(DISTINCT ml.requestId)
- -- FROM all_spams_ml ml
- -- LEFT OUTER JOIN spams_fides_select_rules f ON ml.requestId = f.requestId
- -- WHERE f.requestId IS NULL AND (ml.is_ghosted IS TRUE OR ml.req_flags & 4 <> 0) --388 (238g + 150d)
- -- AND ml.score > 0.378
- -- ORDER BY 1 DESC
- -- -- 6573 sent by fides and not sent by ML
- -- SELECT COUNT(DISTINCT f.requestId)
- -- -- f.requestId, f.req_rca_request_category_id, ml.score
- -- FROM all_spams_fides f
- -- LEFT OUTER JOIN all_spams_ml ml ON f.requestId = ml.requestId
- -- WHERE ml.score <= 0.431 AND (f.is_ghosted IS TRUE OR f.req_flags & 4 <> 0) -- 416 (56g + 360d)
- -- AND f.req_rca_request_category_id != 95
- -- ORDER BY 3 DESC
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement