Advertisement
Guest User

Untitled

a guest
Sep 19th, 2017
58
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.70 KB | None | 0 0
  1. #standardsql
  2.  
  3. WITH all_spams_ml AS (
  4. SELECT DISTINCT requestId, req_rca_request_category_id,
  5. FIRST_VALUE(vrd.score) OVER (PARTITION BY requestId ORDER BY vrd.score DESC) as score,
  6. IF(req_ghosted_time IS NOT NULL, true, false) as is_ghosted,
  7. req_flags
  8. FROM mi.fides_events n, UNNEST(verifierData) as vrd
  9. INNER JOIN website.req_requests r ON r.req_request_id = requestId
  10. LEFT OUTER JOIN a.mts_requests mr ON r.req_request_id = mr.request_id
  11. WHERE n._PARTITIONTIME >= '2017-09-09' AND n._PARTITIONTIME <= '2017-09-14'
  12. AND time > 1505001600000
  13. AND req_create_time > 1505001600000
  14. AND vrd.name = "request.ml_spam_verifier"
  15. -- AND ((LOWER(request_delete_reason) NOT LIKE "%test%"
  16. -- AND LOWER(request_delete_reason) NOT LIKE "%duplicate%"
  17. -- AND request_delete_reason NOT LIKE "%Cascading%") OR request_delete_reason IS NULL)
  18. AND score > 0.378
  19. )
  20.  
  21. SELECT COUNT(DISTINCT requestId) from all_spams_ml f WHERE f.is_ghosted IS TRUE OR f.req_flags & 4 <> 0
  22.  
  23. -- WITH all_spams_fides AS (
  24. -- SELECT requestId, req_rca_request_category_id, request_delete_reason, request_delete_spam_tag,
  25. -- FIRST_VALUE(vrd.score) OVER (PARTITION BY requestId ORDER BY vrd.score DESC) as score,
  26. -- vrd.name,
  27. -- vrd.ruleData,
  28. -- IF(req_ghosted_time IS NOT NULL, true, false) as is_ghosted,
  29. -- req_flags
  30. -- FROM mi.fides_events n, UNNEST(verifierData) as vrd
  31. -- INNER JOIN website.req_requests r ON r.req_request_id = requestId
  32. -- LEFT OUTER JOIN a.mts_requests mr ON r.req_request_id = mr.request_id
  33. -- WHERE n._PARTITIONTIME >= '2017-09-09' AND n._PARTITIONTIME <= '2017-09-14'
  34. -- AND time > 1505001600000
  35. -- AND req_create_time > 1505001600000
  36. -- -- AND vrd.name = "request.ml_spam_verifier"
  37. -- AND vrd.name = "request.baseline"
  38. -- AND vrd.outcome = 2
  39. -- -- AND ((LOWER(request_delete_reason) NOT LIKE "%test%"
  40. -- -- AND LOWER(request_delete_reason) NOT LIKE "%duplicate%"
  41. -- -- AND request_delete_reason NOT LIKE "%Cascading%") OR request_delete_reason IS NULL)
  42. -- -- AND score > 0.378
  43. -- )
  44.  
  45. -- SELECT COUNT(DISTINCT requestId) from all_spams_fides f WHERE f.is_ghosted IS TRUE --OR f.req_flags & 4 <> 0
  46.  
  47. -- spams_fides_select_rules AS (
  48. -- SELECT DISTINCT requestId , s.name, s.req_rca_request_category_id, s.is_ghosted, s.req_flags
  49. -- FROM all_spams_fides s, UNNEST(ruleData) as k
  50. -- WHERE k.score = 1.0
  51. -- AND k.name IN ("has_caveo_blacklisted_ip", "is_category_not_auto_verifiable", "has_risky_attachment", "could_have_private_info", "too_many_requests_from_same_ip", "request_has_blacklisted_device_fingerprint", "has_blacklisted_contact")
  52. -- )
  53.  
  54. -- sent_fides_not_model AS (
  55. -- SELECT f.requestId, f.is_ghosted, f.req_flags, f.name, f.req_rca_request_category_id, ml.score as ml_score --COUNT(DISTINCT f.requestId)
  56. -- FROM spams_fides_select_rules f
  57. -- LEFT OUTER JOIN all_spams_ml ml ON f.requestId = ml.requestId
  58. -- WHERE ml.score <= 0.613
  59. -- -- AND ( f.is_ghosted IS TRUE OR f.req_flags & 4 <> 0) -- 416 (56g + 360d)
  60. -- -- f.req_rca_request_category_id != 95
  61. -- )
  62.  
  63. -- SELECT COUNT(DISTINCT f.requestId)
  64. -- FROM all_spams_fides f
  65. -- INNER JOIN all_spams_ml ml ON ml.requestId = f.requestId
  66. -- WHERE ml.score > 0.431
  67. -- AND ( ml.is_ghosted IS TRUE OR ml.req_flags & 4 <> 0 ) -- 484
  68.  
  69. -- SELECT COUNT(DISTINCT requestId) FROM sent_fides_not_model -- 6553 total sent, 2570 with select rules (216 deleted requests: 33g, 182 deletes)
  70.  
  71. -- SELECT DISTINCT requestId , s.name as verifier_name, k.name as rule_name, k.score, k.outcome, s.req_rca_request_category_id, s.ml_score
  72. -- FROM sent_fides_not_model s
  73. -- WHERE k.score = 1.0 AND
  74. -- (is_ghosted IS TRUE OR req_flags & 4 <> 0)
  75. -- AND k.name = "has_caveo_blacklisted_ip"
  76. -- -- WHERE k.key = 'feature_ipZipDist'
  77. -- ORDER BY 7
  78.  
  79. -- SELECT DISTINCT request_delete_reason from all_spams_ml
  80.  
  81. -- bucketed AS (
  82. -- SELECT
  83. -- CASE WHEN (is_ghosted IS TRUE OR req_flags & 4 <> 0) THEN 'deleted'
  84. -- ELSE 'good' END AS outcome,
  85. -- requestId,
  86. -- request_delete_reason,
  87. -- request_delete_spam_tag,
  88. -- FLOOR(score*10000)/10000 as buckets
  89. -- FROM all_spams_ml
  90. -- -- WHERE req_flags & 4 = 0 AND is_ghosted IS FALSE
  91. -- -- GROUP BY 1,2
  92. -- )
  93.  
  94. -- SELECT
  95. -- *
  96. -- FROM bucketed b
  97. -- ORDER BY 5 DESC
  98.  
  99. -- SELECT COUNT(DISTINCT requestId) from all_spams_fides f
  100. -- WHERE f.is_ghosted IS NOT TRUE AND f.req_flags & 4 = 0
  101.  
  102. -- SELECT COUNT(DISTINCT requestId) from all_spams_ml ml
  103. -- WHERE ml.is_ghosted IS TRUE OR ml.req_flags & 4 <> 0
  104.  
  105. -- SELECT CASE WHEN (ml.is_ghosted IS TRUE OR ml.req_flags & 4 <> 0) THEN 'deleted'
  106. -- ELSE 'good' END AS outcome,
  107. -- COUNT(DISTINCT requestId) from all_spams_ml ml
  108. -- GROUP BY 1
  109.  
  110.  
  111. -- -- sent by both fides and ML
  112. -- -- 16869
  113. -- SELECT COUNT(DISTINCT f.requestId)
  114. -- FROM all_spams_ml ml
  115. -- INNER JOIN all_spams_fides f ON f.requestId = ml.requestId
  116. --WHERE ( ml.is_ghosted IS TRUE OR ml.req_flags & 4 <> 0 ) -- 2225 (269g + 1956d)
  117.  
  118. -- -- 6901 sent by ML and not sent by fides
  119. -- SELECT COUNT(DISTINCT ml.requestId)
  120. -- FROM all_spams_ml ml
  121. -- LEFT OUTER JOIN spams_fides_select_rules f ON ml.requestId = f.requestId
  122. -- WHERE f.requestId IS NULL AND (ml.is_ghosted IS TRUE OR ml.req_flags & 4 <> 0) --388 (238g + 150d)
  123. -- AND ml.score > 0.378
  124. -- ORDER BY 1 DESC
  125.  
  126. -- -- 6573 sent by fides and not sent by ML
  127. -- SELECT COUNT(DISTINCT f.requestId)
  128. -- -- f.requestId, f.req_rca_request_category_id, ml.score
  129. -- FROM all_spams_fides f
  130. -- LEFT OUTER JOIN all_spams_ml ml ON f.requestId = ml.requestId
  131. -- WHERE ml.score <= 0.431 AND (f.is_ghosted IS TRUE OR f.req_flags & 4 <> 0) -- 416 (56g + 360d)
  132. -- AND f.req_rca_request_category_id != 95
  133. -- ORDER BY 3 DESC
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement