Advertisement
fhoffa

Untitled

Jul 24th, 2015
185
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
SQL 1.20 KB | None | 0 0
  1. SELECT a.word, b.word, c, ratio
  2. FROM(
  3.   SELECT a.word, b.word, c, ratio, RANK() OVER(PARTITION BY a.word ORDER BY c DESC) rank
  4.   FROM (
  5.     SELECT a.word, b.word, COUNT(*) c, RATIO_TO_REPORT(c) OVER(PARTITION BY b.word) ratio
  6.     FROM (
  7.       SELECT word, id
  8.       FROM [fh-bigquery:reddit_comments.2015_06] a
  9.       CROSS JOIN (SELECT word FROM (SELECT 'love' word)  # ***** REPLACE 'WORD' here!!!! ****
  10.                  ,(SELECT 'common' word),(SELECT 'but' word)) b
  11.       WHERE author NOT IN ('AutoModerator')
  12.       AND LOWER(body) CONTAINS word
  13.       AND subreddit NOT IN ('leagueoflegends')
  14.     ) a JOIN EACH (
  15.       SELECT word, id FROM (
  16.         SELECT SPLIT(LOWER(REGEXP_REPLACE(body, r'[\-/!\?\.\",*:()\[\]|\n]', ' ')), ' ') word, id
  17.         FROM [fh-bigquery:reddit_comments.2015_06]
  18.         WHERE REGEXP_MATCH(LOWER(body), 'but|common|when')
  19.         HAVING LENGTH(word)>2
  20.         AND NOT word IN ('but','and','that')
  21.       )
  22.       GROUP EACH BY 1,2
  23.     ) b
  24.     ON a.id=b.id
  25.     WHERE a.word!=b.word
  26.     GROUP EACH BY 1,2
  27.     HAVING c>60
  28.   )
  29.   WHERE ratio BETWEEN 0.15 AND 0.95
  30.   AND a.word NOT IN ('common','but') AND b.word NOT IN ('common','but')
  31. )
  32. WHERE rank<30
  33. ORDER BY a.word, c DESC
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement