Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- SELECT a.word, b.word, c, ratio
- FROM(
- SELECT a.word, b.word, c, ratio, RANK() OVER(PARTITION BY a.word ORDER BY c DESC) rank
- FROM (
- SELECT a.word, b.word, COUNT(*) c, RATIO_TO_REPORT(c) OVER(PARTITION BY b.word) ratio
- FROM (
- SELECT word, id
- FROM [fh-bigquery:reddit_comments.2015_06] a
- CROSS JOIN (SELECT word FROM (SELECT 'love' word) # ***** REPLACE 'WORD' here!!!! ****
- ,(SELECT 'common' word),(SELECT 'but' word)) b
- WHERE author NOT IN ('AutoModerator')
- AND LOWER(body) CONTAINS word
- AND subreddit NOT IN ('leagueoflegends')
- ) a JOIN EACH (
- SELECT word, id FROM (
- SELECT SPLIT(LOWER(REGEXP_REPLACE(body, r'[\-/!\?\.\",*:()\[\]|\n]', ' ')), ' ') word, id
- FROM [fh-bigquery:reddit_comments.2015_06]
- WHERE REGEXP_MATCH(LOWER(body), 'but|common|when')
- HAVING LENGTH(word)>2
- AND NOT word IN ('but','and','that')
- )
- GROUP EACH BY 1,2
- ) b
- ON a.id=b.id
- WHERE a.word!=b.word
- GROUP EACH BY 1,2
- HAVING c>60
- )
- WHERE ratio BETWEEN 0.15 AND 0.95
- AND a.word NOT IN ('common','but') AND b.word NOT IN ('common','but')
- )
- WHERE rank<30
- ORDER BY a.word, c DESC
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement