Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import praw
- import time
- import spacy
- import ipdb
- nlp = spacy.load('en')
- reddit = praw.Reddit(client_id='',
- client_secret='_iQ',
- password='',
- user_agent='testscript by /u/fakebot3',
- username='')
- lines = [line.rstrip('\n') for line in open('out.txt')]
- for line in lines:
- if line.startswith('TS CHANGE'):
- continue
- while True:
- try:
- submission = reddit.submission(line)
- break
- except:
- pass
- submission.comments.replace_more(limit=0)
- countries = {}
- for comment in submission.comments.list():
- doc = nlp(comment.body)
- for ent in doc.ents:
- if ent.label_ == 'GPE':
- if ent.text not in countries:
- countries[ent.text] = 1
- else:
- countries[ent.text] += 1
- avg = 0
- avg_count = 0
- for k,v in countries.items():
- if v != 1:
- avg += v
- avg_count += 1
- avg = avg/avg_count/2
- print(submission.title)
- print(sorted( ((v,k) for k,v in countries.items() if v > avg), reverse=True))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement