Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import praw
- import sys
- import datetime
- from shutil import copyfile
- from django.utils.encoding import smart_str
- print("Starting File 1")
- r = praw.Reddit(client_id="redacted",
- client_secret="redacted",
- user_agent="redacted",
- password="redacted",
- username="redacted")
- now = datetime.datetime.now()
- fileDate = smart_str(now.year) + "-" + smart_str(now.month) + "-" + smart_str(now.day) + " Hour" + smart_str(now.hour)
- fileName = fileDate + ' CFB.txt'
- file = open(fileName, "w")
- for submission in r.subreddit("redacted").new(limit=None):
- submission.comments.replace_more()
- for comment in submission.comments.list():
- if smart_str(comment.author_flair_text).startswith("Wisconsin Badgers"):
- file.write(smart_str(comment.author.name) + '\n')
- file.flush()
- for submission in r.subreddit("redacted").new(limit=500):
- submission.comments.replace_more()
- for comment in submission.comments.list():
- if smart_str(comment.author_flair_text).startswith("Wisconsin Badgers"):
- file.write(smart_str(comment.author.name) + '\n')
- file.flush()
- # cfbmeta has no flairs
- mainSmallSubs = ["redacted", "redacted", "redacted", "redacted", "redacted", "redacted"]
- for sub in mainSmallSubs:
- for submission in r.subreddit(sub).new(limit=30):
- submission.comments.replace_more()
- for comment in submission.comments.list():
- if smart_str(comment.author_flair_text).startswith("redacted"):
- file.write(comment.author.name + '\n')
- file.flush()
- file.close()
- print("End File 1")
- print("Start De-Duplication File 1")
- # deduplicate
- lines_seen = set() # holds lines already seen
- outfile = open("out.txt", "w")
- for line in open(fileName, "r"):
- if line not in lines_seen: # not a duplicate
- outfile.write(line)
- lines_seen.add(line)
- outfile.close()
- copyfile("out.txt", fileName)
- print("End De-Duplication File 1")
- #########################################################################
- #########################################################################
- #########################################################################
- print("Starting File 2")
- fileName2 = fileDate + ' NOcfb.txt'
- file = open(fileName2, "w")
- for submission in r.subreddit("redacted").new(limit=None):
- submission.comments.replace_more()
- for comment in submission.comments.list():
- if smart_str(comment.author_flair_text).startswith("Wisconsin"):
- file.write(comment.author.name + '\n')
- file.flush()
- sideSubs = ["redacted", "redacted", "redacted"]
- for sub in sideSubs:
- for submission in r.subreddit(sub).new(limit=50):
- submission.comments.replace_more()
- for comment in submission.comments.list():
- if smart_str(comment.author_flair_text).startswith("redacted"):
- file.write(comment.author.name + '\n')
- file.flush()
- file.close()
- print("End File 2")
- print("Start De-Duplication File 2")
- # deduplicate
- lines_seen = set() # holds lines already seen
- outfile = open("out.txt", "w")
- for line in open(fileName2, "r"):
- if line not in lines_seen: # not a duplicate
- outfile.write(line)
- lines_seen.add(line)
- outfile.close()
- copyfile("out.txt", fileName2)
- print("End De-Duplication File 2")
- print("Scrape Complete")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement