Advertisement
Guest User

flair

a guest
Jul 29th, 2018
95
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.35 KB | None | 0 0
  1. import praw
  2. import sys
  3. import datetime
  4. from shutil import copyfile
  5. from django.utils.encoding import smart_str
  6. print("Starting File 1")
  7. r = praw.Reddit(client_id="redacted",
  8. client_secret="redacted",
  9. user_agent="redacted",
  10. password="redacted",
  11. username="redacted")
  12.  
  13. now = datetime.datetime.now()
  14. fileDate = smart_str(now.year) + "-" + smart_str(now.month) + "-" + smart_str(now.day) + " Hour" + smart_str(now.hour)
  15. fileName = fileDate + ' CFB.txt'
  16. file = open(fileName, "w")
  17.  
  18. for submission in r.subreddit("redacted").new(limit=None):
  19. submission.comments.replace_more()
  20. for comment in submission.comments.list():
  21. if smart_str(comment.author_flair_text).startswith("Wisconsin Badgers"):
  22. file.write(smart_str(comment.author.name) + '\n')
  23. file.flush()
  24.  
  25. for submission in r.subreddit("redacted").new(limit=500):
  26. submission.comments.replace_more()
  27. for comment in submission.comments.list():
  28. if smart_str(comment.author_flair_text).startswith("Wisconsin Badgers"):
  29. file.write(smart_str(comment.author.name) + '\n')
  30. file.flush()
  31.  
  32. # cfbmeta has no flairs
  33.  
  34. mainSmallSubs = ["redacted", "redacted", "redacted", "redacted", "redacted", "redacted"]
  35. for sub in mainSmallSubs:
  36. for submission in r.subreddit(sub).new(limit=30):
  37. submission.comments.replace_more()
  38. for comment in submission.comments.list():
  39. if smart_str(comment.author_flair_text).startswith("redacted"):
  40. file.write(comment.author.name + '\n')
  41. file.flush()
  42. file.close()
  43. print("End File 1")
  44. print("Start De-Duplication File 1")
  45. # deduplicate
  46. lines_seen = set() # holds lines already seen
  47. outfile = open("out.txt", "w")
  48. for line in open(fileName, "r"):
  49. if line not in lines_seen: # not a duplicate
  50. outfile.write(line)
  51. lines_seen.add(line)
  52. outfile.close()
  53. copyfile("out.txt", fileName)
  54. print("End De-Duplication File 1")
  55. #########################################################################
  56. #########################################################################
  57. #########################################################################
  58.  
  59. print("Starting File 2")
  60. fileName2 = fileDate + ' NOcfb.txt'
  61. file = open(fileName2, "w")
  62.  
  63. for submission in r.subreddit("redacted").new(limit=None):
  64. submission.comments.replace_more()
  65. for comment in submission.comments.list():
  66. if smart_str(comment.author_flair_text).startswith("Wisconsin"):
  67. file.write(comment.author.name + '\n')
  68. file.flush()
  69.  
  70.  
  71. sideSubs = ["redacted", "redacted", "redacted"]
  72. for sub in sideSubs:
  73. for submission in r.subreddit(sub).new(limit=50):
  74. submission.comments.replace_more()
  75. for comment in submission.comments.list():
  76. if smart_str(comment.author_flair_text).startswith("redacted"):
  77. file.write(comment.author.name + '\n')
  78. file.flush()
  79. file.close()
  80.  
  81. print("End File 2")
  82. print("Start De-Duplication File 2")
  83. # deduplicate
  84. lines_seen = set() # holds lines already seen
  85. outfile = open("out.txt", "w")
  86. for line in open(fileName2, "r"):
  87. if line not in lines_seen: # not a duplicate
  88. outfile.write(line)
  89. lines_seen.add(line)
  90. outfile.close()
  91. copyfile("out.txt", fileName2)
  92. print("End De-Duplication File 2")
  93.  
  94. print("Scrape Complete")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement