Guest User

Untitled

a guest
Apr 10th, 2019
157
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import praw, time, urllib, datetime, psaw, json
  2.  
  3. class RedditConnectionHandler():
  4. def __init__(self, username, password, useragent, clientid, secret):
  5. self.username = username
  6. self.password = password
  7. self.useragent = useragent
  8. self.clientid = clientid
  9. self.secret = secret
  10. self.connection = self.login()
  11.  
  12. def login(self):
  13. while(1):
  14. try:
  15. return praw.Reddit(client_id=self.clientid,
  16. client_secret=self.secret,
  17. password=self.password,
  18. username=self.username,
  19. user_agent=self.useragent)
  20. except urllib.error.HTTPError as e:
  21. if e.code in [429, 500, 502, 503, 504]:
  22. print("Reddit is down (error %s), sleeping..." % e.code)
  23. time.sleep(60)
  24. pass
  25. except Exception as e:
  26. print("Error in login", e)
  27. pass
  28.  
  29.  
  30. def convertdatetimetoutc(self, datetimestamp):
  31. return (datetimestamp.toordinal()-datetime.datetime(1970, 1, 1).toordinal())*24*60*60
  32.  
  33. def addMinuteToTimestamp(self, timestamp):
  34. return timestamp + datetime.timedelta(minutes=1)
  35.  
  36. def addDayToTimestamp(self, timestamp):
  37. return timestamp + datetime.timedelta(days=1)
  38.  
  39. # get's submissions from all of reddit and also lists which boards need to be visited
  40. def getTopSubmissions(self, subreddit_name, begindate, enddate):
  41. urls = []
  42. begindateutc = self.convertdatetimetoutc(begindate)
  43. enddateutc = self.convertdatetimetoutc(enddate)
  44. url_string = "https://api.pushshift.io/reddit/submission/search/?after="+str(begindateutc)+"&before="+str(enddateutc)+"&limit=1000&filter=full_link"
  45. going_by_minute = False
  46. while True:
  47. if (enddateutc == self.convertdatetimetoutc(self.addDayToTimestamp(enddate))):
  48. break
  49.  
  50. tmp_urls = []
  51. with urllib.request.urlopen(url_string) as url:
  52. data = json.loads(url.read().decode())
  53. for reddit_url in data['data']:
  54. tmp_urls.append(reddit_url["full_link"])
  55. if (len(tmp_urls) >= 1000 or going_by_minute):
  56. print("in here")
  57. begindate = self.addMinuteToTimestamp(begindate)
  58. enddate = self.addMinuteToTimestamp(begindate)
  59. begindateutc = self.convertdatetimetoutc(begindate)
  60. enddateutc = self.convertdatetimetoutc(enddate)
  61. url_string = "https://api.pushshift.io/reddit/submission/search/?after="+str(begindateutc)+"&before="+str(enddateutc)+"&limit=1000&filter=full_link"
  62. going_by_minute = True
  63. else:
  64. for tmp_url in tmp_urls:
  65. urls.append(tmp_url)
  66. print(begindateutc, enddateutc)
  67.  
  68.  
  69. print(urls)
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×