Advertisement
Guest User

Untitled

a guest
Apr 10th, 2019
193
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.00 KB | None | 0 0
  1. import praw, time, urllib, datetime, psaw, json
  2.  
  3. class RedditConnectionHandler():
  4. def __init__(self, username, password, useragent, clientid, secret):
  5. self.username = username
  6. self.password = password
  7. self.useragent = useragent
  8. self.clientid = clientid
  9. self.secret = secret
  10. self.connection = self.login()
  11.  
  12. def login(self):
  13. while(1):
  14. try:
  15. return praw.Reddit(client_id=self.clientid,
  16. client_secret=self.secret,
  17. password=self.password,
  18. username=self.username,
  19. user_agent=self.useragent)
  20. except urllib.error.HTTPError as e:
  21. if e.code in [429, 500, 502, 503, 504]:
  22. print("Reddit is down (error %s), sleeping..." % e.code)
  23. time.sleep(60)
  24. pass
  25. except Exception as e:
  26. print("Error in login", e)
  27. pass
  28.  
  29.  
  30. def convertdatetimetoutc(self, datetimestamp):
  31. return (datetimestamp.toordinal()-datetime.datetime(1970, 1, 1).toordinal())*24*60*60
  32.  
  33. def addMinuteToTimestamp(self, timestamp):
  34. return timestamp + datetime.timedelta(minutes=1)
  35.  
  36. def addDayToTimestamp(self, timestamp):
  37. return timestamp + datetime.timedelta(days=1)
  38.  
  39. # get's submissions from all of reddit and also lists which boards need to be visited
  40. def getTopSubmissions(self, subreddit_name, begindate, enddate):
  41. urls = []
  42. begindateutc = self.convertdatetimetoutc(begindate)
  43. enddateutc = self.convertdatetimetoutc(enddate)
  44. url_string = "https://api.pushshift.io/reddit/submission/search/?after="+str(begindateutc)+"&before="+str(enddateutc)+"&limit=1000&filter=full_link"
  45. going_by_minute = False
  46. while True:
  47. if (enddateutc == self.convertdatetimetoutc(self.addDayToTimestamp(enddate))):
  48. break
  49.  
  50. tmp_urls = []
  51. with urllib.request.urlopen(url_string) as url:
  52. data = json.loads(url.read().decode())
  53. for reddit_url in data['data']:
  54. tmp_urls.append(reddit_url["full_link"])
  55. if (len(tmp_urls) >= 1000 or going_by_minute):
  56. print("in here")
  57. begindate = self.addMinuteToTimestamp(begindate)
  58. enddate = self.addMinuteToTimestamp(begindate)
  59. begindateutc = self.convertdatetimetoutc(begindate)
  60. enddateutc = self.convertdatetimetoutc(enddate)
  61. url_string = "https://api.pushshift.io/reddit/submission/search/?after="+str(begindateutc)+"&before="+str(enddateutc)+"&limit=1000&filter=full_link"
  62. going_by_minute = True
  63. else:
  64. for tmp_url in tmp_urls:
  65. urls.append(tmp_url)
  66. print(begindateutc, enddateutc)
  67.  
  68.  
  69. print(urls)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement