Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import praw, time, urllib, datetime, psaw, json
- class RedditConnectionHandler():
- def __init__(self, username, password, useragent, clientid, secret):
- self.username = username
- self.password = password
- self.useragent = useragent
- self.clientid = clientid
- self.secret = secret
- self.connection = self.login()
- def login(self):
- while(1):
- try:
- return praw.Reddit(client_id=self.clientid,
- client_secret=self.secret,
- password=self.password,
- username=self.username,
- user_agent=self.useragent)
- except urllib.error.HTTPError as e:
- if e.code in [429, 500, 502, 503, 504]:
- print("Reddit is down (error %s), sleeping..." % e.code)
- time.sleep(60)
- pass
- except Exception as e:
- print("Error in login", e)
- pass
- def convertdatetimetoutc(self, datetimestamp):
- return (datetimestamp.toordinal()-datetime.datetime(1970, 1, 1).toordinal())*24*60*60
- def addMinuteToTimestamp(self, timestamp):
- return timestamp + datetime.timedelta(minutes=1)
- def addDayToTimestamp(self, timestamp):
- return timestamp + datetime.timedelta(days=1)
- # get's submissions from all of reddit and also lists which boards need to be visited
- def getTopSubmissions(self, subreddit_name, begindate, enddate):
- urls = []
- begindateutc = self.convertdatetimetoutc(begindate)
- enddateutc = self.convertdatetimetoutc(enddate)
- url_string = "https://api.pushshift.io/reddit/submission/search/?after="+str(begindateutc)+"&before="+str(enddateutc)+"&limit=1000&filter=full_link"
- going_by_minute = False
- while True:
- if (enddateutc == self.convertdatetimetoutc(self.addDayToTimestamp(enddate))):
- break
- tmp_urls = []
- with urllib.request.urlopen(url_string) as url:
- data = json.loads(url.read().decode())
- for reddit_url in data['data']:
- tmp_urls.append(reddit_url["full_link"])
- if (len(tmp_urls) >= 1000 or going_by_minute):
- print("in here")
- begindate = self.addMinuteToTimestamp(begindate)
- enddate = self.addMinuteToTimestamp(begindate)
- begindateutc = self.convertdatetimetoutc(begindate)
- enddateutc = self.convertdatetimetoutc(enddate)
- url_string = "https://api.pushshift.io/reddit/submission/search/?after="+str(begindateutc)+"&before="+str(enddateutc)+"&limit=1000&filter=full_link"
- going_by_minute = True
- else:
- for tmp_url in tmp_urls:
- urls.append(tmp_url)
- print(begindateutc, enddateutc)
- print(urls)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement