Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from __future__ import unicode_literals
- import json
- import os
- import sys
- import time
- import urlparse
- import feedparser
- import pid
- import praw
- from BeautifulSoup import BeautifulSoup
- from praw.exceptions import APIException
- class Source(object):
- name = None
- def __init__(self, state=None):
- self.state = state or {}
- def iter_articles(self):
- raise NotImplementedError()
- class RSSSource(Source):
- def iter_rss_items(self):
- raise NotImplementedError()
- def filter_article(self, article):
- return article
- def iter_articles(self):
- last_id = self.state.get('last_article_id')
- next_last_id = None
- for article in self.iter_rss_items():
- if next_last_id is None:
- self.state['last_article_id'] = next_last_id = article['id']
- if article['id'] == last_id:
- return
- article = self.filter_article(article)
- if article:
- yield article
- class SooToday(RSSSource):
- name = 'sootoday'
- def iter_rss_items(self):
- for i in range(20):
- page = i + 1
- feed = feedparser.parse('https://www.sootoday.com/rss?page={}'.format(page))
- for item in feed['entries']:
- yield item
- def filter_article(self, article):
- url = urlparse.urlparse(article['link'])
- category = url.path.strip('/').split('/')[0]
- if 'local' in category:
- return article
- elif category == 'city-police-beat':
- article['title'] = 'City Police Beat: {}'.format(article['title'])
- return article
- class CitySite(RSSSource):
- name = 'city'
- def iter_rss_items(self):
- for item in feedparser.parse('http://saultstemarie.ca/Newsroom.aspx?rss=news')['entries']:
- yield item
- def filter_article(self, article):
- if article['link'].endswith('?feed=news'):
- article['link'] = article['link'][:-10]
- return article
- class Bot(object):
- def __init__(self, credentials, source_classes, fake=False):
- self.fake = fake
- self.credentials = credentials
- self.source_classes = source_classes
- self.subreddit = None
- def get_subreddit(self):
- if self.subreddit is None:
- reddit = praw.Reddit(client_id=self.credentials['client_id'],
- client_secret=self.credentials['client_secret'],
- password=self.credentials['password'],
- user_agent='SaultBot by /u/metalhedd',
- username=self.credentials['username'])
- self.subreddit = reddit.subreddit('saultstemarie')
- return self.subreddit
- def run(self):
- with pid.PidFile(piddir=os.path.expanduser("~/.saultbot")):
- state = self.load_state()
- self.sources = [source(state.get(source.name)) for source in self.source_classes]
- for source in self.sources:
- for article in source.iter_articles():
- self.post_article(article)
- state[source.name] = source.state
- self.save_state(state)
- def strip_tags(self, text):
- return BeautifulSoup(text).getText(separator=' ')
- def post_article(self, article):
- article['title'] = self.strip_tags(article['title'])
- print 'posting {} as "{}"'.format(article['link'], article['title'])
- if not self.fake:
- while True:
- try:
- submission = self.get_subreddit().submit(
- article['title'],
- url=article['link'],
- send_replies=False,
- resubmit=False,
- )
- print 'submitted {}'.format(submission)
- except APIException as e:
- print e
- if e.field == 'ratelimit':
- seconds = 600
- if e.message and 'try again in' in e.message:
- count, unit = e.message.split('try again in')[1].strip().split(' ')
- seconds = int(count)
- if unit.startswith('minute'):
- seconds += 1
- seconds *= 60
- print 'waiting {} seconds to post'.format(seconds)
- time.sleep(seconds)
- else:
- return
- else:
- return
- time.sleep(1)
- def load_state(self):
- try:
- return json.load(open(os.path.expanduser('~/.saultbot/state.json')))
- except:
- return {}
- def save_state(self, state):
- if not os.path.exists(os.path.expanduser('~/.saultbot')):
- os.makedirs(os.path.expanduser("~/.saultbot"))
- with open(os.path.expanduser("~/.saultbot/state.json"), 'w') as sf:
- json.dump(state, sf)
- def get_credentials():
- if not os.path.exists(os.path.expanduser('~/.saultbot/credentials.json')):
- raise RuntimeError("No credentials.json found")
- with open(os.path.expanduser('~/.saultbot/credentials.json'), 'r') as cf:
- creds = json.load(cf)
- if not ('client_id' in creds and 'client_secret' in creds and 'username' in creds and 'password' in creds):
- raise RuntimeError("Invalid Credentials: {}".format(creds))
- return creds
- if __name__ == '__main__':
- bot = Bot(credentials=get_credentials(), source_classes=[SooToday, CitySite], fake='fake' in sys.argv)
- bot.run()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement