Advertisement
Guest User

Untitled

a guest
Sep 28th, 2017
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from __future__ import unicode_literals
  2.  
  3. import json
  4. import os
  5. import sys
  6. import time
  7. import urlparse
  8.  
  9. import feedparser
  10. import pid
  11. import praw
  12. from BeautifulSoup import BeautifulSoup
  13. from praw.exceptions import APIException
  14.  
  15.  
  16. class Source(object):
  17.     name = None
  18.  
  19.     def __init__(self, state=None):
  20.         self.state = state or {}
  21.  
  22.     def iter_articles(self):
  23.         raise NotImplementedError()
  24.  
  25.  
  26. class RSSSource(Source):
  27.  
  28.     def iter_rss_items(self):
  29.         raise NotImplementedError()
  30.  
  31.     def filter_article(self, article):
  32.         return article
  33.  
  34.     def iter_articles(self):
  35.         last_id = self.state.get('last_article_id')
  36.         next_last_id = None
  37.  
  38.         for article in self.iter_rss_items():
  39.             if next_last_id is None:
  40.                 self.state['last_article_id'] = next_last_id = article['id']
  41.  
  42.             if article['id'] == last_id:
  43.                 return
  44.  
  45.             article = self.filter_article(article)
  46.             if article:
  47.                 yield article
  48.  
  49.  
  50. class SooToday(RSSSource):
  51.  
  52.     name = 'sootoday'
  53.  
  54.     def iter_rss_items(self):
  55.         for i in range(20):
  56.             page = i + 1
  57.             feed = feedparser.parse('https://www.sootoday.com/rss?page={}'.format(page))
  58.             for item in feed['entries']:
  59.                 yield item
  60.  
  61.     def filter_article(self, article):
  62.         url = urlparse.urlparse(article['link'])
  63.         category = url.path.strip('/').split('/')[0]
  64.         if 'local' in category:
  65.             return article
  66.         elif category == 'city-police-beat':
  67.             article['title'] = 'City Police Beat: {}'.format(article['title'])
  68.             return article
  69.  
  70.  
  71. class CitySite(RSSSource):
  72.     name = 'city'
  73.  
  74.     def iter_rss_items(self):
  75.         for item in feedparser.parse('http://saultstemarie.ca/Newsroom.aspx?rss=news')['entries']:
  76.             yield item
  77.  
  78.     def filter_article(self, article):
  79.         if article['link'].endswith('?feed=news'):
  80.             article['link'] = article['link'][:-10]
  81.         return article
  82.  
  83.  
  84. class Bot(object):
  85.  
  86.     def __init__(self, credentials, source_classes, fake=False):
  87.         self.fake = fake
  88.         self.credentials = credentials
  89.         self.source_classes = source_classes
  90.         self.subreddit = None
  91.  
  92.     def get_subreddit(self):
  93.         if self.subreddit is None:
  94.             reddit = praw.Reddit(client_id=self.credentials['client_id'],
  95.                                  client_secret=self.credentials['client_secret'],
  96.                                  password=self.credentials['password'],
  97.                                  user_agent='SaultBot by /u/metalhedd',
  98.                                  username=self.credentials['username'])
  99.             self.subreddit = reddit.subreddit('saultstemarie')
  100.         return self.subreddit
  101.  
  102.     def run(self):
  103.         with pid.PidFile(piddir=os.path.expanduser("~/.saultbot")):
  104.             state = self.load_state()
  105.             self.sources = [source(state.get(source.name)) for source in self.source_classes]
  106.             for source in self.sources:
  107.                 for article in source.iter_articles():
  108.                     self.post_article(article)
  109.                 state[source.name] = source.state
  110.             self.save_state(state)
  111.  
  112.     def strip_tags(self, text):
  113.         return BeautifulSoup(text).getText(separator=' ')
  114.  
  115.     def post_article(self, article):
  116.         article['title'] = self.strip_tags(article['title'])
  117.         print 'posting {} as "{}"'.format(article['link'], article['title'])
  118.         if not self.fake:
  119.             while True:
  120.                 try:
  121.                     submission = self.get_subreddit().submit(
  122.                         article['title'],
  123.                         url=article['link'],
  124.                         send_replies=False,
  125.                         resubmit=False,
  126.                     )
  127.                     print 'submitted {}'.format(submission)
  128.                 except APIException as e:
  129.                     print e
  130.                     if e.field == 'ratelimit':
  131.                         seconds = 600
  132.                         if e.message and 'try again in' in e.message:
  133.                             count, unit = e.message.split('try again in')[1].strip().split(' ')
  134.                             seconds = int(count)
  135.                             if unit.startswith('minute'):
  136.                                 seconds += 1
  137.                                 seconds *= 60
  138.                         print 'waiting {} seconds to post'.format(seconds)
  139.                         time.sleep(seconds)
  140.                     else:
  141.                         return
  142.                 else:
  143.                     return
  144.  
  145.             time.sleep(1)
  146.  
  147.     def load_state(self):
  148.         try:
  149.             return json.load(open(os.path.expanduser('~/.saultbot/state.json')))
  150.         except:
  151.             return {}
  152.  
  153.     def save_state(self, state):
  154.         if not os.path.exists(os.path.expanduser('~/.saultbot')):
  155.             os.makedirs(os.path.expanduser("~/.saultbot"))
  156.  
  157.         with open(os.path.expanduser("~/.saultbot/state.json"), 'w') as sf:
  158.             json.dump(state, sf)
  159.  
  160.  
  161. def get_credentials():
  162.     if not os.path.exists(os.path.expanduser('~/.saultbot/credentials.json')):
  163.         raise RuntimeError("No credentials.json found")
  164.  
  165.     with open(os.path.expanduser('~/.saultbot/credentials.json'), 'r') as cf:
  166.         creds = json.load(cf)
  167.  
  168.     if not ('client_id' in creds and 'client_secret' in creds and 'username' in creds and 'password' in creds):
  169.         raise RuntimeError("Invalid Credentials: {}".format(creds))
  170.  
  171.     return creds
  172.  
  173.  
  174. if __name__ == '__main__':
  175.     bot = Bot(credentials=get_credentials(), source_classes=[SooToday, CitySite], fake='fake' in sys.argv)
  176.     bot.run()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement