View difference between Paste ID: yrg24PRT and DbbMNwvV
SHOW: | | - or go back to the newest paste.
1
from __future__ import unicode_literals
2
3
import json
4
import os
5
import sys
6
import time
7
import urlparse
8
9
import feedparser
10
import pid
11
import praw
12
from BeautifulSoup import BeautifulSoup
13
from praw.exceptions import APIException
14
15
16
class Source(object):
17
    name = None
18
19
    def __init__(self, state=None):
20
        self.state = state or {}
21
22
    def iter_articles(self):
23
        raise NotImplementedError()
24
25
26
class RSSSource(Source):
27
28
    def iter_rss_items(self):
29
        raise NotImplementedError()
30
31
    def filter_article(self, article):
32
        return article
33
34
    def iter_articles(self):
35
        last_id = self.state.get('last_article_id')
36
        next_last_id = None
37
38
        for article in self.iter_rss_items():
39
            if next_last_id is None:
40
                self.state['last_article_id'] = next_last_id = article['id']
41
42
            if article['id'] == last_id:
43
                return
44
45
            article = self.filter_article(article)
46
            if article:
47
                yield article
48
49
50
class SooToday(RSSSource):
51
52
    name = 'sootoday'
53
54
    def iter_rss_items(self):
55
        for i in range(20):
56
            page = i + 1
57
            feed = feedparser.parse('https://www.sootoday.com/rss?page={}'.format(page))
58
            for item in feed['entries']:
59
                yield item
60
61
    def filter_article(self, article):
62
        url = urlparse.urlparse(article['link'])
63
        category = url.path.strip('/').split('/')[0]
64
        if 'local' in category:
65
            return article
66
        elif category == 'city-police-beat':
67
            article['title'] = 'City Police Beat: {}'.format(article['title'])
68
            return article
69
70
71
class CitySite(RSSSource):
72
    name = 'city'
73
74
    def iter_rss_items(self):
75
        for item in feedparser.parse('http://saultstemarie.ca/Newsroom.aspx?rss=news')['entries']:
76
            yield item
77
78
    def filter_article(self, article):
79
        if article['link'].endswith('?feed=news'):
80
            article['link'] = article['link'][:-10]
81
        return article
82
83
84
class Bot(object):
85
86
    def __init__(self, credentials, source_classes, fake=False):
87
        self.fake = fake
88
        self.credentials = credentials
89
        self.source_classes = source_classes
90
        self.subreddit = None
91
92
    def get_subreddit(self):
93
        if self.subreddit is None:
94
            reddit = praw.Reddit(client_id=self.credentials['client_id'],
95
                                 client_secret=self.credentials['client_secret'],
96
                                 password=self.credentials['password'],
97
                                 user_agent='SaultBot by /u/metalhedd',
98
                                 username=self.credentials['username'])
99
            self.subreddit = reddit.subreddit('saultstemarie')
100
        return self.subreddit
101
102
    def run(self):
103
        with pid.PidFile(piddir=os.path.expanduser("~/.saultbot")):
104
            state = self.load_state()
105
            self.sources = [source(state.get(source.name)) for source in self.source_classes]
106
            for source in self.sources:
107
                for article in source.iter_articles():
108
                    self.post_article(article)
109
                state[source.name] = source.state
110
            self.save_state(state)
111
112
    def strip_tags(self, text):
113
        return BeautifulSoup(text).getText(separator=' ')
114
115
    def post_article(self, article):
116
        article['title'] = self.strip_tags(article['title'])
117
        print 'posting {} as "{}"'.format(article['link'], article['title'])
118
        if not self.fake:
119
            while True:
120
                try:
121
                    submission = self.get_subreddit().submit(
122
                        article['title'],
123
                        url=article['link'],
124
                        send_replies=False,
125
                        resubmit=False,
126
                    )
127
                    print 'submitted {}'.format(submission)
128
                except APIException as e:
129
                    print e
130
                    if e.field == 'ratelimit':
131
                        seconds = 600
132
                        if e.message and 'try again in' in e.message:
133
                            count, unit = e.message.split('try again in')[1].strip().split(' ')
134
                            seconds = int(count)
135
                            if unit.startswith('minute'):
136
                                seconds += 1
137
                                seconds *= 60
138
                        print 'waiting {} seconds to post'.format(seconds)
139
                        time.sleep(seconds)
140
                    else:
141
                        return
142
                else:
143
                    return
144
145
            time.sleep(1)
146
147
    def load_state(self):
148
        try:
149
            return json.load(open(os.path.expanduser('~/.saultbot/state.json')))
150
        except:
151
            return {}
152
153
    def save_state(self, state):
154
        if not os.path.exists(os.path.expanduser('~/.saultbot')):
155
            os.makedirs(os.path.expanduser("~/.saultbot"))
156
157
        with open(os.path.expanduser("~/.saultbot/state.json"), 'w') as sf:
158
            json.dump(state, sf)
159
160
161
def get_credentials():
162
    if not os.path.exists(os.path.expanduser('~/.saultbot/credentials.json')):
163
        raise RuntimeError("No credentials.json found")
164
165
    with open(os.path.expanduser('~/.saultbot/credentials.json'), 'r') as cf:
166
        creds = json.load(cf)
167
168
    if not ('client_id' in creds and 'client_secret' in creds and 'username' in creds and 'password' in creds):
169
        raise RuntimeError("Invalid Credentials: {}".format(creds))
170
171
    return creds
172
173
174
if __name__ == '__main__':
175
    bot = Bot(credentials=get_credentials(), source_classes=[SooToday, CitySite], fake='fake' in sys.argv)
176
    bot.run()