SHOW:
|
|
- or go back to the newest paste.
1 | from __future__ import unicode_literals | |
2 | ||
3 | import json | |
4 | import os | |
5 | import sys | |
6 | import time | |
7 | import urlparse | |
8 | ||
9 | import feedparser | |
10 | import pid | |
11 | import praw | |
12 | from BeautifulSoup import BeautifulSoup | |
13 | from praw.exceptions import APIException | |
14 | ||
15 | ||
16 | class Source(object): | |
17 | name = None | |
18 | ||
19 | def __init__(self, state=None): | |
20 | self.state = state or {} | |
21 | ||
22 | def iter_articles(self): | |
23 | raise NotImplementedError() | |
24 | ||
25 | ||
26 | class RSSSource(Source): | |
27 | ||
28 | def iter_rss_items(self): | |
29 | raise NotImplementedError() | |
30 | ||
31 | def filter_article(self, article): | |
32 | return article | |
33 | ||
34 | def iter_articles(self): | |
35 | last_id = self.state.get('last_article_id') | |
36 | next_last_id = None | |
37 | ||
38 | for article in self.iter_rss_items(): | |
39 | if next_last_id is None: | |
40 | self.state['last_article_id'] = next_last_id = article['id'] | |
41 | ||
42 | if article['id'] == last_id: | |
43 | return | |
44 | ||
45 | article = self.filter_article(article) | |
46 | if article: | |
47 | yield article | |
48 | ||
49 | ||
50 | class SooToday(RSSSource): | |
51 | ||
52 | name = 'sootoday' | |
53 | ||
54 | def iter_rss_items(self): | |
55 | for i in range(20): | |
56 | page = i + 1 | |
57 | feed = feedparser.parse('https://www.sootoday.com/rss?page={}'.format(page)) | |
58 | for item in feed['entries']: | |
59 | yield item | |
60 | ||
61 | def filter_article(self, article): | |
62 | url = urlparse.urlparse(article['link']) | |
63 | category = url.path.strip('/').split('/')[0] | |
64 | if 'local' in category: | |
65 | return article | |
66 | elif category == 'city-police-beat': | |
67 | article['title'] = 'City Police Beat: {}'.format(article['title']) | |
68 | return article | |
69 | ||
70 | ||
71 | class CitySite(RSSSource): | |
72 | name = 'city' | |
73 | ||
74 | def iter_rss_items(self): | |
75 | for item in feedparser.parse('http://saultstemarie.ca/Newsroom.aspx?rss=news')['entries']: | |
76 | yield item | |
77 | ||
78 | def filter_article(self, article): | |
79 | if article['link'].endswith('?feed=news'): | |
80 | article['link'] = article['link'][:-10] | |
81 | return article | |
82 | ||
83 | ||
84 | class Bot(object): | |
85 | ||
86 | def __init__(self, credentials, source_classes, fake=False): | |
87 | self.fake = fake | |
88 | self.credentials = credentials | |
89 | self.source_classes = source_classes | |
90 | self.subreddit = None | |
91 | ||
92 | def get_subreddit(self): | |
93 | if self.subreddit is None: | |
94 | reddit = praw.Reddit(client_id=self.credentials['client_id'], | |
95 | client_secret=self.credentials['client_secret'], | |
96 | password=self.credentials['password'], | |
97 | user_agent='SaultBot by /u/metalhedd', | |
98 | username=self.credentials['username']) | |
99 | self.subreddit = reddit.subreddit('saultstemarie') | |
100 | return self.subreddit | |
101 | ||
102 | def run(self): | |
103 | with pid.PidFile(piddir=os.path.expanduser("~/.saultbot")): | |
104 | state = self.load_state() | |
105 | self.sources = [source(state.get(source.name)) for source in self.source_classes] | |
106 | for source in self.sources: | |
107 | for article in source.iter_articles(): | |
108 | self.post_article(article) | |
109 | state[source.name] = source.state | |
110 | self.save_state(state) | |
111 | ||
112 | def strip_tags(self, text): | |
113 | return BeautifulSoup(text).getText(separator=' ') | |
114 | ||
115 | def post_article(self, article): | |
116 | article['title'] = self.strip_tags(article['title']) | |
117 | print 'posting {} as "{}"'.format(article['link'], article['title']) | |
118 | if not self.fake: | |
119 | while True: | |
120 | try: | |
121 | submission = self.get_subreddit().submit( | |
122 | article['title'], | |
123 | url=article['link'], | |
124 | send_replies=False, | |
125 | resubmit=False, | |
126 | ) | |
127 | print 'submitted {}'.format(submission) | |
128 | except APIException as e: | |
129 | print e | |
130 | if e.field == 'ratelimit': | |
131 | seconds = 600 | |
132 | if e.message and 'try again in' in e.message: | |
133 | count, unit = e.message.split('try again in')[1].strip().split(' ') | |
134 | seconds = int(count) | |
135 | if unit.startswith('minute'): | |
136 | seconds += 1 | |
137 | seconds *= 60 | |
138 | print 'waiting {} seconds to post'.format(seconds) | |
139 | time.sleep(seconds) | |
140 | else: | |
141 | return | |
142 | else: | |
143 | return | |
144 | ||
145 | time.sleep(1) | |
146 | ||
147 | def load_state(self): | |
148 | try: | |
149 | return json.load(open(os.path.expanduser('~/.saultbot/state.json'))) | |
150 | except: | |
151 | return {} | |
152 | ||
153 | def save_state(self, state): | |
154 | if not os.path.exists(os.path.expanduser('~/.saultbot')): | |
155 | os.makedirs(os.path.expanduser("~/.saultbot")) | |
156 | ||
157 | with open(os.path.expanduser("~/.saultbot/state.json"), 'w') as sf: | |
158 | json.dump(state, sf) | |
159 | ||
160 | ||
161 | def get_credentials(): | |
162 | if not os.path.exists(os.path.expanduser('~/.saultbot/credentials.json')): | |
163 | raise RuntimeError("No credentials.json found") | |
164 | ||
165 | with open(os.path.expanduser('~/.saultbot/credentials.json'), 'r') as cf: | |
166 | creds = json.load(cf) | |
167 | ||
168 | if not ('client_id' in creds and 'client_secret' in creds and 'username' in creds and 'password' in creds): | |
169 | raise RuntimeError("Invalid Credentials: {}".format(creds)) | |
170 | ||
171 | return creds | |
172 | ||
173 | ||
174 | if __name__ == '__main__': | |
175 | bot = Bot(credentials=get_credentials(), source_classes=[SooToday, CitySite], fake='fake' in sys.argv) | |
176 | bot.run() |