Advertisement
Guest User

Untitled

a guest
Feb 18th, 2017
124
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.04 KB | None | 0 0
  1. """
  2. Pocket Calibre Recipe v1.4
  3. """
  4. from calibre import strftime
  5. from calibre.web.feeds.news import BasicNewsRecipe
  6. from string import Template
  7. import json
  8. import operator
  9. import re
  10. import tempfile
  11. import urllib
  12. import urllib2
  13.  
  14.  
  15. __license__ = 'GPL v3'
  16. __copyright__ = '''
  17. 2010, Darko Miletic <darko.miletic at gmail.com>
  18. 2011, Przemyslaw Kryger <pkryger at gmail.com>
  19. 2012-2013, tBunnyMan <Wag That Tail At Me dot com>
  20. '''
  21.  
  22.  
  23. class Pocket(BasicNewsRecipe):
  24. title = 'Pocket'
  25. __author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan'
  26. description = '''Personalized news feeds. Go to getpocket.com to setup up
  27. your news. This version displays pages of articles from
  28. oldest to newest, with max & minimum counts, and marks
  29. articles read after downloading.'''
  30. publisher = 'getpocket.com'
  31. category = 'news, custom'
  32.  
  33. #Settings people change
  34. oldest_article = 70.0
  35. max_articles_per_feed = 250
  36. minimum_articles = 10
  37. mark_as_read_after_dl = False # Set this to False for testing
  38. sort_method = 'newest' # MUST be either 'oldest' or 'newest'
  39. # To filter by tag this needs to be a single tag in quotes; IE 'calibre'
  40. only_pull_tag = None
  41.  
  42. #You don't want to change anything under
  43. no_stylesheets = True
  44. use_embedded_content = False
  45. needs_subscription = True
  46. articles_are_obfuscated = True
  47. apikey = '19eg0e47pbT32z4793Tf021k99Afl889'
  48. index_url = u'https://getpocket.com'
  49. read_api_url = index_url + u'/v3/get'
  50. modify_api_url = index_url + u'/v3/send'
  51. legacy_login_url = index_url + u'/l' # We use this to cheat oAuth
  52. articles = []
  53.  
  54. def get_browser(self, *args, **kwargs):
  55. """
  56. We need to pretend to be a recent version of safari for the mac to
  57. prevent User-Agent checks Pocket api requires username and password so
  58. fail loudly if it's missing from the config.
  59. """
  60. br = BasicNewsRecipe.get_browser(self,
  61. user_agent='Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; \
  62. en-us) AppleWebKit/533.19.4 (KHTML, like Gecko) \
  63. Version/5.0.3 Safari/533.19.4')
  64. if self.username is not None and self.password is not None:
  65. br.open(self.legacy_login_url)
  66. br.select_form(nr=0)
  67. br['feed_id'] = self.username
  68. br['password'] = self.password
  69. br.submit()
  70. else:
  71. self.user_error("This Recipe requires authentication")
  72. return br
  73.  
  74. def get_auth_uri(self):
  75. """Quick function to return the authentication part of the url"""
  76. uri = ""
  77. uri = u'{0}&apikey={1!s}'.format(uri, self.apikey)
  78. if self.username is None or self.password is None:
  79. self.user_error("Username or password is blank.")
  80. else:
  81. uri = u'{0}&username={1!s}'.format(uri, self.username)
  82. uri = u'{0}&password={1!s}'.format(uri, self.password)
  83. return uri
  84.  
  85. def get_pull_articles_uri(self):
  86. uri = ""
  87. uri = u'{0}&state={1}'.format(uri, u'unread')
  88. uri = u'{0}&contentType={1}'.format(uri, u'article')
  89. uri = u'{0}&sort={1}'.format(uri, self.sort_method)
  90. uri = u'{0}&count={1!s}'.format(uri, self.max_articles_per_feed)
  91. if self.only_pull_tag is not None:
  92. uri = u'{0}&tag={1}'.format(uri, self.only_pull_tag)
  93. return uri
  94.  
  95. def parse_index(self):
  96. pocket_feed = []
  97. fetch_url = u"{0}?{1}{2}".format(
  98. self.read_api_url,
  99. self.get_auth_uri(),
  100. self.get_pull_articles_uri()
  101. )
  102. try:
  103. request = urllib2.Request(fetch_url)
  104. response = urllib2.urlopen(request)
  105. pocket_feed = json.load(response)['list']
  106. except urllib2.HTTPError as e:
  107. self.log.exception("Pocket returned an error: {0}".format(e.info()))
  108. return []
  109. except urllib2.URLError as e:
  110. self.log.exception("Unable to connect to getpocket.com's api: {0}\nurl: {1}".format(e, fetch_url))
  111. return []
  112.  
  113. if len(pocket_feed) < self.minimum_articles:
  114. self.mark_as_read_after_dl = False
  115. self.user_error("Only {0} articles retrieved, minimum_articles not reached".format(len(pocket_feed)))
  116.  
  117. for pocket_article in pocket_feed.iteritems():
  118. self.articles.append({
  119. 'item_id': pocket_article[0],
  120. 'title': pocket_article[1]['resolved_title'],
  121. 'date': pocket_article[1]['time_updated'],
  122. 'url': u'{0}/a/read/{1}'.format(self.index_url, pocket_article[0]),
  123. 'real_url': pocket_article[1]['resolved_url'],
  124. 'description': pocket_article[1]['excerpt'],
  125. 'sort': pocket_article[1]['sort_id']
  126. })
  127. self.articles = sorted(self.articles, key=operator.itemgetter('sort'))
  128. return [("My Pocket Articles for {0}".format(strftime('[%I:%M %p]')), self.articles)]
  129.  
  130. def get_textview(self, url):
  131. """
  132. Since Pocket's v3 API they removed access to textview. They also
  133. redesigned their page to make it much harder to scrape their textview.
  134. We need to pull the article, retrieve the formcheck id, then use it
  135. to querty for the json version
  136. This function will break when pocket hates us
  137. """
  138. ajax_url = self.index_url + u'/a/x/getArticle.php'
  139. soup = self.index_to_soup(url)
  140. fc_tag = soup.find('script', text=re.compile("formCheck"))
  141. fc_id = re.search(r"formCheck = \'([\d\w]+)\';", fc_tag).group(1)
  142. article_id = url.split("/")[-1]
  143. data = urllib.urlencode({'itemId': article_id, 'formCheck': fc_id})
  144. try:
  145. response = self.browser.open(ajax_url, data)
  146. except urllib2.HTTPError as e:
  147. self.log.exception("unable to get textview {0}".format(e.info()))
  148. raise e
  149. return json.load(response)['article']
  150.  
  151. def get_obfuscated_article(self, url):
  152. """
  153. Our get_textview returns parsed json so prettify it to something well
  154. parsed by calibre.
  155. """
  156. article = self.get_textview(url)
  157. template = Template('<h1>$title</h1><div class="body">$body</div>')
  158. with tempfile.NamedTemporaryFile(delete=False) as tf:
  159. tmpbody = article['article']
  160. for img in article['images']:
  161. imgdiv = '<div id="RIL_IMG_{0}" class="RIL_IMG"></div>'.format(article['images'][img]['image_id'])
  162. imgtag = '<img src="{0}" \>'.format(article['images'][img]['src'])
  163. tmpbody = tmpbody.replace(imgdiv, imgtag)
  164.  
  165. tf.write(template.safe_substitute(
  166. title=article['title'],
  167. body=tmpbody
  168. ))
  169. return tf.name
  170.  
  171. def mark_as_read(self, mark_list):
  172. actions_list = []
  173. for article_id in mark_list:
  174. actions_list.append({
  175. 'action': 'archive',
  176. 'item_id': article_id
  177. })
  178. mark_read_url = u'{0}?actions={1}{2}'.format(
  179. self.modify_api_url,
  180. json.dumps(actions_list, separators=(',', ':')),
  181. self.get_auth_uri()
  182. )
  183. try:
  184. request = urllib2.Request(mark_read_url)
  185. response = urllib2.urlopen(request)
  186. except urllib2.HTTPError as e:
  187. self.log.exception('Pocket returned an error while archiving articles: {0}'.format(e))
  188. return []
  189. except urllib2.URLError as e:
  190. self.log.exception("Unable to connect to getpocket.com's modify api: {0}".format(e))
  191. return []
  192.  
  193. def cleanup(self):
  194. if self.mark_as_read_after_dl:
  195. self.mark_as_read([x['item_id'] for x in self.articles])
  196. else:
  197. pass
  198.  
  199. def default_cover(self, cover_file):
  200. """
  201. Create a generic cover for recipes that don't have a cover
  202. This override adds time to the cover
  203. """
  204. try:
  205. from calibre.ebooks import calibre_cover
  206. title = self.title if isinstance(self.title, unicode) else \
  207. self.title.decode('utf-8', 'replace')
  208. date = strftime(self.timefmt)
  209. time = strftime('[%I:%M %p]')
  210. img_data = calibre_cover(title, date, time)
  211. cover_file.write(img_data)
  212. cover_file.flush()
  213. except:
  214. self.log.exception('Failed to generate default cover')
  215. return False
  216. return True
  217.  
  218. def user_error(self, error_message):
  219. if hasattr(self, 'abort_recipe_processing'):
  220. self.abort_recipe_processing(error_message)
  221. else:
  222. self.log.exception(error_message)
  223. raise RuntimeError(error_message)
  224.  
  225. # vim:ft=python tabstop=8 expandtab shiftwidth=4 softtabstop=4
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement