Guest User

Untitled

a guest
Dec 21st, 2011
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.17 KB | None | 0 0
  1. #!/usr/bin/env python
  2. __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
  3.  
  4. __license__ = 'GPL v3'
  5.  
  6. '''
  7. calibre recipe for prospectmagazine.co.uk
  8. '''
  9.  
  10. import re
  11. from calibre.web.feeds.recipes import BasicNewsRecipe
  12.  
  13. class ProspectMagUK(BasicNewsRecipe):
  14. description = 'A general-interest publication offering analysis and commentary about politics, news and culture.'
  15. __author__ = 'barty1'
  16. timefmt = ' [%d %B %Y]'
  17. no_stylesheets = True
  18. publication_type = 'newspaper'
  19. masthead_url = 'http://www.prospectmagazine.co.uk/wp-content/themes/prospect/images/titleMain.jpg'
  20. category = 'news, UK'
  21. language = 'en'
  22. title = u'Prospect Magazine'
  23. INDEX = 'http://www.prospectmagazine.co.uk/current-issue'
  24. max_articles_per_feed = 100
  25. auto_cleanup = True
  26. needs_subscription = True
  27.  
  28. def get_browser(self):
  29. br = BasicNewsRecipe.get_browser()
  30. if self.username is not None and self.password is not None:
  31. br.open('http://www.prospectmagazine.co.uk/wp-login.php')
  32. br.select_form(name='loginform')
  33. br['log'] = self.username
  34. br['pwd'] = self.password
  35. br.submit()
  36. return br
  37.  
  38. def parse_index(self):
  39. soup = self.index_to_soup(self.INDEX)
  40. div = soup.find('h1',text=re.compile(r'Issue \d+'))
  41. fname = self.tag_to_string( div) if div is not None else 'Current Issue'
  42. div = soup.find('div', id='cover_image')
  43. if div is not None:
  44. img = div.find('img', src=True)
  45. if img is not None:
  46. src = img['src']
  47. if src.startswith('/'):
  48. src = 'http://www.prospectmagazine.co.uk' + src
  49. self.cover_url = src
  50.  
  51. articles = []
  52. seen = set()
  53. for item in soup.findAll('div',attrs={'class':re.compile(r'post\b')}):
  54. div = item.find('h3')
  55. if div is None: continue
  56. a = div.find('a', href=True)
  57. if a is None: continue
  58. url = a['href']
  59. if url in seen: continue
  60. seen.add(url)
  61. title = self.tag_to_string(a)
  62. div = item.find('p')
  63. desc = self.tag_to_string( div) if div is not None else ''
  64. art = {'title':title, 'description':desc, 'url':url}
  65. self.log('\tFound article:', title, '::', url)
  66. articles.append(art)
  67.  
  68. return [(fname, articles)]
Advertisement
Add Comment
Please, Sign In to add comment