Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
- __license__ = 'GPL v3'
- '''
- calibre recipe for prospectmagazine.co.uk
- '''
- import re
- from calibre.web.feeds.recipes import BasicNewsRecipe
- class ProspectMagUK(BasicNewsRecipe):
- description = 'A general-interest publication offering analysis and commentary about politics, news and culture.'
- __author__ = 'barty1'
- timefmt = ' [%d %B %Y]'
- no_stylesheets = True
- publication_type = 'newspaper'
- masthead_url = 'http://www.prospectmagazine.co.uk/wp-content/themes/prospect/images/titleMain.jpg'
- category = 'news, UK'
- language = 'en'
- title = u'Prospect Magazine'
- INDEX = 'http://www.prospectmagazine.co.uk/current-issue'
- max_articles_per_feed = 100
- auto_cleanup = True
- needs_subscription = True
- def get_browser(self):
- br = BasicNewsRecipe.get_browser()
- if self.username is not None and self.password is not None:
- br.open('http://www.prospectmagazine.co.uk/wp-login.php')
- br.select_form(name='loginform')
- br['log'] = self.username
- br['pwd'] = self.password
- br.submit()
- return br
- def parse_index(self):
- soup = self.index_to_soup(self.INDEX)
- div = soup.find('h1',text=re.compile(r'Issue \d+'))
- fname = self.tag_to_string( div) if div is not None else 'Current Issue'
- div = soup.find('div', id='cover_image')
- if div is not None:
- img = div.find('img', src=True)
- if img is not None:
- src = img['src']
- if src.startswith('/'):
- src = 'http://www.prospectmagazine.co.uk' + src
- self.cover_url = src
- articles = []
- seen = set()
- for item in soup.findAll('div',attrs={'class':re.compile(r'post\b')}):
- div = item.find('h3')
- if div is None: continue
- a = div.find('a', href=True)
- if a is None: continue
- url = a['href']
- if url in seen: continue
- seen.add(url)
- title = self.tag_to_string(a)
- div = item.find('p')
- desc = self.tag_to_string( div) if div is not None else ''
- art = {'title':title, 'description':desc, 'url':url}
- self.log('\tFound article:', title, '::', url)
- articles.append(art)
- return [(fname, articles)]
Advertisement
Add Comment
Please, Sign In to add comment