Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- __license__ = 'GPL v3'
- __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
- '''
- en.wikipedia.org
- '''
- class Wikipedia(BasicNewsRecipe):
- title = 'Wikipedia'
- __author__ = 'Darko Miletic'
- description = 'Wikipedia articles'
- category = 'data, world'
- oldest_article = 7
- max_articles_per_feed = 100
- publisher = 'Wiki'
- no_stylesheets = True
- use_embedded_content = False
- encoding = 'utf-8'
- remove_javascript = True
- language = _('English')
- INDEX = 'http://en.wikipedia.org/'
- html2lrf_options = ['--comment', description, '--category', category, '--publisher', publisher]
- html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
- keep_only_tags = [dict(name='h1', attrs={'id':'firstHeading'}) ,dict(name='div', attrs={'id':'bodyContent'})]
- remove_tags = [dict(name=['link','sup']),dict(name='div',attrs={'id':['printfooter','catlinks','footer']}),dict(name='div',attrs={'class':['thumb left','thumb right']})]
- # add entries in here, for example:
- '''
- def parse_index(self):
- articles = []
- articles.append({
- 'title':'Tree',
- 'date':'',
- 'url':self.INDEX + 'wiki/Tree',
- 'description':'Tree'
- })
- articles.append({
- 'title':'Tiger',
- 'date':'',
- 'url':self.INDEX + 'wiki/Tiger',
- 'description':'Tiger'
- })
- return [('Articles', articles)]
- '''
- def parse_index(self):
- articles = [{'title':'Viking','date':'','url':self.INDEX + 'wiki/Viking','description':'Viking'},
- {'title':'Viking_Age','date':'','url':self.INDEX + 'wiki/Viking_Age','description':'Viking_Age'},
- {'title':'History_of_Scandinavia','date':'','url':self.INDEX + 'wiki/History_of_Scandinavia','description':'History_of_Scandinavia'},
- {'title':'History_of_Denmark','date':'','url':self.INDEX + 'wiki/History_of_Denmark','description':'History_of_Denmark'},
- {'title':'History_of_Sweden','date':'','url':self.INDEX + 'wiki/History_of_Sweden','description':'History_of_Sweden'},
- {'title':'History_of_Norway','date':'','url':self.INDEX + 'wiki/History_of_Norway','description':'History_of_Norway'},
- {'title':'History_of_Iceland','date':'','url':self.INDEX + 'wiki/History_of_Iceland','description':'History_of_Iceland'},
- {'title':'History_of_Greenland','date':'','url':self.INDEX + 'wiki/History_of_Greenland','description':'History_of_Greenland'},
- {'title':'Viking_Age_arms_and_armour','date':'','url':self.INDEX + 'wiki/Viking_Age_arms_and_armour','description':'Viking_Age_arms_and_armour'},
- {'title':'Viking_ship', 'date':'','url':self.INDEX + 'wiki/Viking_ship','description':'Viking_ship'},
- {'title':'Skuldelev_ships','date':'','url':self.INDEX + 'wiki/Skuldelev_ships','description':'Skuldelev_ships'},
- {'title':'Gokstad_ship','date':'','url':self.INDEX + 'wiki/Gokstad_ship','description':'Gokstad_ship'},
- {'title':'Oseberg_ship','date':'','url':self.INDEX + 'wiki/Oseberg_ship','description':'Oseberg_ship'},
- {'title':'Tune_ship','date':'','url':self.INDEX + 'wiki/Tune_ship','description':'Tune_ship'},
- {'title':'Norse_funeral','date':'','url':self.INDEX + 'wiki/Norse_funeral','description':'Norse_funeral'},
- {'title':'Norse_pagan_worship','date':'','url':self.INDEX + 'wiki/Norse_pagan_worship','description':'Norse_pagan_worship'},
- {'title':'Runestone','date':'','url':self.INDEX + 'wiki/Runestone','description':'Runestone'},
- {'title':'Norse_mythology','date':'','url':self.INDEX + 'wiki/Norse_mythology','description':'Norse_mythology'},
- {'title':'Borre_mound_cemetery','date':'','url':self.INDEX + 'wiki/Borre_mound_cemetery','description':'Borre_mound_cemetery'},
- {'title':'Saga','date':'','url':self.INDEX + 'wiki/Saga','description':'Saga'},
- {'title':'''L'Anse_aux_Meadows''','date':'','url':self.INDEX + '''wiki/L'Anse_aux_Meadows''','description':'''L'Anse_aux_Meadows'''},
- {'title':'Dorset_culture','date':'','url':self.INDEX + 'wiki/Dorset_culture','description':'Dorset_culture'},
- {'title':'Danelaw','date':'','url':self.INDEX + 'wiki/Danelaw','description':'Danelaw'},
- {'title':'Oland','date':'','url':self.INDEX + 'wiki/Oland','description':'Oland'},
- {'title':'Jelling','date':'','url':self.INDEX + 'wiki/Jelling','description':'Jelling'},
- {'title':'Gamla_Uppsala','date':'','url':self.INDEX + 'wiki/Gamla_Uppsala','description':'Gamla_Uppsala'},
- {'title':'Adelso','date':'','url':self.INDEX + 'wiki/Adelso','description':'Adelso'},
- {'title':'Alby','date':'','url':self.INDEX + 'wiki/Alby','description':'Alby'},
- {'title':'Birka','date':'','url':self.INDEX + 'wiki/Birka','description':'Birka'},
- {'title':'Gene_fornby','date':'','url':self.INDEX + 'wiki/Gene_fornby','description':'Gene_fornby'},
- {'title':'Helgo','date':'','url':self.INDEX + 'wiki/Helgo','description':'Helgo'},
- {'title':'Sigtuna','date':'','url':self.INDEX + 'wiki/Sigtuna','description':'Sigtuna'},
- {'title':'Uppakra','date':'','url':self.INDEX + 'wiki/Uppakra','description':'Uppakra'},
- {'title':'Valsgarde','date':'','url':self.INDEX + 'wiki/Valsgarde','description':'Valsgarde'},
- {'title':'Vendel','date':'','url':self.INDEX + 'wiki/Vendel','description':'Vendel'},
- {'title':'Roskilde','date':'','url':self.INDEX + 'wiki/Roskilde','description':'Roskilde'},
- {'title':'Lindholm_Hoje','date':'','url':self.INDEX + 'wiki/Lindholm_Hoje','description':'Lindholm_Hoje'},
- {'title':'Hedeby','date':'','url':self.INDEX + 'wiki/Hedeby','description':'Hedeby'},
- {'title':'Ribe','date':'','url':self.INDEX + 'wiki/Ribe','description':'Ribe'},
- {'title':'Aarhus','date':'','url':self.INDEX + 'wiki/Aarhus','description':'Aarhus'},
- {'title':'Viborg,_Denmark','date':'','url':self.INDEX + 'wiki/Viborg,_Denmark','description':'Viborg,_Denmark'},
- {'title':'Lund','date':'','url':self.INDEX + 'wiki/Lund','description':'Lund'},
- {'title':'Kaupang','date':'','url':self.INDEX + 'wiki/Kaupang','description':'Kaupang'},
- {'title':'Jorvik','date':'','url':self.INDEX + 'wiki/Jorvik','description':'Jorvik'},
- {'title':'History_of_Dublin','date':'','url':self.INDEX + 'wiki/History_of_Dublin','description':'History_of_Dublin'},
- {'title':'Serkland','date':'','url':self.INDEX + 'wiki/Serkland','description':'Serkland'},
- {'title':'History_of_the_Faroe_Islands','date':'','url':self.INDEX + 'wiki/History_of_the_Faroe_Islands','description':'History_of_the_Faroe_Islands'},
- {'title':'Vinland','date':'','url':self.INDEX + 'wiki/Vinland','description':'Vinland'},
- {'title':'Markland','date':'','url':self.INDEX + 'wiki/Markland','description':'Markland'},
- {'title':'Helluland','date':'','url':self.INDEX + 'wiki/Helluland','description':'Helluland'},
- {'title':'Bjarmaland','date':'','url':self.INDEX + 'wiki/Bjarmaland','description':'Bjarmaland'}]
- return [('Articles', articles)]
- def print_version(self, url):
- rest, sep, article_id = url.rpartition('/')
- return self.INDEX + 'w/index.php?title=' + article_id + '&printable=yes'
- def preprocess_html(self, soup):
- mtag = '<meta http-equiv="Content-Language" content="en"/><meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
- soup.head.insert(0,mtag)
- btag = soup.find('div',attrs={'id':'bodyContent'})
- for item in btag.findAll('div'):
- item.extract()
- for item in soup.findAll(style=True):
- del item['style']
- for item in soup.findAll(font=True):
- del item['font']
- return soup
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement