Advertisement
Guest User

Recepta Calibre do Uważam Rze - konieczne jest hasło

a guest
Jul 18th, 2012
914
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.95 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3.  
  4. # recepta dla Calibre do Uważam Rze. Konieczne jest wykupienie dostępu i podanie w Calibre loginu i hasła
  5.  
  6. __license__   = 'WTFPL'
  7. __version__   = '0.1'
  8.  
  9.  
  10. from calibre.web.feeds.news import BasicNewsRecipe
  11. from calibre.constants import config_dir, CONFIG_DIR_MODE
  12. from datetime import datetime
  13. import os, os.path, urllib
  14.  
  15. class uwazamrze(BasicNewsRecipe):
  16.     title = u'Uważam Rze'
  17.     description = u'Tygodnik autorów niepokornych. Największy w Polsce.'
  18.     language = 'pl'
  19.     publisher = 'Presspublica sp. z o.o'
  20.     publication_type = 'magazine'
  21.     timefmt = ''
  22.  
  23.     needs_subscription = True
  24.    
  25.     conversion_options = {
  26.         'authors' : 'uwazamrze.pl'
  27.         ,'publisher' : publisher
  28.         ,'language' : language
  29.         ,'preserve_cover_aspect_ratio': True
  30.     }
  31.  
  32.     remove_javascript = True
  33.     recursion = 0
  34.  
  35.     keep_only_tags = [{'class': ['articleTitle', 'storyContent', 'authordate']}]
  36.     remove_tags = [dict(name='div', attrs={'class':'editorPicks'})]
  37.     extra_css      = '.authordate {font-size: small;}  .lead {font-weight: bold;}  .srodtytul {font-size: 120%; margin-top: 10px;} strong {font-weight: bold;} '
  38.  
  39.     def get_browser(self):
  40.         br = BasicNewsRecipe.get_browser()
  41.         if self.username is not None and self.password is not None:
  42.             br.open('http://www.uwazamrze.pl/temat/755797.html')
  43.             br.select_form(name='logowanie')
  44.             br['login']   = self.username
  45.             br['password'] = self.password
  46.             br.submit()
  47.         return br
  48.    
  49.     def postprocess_html(self, soup, first):
  50.         return self.adeify_images(soup)
  51.  
  52.     def get_cover_url(self):
  53.         soup = self.index_to_soup('http://www.uwazamrze.pl/temat/755797.html')
  54.         tr = soup.find('div', attrs={'id':'urzeIssueIndex_cover'})
  55.         img = tr.find('img')['src']
  56.         cover_url = img.replace(',145.jpg', ',9.jpg')
  57.         return cover_url
  58.  
  59.     def print_version(self,url):
  60.         segments = url.split(',')
  61.         segments = segments[1].split('-')
  62.  
  63.         newUrl = "http://www.uwazamrze.pl/artykul/" + segments[0] + ".html?print=tak&p=0"
  64.        
  65.         return newUrl
  66.    
  67.     def parse_index(self):
  68.         feedname = [u'Uważam Rze']
  69.         feedurl = ['http://www.uwazamrze.pl/temat/755797.html']
  70.         sections = {}
  71.         feeds = []
  72.         lista = []
  73.  
  74.         soup = self.index_to_soup(feedurl[0])
  75.         records = soup.findAll('div', attrs={'class':'urzeIssueIndex_element'})
  76.         for rec in records:
  77.             title = rec.find('div', attrs={'class':'urzeIssueIndex_title'})
  78.             url = title.a['href']
  79.             title = title.a.string.strip()
  80.            
  81.             section = rec.find('div', attrs={'class':'urzeIssueIndex_topic'})
  82.             section = section.a.string
  83.  
  84.             author = rec.find('div', attrs={'class':'urzeIssueIndex_author'})
  85.             author = author.string.strip()
  86.  
  87.             if not section in sections:
  88.                 sections[section] = []
  89.                 lista.append(section)
  90.            
  91.             sections[section].append( {
  92.                 'title' : title,
  93.                 'url'   : url,
  94.                 'date'  : '',
  95.                 'description' : '',
  96.                 'author' : author
  97.             })
  98.            
  99.            
  100.         for section in lista:
  101.             feeds.append((section, sections[section]))
  102.        
  103.         return feeds
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement