Advertisement
Guest User

uwazam-rze-recipe

a guest
Feb 20th, 2012
656
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.74 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3.  
  4. __license__   = 'WTFPL'
  5. __version__   = '0.1'
  6.  
  7.  
  8. from calibre.web.feeds.news import BasicNewsRecipe
  9. from calibre.constants import config_dir, CONFIG_DIR_MODE
  10. from datetime import datetime
  11. import os, os.path, urllib
  12.  
  13. class uwazamrze(BasicNewsRecipe):
  14.     title = u'Uważam Rze'
  15.     description = u'Tygodnik autorów niepokornych. Największy w Polsce.'
  16.     language = 'pl'
  17.     publisher = 'Presspublica sp. z o.o'
  18.     publication_type = 'magazine'
  19.     timefmt = ''
  20.  
  21.     needs_subscription = True
  22.    
  23.     conversion_options = {
  24.         'authors' : 'uwazamrze.pl'
  25.         ,'publisher' : publisher
  26.         ,'language' : language
  27.         ,'preserve_cover_aspect_ratio': True
  28.     }
  29.  
  30.     remove_javascript = True
  31.     recursion = 0
  32.  
  33.     keep_only_tags = [{'class': ['articleTitle', 'storyContent', 'authordate']}]
  34.     remove_tags = [dict(name='div', attrs={'class':'editorPicks'})]
  35.     extra_css      = '.authordate {font-size: small;} }'
  36.  
  37.     def get_browser(self):
  38.         br = BasicNewsRecipe.get_browser()
  39.         if self.username is not None and self.password is not None:
  40.             br.open('http://www.uwazamrze.pl/temat/755797.html')
  41.             br.select_form(name='logowanie')
  42.             br['login']   = self.username
  43.             br['password'] = self.password
  44.             br.submit()
  45.         return br
  46.    
  47.     def postprocess_html(self, soup, first):
  48.         return self.adeify_images(soup)
  49.  
  50.     def get_cover_url(self):
  51.         soup = self.index_to_soup('http://www.uwazamrze.pl/temat/755797.html')
  52.         tr = soup.find('div', attrs={'id':'urzeIssueIndex_cover'})
  53.         img = tr.find('img')['src']
  54.         cover_url = img.replace(',145.jpg', ',9.jpg')
  55.         return cover_url
  56.  
  57.     def print_version(self,url):
  58.         segments = url.split(',')
  59.         segments = segments[1].split('-')
  60.  
  61.         newUrl = "http://www.uwazamrze.pl/artykul/" + segments[0] + ".html?print=tak&p=0"
  62.        
  63.         return newUrl
  64.    
  65.     def parse_index(self):
  66.         feedname = [u'Uważam Rze']
  67.         feedurl = ['http://www.uwazamrze.pl/temat/755797.html']
  68.         sections = {}
  69.         feeds = []
  70.         lista = []
  71.  
  72.         soup = self.index_to_soup(feedurl[0])
  73.         records = soup.findAll('div', attrs={'class':'urzeIssueIndex_element'})
  74.         for rec in records:
  75.             title = rec.find('div', attrs={'class':'urzeIssueIndex_title'})
  76.             url = title.a['href']
  77.             title = title.a.string.strip()
  78.            
  79.             section = rec.find('div', attrs={'class':'urzeIssueIndex_topic'})
  80.             section = section.a.string
  81.  
  82.             author = rec.find('div', attrs={'class':'urzeIssueIndex_author'})
  83.             author = author.string.strip()
  84.  
  85.             if not section in sections:
  86.                 sections[section] = []
  87.                 lista.append(section)
  88.            
  89.             sections[section].append( {
  90.                 'title' : title,
  91.                 'url'   : url,
  92.                 'date'  : '',
  93.                 'description' : '',
  94.                 'author' : author
  95.             })
  96.            
  97.            
  98.         for section in lista:
  99.             feeds.append((section, sections[section]))
  100.        
  101.         return feeds
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement