Advertisement
Typhoon

Scrapy ZVJS Faktury v.2

Feb 1st, 2015
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.08 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. import scrapy
  3. from scrapy.spider import Spider
  4. from scrapy.selector import Selector
  5. from zvjs.items import Website
  6.  
  7. class TutorialSpiderSpider(scrapy.Spider):
  8.     name = "zvjs-spider"
  9.     allowed_domains = ["zvjs.sk"]
  10.  
  11.  
  12.     start_urls = [
  13.         "http://www.zvjs.sk/index.php?fa_obj&type=fa&id=98273",
  14.         "http://www.zvjs.sk/index.php?fa_obj&type=fa&id=98270",
  15.         "http://www.zvjs.sk/index.php?fa_obj&type=fa&id=10845",
  16.     ]
  17.  
  18.     def parse(self, response):
  19.         sel = Selector(response)
  20.         sites = sel.xpath('//strong')
  21.         items = []
  22.  
  23.         for site in sites:
  24.             item = Website()
  25.             item['evcislo'] = site.xpath('//strong/text()').extract()[0].encode("utf-8")
  26.             item['evcisloval'] = site.xpath("//td/text()").extract()[1].encode("utf-8")
  27.             item['prinalezi'] = site.xpath("//strong/text()").extract()[1].encode("utf-8")
  28.             item['prinalezival'] = site.xpath("//td/text()").extract()[2].encode("utf-8")
  29.             item['popisfa'] = sel.xpath("//strong/text()").extract()[2].encode("utf-8")
  30.             item['popisfaval'] = sel.xpath("//td/text()").extract()[3].encode("utf-8")
  31.             item['hodnotafa'] = sel.xpath("//strong/text()").extract()[3].encode("windows-1250")
  32.             item['hodnotafaval'] = sel.xpath("//td/text()").extract()[4].encode("windows-1250")
  33.             item['datumdoruceniaval'] = sel.xpath("//td/text()").extract()[5].encode("windows-1250")
  34.             item['datumzaplateniaval']= sel.xpath("//td/text()").extract()[6].encode("windows-1250")
  35.         items.append(item)
  36.  
  37.         return items
  38.  
  39. ####################################
  40. items.py
  41. ####################################
  42. # -*- coding: utf-8 -*-
  43.  
  44. import scrapy
  45. from scrapy.item import Item, Field
  46.  
  47. class Website(Item):
  48.  
  49.      evcislo = Field()
  50.      evcisloval = Field()
  51.      prinalezi = Field()
  52.      prinalezival = Field()
  53.      popisfa = Field()
  54.      popisfaval = Field()
  55.      hodnotafa = Field()
  56.      hodnotafaval = Field()
  57.      datumdoruceniaval = Field()
  58.      datumzaplateniaval = Field()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement