Advertisement
Guest User

Untitled

a guest
Nov 20th, 2018
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.70 KB | None | 0 0
  1. from scrapy import Request
  2. from scrapy.spiders import CrawlSpider, Rule
  3. from scrapy.linkextractors import LinkExtractor
  4.  
  5. from etender.items import EtenderItem
  6.  
  7.  
  8. class EtenderSpider(CrawlSpider):
  9.     name = "etender"
  10.     start_urls = [
  11.         'https://etendering.ted.europa.eu/cft/cft-search.html?'
  12.         'caList=&_caList=1&status=PUBLISHED&startDateFrom=01%2F05%2F2017&startDateTo=19%2F11%2F2018&'
  13.         'closingDateFrom=&closingDateTo=&procedureType=&_procedureType=1&confirm=Search#'
  14.     ]
  15.  
  16.     rules = (
  17.         Rule(LinkExtractor(allow=(), restrict_xpaths=('//*[@id="centerContent"]/div/span[3]/a[last()-1]',)),
  18.              callback="parse_pages",
  19.              follow=True),)
  20.  
  21.     def parse_tender_item(self, response):
  22.         print('Processing tender: ' + response.url)
  23.  
  24.         tender = EtenderItem()
  25.         tender['url'] = response.url
  26.         tender['name'] = response.xpath('//*[@id="cft_titleautolinked"]/text()').get()
  27.         tender['reference'] = response.xpath('//*[@id="cft.data.internal_name"]/text()').get()
  28.         tender['description'] = response.xpath('//*[@id="cft_descriptionautolinked"]/text()').get()
  29.         tender['procedureType'] = response.xpath('//*[@id="cft.data.procedure_type"]/text()').get()
  30.         tender['status'] = response.xpath('//*[@id="cft.data.status"]/text()').get()
  31.         tender['contractType'] = response.xpath('//*[@id="cft.data.contract_type"]/text()').get()
  32.  
  33.         yield tender
  34.  
  35.     def parse_pages(self, response):
  36.         print('Processing...' + response.url)
  37.  
  38.         tender_links = response.css('tbody tr td:nth-child(2) a::attr(href)').extract()
  39.         for a in tender_links:
  40.             yield Request(a, callback=self.parse_tender_item)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement