Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import scrapy
- class BlogSpider(scrapy.Spider):
- name = 'blogspider'
- start_urls = ['http://www2.congreso.gob.pe/Sicr/TraDocEstProc/CLProLey2016.nsf/Local%20Por%20Numero%20Inverso?OpenView']
- def parse(self, response):
- # for title in response.('.post-header>h2'):
- # yield {'title': title.css('a ::text').extract_first()}
- links = response.xpath('body/form/table[2]/tr/td[1]/font/a/@href')
- #yield response.follow(links[1], self.parseLaw)
- for next_page in links:
- yield response.follow(next_page, self.parseLaw)
- def getMail(self, s):
- return s.split(':')[1]
- def parseLaw(self, response):
- fecha = response.xpath('body/form/table/tr[1]/td/table/tr[3]/td[2]/font[1]/text()').extract()
- codigo = response.xpath('body/form/table/tr[1]/td/table/tr[4]/td[2]/font[1]/text()').extract()
- titulo = response.xpath('body/form/table/tr[1]/td/table/tr[7]/td[2]/font/text()').extract()
- sumilla = response.xpath('body/form/table/tr[1]/td/table/tr[8]/td[2]/font/text()').extract()
- correos = list(map(self.getMail,response.xpath('body/form/table/tr[1]/td/table/tr[9]/td[2]/font/a/@href').extract()))
- autores = response.xpath('body/form/table/tr[1]/td/table/tr[9]/td[2]/font/a/text()').extract()
- print (fecha)
- print (codigo)
- print (titulo)
- print (sumilla)
- print (correos)
- print (autores)
- yield None
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement