Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import scrapy
- from ..items import SolkeyItem
- from openpyxl import Workbook
- class SolkeySpider(scrapy.Spider):
- name = 'solkey'
- allowed_domains = ['www.duurzaamloket.nl']
- start_urls = ['https://www.duurzaamloket.nl/SolKey_X014/index.php?SchemeNo=0&Offset=1&SearchText=&PageCnt=448']
- def parse(self, response):
- wb = Workbook()
- ws = wb.active
- ws.append(['ID', 'Nom', 'Eta', 'b', 'a1', 'a2', 'a3'])
- rows = response.xpath('//table[@id="mytable"]/tr')[1:] # Récupération de toutes les lignes de la table
- for row in rows:
- item = SolkeyItem()
- # Récupération de l'ID et du nom de la licence
- item['id'] = row.xpath('.//td[1]/text()').get()
- item['name'] = row.xpath('.//td[2]/text()').get()
- # Récupération de la page de détails et extraction des données η, b, a1, a2, a3
- detail_page = row.xpath('.//td[2]/a/@href').get()
- yield response.follow(detail_page, self.parse_detail, meta={'item': item, 'ws': ws})
- wb.save("solkey_results.xlsx") # Sauvegarde des données dans un fichier Excel
- def parse_detail(self, response):
- item = response.meta['item']
- ws = response.meta['ws']
- item['eta'] = response.xpath('//th[text()="η0"]/following-sibling::td[1]/text()').get()
- item['b'] = response.xpath('//th[text()="b"]/following-sibling::td[1]/text()').get()
- item['a1'] = response.xpath('//th[text()="a1"]/following-sibling::td[1]/text()').get()
- item['a2'] = response.xpath('//th[text()="a2"]/following-sibling::td[1]/text()').get()
- item['a3'] = response.xpath('//th[text()="a3"]/following-sibling::td[1]/text()').get()
- ws.append([item['id'], item['name'], item['eta'], item['b'], item['a1'], item['a2'], item['a3']])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement