SHOW:
|
|
- or go back to the newest paste.
1 | import scrapy | |
2 | - | import jsonhttps://pastebin.com/ |
2 | + | import json |
3 | ||
4 | class OlxHouses(scrapy.Spider): | |
5 | name = 'olx' | |
6 | ||
7 | custom_settings = { | |
8 | 'USER_AGENT' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36', | |
9 | 'AUTOTHROTTLE_ENABLED': True, | |
10 | } | |
11 | ||
12 | def start_requests(self): | |
13 | for page in range(1,101): | |
14 | yield scrapy.Request(f'https://www.olx.com.br/eletronicos-e-celulares/estado-sp?o={page}') | |
15 | ||
16 | def parse(self, response, **kwargs): | |
17 | html = json.loads(response.xpath('//script[@id="__NEXT_DATA__"]/text()').get()) | |
18 | houses = html.get('props').get('pageProps').get('ads') | |
19 | for house in houses: | |
20 | yield{ | |
21 | 'title' : house.get('title'), | |
22 | 'price' : house.get('price'), | |
23 | 'locations' : house.get('location') | |
24 | } | |
25 | ||
26 | ||
27 |