View difference between Paste ID: VQQkDDpR and r9FkJGTs
SHOW: | | - or go back to the newest paste.
1
import scrapy
2-
import jsonhttps://pastebin.com/
2+
import json
3
4
class OlxHouses(scrapy.Spider):
5
    name = 'olx'
6
7
    custom_settings = {
8
        'USER_AGENT' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
9
        'AUTOTHROTTLE_ENABLED': True,
10
    }
11
12
    def start_requests(self):
13
        for page in range(1,101):
14
            yield scrapy.Request(f'https://www.olx.com.br/eletronicos-e-celulares/estado-sp?o={page}')
15
16
    def parse(self, response, **kwargs):
17
        html = json.loads(response.xpath('//script[@id="__NEXT_DATA__"]/text()').get())
18
        houses = html.get('props').get('pageProps').get('ads')
19
        for house in houses:
20
            yield{
21
                'title' : house.get('title'),
22
                'price' : house.get('price'),
23
                'locations' : house.get('location')
24
            }
25
26
27