Not a member of Pastebin yet?
                        Sign Up,
                        it unlocks many cool features!                    
                - import scrapy
 - import json
 - class OlxHouses(scrapy.Spider):
 - name = 'olx'
 - custom_settings = {
 - 'USER_AGENT' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
 - 'AUTOTHROTTLE_ENABLED': True,
 - }
 - def start_requests(self):
 - for page in range(1,101):
 - yield scrapy.Request(f'https://www.olx.com.br/eletronicos-e-celulares/estado-sp?o={page}')
 - def parse(self, response, **kwargs):
 - html = json.loads(response.xpath('//script[@id="__NEXT_DATA__"]/text()').get())
 - houses = html.get('props').get('pageProps').get('ads')
 - for house in houses:
 - yield{
 - 'title' : house.get('title'),
 - 'price' : house.get('price'),
 - 'locations' : house.get('location')
 - }
 
Advertisement
 
                    Add Comment                
                
                        Please, Sign In to add comment