Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import scrapy
- import json
- class ProductsSpider(scrapy.Spider):
- name = 'products'
- page_num={'page':0}
- body = '''
- {"requests":[{"indexName":"shopify_products","params":"highlightPreTag=%3Cais-highlight-0000000000%3E&highlightPostTag=%3C%2Fais-highlight-0000000000%3E&clickAnalytics=true&query=laptop&filters=(price%20%3E%200%20AND%20product_published%20%3D%201%20AND%20availability.displayProduct%20%3D%201)&hitsPerPage=36&distinct=true&maxValuesPerFacet=100&page=1&facets=%5B%22facets.Brand%22%2C%22facets.Price%22%2C%22facets.Category%22%2C%22facets.Availability%22%5D&tagFilters="}]}
- '''
- def start_requests(self):
- yield scrapy.Request(
- url='https://vtvkm5urpx-1.algolianet.com/1/indexes/*/queries?x-algolia-agent=Algolia%20for%20JavaScript%20(4.8.6)%3B%20Browser',
- method='POST',
- headers={
- 'content-type': 'application/x-www-form-urlencoded',
- 'x-algolia-api-key': 'a0c0108d737ad5ab54a0e2da900bf040',
- 'x-algolia-application-id': 'VTVKM5URPX'
- },
- body=self.body,
- callback=self.parse
- )
- def parse(self, response):
- resp = json.loads(response.body)
- hits = resp.get('results')[0].get('hits')
- for h in hits:
- yield {
- 'title': h.get('primary_title')
- }
- per_page= resp.get('results')[0].get('page')
- total_page = resp.get('results')[0].get('nbPages')
- per_hit=resp.get('results')[0].get('hitsPerPage')
- total_hits = resp.get('results')[0].get('nbHits')
- if self.page_num['page'] <=total_page & per_hit<= total_hits:
- self.page_num['page'] +=1
- yield scrapy.Request(
- url='https://vtvkm5urpx-1.algolianet.com/1/indexes/*/queries?x-algolia-agent=Algolia%20for%20JavaScript%20(4.8.6)%3B%20Browser',
- method='POST',
- headers={
- 'content-type': 'application/x-www-form-urlencoded',
- 'x-algolia-api-key': 'a0c0108d737ad5ab54a0e2da900bf040',
- 'x-algolia-application-id': 'VTVKM5URPX'
- },
- body=self.body,
- callback=self.parse,
- )
Advertisement
Add Comment
Please, Sign In to add comment