Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import scrapy
- from scrapy.exceptions import CloseSpider
- import json
- class HtmshopeeSpider(scrapy.Spider):
- name = 'shopeeitem2'
- headers={
- 'authority': 'shopee.com.my',
- 'method': 'GET',
- 'path': '/api/v4/search/search_items?by=relevancy&keyword=chantiva&limit=60&newest=0&order=desc&page_type=search&scenario=PAGE_GLOBAL_SEARCH&version=2',
- 'scheme': 'https',
- 'accept': '*/*',
- 'accept-encoding': 'gzip, deflate, br',
- 'accept-language': 'en-US,en;q=0.9',
- 'cache-control': 'no-cache',
- 'cookie': 'private_content_version=75d921dc5d1fc85c97d8d9876d6e58b2; _fbp=fb.2.1626162049790.1893904607; _ga=GA1.3.518387377.1626162051; _gid=GA1.3.151467354.1626162051; _gcl_au=1.1.203553443.1626162051; x_axis_main=v_id:017a9ecfb7ba000a4be21b24a20803079001c0710093c$_sn:1$_ss:1$_pn:1%3Bexp-session$_st:1626163851002$ses_id:1626162051002%3Bexp-session',
- 'if-none-match-': '55b03-676eb00af72df9e2b38a2976dd41d5ea',
- 'pragma': 'no-cache',
- 'referer': 'https://shopee.com.my/search?keyword=chantiva&page=0',
- 'sec-ch-ua': '" Not;A Brand";v="99", "Google Chrome";v="91", "Chromium";v="91"',
- 'sec-ch-ua-mobile': '?0',
- 'sec-fetch-dest': 'empty',
- 'sec-fetch-mode': 'cors',
- 'sec-fetch-site': 'same-origin',
- 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36',
- 'x-api-source': 'pc',
- 'x-requested-with': 'XMLHttpRequest',
- 'x-shopee-language': 'en'
- }
- position = 0
- def start_requests(self):
- yield scrapy.Request(
- url= 'https://shopee.com.my/api/v4/search/search_items?by=relevancy&keyword=chantiva&limit=60&newest=0&order=desc&page_type=search&scenario=PAGE_GLOBAL_SEARCH&version=2',
- headers=self.headers,
- callback=self.parse_products,
- meta={
- 'newest':0
- }
- )
- def parse_products(self, response):
- json_resp = json.loads(response.body)
- products = json_resp.get('items')
- for product in products:
- item_id = product.get('item_basic').get('itemid'),
- shop_id = product.get('item_basic').get('shopid')
- yield scrapy.Request(
- url=f"https://shopee.com.my/api/v2/item/get?itemid={item_id}&shopid={shop_id}",
- callback=self.parse_data,
- headers=self.headers
- )
- def parse_data(self, response):
- json_resp = json.loads(response.body)
- datas = json_resp.get('item')
- for data in datas:
- yield {
- 'product': data.get('name')
- }
- count= 240000
- next_page = response.meta['newest'] + 60
- if next_page <= count:
- yield scrapy.Request(
- url=f"https://shopee.com.my/api/v4/search/search_items?by=relevancy&keyword=chantiva&limit=60&newest={next_page}&order=desc&page_type=search&scenario=PAGE_GLOBAL_SEARCH&version=2",
- headers=self.headers,
- meta={'newest': next_page}
- )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement