Advertisement
Guest User

Product_page_json

a guest
Aug 9th, 2021
328
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.17 KB | None | 0 0
  1. import scrapy
  2. from scrapy.exceptions import CloseSpider
  3. import json
  4.  
  5. class HtmshopeeSpider(scrapy.Spider):
  6. name = 'shopeeitem2'
  7.  
  8. headers={
  9. 'authority': 'shopee.com.my',
  10. 'method': 'GET',
  11. 'path': '/api/v4/search/search_items?by=relevancy&keyword=chantiva&limit=60&newest=0&order=desc&page_type=search&scenario=PAGE_GLOBAL_SEARCH&version=2',
  12. 'scheme': 'https',
  13. 'accept': '*/*',
  14. 'accept-encoding': 'gzip, deflate, br',
  15. 'accept-language': 'en-US,en;q=0.9',
  16. 'cache-control': 'no-cache',
  17. 'cookie': 'private_content_version=75d921dc5d1fc85c97d8d9876d6e58b2; _fbp=fb.2.1626162049790.1893904607; _ga=GA1.3.518387377.1626162051; _gid=GA1.3.151467354.1626162051; _gcl_au=1.1.203553443.1626162051; x_axis_main=v_id:017a9ecfb7ba000a4be21b24a20803079001c0710093c$_sn:1$_ss:1$_pn:1%3Bexp-session$_st:1626163851002$ses_id:1626162051002%3Bexp-session',
  18. 'if-none-match-': '55b03-676eb00af72df9e2b38a2976dd41d5ea',
  19. 'pragma': 'no-cache',
  20. 'referer': 'https://shopee.com.my/search?keyword=chantiva&page=0',
  21. 'sec-ch-ua': '" Not;A Brand";v="99", "Google Chrome";v="91", "Chromium";v="91"',
  22. 'sec-ch-ua-mobile': '?0',
  23. 'sec-fetch-dest': 'empty',
  24. 'sec-fetch-mode': 'cors',
  25. 'sec-fetch-site': 'same-origin',
  26. 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36',
  27. 'x-api-source': 'pc',
  28. 'x-requested-with': 'XMLHttpRequest',
  29. 'x-shopee-language': 'en'
  30. }
  31.  
  32. position = 0
  33.  
  34. def start_requests(self):
  35. yield scrapy.Request(
  36. url= 'https://shopee.com.my/api/v4/search/search_items?by=relevancy&keyword=chantiva&limit=60&newest=0&order=desc&page_type=search&scenario=PAGE_GLOBAL_SEARCH&version=2',
  37. headers=self.headers,
  38. callback=self.parse_products,
  39. meta={
  40. 'newest':0
  41. }
  42. )
  43.  
  44.  
  45. def parse_products(self, response):
  46. json_resp = json.loads(response.body)
  47. products = json_resp.get('items')
  48.  
  49. for product in products:
  50. item_id = product.get('item_basic').get('itemid'),
  51. shop_id = product.get('item_basic').get('shopid')
  52.  
  53. yield scrapy.Request(
  54. url=f"https://shopee.com.my/api/v2/item/get?itemid={item_id}&shopid={shop_id}",
  55. callback=self.parse_data,
  56. headers=self.headers
  57. )
  58.  
  59. def parse_data(self, response):
  60. json_resp = json.loads(response.body)
  61. datas = json_resp.get('item')
  62.  
  63. for data in datas:
  64. yield {
  65. 'product': data.get('name')
  66. }
  67.  
  68.  
  69.  
  70. count= 240000
  71.  
  72. next_page = response.meta['newest'] + 60
  73.  
  74.  
  75. if next_page <= count:
  76. yield scrapy.Request(
  77. url=f"https://shopee.com.my/api/v4/search/search_items?by=relevancy&keyword=chantiva&limit=60&newest={next_page}&order=desc&page_type=search&scenario=PAGE_GLOBAL_SEARCH&version=2",
  78. headers=self.headers,
  79. meta={'newest': next_page}
  80. )
  81.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement