Fazlul

Untitled

Jun 6th, 2021
117
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.73 KB | None | 0 0
  1. import scrapy
  2. from scrapy import FormRequest
  3. from scrapy.exceptions import CloseSpider
  4. import json
  5.  
  6. class BaierlSpider(scrapy.Spider):
  7. name = 'rental'
  8.  
  9. def start_requests(self):
  10. yield scrapy.Request(
  11. url='https://rent.591.com.hk/?hl=en-us#list',
  12. method = 'GET',
  13. headers={'X-Requested-With':'XMLHttpRequest',
  14. 'Referer':'https://rent.591.com.hk/?hl=en-us'},
  15. meta={
  16. 'current_page': 1,
  17. 'total_page': 275},
  18. callback=self.parse
  19. )
  20.  
  21. def parse(self, response):
  22. resp = json.loads(response.body)
  23. hits = resp.get('items')
  24. for h in hits:
  25. yield {
  26. 'Title': h.get('title'),
  27. 'pricce':h.get('price')}
  28.  
  29.  
  30. total_page_num = response.meta['total_page']
  31. next_page = response.meta['current_page']+1
  32.  
  33. payload = {
  34. 'm':'home',
  35. 'c':'search',
  36. 'a':'rslist',
  37. 'v':'new',
  38. 'type':'1',
  39. 'region':'6',
  40. 'searchtype':'1',
  41. 'p':str(next_page),
  42. 'role':''
  43. }
  44.  
  45.  
  46. if next_page <= total_page_num:
  47.  
  48. yield scrapy.Request(
  49. url='https://rent.591.com.hk/?hl=en-us#list',
  50. meta={
  51. 'current_page': next_page
  52. },
  53. callback=self.parse,
  54. method = 'GET',
  55. headers={'X-Requested-With':'XMLHttpRequest',
  56. 'Referer':'https://rent.591.com.hk/?hl=en-us'},
  57. body = payload
  58. )
Advertisement
Add Comment
Please, Sign In to add comment