Advertisement
sarafg11

Untitled

Nov 25th, 2020 (edited)
59
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.13 KB | None | 0 0
  1. {
  2. "start_urls":
  3. [
  4. "https://www.amazon.ca/s?k=whitewave+silk&i=grocery&dc&ref=a9_asc_1",
  5. "https://www.amazon.ca/s?k=international+delight&i=grocery&ref=nb_sb_noss",
  6. "https://www.amazon.ca/s?k=so+delicious&i=grocery&ref=nb_sb_noss_1"
  7. ],
  8. "fields": {
  9. "name": "//h1[@id='title']/span[@id='productTitle']/text()",
  10. "price": "//span[@id='priceblock_ourprice']/text()",
  11. "price_old": "//span[@class='priceBlockStrikePriceString a-text-strike']/text()",
  12. "reference": "",
  13. "image": "(//script[contains(.,'ImageBlockATF')])[1]/text()",
  14. "description": "//div[@id='productDescription']//text()",
  15. "brand": "",
  16. "brand_text": "//*[@id='bylineInfo_feature_div']/div/a/text()",
  17. "categories": "",
  18. "availability": "//div[contains(@id, 'availabilityInsideBuyBox')]//div[@id='availability']/span/text() | //div[@id='availability_feature_div']/div[@id='availability']/span/text()",
  19. "rating_average": "(//div[@id='averageCustomerReviews_feature_div']/div[@id='averageCustomerReviews']/span[@class='a-declarative']/span[@id='acrPopover']/@title)[1]",
  20. "rating_count": "(//span[@id='acrCustomerReviewText']/text())[1]",
  21. "barcode": "",
  22. "stock": "",
  23. "seller": "//div[@id='merchant-info']/a[@id='sellerProfileTriggerId']/text()"
  24. },
  25. "links": {
  26. "detail": "//h2/a[contains(@class,'a-text-normal')]",
  27. "next_page": "(//ul[@class='a-pagination']//a[(contains(.,'Next')) and not(contains(@class,'carousel-goto-nextpage'))])[last()]"
  28. },
  29. "regex": {
  30. "rating_average": ["(\\d+)", 1],
  31. "rating_count": ["(.*?)ratings", 1],
  32. "price": ["(\\d.*)", 1],
  33. "price_old": ["(\\d.*)", 1],
  34. "brand_text": ["Brand:\\s(.*)", 1],
  35. "image": ["hiRes\\\":\\\"(http.*?.jpg)", 1],
  36. "availability": ["(Only \\d+ left )?(.*)", 2],
  37. "url": "^.*(?=\/ref=.*)|^.*"
  38. },
  39. "list": {
  40. "product": "//div[@class='s-item-container'] | //h2/a[contains(@class,'a-text-normal') and not(contains(@href,'pantry'))]/ancestor::div[@data-index]/div[@class='sg-col-inner']",
  41. "page": "//div[@id='pagn']/span[@class='pagnCur']/text() | //ul[@class='a-pagination']/li[@class='a-selected']/a/text()"
  42. }
  43. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement