Advertisement
sarafg11

Untitled

Nov 13th, 2020 (edited)
64
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.93 KB | None | 0 0
  1. {
  2. "start_urls":
  3. [
  4. "https://www.amazon.ca/s?k=whitewave+silk&i=grocery&dc&ref=a9_asc_1",
  5. "https://www.amazon.ca/s?k=international+delight&i=grocery&ref=nb_sb_noss",
  6. "https://www.amazon.ca/s?k=so+delicious&i=grocery&ref=nb_sb_noss_1"
  7. ],
  8. "fields": {
  9. "name": "//h1[@id='title']/span[@id='productTitle']/text()",
  10. "price": "//span[@id='priceblock_ourprice']/text()",
  11. "price_old": "//span[@class='priceBlockStrikePriceString a-text-strike']/text()",
  12. "reference": "",
  13. "image": "(//script[contains(.,'ImageBlockATF')])[1]/text()",
  14. "description": "//div[@id='productDescription']//text()",
  15. "brand": "",
  16. "brand_text": "//*[@id='bylineInfo_feature_div']/div/a/text()",
  17. "categories": "",
  18. "availability": "//div[contains(@id, 'availabilityInsideBuyBox')]//div[@id='availability']/span/text() | //div[@id='availability_feature_div']/div[@id='availability']/span/text()",
  19. "rating_average": "(//div[@id='averageCustomerReviews_feature_div']/div[@id='averageCustomerReviews']/span[@class='a-declarative']/span[@id='acrPopover']/@title)[1]",
  20. "rating_count": "(//span[@id='acrCustomerReviewText']/text())[1]",
  21. "barcode": "",
  22. "stock": "",
  23. "seller": "//div[@id='merchant-info']/a[@id='sellerProfileTriggerId']/text()"
  24. },
  25. "links": {
  26. "detail": "//h2/a[contains(@class,'a-text-normal') and not(contains(@href,'pantry'))]|//div[@class='s-item-container' and not(self::*//img[contains(@src,'pantry')])]//h2/parent::a[contains(@class,'a-text-normal')]",
  27. "next_page": "(//ul[@class='a-pagination']//a[(contains(.,'Next')) and not(contains(@class,'carousel-goto-nextpage'))])[last()]"
  28. },
  29. "regex": {
  30. "rating_average": ["(\\d+)", 1],
  31. "rating_count": ["(.*?)ratings", 1],
  32. "price": ["(\\d.*)", 1],
  33. "price_old": ["(\\d.*)", 1],
  34. "brand_text": ["Brand:\\s(.*)", 1],
  35. "image": ["hiRes\\\":\\\"(http.*?.jpg)", 1],
  36. "availability": ["(Only \\d+ left )?(.*)", 2]
  37. }
  38. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement