Advertisement
skip420

Amazon_info_scraper

Mar 29th, 2020
1,288
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.54 KB | None | 0 0
  1. import datetime
  2. from bs4 import BeautifulSoup
  3. import time
  4. from selenium import webdriver
  5. import re
  6. import csv
  7.  
  8.  
  9.  
  10.  
  11.  
  12. keyword = 'blue+skateboard'
  13. driver = webdriver.Chrome()
  14.  
  15. url = 'https://www.amazon.co.uk/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords={}'
  16.  
  17. driver.get(url.format(keyword))
  18. soup = BeautifulSoup(driver.page_source, 'lxml')
  19. results = soup.select('.s-result-list [data-asin]')
  20.  
  21. for a, b in enumerate(results):
  22.     soup = b
  23.     header = soup.select_one('h5')
  24.     result = a + 1
  25.     title = header.text.strip()
  26.  
  27.     try:
  28.         link = soup.select_one('h5 > a')
  29.         url = link['href']
  30.         url = re.sub(r'/ref=.*', '', str(url))
  31.     except:
  32.         url = "None"
  33.  
  34.     if url !='/gp/slredirect/picassoRedirect.html':
  35.         ASIN = re.sub(r'.*/dp/', '', str(url))
  36.         #print(ASIN)
  37.  
  38.         try:
  39.             score = soup.select_one('.a-icon-alt')
  40.             score = score.text
  41.             score = score.strip('\n')
  42.             score = re.sub(r' .*', '', str(score))
  43.         except:
  44.             score = "None"
  45.  
  46.         try:
  47.             reviews = soup.select_one("href*='#customerReviews']")
  48.             reviews = reviews.text.strip()
  49.         except:
  50.             reviews = "None"
  51.  
  52.         try:
  53.             PRIME = soup.select_one('[aria-label="Amazon Prime"]')
  54.             PRIME = PRIME['aria-label']
  55.         except:
  56.             PRIME = "None"
  57.         data = {keyword:[keyword,str(result),title,ASIN,score,reviews,PRIME,datetime.datetime.today().strftime("%B %d, %Y")]}
  58.         print(data)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement