Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import datetime
- from bs4 import BeautifulSoup
- import time
- from selenium import webdriver
- import re
- import csv
- keyword = 'blue+skateboard'
- driver = webdriver.Chrome()
- url = 'https://www.amazon.co.uk/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords={}'
- driver.get(url.format(keyword))
- soup = BeautifulSoup(driver.page_source, 'lxml')
- results = soup.select('.s-result-list [data-asin]')
- for a, b in enumerate(results):
- soup = b
- header = soup.select_one('h5')
- result = a + 1
- title = header.text.strip()
- try:
- link = soup.select_one('h5 > a')
- url = link['href']
- url = re.sub(r'/ref=.*', '', str(url))
- except:
- url = "None"
- if url !='/gp/slredirect/picassoRedirect.html':
- ASIN = re.sub(r'.*/dp/', '', str(url))
- #print(ASIN)
- try:
- score = soup.select_one('.a-icon-alt')
- score = score.text
- score = score.strip('\n')
- score = re.sub(r' .*', '', str(score))
- except:
- score = "None"
- try:
- reviews = soup.select_one("href*='#customerReviews']")
- reviews = reviews.text.strip()
- except:
- reviews = "None"
- try:
- PRIME = soup.select_one('[aria-label="Amazon Prime"]')
- PRIME = PRIME['aria-label']
- except:
- PRIME = "None"
- data = {keyword:[keyword,str(result),title,ASIN,score,reviews,PRIME,datetime.datetime.today().strftime("%B %d, %Y")]}
- print(data)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement