Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import csv
- from selenium import webdriver
- from bs4 import BeautifulSoup
- import requests
- from lxml import html
- import io
- links = [
- 'https://www.amazon.com/s/ref=sr_pg_1?fst=as%3Aoff&rh=n%3A1055398%2Cn%3A1063306%2Ck%3Aas&keywords=as&ie=UTF8&qid=1532070774'
- ]
- proxies = {
- 'http': 'http://218.50.2.102:8080',
- 'https': 'http://185.93.3.123:8080'
- }
- chrome_options = webdriver.ChromeOptions()
- chrome_options.add_argument('--proxy-server="%s"' % ';'.join(['%s=%s' % (k, v) for k, v in proxies.items()]))
- driver = webdriver.Chrome(executable_path="C:\UsersAndrei-PCDownloadswebdriverchromedriver.exe",
- chrome_options=chrome_options)
- header = ['Product title', 'Product price', 'Review', 'ASIN']
- with open('csv/demo.csv', "w") as output:
- writer = csv.writer(output)
- writer.writerow(header)
- for i in range(len(links)):
- driver.get(links[i])
- for x in range(0,23):
- product_title = driver.find_elements_by_xpath('//li[@id="result_{}"]/div/div[3]/div/a'.format(x))
- title = [x.text for x in product_title]
- try:
- price = driver.find_element_by_xpath('//li[@id="result_{}"]/div/div[5]/div/a/span[2]'.format(x)).text
- except:
- price = 'No price v2'
- print('No price v2')
- try:
- review = driver.find_element_by_xpath('//li[@id="result_{}"]/div/div[6]/span'.format(x)).text()
- except:
- review = 'No review v1'
- print('No review v1')
- try:
- asin = driver.find_element_by_id('result_{}'.format(x)).get_attribute('data-asin')
- except:
- asin = 'No asin'
- print('No asin')
- try:
- data = [title[0], price, review, asin]
- except:
- print('no items v3 ')
- with io.open('csv/demo.csv', "a", newline="", encoding="utf-8") as output:
- writer = csv.writer(output)
- writer.writerow(data)
- print('I solved this link %s' % (links[i]))
- print('Number of product %s' % (i + 1))
- stars = driver.find_element_by_class_name('a-icon-alt').get_attribute('textContent')
Add Comment
Please, Sign In to add comment