Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from selenium import webdriver
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.webdriver.support import expected_conditions as EC
- from selenium.webdriver.common.by import By
- from selenium.webdriver.chrome.options import Options
- from selenium.common.exceptions import TimeoutException
- from bs4 import BeautifulSoup
- from random import randint
- from time import sleep
- from fake_useragent import UserAgent
- import csv
- #todo
- # https://selenium-python.readthedocs.io/waits.html
- # Also try looking at webdriverwait, which helps with waiting until an element is found, and throws an Exception if the element is not found
- f = csv.writer(open('ebay_watches.csv', 'w'))
- f.writerow(['title', 'price', 'numSold'])
- pages = []
- for i in range(0, 999):
- options = Options()
- ua = UserAgent()
- a = ua.random
- user_agent = ua.random
- print(user_agent)
- options.add_argument(f'user-agent={user_agent}')
- driver = webdriver.Chrome('/Users/kenny/Dropbox/Python/WebScrapping/Others/chromedriver')
- driver.get('https://www.ebay.com/sch/i.html?_from=R40&_nkw=watches&_sacat=0&_pgn=' + str(i))
- soup = BeautifulSoup(driver.page_source, 'lxml')
- driver.maximize_window()
- tempList = []
- for link in soup.find_all('a', href=True):
- if 'itm' in link['href']:
- print(link['href'])
- tempList.append(link['href'])
- array_length = len(tempList)
- for i in range(array_length):
- driver.get(tempList[i])
- timeout = 5
- try:
- WebDriverWait(driver, timeout).until(EC.presence_of_element_located((By.XPATH, '//*[@id="itemTitle"]')))
- except TimeoutException:
- print("Timed out waiting for page to load")
- try:
- title = driver.find_element_by_xpath('//*[@id="itemTitle"]').text
- except Exception as e:
- title = ""
- item = driver.find_element_by_xpath('//*[@id="prcIsum"]').text.strip().split()
- if len(item.text) > 0:
- price = item.text
- item = driver.find_element_by_xpath('//*[@id="mm-saleDscPrc"]')
- if len(item.text) > 0:
- price = item.text
- else:
- price = ""
- soup = BeautifulSoup(driver.page_source, 'lxml')
- try:
- total_sold_price = soup.find('span', {'class': 'vi-qtyS-hot-red'}).text
- except Exception as e:
- total_sold_price = ""
- try:
- total_sold_price2 = soup.find('a', {'class': 'vi-txt-underline'}).text
- except Exception as e:
- total_sold_price2 = ""
- print("title: ", title)
- print("price: ", price)
- print("total_sold_price: ", total_sold_price)
- print("\n")
- f.writerow([title, price, total_sold_price])
- i += 1
- sleep(randint(1, 3))
- i+=1
- driver.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement