Untitled

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup
from random import randint
from time import sleep
from fake_useragent import UserAgent
import csv

#todo
# https://selenium-python.readthedocs.io/waits.html
# Also try looking at webdriverwait, which helps with waiting until an element is found, and throws an Exception if the element is not found
# Why is this script giving double links?


f = csv.writer(open('ebay_watches.csv', 'w'))
f.writerow(['title', 'price', 'numSold'])


pages = []

for i in range(0, 999):
    options = Options()
    ua = UserAgent()
    a = ua.random
    user_agent = ua.random
    print(user_agent)
    options.add_argument(f'user-agent={user_agent}')
    driver = webdriver.Chrome('/Users/kenny/Dropbox/Python/WebScrapping/Others/chromedriver')
    driver.get('https://www.ebay.com/sch/i.html?_from=R40&_nkw=watches&_sacat=0&_pgn=' + str(i))
    soup = BeautifulSoup(driver.page_source, 'lxml')
    driver.maximize_window()

    tempList = []

    for link in soup.find_all('a', href=True):
        if 'itm' in link['href']:
            print(link['href'])
            tempList.append(link['href'])

    array_length = len(tempList)

    for i in range(array_length):
        driver.get(tempList[i])
        timeout = 5

        try:
            element_present = EC.presence_of_element_located((By.XPATH, '//*[@id="itemTitle"]'))
            WebDriverWait(driver, timeout).until(element_present)
        except TimeoutException:
            print("Timed out waiting for page to load")
        try:
            title = driver.find_element_by_xpath('//*[@id="itemTitle"]').text
        except Exception as e:
            title = ""
        try:
            price = driver.find_element_by_xpath('//*[@id="prcIsum"]').text.strip().split()
        except Exception as e:
            print(e)
            try:
                price = driver.find_element_by_xpath('//*[@id="mm-saleDscPrc"]').text
            except Exception as e:
                print(e)
                price = ""
        soup = BeautifulSoup(driver.page_source, 'lxml')
        try:
            total_sold_price = soup.find('span', {'class': 'vi-qtyS-hot-red'}).text
        except Exception as e:
            try:
                total_sold_price = soup.find('a', {'class': 'vi-txt-underline'}).text
            except Exception as e:
                print(e)
                total_sold_price = ""


        print("title: ", title)
        print("price: ", price)
        print("total_sold_price: ", total_sold_price)
        print(tempList[i])
        print("\n")

        f.writerow([title, price, total_sold_price, tempList[i]])
        i += 1
        sleep(randint(1, 3))

    i+=1


driver.close()