Advertisement
Guest User

Untitled

a guest
Dec 14th, 2019
349
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.85 KB | None | 0 0
  1. from selenium import webdriver
  2. from selenium.webdriver.support.ui import WebDriverWait
  3. from selenium.webdriver.support import expected_conditions as EC
  4. from selenium.webdriver.common.by import By
  5. from selenium.webdriver.chrome.options import Options
  6. from selenium.common.exceptions import TimeoutException
  7. from bs4 import BeautifulSoup
  8. from random import randint
  9. from time import sleep
  10. from fake_useragent import UserAgent
  11. import csv
  12.  
  13. #todo
  14. # https://selenium-python.readthedocs.io/waits.html
  15. # Also try looking at webdriverwait, which helps with waiting until an element is found, and throws an Exception if the element is not found
  16.  
  17. f = csv.writer(open('ebay_watches.csv', 'w'))
  18. f.writerow(['title', 'price', 'numSold'])
  19.  
  20.  
  21. pages = []
  22.  
  23. for i in range(0, 999):
  24.     options = Options()
  25.     ua = UserAgent()
  26.     a = ua.random
  27.     user_agent = ua.random
  28.     print(user_agent)
  29.     options.add_argument(f'user-agent={user_agent}')
  30.     driver = webdriver.Chrome('/Users/kenny/Dropbox/Python/WebScrapping/Others/chromedriver')
  31.     driver.get('https://www.ebay.com/sch/i.html?_from=R40&_nkw=watches&_sacat=0&_pgn=' + str(i))
  32.     soup = BeautifulSoup(driver.page_source, 'lxml')
  33.     driver.maximize_window()
  34.  
  35.     tempList = []
  36.  
  37.     for link in soup.find_all('a', href=True):
  38.         if 'itm' in link['href']:
  39.             print(link['href'])
  40.             tempList.append(link['href'])
  41.  
  42.     array_length = len(tempList)
  43.  
  44.     for i in range(array_length):
  45.         driver.get(tempList[i])
  46.         timeout = 5
  47.  
  48.         try:
  49.             WebDriverWait(driver, timeout).until(EC.presence_of_element_located((By.XPATH, '//*[@id="itemTitle"]')))
  50.         except TimeoutException:
  51.             print("Timed out waiting for page to load")
  52.         try:
  53.             title = driver.find_element_by_xpath('//*[@id="itemTitle"]').text
  54.         except Exception as e:
  55.             title = ""
  56.         item = driver.find_element_by_xpath('//*[@id="prcIsum"]').text.strip().split()
  57.         if len(item.text) > 0:
  58.             price = item.text
  59.         item = driver.find_element_by_xpath('//*[@id="mm-saleDscPrc"]')
  60.         if len(item.text) > 0:
  61.             price = item.text
  62.         else:
  63.             price = ""
  64.         soup = BeautifulSoup(driver.page_source, 'lxml')
  65.         try:
  66.             total_sold_price = soup.find('span', {'class': 'vi-qtyS-hot-red'}).text
  67.         except Exception as e:
  68.             total_sold_price = ""
  69.         try:
  70.             total_sold_price2 = soup.find('a', {'class': 'vi-txt-underline'}).text
  71.         except Exception as e:
  72.             total_sold_price2 = ""
  73.  
  74.  
  75.         print("title: ", title)
  76.         print("price: ", price)
  77.         print("total_sold_price: ", total_sold_price)
  78.         print("\n")
  79.  
  80.         f.writerow([title, price, total_sold_price])
  81.  
  82.         i += 1
  83.  
  84.         sleep(randint(1, 3))
  85.  
  86.     i+=1
  87.  
  88.  
  89.  
  90.  
  91.  
  92. driver.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement