Advertisement
alperiox

selenium

Nov 29th, 2022
1,042
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.03 KB | None | 0 0
  1. import time
  2.  
  3. from selenium import webdriver
  4. from selenium.webdriver.chrome.service import Service
  5. from selenium.webdriver.common.by import By
  6. from selenium.webdriver.support import expected_conditions as EC
  7. from selenium.webdriver.support.ui import WebDriverWait
  8. # web driver manager: https://github.com/SergeyPirogov/webdriver_manager
  9. # will help us automatically download the web driver binaries
  10. # then we can use `Service` to manage the web driver's state.
  11. from webdriver_manager.chrome import ChromeDriverManager
  12.  
  13. def extract(element):
  14.     title = element.find_element(By.CSS_SELECTOR, "div.preview-title").text
  15.     author = element.find_element(By.CSS_SELECTOR, "div.preview-author").text
  16.     rating = element.find_element(By.CSS_SELECTOR, "div.preview-details p.preview-rating").text
  17.     price = element.find_element(By.CSS_SELECTOR, "div.preview-details p.preview-price").text
  18.  
  19.     return {"title": title, 'author': author, 'rating': rating, 'price': price}        
  20.  
  21. # start the timer
  22. start = time.time()
  23.  
  24. options = webdriver.ChromeOptions()
  25. options.headless = True
  26. # this returns the path web driver downloaded
  27. chrome_path = ChromeDriverManager().install()
  28. # define the chrome service and pass it to the driver instance
  29. chrome_service = Service(chrome_path)
  30. driver = webdriver.Chrome(service=chrome_service, options=options)
  31.  
  32. url = "https://danube-webshop.herokuapp.com/"
  33.  
  34. driver.get(url)
  35. # get the first page and click to the its link
  36. # first element will be the Crime & Thrillers category
  37. time.sleep(1)
  38. crime_n_thrillers = driver.find_element(By.CSS_SELECTOR, "ul[class='sidebar-list'] > li")
  39. print(crime_n_thrillers)
  40. crime_n_thrillers.click()
  41. time.sleep(1)
  42. # get the data div and extract the data using beautifulsoup
  43. books = driver.find_elements(By.CSS_SELECTOR, "div.shop-content li.preview")
  44.  
  45. extracted_data = []
  46. print(books)
  47. for element in books:
  48.     data = extract(element)
  49.     extracted_data.append(data)
  50.     print(data)
  51.  
  52. end = time.time()
  53.  
  54. print(f"The whole script took: {end-start:.4f}")
  55.  
  56. driver.quit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement