Guest User

Untitled

a guest
Jul 25th, 2020
105
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.99 KB | None | 0 0
  1. from selenium import webdriver
  2. from webdriver_manager.chrome import ChromeDriverManager
  3. from bs4 import BeautifulSoup
  4. from selenium.webdriver.common.by import By
  5. import time
  6.  
  7. URL = "https://www.morganlewis.com/our-people"
  8. HOST = "https://www.morganlewis.com"
  9. def get_driver():
  10.     driver = webdriver.Chrome(ChromeDriverManager().install())
  11.     return driver
  12.  
  13. driver = get_driver()
  14. driver.get(URL)
  15. time.sleep(6)
  16. html = driver.page_source
  17. soup = BeautifulSoup(html, 'html.parser')
  18. items = soup.find_all('div', class_='c-content-team__card')
  19.  
  20. url_list = []
  21. for item in items:
  22.     url_profile = HOST + item.find('a', href=True).get('href')
  23.     url_list.append(url_profile)
  24.  
  25. bio_info = []
  26. for url in url_list:
  27.     driver.get(url)
  28.     img = driver.find_elements_by_class_name('thumbnail')
  29.     for i in img:
  30.         bio_info.append(i.find_element_by_tag_name('img').get_attribute('src'))
  31.     phone_numbers = driver.find_elements_by_class_name('underline')
  32.     for i in phone_numbers:
  33.         bio_info.append(i.find_element_by_tag_name('a').get_attribute('href'))
  34.     name = driver.find_elements_by_tag_name('h1')
  35.     for i in name:
  36.         bio_info.append(i.text)
  37.     email = driver.find_elements_by_id('bio-mail-id')
  38.     for i in email:
  39.         bio_info.append(i.find_elements_by_tag_name('a').get_attribute('href'))
  40.     sectors = driver.find_elements_by_class_name('person-depart-info')
  41.     for i in sectors:
  42.         bio_info.append(i.find_element_by_tag_name('a').get_attribute('title'))
  43.     sector = driver.find_elements_by_class_name('collapse-expand-cont bio-accordion-listing')
  44.     for i in sector:
  45.         bio_info.append(i.find_element_by_tag_name('a').get_attribute('title'))
  46.     publication = driver.find_elements_by_class_name('block print-Publication')
  47.     for i in publication:
  48.         elem = i.find_elements_by_class_name('collapse-expand-cont bio-accordion-listing')
  49.         bio_info.append(elem.find_elements_by_tag_name('a').get_attribute('href'))
  50.  
  51.  
  52. print(bio_info)
  53.  
  54.  
  55.  
  56.  
Advertisement
Add Comment
Please, Sign In to add comment