Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup
- from urllib.parse import urljoin
- import urllib.request
- from selenium import webdriver
- from selenium.webdriver.chrome.options import Options
- from selenium.common import exceptions
- from selenium.webdriver.common.by import By
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.webdriver.support import expected_conditions as EC
- from selenium.common.exceptions import StaleElementReferenceException, ElementNotVisibleException
- import time
- chrome_options = Options()
- chrome_options.add_argument("--no-sandbox")
- chrome_options.add_argument("--disable-setuid-sandbox")
- driver = webdriver.Chrome('C:/Users/User/webdriver/chromedriver.exe')
- driver.get("https://paperpaper.ru/category/what/news/")
- # wait = WebDriverWait(driver, 15)
- # def parse_news(driver):
- links_list = driver.find_element_by_class_name('link-black')
- href = links_list.get_attribute('href')
- linkLen = len(href)
- for i in range(0, linkLen):
- link = WebDriverWait(driver,30).until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'link-black')))
- link[i].click()
- title = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.CLASS_NAME, 'text-title__title'))).text
- article = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'html body div div div article div div div.r-text-content'))).text
- print('"' + title + '": ' + article)
- driver.execute_script("window.history.go(-1)")
- button = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'button.button-outline.r-container__more-button')))
- button.click()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement