Advertisement
dereksir

Untitled

Aug 23rd, 2023
72
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.15 KB | None | 0 0
  1. import scrapy
  2. from selenium import webdriver
  3. from selenium.webdriver.chrome.service import Service
  4. from selenium.webdriver.chrome.options import Options
  5.  
  6. class SeleniumSpider(scrapy.Spider):
  7.     name = 'selenium_spider'
  8.     start_urls = ['https://angular.io/docs']
  9.    
  10.     # Generate normal scrapy request
  11.     def start_requests(self):
  12.         for url in self.start_urls:
  13.             yield scrapy.Request(url, self.parse, meta={'url': url})
  14.  
  15.     def parse(self, response):
  16.         url = response.meta['url']
  17.  
  18.         # Create a Service object for chromedriver
  19.         service = Service()
  20.  
  21.         # Create Chrome options
  22.         options = Options()
  23.         options.add_argument("--headless")  # Set the browser to run in headless mode
  24.        
  25.         # Create a new Selenium webdriver instance with the specified service and options
  26.         driver = webdriver.Chrome(service=service, options=options)
  27.  
  28.         # Use the driver to navigate to the URL
  29.         driver.get(url)
  30.  
  31.         # Extract the page title using Selenium
  32.         title = driver.title
  33.  
  34.         # Close the driver after use
  35.         driver.quit()
  36.  
  37.         yield {'title': title}
  38.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement