Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import scrapy
- from selenium import webdriver
- from selenium.webdriver.chrome.service import Service
- from selenium.webdriver.chrome.options import Options
- class SeleniumSpider(scrapy.Spider):
- name = 'selenium_spider'
- start_urls = ['https://angular.io/docs']
- # Generate normal scrapy request
- def start_requests(self):
- for url in self.start_urls:
- yield scrapy.Request(url, self.parse, meta={'url': url})
- def parse(self, response):
- url = response.meta['url']
- # Create a Service object for chromedriver
- service = Service()
- # Create Chrome options
- options = Options()
- options.add_argument("--headless") # Set the browser to run in headless mode
- # Create a new Selenium webdriver instance with the specified service and options
- driver = webdriver.Chrome(service=service, options=options)
- # Use the driver to navigate to the URL
- driver.get(url)
- # Extract the page title using Selenium
- title = driver.title
- # Close the driver after use
- driver.quit()
- yield {'title': title}
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement