Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import scrapy
- from selenium import webdriver
- from selenium.webdriver.chrome.options import Options
- from shutil import which
- from scrapy.selector import Selector
- from time import sleep
- from datetime import datetime
- class ListSpider(scrapy.Spider):
- name = 'list'
- allowed_domains = ['moneyfacts.co.uk']
- start_urls = ['https://moneyfacts.co.uk/credit-cards/balance-transfer-credit-cards/?fbclid=IwAR05-Sa1hIcYTRx8DXYYQd0UfDRjWF-jD2-u51jiLP-WKlkxSddKjzUcnWA']
- def parse(self, response):
- options = Options()
- chrome_path = which("chromedriver")
- driver = webdriver.Chrome(executable_path=chrome_path) #, chrome_options=options)
- #driver.set_window_size(1920, 1080)
- print(dir(driver))
- driver.maximize_window()
- scroll_pause_time = 1
- # loading first page
- start_url = 'https://moneyfacts.co.uk/credit-cards/balance-transfer-credit-cards/?fbclid=IwAR05-Sa1hIcYTRx8DXYYQd0UfDRjWF-jD2-u51jiLP-WKlkxSddKjzUcnWA'
- driver.get(start_url)
- screen_height = driver.execute_script("return window.screen.height;") # get the screen height of the web
- while True: # all pages
- #for _ in range(5): # only 5 pages
- #sleep(scroll_pause_time)
- # scrolling page
- i = 1
- while True: #this is the infinite scoll thing which reveals all javascript generated product tiles
- driver.execute_script(f"window.scrollBy(0, {screen_height});")
- sleep(scroll_pause_time)
- i += 1
- scroll_height = driver.execute_script("return document.body.scrollHeight;")
- if screen_height * i > scroll_height:
- break
- # after scrolling
- resp = Selector(text=driver.page_source)
- for products in resp.xpath('//ul[@id="finder-table"]/li'):
- name = products.xpath('.//*[@class="table-item-heading-product-name"]/span/strong/text()').get(),
- title = products.xpath('.//*[@class="table-item-heading-product-name"]/span/text()').get()
- yield {
- 'Name':name,
- 'Titrle':title
- }
- # to load next page
- try:
- driver.find_element_by_xpath('//*[text()="Show More"]').click()
- except:
- break
- driver.close()
Advertisement
Add Comment
Please, Sign In to add comment