Fazlul

Untitled

Jun 23rd, 2021
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.55 KB | None | 0 0
  1. import scrapy
  2. from selenium import webdriver
  3. from selenium.webdriver.chrome.options import Options
  4. from shutil import which
  5. from scrapy.selector import Selector
  6. from time import sleep
  7. from datetime import datetime
  8.  
  9.  
  10. class ListSpider(scrapy.Spider):
  11. name = 'list'
  12. allowed_domains = ['moneyfacts.co.uk']
  13. start_urls = ['https://moneyfacts.co.uk/credit-cards/balance-transfer-credit-cards/?fbclid=IwAR05-Sa1hIcYTRx8DXYYQd0UfDRjWF-jD2-u51jiLP-WKlkxSddKjzUcnWA']
  14.  
  15. def parse(self, response):
  16.  
  17. options = Options()
  18.  
  19. chrome_path = which("chromedriver")
  20. driver = webdriver.Chrome(executable_path=chrome_path) #, chrome_options=options)
  21. #driver.set_window_size(1920, 1080)
  22. print(dir(driver))
  23. driver.maximize_window()
  24.  
  25. scroll_pause_time = 1
  26.  
  27. # loading first page
  28. start_url = 'https://moneyfacts.co.uk/credit-cards/balance-transfer-credit-cards/?fbclid=IwAR05-Sa1hIcYTRx8DXYYQd0UfDRjWF-jD2-u51jiLP-WKlkxSddKjzUcnWA'
  29. driver.get(start_url)
  30.  
  31. screen_height = driver.execute_script("return window.screen.height;") # get the screen height of the web
  32.  
  33. while True: # all pages
  34. #for _ in range(5): # only 5 pages
  35.  
  36. #sleep(scroll_pause_time)
  37.  
  38. # scrolling page
  39. i = 1
  40. while True: #this is the infinite scoll thing which reveals all javascript generated product tiles
  41. driver.execute_script(f"window.scrollBy(0, {screen_height});")
  42. sleep(scroll_pause_time)
  43.  
  44. i += 1
  45.  
  46. scroll_height = driver.execute_script("return document.body.scrollHeight;")
  47. if screen_height * i > scroll_height:
  48. break
  49.  
  50. # after scrolling
  51. resp = Selector(text=driver.page_source)
  52.  
  53. for products in resp.xpath('//ul[@id="finder-table"]/li'):
  54. name = products.xpath('.//*[@class="table-item-heading-product-name"]/span/strong/text()').get(),
  55. title = products.xpath('.//*[@class="table-item-heading-product-name"]/span/text()').get()
  56. yield {
  57. 'Name':name,
  58. 'Titrle':title
  59.  
  60. }
  61.  
  62. # to load next page
  63. try:
  64. driver.find_element_by_xpath('//*[text()="Show More"]').click()
  65. except:
  66. break
  67. driver.close()
  68.  
  69.  
Advertisement
Add Comment
Please, Sign In to add comment