Guest User

set location to Norway

a guest
Aug 21st, 2020
65
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.33 KB | None | 0 0
  1. import scrapy
  2. from scrapy_selenium import SeleniumRequest
  3. import logging
  4. from shutil import which
  5.  
  6. class SigmaSpider(scrapy.Spider):
  7.     name = 'sigma_help'
  8.     allowed_domains = ['sigmaaldrich.com']
  9.     custom_settings = {
  10.         'HTTPCACHE_ENABLED': False,
  11.         'SELENIUM_DRIVER_NAME': 'chrome',
  12.         'SELENIUM_DRIVER_EXECUTABLE_PATH': which('chromedriver'),
  13.         'SELENIUM_DRIVER_ARGUMENTS': ['--headless'],
  14.         'DOWNLOADER_MIDDLEWARES': {
  15.             'scrapy_selenium.SeleniumMiddleware': 800
  16.         },
  17.         # 'CONCURRENT_REQUESTS': 1,
  18.         'RETRY_TIMES': 1,
  19.         'DOWNLOAD_DELAY': 5
  20.     }
  21.  
  22.     user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36"
  23.  
  24.     def start_requests(self):
  25.         # set location to Norway
  26.         js_code = "document.querySelector('.countriesRight+ .countriesRight a:nth-child(17)').click()"
  27.         yield SeleniumRequest(url="https://www.sigmaaldrich.com", callback=self.set_location, script=js_code, wait_time=10)
  28.         # check if location was set
  29.         # yield SeleniumRequest(url="https://www.sigmaaldrich.com", callback=self.set_location, dont_filter=True)
  30.  
  31.     def set_location(self, response):
  32.         from scrapy.shell import inspect_response
  33.         inspect_response(response, self)
  34.  
Add Comment
Please, Sign In to add comment