rhat398

Untitled

Feb 27th, 2023
657
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.34 KB | None | 0 0
  1. class SFAProxyMiddleware(object):
  2.     @classmethod
  3.     def from_crawler(cls, crawler):
  4.         return cls(crawler.settings)
  5.  
  6.     def __init__(self, settings):
  7.  
  8.         self.packetstream_proxies = [
  9.             settings.get("PS_PROXY_USA"),
  10.             settings.get("PS_PROXY_CA"),
  11.             settings.get("PS_PROXY_IT"),
  12.             settings.get("PS_PROXY_GLOBAL"),
  13.         ]
  14.  
  15.         self.unlimited_proxies = [
  16.             settings.get("UNLIMITED_PROXY_1"),
  17.             settings.get("UNLIMITED_PROXY_2"),
  18.             # settings.get("UNLIMITED_PROXY_3"),
  19.             # settings.get("UNLIMITED_PROXY_4"),
  20.             # settings.get("UNLIMITED_PROXY_5"),
  21.             # settings.get("UNLIMITED_PROXY_6"),
  22.         ]
  23.  
  24.     def add_proxy(self, request, host):
  25.         request.meta["proxy"] = host
  26.  
  27.     def process_request(self, request, spider):
  28.         retries = request.meta.get("retry_times", 0)
  29.  
  30.         if retries <= 8:
  31.             self.add_proxy(request, random.choice(self.unlimited_proxies))
  32.             spider.logger.info("In unlimiited proxies block")
  33.             return None
  34.         spider.logger.info("Using packetstream proxies now")
  35.  
  36.         self.add_proxy(request, random.choice(self.packetstream_proxies))
  37.         return None
  38.  
  39.     def process_response(self, request, response, spider):
  40.        
  41.         return response
Advertisement
Add Comment
Please, Sign In to add comment