Advertisement
arceny

Untitled

Jan 23rd, 2016
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.08 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. import re
  3. import six
  4. import grab.proxylist
  5. from collections import namedtuple
  6. from grab.proxylist import RE_AUTH_PROXY, RE_SIMPLE_PROXY, InvalidProxyLine, logger
  7.  
  8. PROXY_FIELDS = ('host', 'port', 'username', 'password', 'proxy_type', 'proxy_ip')
  9. RE_AUTH_PROXY_WITH_PROXY_IP = re.compile(r'^([^:]+):([^:]+):([^:]+):([^:]+)\t([\d\.]+)$')
  10.  
  11.  
  12. class Proxy(namedtuple('Proxy', PROXY_FIELDS)):
  13.     def get_address(self):
  14.         return '%s:%s' % (self.host, self.port)
  15.  
  16.     def get_userpwd(self):
  17.         if self.username:
  18.             return '%s:%s' % (self.username, self.password or '')
  19.  
  20.  
  21. def parse_proxy_line(line):
  22.     line = line.strip()
  23.     match = RE_SIMPLE_PROXY.search(line)
  24.  
  25.     if match:
  26.         return match.group(1), match.group(2), None, None, None
  27.  
  28.     match = RE_AUTH_PROXY_WITH_PROXY_IP.search(line)
  29.     if match:
  30.         host, port, user, pwd, proxy_ip = match.groups()
  31.         return host, port, user, pwd, proxy_ip
  32.  
  33.     match = RE_AUTH_PROXY.search(line)
  34.     if match:
  35.         host, port, user, pwd = match.groups()
  36.         return host, port, user, pwd, None
  37.  
  38.     raise InvalidProxyLine('Invalid proxy line: %s' % line)
  39.  
  40.  
  41. def parse_raw_list_data(data, proxy_type='http', proxy_userpwd=None):
  42.     "Iterate over proxy servers found in the raw data"
  43.     if not isinstance(data, six.text_type):
  44.         data = data.decode('utf-8')
  45.     for orig_line in data.splitlines():
  46.         line = orig_line.strip().replace(' ', '')
  47.         if line and not line.startswith('#'):
  48.             try:
  49.                 host, port, username, password, proxy_ip = parse_proxy_line(line)
  50.             except InvalidProxyLine as ex:
  51.                 logger.error(ex)
  52.             else:
  53.                 if username is None and proxy_userpwd is not None:
  54.                     username, password = proxy_userpwd.split(':')
  55.                 yield Proxy(host, port, username, password, proxy_type, proxy_ip)
  56.  
  57.  
  58. def patch():
  59.     grab.proxylist.Proxy = Proxy
  60.     grab.proxylist.parse_proxy_line = parse_proxy_line
  61.     grab.proxylist.parse_raw_list_data = parse_raw_list_data
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement