Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- import re
- import six
- import grab.proxylist
- from collections import namedtuple
- from grab.proxylist import RE_AUTH_PROXY, RE_SIMPLE_PROXY, InvalidProxyLine, logger
- PROXY_FIELDS = ('host', 'port', 'username', 'password', 'proxy_type', 'proxy_ip')
- RE_AUTH_PROXY_WITH_PROXY_IP = re.compile(r'^([^:]+):([^:]+):([^:]+):([^:]+)\t([\d\.]+)$')
- class Proxy(namedtuple('Proxy', PROXY_FIELDS)):
- def get_address(self):
- return '%s:%s' % (self.host, self.port)
- def get_userpwd(self):
- if self.username:
- return '%s:%s' % (self.username, self.password or '')
- def parse_proxy_line(line):
- line = line.strip()
- match = RE_SIMPLE_PROXY.search(line)
- if match:
- return match.group(1), match.group(2), None, None, None
- match = RE_AUTH_PROXY_WITH_PROXY_IP.search(line)
- if match:
- host, port, user, pwd, proxy_ip = match.groups()
- return host, port, user, pwd, proxy_ip
- match = RE_AUTH_PROXY.search(line)
- if match:
- host, port, user, pwd = match.groups()
- return host, port, user, pwd, None
- raise InvalidProxyLine('Invalid proxy line: %s' % line)
- def parse_raw_list_data(data, proxy_type='http', proxy_userpwd=None):
- "Iterate over proxy servers found in the raw data"
- if not isinstance(data, six.text_type):
- data = data.decode('utf-8')
- for orig_line in data.splitlines():
- line = orig_line.strip().replace(' ', '')
- if line and not line.startswith('#'):
- try:
- host, port, username, password, proxy_ip = parse_proxy_line(line)
- except InvalidProxyLine as ex:
- logger.error(ex)
- else:
- if username is None and proxy_userpwd is not None:
- username, password = proxy_userpwd.split(':')
- yield Proxy(host, port, username, password, proxy_type, proxy_ip)
- def patch():
- grab.proxylist.Proxy = Proxy
- grab.proxylist.parse_proxy_line = parse_proxy_line
- grab.proxylist.parse_raw_list_data = parse_raw_list_data
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement