import requests import urllib3 urllib3.disable_warnings() class CrawlConfig: def __init__(self, proxy={}, headers={}, cookies={}, timeout=30.0, verify=False): self.proxy = proxy self.headers = headers self.timeout = timeout self.verify = verify self.cookies = cookies class Crawl: def __init__(self, config: CrawlConfig): self.set_config(config) def set_config(self, config: CrawlConfig): self.proxy = config.proxy self.headers = config.headers self.timeout = config.timeout self.verify = config.verify self.cookies = config.cookies def crawl(self, url): try: r = requests.get(url, headers=self.headers, cookies=self.cookies, timeout=self.timeout, verify=self.verify, proxies=self.proxy) return r.content.decode() except: return "" from enum import Enum from urllib.parse import urlparse import re class RE: TAG = r"<(|[a-z]+)(| ([^<]+))>" IMG = r"]+)" JS = r"