Proxy Scraper

# You must install requests and bs4 in order for this to work.
# Python 3.6+
import zipfile
from io import BytesIO

import requests
from bs4 import BeautifulSoup

session = requests.Session()
proxies = set()
url = 'http://www.sslproxies24.top/'
proxy_links = []
download_links = []
response = session.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

print('Beginning to scrape proxies...')

for tag in soup.find_all('a'):
    if tag.get('href') is not None:
        if 'Free SSL Proxies' in tag.get_text():
            proxy_links.append(tag['href'])

for link in proxy_links:
    response = session.get(link)
    soup = BeautifulSoup(response.text, 'html.parser')
    for tag in soup.find_all('a'):
        if tag.get('href') is not None:
            if tag['href'].startswith('https://drive.google.com'):
                download_links.append(tag['href'])

for link in download_links:
    response = session.get(link)
    with zipfile.ZipFile(BytesIO(response.content)) as z:
        with z.open('ssl.txt') as fp:
            proxies.update({proxy.decode() for proxy in fp.read().splitlines()})

print(f'Scraped {len(proxies)} proxies.')

with open('proxies.txt', 'w') as fp:
    fp.write('\n'.join(proxies))
    print('Proxies have been written to proxies.txt.')