Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # You must install requests and bs4 in order for this to work.
- # Python 3.6+
- import zipfile
- from io import BytesIO
- import requests
- from bs4 import BeautifulSoup
- session = requests.Session()
- proxies = set()
- url = 'http://www.sslproxies24.top/'
- proxy_links = []
- download_links = []
- response = session.get(url)
- soup = BeautifulSoup(response.text, 'html.parser')
- print('Beginning to scrape proxies...')
- for tag in soup.find_all('a'):
- if tag.get('href') is not None:
- if 'Free SSL Proxies' in tag.get_text():
- proxy_links.append(tag['href'])
- for link in proxy_links:
- response = session.get(link)
- soup = BeautifulSoup(response.text, 'html.parser')
- for tag in soup.find_all('a'):
- if tag.get('href') is not None:
- if tag['href'].startswith('https://drive.google.com'):
- download_links.append(tag['href'])
- for link in download_links:
- response = session.get(link)
- with zipfile.ZipFile(BytesIO(response.content)) as z:
- with z.open('ssl.txt') as fp:
- proxies.update({proxy.decode() for proxy in fp.read().splitlines()})
- print(f'Scraped {len(proxies)} proxies.')
- with open('proxies.txt', 'w') as fp:
- fp.write('\n'.join(proxies))
- print('Proxies have been written to proxies.txt.')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement