Advertisement
Venusaur

Proxy Scraper

Jul 21st, 2018
817
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.30 KB | None | 0 0
  1. # You must install requests and bs4 in order for this to work.
  2. # Python 3.6+
  3. import zipfile
  4. from io import BytesIO
  5.  
  6. import requests
  7. from bs4 import BeautifulSoup
  8.  
  9. session = requests.Session()
  10. proxies = set()
  11. url = 'http://www.sslproxies24.top/'
  12. proxy_links = []
  13. download_links = []
  14. response = session.get(url)
  15. soup = BeautifulSoup(response.text, 'html.parser')
  16.  
  17. print('Beginning to scrape proxies...')
  18.  
  19. for tag in soup.find_all('a'):
  20.     if tag.get('href') is not None:
  21.         if 'Free SSL Proxies' in tag.get_text():
  22.             proxy_links.append(tag['href'])
  23.  
  24. for link in proxy_links:
  25.     response = session.get(link)
  26.     soup = BeautifulSoup(response.text, 'html.parser')
  27.     for tag in soup.find_all('a'):
  28.         if tag.get('href') is not None:
  29.             if tag['href'].startswith('https://drive.google.com'):
  30.                 download_links.append(tag['href'])
  31.  
  32. for link in download_links:
  33.     response = session.get(link)
  34.     with zipfile.ZipFile(BytesIO(response.content)) as z:
  35.         with z.open('ssl.txt') as fp:
  36.             proxies.update({proxy.decode() for proxy in fp.read().splitlines()})
  37.  
  38. print(f'Scraped {len(proxies)} proxies.')
  39.  
  40. with open('proxies.txt', 'w') as fp:
  41.     fp.write('\n'.join(proxies))
  42.     print('Proxies have been written to proxies.txt.')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement