Advertisement
skip420

proxy_scrape_me

Jun 13th, 2020
4,677
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.81 KB | None | 0 0
  1. # User-Agent_Proxy_rotator_python
  2. # python3 scrape_me.py
  3.  
  4. # output Proxy 180.246.242.155:8080 deleted.
  5. # Proxy 105.27.238.166:80 deleted.
  6. # https://free-proxy-list.net/
  7.  
  8.  
  9. from urllib.request import Request, urlopen
  10. from bs4 import BeautifulSoup
  11. from fake_useragent import UserAgent
  12. import random
  13.  
  14. ua = UserAgent() # From here we generate a random user agent
  15. proxies = [] # Will contain proxies [ip, port]
  16.  
  17. # Main function
  18. def main():
  19.   # Retrieve latest proxies
  20.   proxies_req = Request('https://free-proxy-list.net/')
  21.   proxies_req.add_header('User-Agent', ua.random)
  22.   proxies_doc = urlopen(proxies_req).read().decode('utf8')
  23.  
  24.   soup = BeautifulSoup(proxies_doc, 'html.parser')
  25.   proxies_table = soup.find(id='proxylisttable')
  26.  
  27.   # Save proxies in the array
  28.   for row in proxies_table.tbody.find_all('tr'):
  29.     proxies.append({
  30.       'ip':   row.find_all('td')[0].string,
  31.       'port': row.find_all('td')[1].string
  32.     })
  33.  
  34.   # Choose a random proxy
  35.   proxy_index = random_proxy()
  36.   proxy = proxies[proxy_index]
  37.  
  38.   for n in range(1, 100):
  39.     req = Request('https://free-proxy-list.net/')
  40.     req.set_proxy(proxy['ip'] + ':' + proxy['port'], 'http')
  41.  
  42.     # Every 10 requests, generate a new proxy
  43.     if n % 10 == 0:
  44.       proxy_index = random_proxy()
  45.       proxy = proxies[proxy_index]
  46.  
  47.     # Make the call
  48.     try:
  49.       my_ip = urlopen(req).read().decode('utf8')
  50.       print('#' + str(n) + ': ' + my_ip)
  51.     except: # If error, delete this proxy and find another one
  52.       del proxies[proxy_index]
  53.       print('Proxy ' + proxy['ip'] + ':' + proxy['port'] + ' deleted.')
  54.       proxy_index = random_proxy()
  55.       proxy = proxies[proxy_index]
  56.  
  57. # Retrieve a random index proxy (we need the index to delete it if not working)
  58. def random_proxy():
  59.   return random.randint(0, len(proxies) - 1)
  60.  
  61. if __name__ == '__main__':
  62.   main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement