Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # User-Agent_Proxy_rotator_python
- # python3 scrape_me.py
- # output Proxy 180.246.242.155:8080 deleted.
- # Proxy 105.27.238.166:80 deleted.
- # https://free-proxy-list.net/
- from urllib.request import Request, urlopen
- from bs4 import BeautifulSoup
- from fake_useragent import UserAgent
- import random
- ua = UserAgent() # From here we generate a random user agent
- proxies = [] # Will contain proxies [ip, port]
- # Main function
- def main():
- # Retrieve latest proxies
- proxies_req = Request('https://free-proxy-list.net/')
- proxies_req.add_header('User-Agent', ua.random)
- proxies_doc = urlopen(proxies_req).read().decode('utf8')
- soup = BeautifulSoup(proxies_doc, 'html.parser')
- proxies_table = soup.find(id='proxylisttable')
- # Save proxies in the array
- for row in proxies_table.tbody.find_all('tr'):
- proxies.append({
- 'ip': row.find_all('td')[0].string,
- 'port': row.find_all('td')[1].string
- })
- # Choose a random proxy
- proxy_index = random_proxy()
- proxy = proxies[proxy_index]
- for n in range(1, 100):
- req = Request('https://free-proxy-list.net/')
- req.set_proxy(proxy['ip'] + ':' + proxy['port'], 'http')
- # Every 10 requests, generate a new proxy
- if n % 10 == 0:
- proxy_index = random_proxy()
- proxy = proxies[proxy_index]
- # Make the call
- try:
- my_ip = urlopen(req).read().decode('utf8')
- print('#' + str(n) + ': ' + my_ip)
- except: # If error, delete this proxy and find another one
- del proxies[proxy_index]
- print('Proxy ' + proxy['ip'] + ':' + proxy['port'] + ' deleted.')
- proxy_index = random_proxy()
- proxy = proxies[proxy_index]
- # Retrieve a random index proxy (we need the index to delete it if not working)
- def random_proxy():
- return random.randint(0, len(proxies) - 1)
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement