Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- '''
- scrapes 300 http(s) proxies to ip:port from https://free-proxy-list.net
- '''
- from requests_html import HTMLSession
- session = HTMLSession() # session appears to be currently required for a single get request with this library?
- cells = session.get('https://free-proxy-list.net').html.find('td') # table cells
- s = '' # string as data stream to then parse
- for cell in cells:
- c = cell.text
- if not c.lower().islower(): # lowercase all letters and then check if islower to determine if the cell contains letters (only ip and port cells will remain)
- if '.' in c: c = 'proxy' + c + ':' # ip's will have .
- s += c # string together to be parsed
- p = s.replace('proxy', '\n') # proxyip:portproxyip:portproxyip:port -> ip:port\nip:port\n...
- print(p)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement