Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python3
- import requests
- from bs4 import BeautifulSoup
- import subprocess
- from json import loads
- import os
- from os.path import exists
- debug_success = True
- debug_error = False
- Timeout = 5 #How long to wait for a proxy to respond
- keep_individual_lists = False #Delete lists when consolodating or not
- def get_soup(url):
- return BeautifulSoup(requests.get(url).text, "html.parser")
- def appendToFile(filename, content):
- with open(filename, "a") as file:
- file.write(content)
- def clearFile(filename):
- with open(filename, "w") as file:
- file.write("")
- def parse_freeproxyupdate():
- print("[INFO] Parsing https://freeproxyupdate.com/")
- out_array = []
- clearFile("fpu.list")
- pages = ["socks5-proxy", "socks4-proxy", "https-ssl-proxy", "http-proxy"]
- for page in pages:
- url = f"https://freeproxyupdate.com/{page}"
- html = get_soup(url)
- table = html.select(".list-proxy")[0]
- rows = table.find_all("tr")
- for row in rows:
- if "IP address" in row.text: continue
- cells_legend = ["IP address","Port","Country","Region/City","Protocol","Anonimity","Speed", "Latency", "Response", "Uptime", "Last Checked"]
- cells = list(row.children)
- data = [cell.text for cell in cells if "\n" not in cell]
- out_array.append(",".join(data))
- appendToFile("fpu.list", "\n".join(out_array))
- def parse_proxylist():
- print("[INFO] Parsing https://www.proxy-list.download/ using their API")
- url = "https://www.proxy-list.download/api/v2/get?l=en&t="
- pages = ["socks5", "socks4", "https", "http"]
- out_array = []
- clearFile("pl.list")
- for page in pages:
- obj_legend = ["IP","PORT","ANON","COUNTRY","ISO","PING"]
- obj = loads(requests.get(f"{url}{page}").text)["LISTA"]
- #todo output to file as f-string
- for i in obj:
- out_array.append(f"{i['IP']},{i['PORT']},{page},{i['ANON']},{i['COUNTRY']},{i['ISO']},{i['PING']}")
- appendToFile("pl.list", "\n".join(out_array))
- def parse_dupesites(site):
- urls = [("https://www.socks-proxy.net/", "socks-proxy.net.list"), ("https://www.sslproxies.org/", "sslproxies.org.list")]
- for i in urls:
- if site in i[0]:
- url = i[0]
- filename = i[1]
- clearFile(filename)
- print(f"[INFO] Parsing {url}")
- page = get_soup(url)
- table = page.select(".table.table-striped.table-bordered")[0]
- rows = table.select("tr")
- cells_legend_socks = ["IP Address", "Port", "Code", "Country", "Version", "Anonymity", "Https", "Last Checked"]
- cells_legend_ssl = ["IP Address", "Port", "Code", "Country", "anonymity", "Google", "Https", "Last Checked"]
- out_array = []
- for row in rows:
- cells = [i.text for i in row.children]
- out_array.append(",".join(cells))
- out_array.pop(0)#removes header legend text
- appendToFile(filename, "\n".join(out_array))
- def parse_proxyscrape():
- print(f"[INFO] Parsing https://www.proxyscrape.com/free-proxy-list using their API")
- clearFile("proxyscrape.list")
- socks_url = "https://api.proxyscrape.com/v2/?request=getproxies&protocol=$SOCKSVERSION&timeout=10000&country=all"
- https_url = "https://api.proxyscrape.com/v2/?request=getproxies&protocol=http&timeout=10000&country=all&ssl=$SSL&anonymity=all&simplified=true"
- urls = [
- (socks_url.replace("$SOCKSVERSION", "socks5"), "socks5"),
- (socks_url.replace("$SOCKSVERSION", "socks4"), "socks4"),
- (https_url.replace("$SSL", "yes"), "https"),
- (https_url.replace("$SSL", "no"), "http")
- ]
- for url in urls:
- req = requests.get(url[0]).text
- lst = req.split("\r\n")
- lst.pop()
- for i, line in enumerate(lst):
- lst[i] = f"{url[1]}://" + line
- appendToFile("proxyscrape.list", "\n".join(lst)+"\n")
- def check_proxy(url):
- global Timeout
- proxy = {"http":url, "https":url, "socks4":url, "socks5":url}
- try:
- test = requests.get("http://ifconfig.me/", proxies=proxy, timeout=Timeout)
- if debug_success:print(test.text)
- if test.ok and len(test.text) < 30: return True
- except Exception as err:
- if debug_error: print(type(err))
- return False
- def check_all():
- print("[INFO] Checking proxies")
- with open("proxies.list","r") as file:
- urls = file.read().split("\n")
- for url in urls:
- print(f"[INFO] Checking url: {url}")
- if check_proxy(url):
- appendToFile("good_proxies.list", f"{url}\n")
- print(f"[GOOD]")
- os.remove("proxies.list")
- def consolodate():
- print("[INFO] Consolidating proxy urls into a single file")
- with open("proxies.list","w") as file:
- try:
- #freeproxyupdate.com
- lst = open("fpu.list", "r").read().split("\n")
- for line in lst:
- data = line.split(",")
- url = f"{data[4]}://{data[0]}:{data[1]}/"
- file.write(url + "\n")
- except Exception as err:
- print(err)
- pass;
- #proxy-list.download
- lst = open("pl.list", "r").read().split("\n")
- for line in lst:
- data = line.split(",")
- url = f"{data[2]}://{data[0]}:{data[1]}"
- file.write(url + "\n")
- #socks-proxy.net
- lst = open("socks-proxy.net.list", "r").read().split("\n")
- for line in lst:
- data = line.split(",")
- url = f"{data[4]}://{data[0]}:{data[1]}/"
- file.write(url + "\n")
- #sslproxies.org
- lst = open("sslproxies.org.list", "r").read().split("\n")
- for line in lst:
- data = line.split(",")
- url = f"https://{data[0]}:{data[1]}"
- file.write(url + "\n")
- #proxyscrape.com is already formatted
- lst = open("proxyscrape.list", "r").read()
- file.write(lst)
- if not keep_individual_lists:
- os.remove("fpu.list")
- os.remove("pl.list")
- os.remove("socks-proxy.net.list")
- os.remove("sslproxies.org.list")
- os.remove("proxyscrape.list")
- def main():
- if not exists("proxies.list"):
- parse_freeproxyupdate()
- parse_proxylist()
- parse_dupesites("sslproxies")
- parse_dupesites("socks-proxy")
- parse_proxyscrape()
- consolodate()
- else:
- print("[INFO] proxies.list already exists. Delete it to reaquire IPS. Commence checking.")
- check_all()
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement