Advertisement
_c0mrad

[+] proxyscraper [+]

Oct 24th, 2016
165
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.39 KB | None | 0 0
  1. from eventlet.green import urllib2
  2. import eventlet
  3. import re
  4.  
  5. def getProxies(url):
  6.   src = urllib2.urlopen(url).read().split("\n")
  7.  
  8.   css = False
  9.   classes = {}
  10.   ips = []
  11.   proxies = []
  12.  
  13.   for i in xrange(len(src)):
  14.     line = src[i]
  15.    
  16.     # Start of style
  17.     if "<td><span><style>" in line:
  18.       css = True
  19.       continue
  20.    
  21.     # Handle the CSS
  22.     if css == True:
  23.       # End of style
  24.       if "</style>" in line:
  25.         css = False
  26.      
  27.       if "display:none" in line:
  28.         classes[line[1:5]] = "none"
  29.        
  30.       if "display:inline" in line:
  31.         classes[line[1:5]] = "inline"
  32.    
  33.     # IP line
  34.     if len(classes) > 0 and css == False:
  35.       ip = line
  36.       linePort = src[i + 2]
  37.       lineCountry = src[i + 4]
  38.       lineResponseTime = src[i + 7]
  39.       lineConnectionTime = src[i + 11]
  40.       lineType = src[i + 16]
  41.       lineAnonymity = src[i + 17]
  42.      
  43.       # Replace class declarations with style ones
  44.       for class_ in classes:
  45.         ip = ip.replace("class=\"%s\"" % (class_), "style=\"display:%s\"" % (classes[class_]))
  46.      
  47.       # Remove all unecessary poop :)
  48.       ip = re.sub(r"<(div|span) style=\"display:none\">[\.0-9]+</(div|span)>", r"", ip)
  49.       ip = re.sub(r"class=\"[0-9]+\"", r"", ip)
  50.       ip = re.sub(r"[^0123456789\.]", r"", ip)
  51.      
  52.       # Port
  53.       port = linePort.replace("</td>", "")
  54.      
  55.       # Country
  56.       country = lineCountry.split("/> ")[1].split("<")[0]
  57.      
  58.       # Response Time Percents
  59.       responseTime = lineResponseTime.split(":")[1].split("%")[0]
  60.      
  61.       # Connection Time Percents
  62.       connectionTime = lineConnectionTime.split(":")[1].split("%")[0]
  63.      
  64.       # Connection Type
  65.       type = lineType.split(">")[1].split("<")[0]
  66.      
  67.       # Anonymity
  68.       anonymity = lineAnonymity.split(">")[1].split("<")[0]
  69.      
  70.       proxies.append({"ip":ip, "port":port, "country":country, "responseTime":responseTime, "connectionTime":connectionTime, "type":type, "anonymity":anonymity})
  71.      
  72.       classes = {}
  73.  
  74.   return url, proxies
  75.  
  76. proxies = []
  77.  
  78. pool = eventlet.GreenPool(12)
  79. for url, proxyList in pool.imap(getProxies, ["https://hidemyass.com/proxy-list/" + str(i + 1) for i in xrange(12)]):
  80.   for proxy in proxyList:
  81.     proxies.append(proxy)
  82.  
  83. for proxy in proxies:
  84.   print "%s:%s" % (proxy["ip"], proxy["port"])
  85.  
  86. print "\nGot %s proxies!" % (len(proxies))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement