Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import urllib2 , urllib
- import random
- import re
- import sys
- import threading
- class extracter:
- def __init__(self):
- self.linkl = []
- self.useragent = ['Mozilla/4.0 (compatible; MSIE 5.0; SunOS 5.10 sun4u; X11)',
- 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.2pre) Gecko/20100207 Ubuntu/9.04 (jaunty) Namoroka/3.6.2pre',
- 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser;',
- 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0)',
- 'Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.1)',
- 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.6)',
- 'Microsoft Internet Explorer/4.0b1 (Windows 95)',
- 'Mozilla/4.0 (compatible; MSIE 5.0; AOL 4.0; Windows 95; c_athome)',
- 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)',
- 'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
- 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; ZoomSpider.net bot; .NET CLR 1.1.4322)',
- 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; QihooBot 1.0 [email protected])',
- 'Mozilla/4.0 (compatible; MSIE 5.0; Windows ME) Opera 5.11 [en]']
- def fetcher(self,url,semaphore,counter,lenlinks,tip):
- try:
- data = urllib2.Request(url,headers={"User-Agent": random.choice(self.useragent)})
- data = urllib2.urlopen(data).read()
- regex= re.compile('h3><a href="(.*?)" h=')
- links= regex.findall(data)
- for link in links:
- if key:
- if re.search(key,link):
- link = link.split("/")
- link = "http://"+link[2]
- link = link.replace("www.","")
- if not re.search("[0-9]\.[0-9]|php|msn|drupal|e2bn|microsoft|soundcloud|wp-plugins|wpguide|godaddy|fan-sites|godaddy|zone-h|osvdb|aa.org|aa.se|inspcloud|mondounix|yellowpages|iamtheproudownerofthelongestlongestlongestdomainnameinthisworld|cnet.com|encyclopedia|go.com|thepiratebay|wpbeginner|tripod|infospace|linkedin|ovh.net|a9.com|exploit|logo|music|altavista|github|gamesville|whowhere|gigablast|stackoverflow|teoma|download|wik|dictionary|theme|free|video|startpagina|startgoogle|lygo|dogpile|secure|security|hack|myspace|conduit|amfibi|lycos|blekko|metacrawler|exactseek|bing|dmoz|pathfinder|feedback|live\.com|w3|aol|yahoo|ask\.com|youtube|twitter|google|facebook|blogspot|wiki|sourceforge|phpmyadmin|forum|blog|share|wordpress|pastebin|4shared|tracker|yahoo|python|host|lib|app|yandex|wphelp|helpcenter|digitalsports",link) and link not in self.linkl:
- self.linkl.append(link)
- self.save(link)
- sys.stdout.write("\rKeyword: %s || Links: %s || Fetching %s of %s" % (key,len(self.linkl),counter,lenlinks))
- sys.stdout.flush()
- else:
- link = link.split("/")
- link = "http://"+link[2]
- link = link.replace("www.","")
- if not re.search("[0-9]\.[0-9]|php|msn|drupal|e2bn|microsoft|soundcloud|wp-plugins|wpguide|godaddy|fan-sites|godaddy|zone-h|osvdb|aa.org|aa.se|inspcloud|mondounix|yellowpages|iamtheproudownerofthelongestlongestlongestdomainnameinthisworld|cnet.com|encyclopedia|go.com|thepiratebay|wpbeginner|tripod|infospace|linkedin|ovh.net|a9.com|exploit|logo|music|altavista|github|gamesville|whowhere|gigablast|stackoverflow|teoma|download|wik|dictionary|theme|free|video|startpagina|startgoogle|lygo|dogpile|secure|security|hack|myspace|conduit|amfibi|lycos|blekko|metacrawler|exactseek|bing|dmoz|pathfinder|feedback|live\.com|w3|aol|yahoo|ask\.com|youtube|twitter|google|facebook|blogspot|wiki|sourceforge|phpmyadmin|forum|blog|share|wordpress|pastebin|4shared|tracker|yahoo|python|host|lib|app|yandex|wphelp|helpcenter|digitalsports",link) and link not in self.linkl:
- self.linkl.append(link)
- self.save(link)
- sys.stdout.write("\rLinks: %s || Fetching %s of %s" % (len(self.linkl),counter,lenlinks))
- sys.stdout.flush()
- except:
- pass
- semaphore.release()
- def save(self,link):
- try:
- check = open('bing.txt').read()
- if re.search(link,check):
- return False
- else:
- write = open('bing.txt','ab')
- write.write(link+"\r\n")
- write.close()
- except IOError:
- create = open('bing.txt','ab')
- extract = extracter()
- dorks = open(sys.argv[1]).readlines()
- semaphore = threading.BoundedSemaphore(value=int(sys.argv[2]))
- try:
- key = sys.argv[3]
- except:
- key = ""
- url_list = []
- for dork in dorks:
- dork = urllib.quote_plus(dork.strip())
- i = 1
- while i <= 201:
- url_list.append("http://www.bing.com/search?q="+dork+"&count=50&first="+str(i))
- i += 50
- print "[ + ] %s url created for fetch" % len(url_list)
- threadlist = []
- counter = 1
- for url in url_list:
- semaphore.acquire()
- thrd = threading.Thread(target=extract.fetcher,args=(url,semaphore,counter,len(url_list),key,))
- thrd.start()
- threadlist.append(thrd)
- counter += 1
- for t in threadlist:
- t.join()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement