Bing Grabber

import urllib2 , urllib
import random
import re
import sys
import threading
class extracter:
    def __init__(self):
        self.linkl     = []
        self.useragent = ['Mozilla/4.0 (compatible; MSIE 5.0; SunOS 5.10 sun4u; X11)',
                  'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.2pre) Gecko/20100207 Ubuntu/9.04 (jaunty) Namoroka/3.6.2pre',
                  'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser;',
              'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0)',
              'Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.1)',
              'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.6)',
              'Microsoft Internet Explorer/4.0b1 (Windows 95)',
              'Mozilla/4.0 (compatible; MSIE 5.0; AOL 4.0; Windows 95; c_athome)',
              'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)',
              'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
              'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; ZoomSpider.net bot; .NET CLR 1.1.4322)',
              'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; QihooBot 1.0 [email protected])',
              'Mozilla/4.0 (compatible; MSIE 5.0; Windows ME) Opera 5.11 [en]']
    def fetcher(self,url,semaphore,counter,lenlinks,tip):
        try:
            data = urllib2.Request(url,headers={"User-Agent": random.choice(self.useragent)})
            data = urllib2.urlopen(data).read()
            regex= re.compile('h3><a href="(.*?)" h=')
            links= regex.findall(data)
            for link in links:
                if key:
                    if re.search(key,link):
                       link = link.split("/")
                       link = "http://"+link[2]
                       link = link.replace("www.","")
                       if not re.search("[0-9]\.[0-9]|php|msn|drupal|e2bn|microsoft|soundcloud|wp-plugins|wpguide|godaddy|fan-sites|godaddy|zone-h|osvdb|aa.org|aa.se|inspcloud|mondounix|yellowpages|iamtheproudownerofthelongestlongestlongestdomainnameinthisworld|cnet.com|encyclopedia|go.com|thepiratebay|wpbeginner|tripod|infospace|linkedin|ovh.net|a9.com|exploit|logo|music|altavista|github|gamesville|whowhere|gigablast|stackoverflow|teoma|download|wik|dictionary|theme|free|video|startpagina|startgoogle|lygo|dogpile|secure|security|hack|myspace|conduit|amfibi|lycos|blekko|metacrawler|exactseek|bing|dmoz|pathfinder|feedback|live\.com|w3|aol|yahoo|ask\.com|youtube|twitter|google|facebook|blogspot|wiki|sourceforge|phpmyadmin|forum|blog|share|wordpress|pastebin|4shared|tracker|yahoo|python|host|lib|app|yandex|wphelp|helpcenter|digitalsports",link) and link not in self.linkl:
                            self.linkl.append(link)
                            self.save(link)
                    sys.stdout.write("\rKeyword: %s || Links: %s || Fetching %s of %s" % (key,len(self.linkl),counter,lenlinks))
                    sys.stdout.flush()
                else:
                   link = link.split("/")
                   link = "http://"+link[2]
                   link = link.replace("www.","")
                   if not re.search("[0-9]\.[0-9]|php|msn|drupal|e2bn|microsoft|soundcloud|wp-plugins|wpguide|godaddy|fan-sites|godaddy|zone-h|osvdb|aa.org|aa.se|inspcloud|mondounix|yellowpages|iamtheproudownerofthelongestlongestlongestdomainnameinthisworld|cnet.com|encyclopedia|go.com|thepiratebay|wpbeginner|tripod|infospace|linkedin|ovh.net|a9.com|exploit|logo|music|altavista|github|gamesville|whowhere|gigablast|stackoverflow|teoma|download|wik|dictionary|theme|free|video|startpagina|startgoogle|lygo|dogpile|secure|security|hack|myspace|conduit|amfibi|lycos|blekko|metacrawler|exactseek|bing|dmoz|pathfinder|feedback|live\.com|w3|aol|yahoo|ask\.com|youtube|twitter|google|facebook|blogspot|wiki|sourceforge|phpmyadmin|forum|blog|share|wordpress|pastebin|4shared|tracker|yahoo|python|host|lib|app|yandex|wphelp|helpcenter|digitalsports",link) and link not in self.linkl:
                        self.linkl.append(link)
                        self.save(link)
                   sys.stdout.write("\rLinks: %s || Fetching %s of %s" % (len(self.linkl),counter,lenlinks))
                   sys.stdout.flush()
        except:
            pass
        semaphore.release()
    def save(self,link):
        try:
            check = open('bing.txt').read()
            if re.search(link,check):
               return False
            else:
               write = open('bing.txt','ab')
               write.write(link+"\r\n")
               write.close()
        except IOError:
            create = open('bing.txt','ab')
extract   = extracter()
dorks     = open(sys.argv[1]).readlines()
semaphore = threading.BoundedSemaphore(value=int(sys.argv[2]))
try:
    key       = sys.argv[3]
except:
    key       = ""
url_list  = []
for dork in dorks:
    dork = urllib.quote_plus(dork.strip())
    i = 1
    while i <= 201:
        url_list.append("http://www.bing.com/search?q="+dork+"&count=50&first="+str(i))
        i += 50
print "[ + ] %s url created for fetch" % len(url_list)
threadlist = []
counter    = 1
for url in url_list:
    semaphore.acquire()
    thrd = threading.Thread(target=extract.fetcher,args=(url,semaphore,counter,len(url_list),key,))
    thrd.start()
    threadlist.append(thrd)
    counter += 1
for t in threadlist:
    t.join()