Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re,requests, time
- from urlparse import urlparse
- #karena susah sekali buat download data JSON nya
- #mendingan pake ini aja :D
- # Sorry Acak2an gan
- # Dah Males buat tools ginian soalnya :"
- # Kalo mau recorde,,, ya tau diri lah :D
- print """
- urlsearch.commoncrawl.org Grabber
- Coded by Zeerx7 # XploitSec ID
- """
- print 'Masukan Domain! (ex: jp)'
- d = raw_input('Domain: ') #d = 'uk'
- u = 'http://urlsearch.commoncrawl.org/?q='+d+'&start='
- tmp = []
- def x(): #co = count_page() #karena sering 502, mendingan gk usah di pakai function ini! print 'Total/jumlah halaman Yang akan di grab, (ex: 10000)'
- co = raw_input('Total Page: ')
- print 'domain: %s Total Page Yang akan di grab: %s' % (d,co)
- time.sleep(2)
- print 'start'
- #exit()
- for o in range(int(co)):
- print 'page: %s' % (o)
- try:
- #exit()
- rum = 0+(int(o)-1)*20 #rumus suku ke N bjir, untung w nasih inget rumusnya :v
- z = requests.get(u+str(rum))
- f = z.text.replace('\n','')
- #print f
- r = re.findall('http(.+?)</a></li>',f)
- #print r
- for j in r:
- if 'Content-Type' in j:pass
- else:
- uu = 'http'+j
- uuu = urlparse(uu).netloc
- if uuu in tmp:pass
- else:
- save('domains.txt',uuu)
- tmp.append(uuu)
- print uuu+' [Saved!]'
- print uu
- #print tmp
- print z.status_code
- except:pass
- def count_page():
- c = u+str(999999999)
- p = requests.get(c)
- #print p.text.replace('\n','')
- #print c
- h = re.findall('">(.+?)</a>',p.text)
- b = len(h)
- #print h
- #print b
- #print b-1
- print c
- print p.status_code
- return h[b-2]
- def save(a,b):
- fx = open(a, "a")
- fx.write(b+"\n")
- fx.close()
- ##print count_page()
- x()
Add Comment
Please, Sign In to add comment