Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import time
- import urllib2
- import hashlib
- import os
- import random
- import Queue
- import threading
- import argparse
- print """
- Malc0de.com samples downloader v3.3
- )\._.,--....,'``.
- .b--. /; _.. \ _\ (`._ ,.
- `=,-,-'~~~ `----(,_..'--(,_..'`-.;.'
- http://virii.tk http://twitter.com/ViRiiTk
- """
- parser = argparse.ArgumentParser(description="Malc0de.com samples downloader v3.3")
- parser.add_argument("nr_samples", type=int,
- help= "Number of samples you want to download")
- parser.add_argument("-t", "--nr_threads", metavar="threads", type=int, default=200,
- help= "Threads number (Default: 200)")
- parser.add_argument("-a", "--agent", default="Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0",
- help= "User Agent used to download samples")
- parser.add_argument("-d", "--dldfolder", default = "C:\malware\\",
- help= "Local folder to download samples (Default: C:\malware\\ )")
- parser.add_argument("-i", "--info", default = "_files.txt",
- help = "file to store info about downloaded samples (Default: _files.txt)")
- parser.add_argument("-e", "--error", default = "_errors.txt",
- help = "file to store errors (Default: _errors.txt)")
- parser.add_argument("-u", "--malurl", default = "_mal_url.txt",
- help = "file to store malware urls (Default: _mal_url.txt)")
- args = parser.parse_args()
- #create download folder if not exist
- if not os.path.isdir(args.dldfolder):
- os.mkdir(args.dldfolder)
- #limit the number of download samples
- if args.nr_samples > 10000:
- print "You need very Very VERY many samples, 5k is enough for you"
- args.nr_samples = 4999
- if args.nr_threads >= args.nr_samples:
- args.nr_threads = args.nr_samples
- print "Try to download latest %i samples" %(args.nr_samples)
- print "Threads: %i" %(args.nr_threads)
- print "Malware samples will be downloaded to %s" %(args.dldfolder), "\n"
- #construct user agents
- dldagent = {'User-Agent' : args.agent}
- useragent = { 'User-Agent' : 'Malc0de.com samples downloader v3.3 http://ViRii.Tk'}
- #queue
- q = Queue.Queue()
- #generate random string
- def get_random_word(a):
- word = ''
- for i in range(a):
- word += random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789')
- return word
- #md5 file
- def md5Checksum(filePath):
- fh = open(filePath, 'rb')
- m = hashlib.md5()
- while True:
- data = fh.read(8192)
- if not data:
- break
- m.update(data)
- return m.hexdigest()
- #nr paginilor ce trebuie vizitate
- counter = 0
- if args.nr_samples % 50 == 0:
- pages = args.nr_samples / 50
- else :
- pages = (args.nr_samples / 50) + 1
- #find all malware address on curent page
- def getmalware(pagina):
- global counter
- b = re.findall("<td>[\d]{4}-[\d]{2}-[\d]{2}<\/td>\n.+\n", pagina)
- if b:
- for i in b:
- data = re.search("<td>([\d]{4}-[\d]{2}-[\d]{2})<\/td>", i)
- malware = re.search("\t<td>(.+)<\/td>", i)
- if data and malware:
- malware= re.sub("<br\/>", "",malware.group(1) )
- #print data.group(1), malware
- if counter >= args.nr_samples:
- return
- else:
- q.put(malware)
- counter += 1
- #browsing pages
- print "Browsing pages:"
- for i in range(1, pages + 1):
- adresa = "http://malc0de.com/database/?&page=" + str(i)
- print "Searching on:", adresa
- time.sleep(3) # pauza intre pagini (s)
- try:
- req = urllib2.Request(adresa, None, useragent)
- response = urllib2.urlopen(req)
- continut = response.read()
- getmalware(continut)
- except Exception as e:
- print e
- pass
- def dld_mal(url_mal):
- #downloading malware samples
- #write address of this sample
- with open(args.dldfolder + args.malurl, "a") as handle:
- handle.write(url_mal + "\n")
- handle.close()
- #get file name
- file_name = url_mal.split("/")[-1]
- #remove bad characters from file name
- if len(file_name)==0 or re.search("\?", file_name) or re.search("\&", file_name):
- file_name = "No_name" + str(get_random_word(8))
- #try to download sample
- try:
- #check if url start with "http://
- if url_mal[:7] != "http://":
- url_mal = "http://" + url_mal
- #construct url and set timeout
- url_construct = urllib2.Request(url_mal, None, dldagent)
- u = urllib2.urlopen(url_construct, timeout = 59) #timeout 1 min
- # every downloaded malware will have a uniq name: "Malware_sample" + "_" + 3 random characters
- f_name = args.dldfolder + str(file_name) +"_" + get_random_word(3)
- #write to file
- f = open(f_name, 'wb')
- block_sz = 8192
- while True:
- buffer = u.read(block_sz)
- if not buffer:
- break
- f.write(buffer)
- f.close()
- #write info to _files.txt
- with open(args.dldfolder + args.info, "a") as handle:
- md5hash = md5Checksum(f_name)
- handle.write(str(md5Checksum(f_name)) +"\t" + str(file_name)+ "\t" + url_mal + "\n")
- handle.close
- print "\n" + "Am descarcat: " + file_name,
- except Exception as e:
- #adding error to _errors.txt
- with open(args.dldfolder + args.error, "a") as handle:
- handle.write(url_mal + "\t" + str(e) + "\n")
- handle.close()
- pass
- print "Downloading:",
- def worker():
- while True:
- if not q.empty():
- item = q.get()
- dld_mal(item)
- q.task_done()
- for i in range(args.nr_threads):
- t = threading.Thread(target=worker)
- t.daemon = True
- t.start()
- q.join()
- exit()
Add Comment
Please, Sign In to add comment