SH1NU11b1

Pydownloader.py

Feb 9th, 2015
298
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.75 KB | None | 0 0
  1. import re
  2. import time
  3. import urllib2
  4. import hashlib
  5. import os
  6. import random
  7. import Queue
  8. import threading
  9. import argparse
  10.  
  11. print """
  12. Malc0de.com samples downloader v3.3
  13. )\._.,--....,'``.
  14. .b--. /; _.. \ _\ (`._ ,.
  15. `=,-,-'~~~ `----(,_..'--(,_..'`-.;.'
  16. http://virii.tk http://twitter.com/ViRiiTk
  17. """
  18.  
  19. parser = argparse.ArgumentParser(description="Malc0de.com samples downloader v3.3")
  20.  
  21. parser.add_argument("nr_samples", type=int,
  22. help= "Number of samples you want to download")
  23.  
  24. parser.add_argument("-t", "--nr_threads", metavar="threads", type=int, default=200,
  25. help= "Threads number (Default: 200)")
  26.  
  27. parser.add_argument("-a", "--agent", default="Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0",
  28. help= "User Agent used to download samples")
  29.  
  30. parser.add_argument("-d", "--dldfolder", default = "C:\malware\\",
  31. help= "Local folder to download samples (Default: C:\malware\\ )")
  32.  
  33. parser.add_argument("-i", "--info", default = "_files.txt",
  34. help = "file to store info about downloaded samples (Default: _files.txt)")
  35.  
  36. parser.add_argument("-e", "--error", default = "_errors.txt",
  37. help = "file to store errors (Default: _errors.txt)")
  38.  
  39. parser.add_argument("-u", "--malurl", default = "_mal_url.txt",
  40. help = "file to store malware urls (Default: _mal_url.txt)")
  41. args = parser.parse_args()
  42.  
  43. #create download folder if not exist
  44. if not os.path.isdir(args.dldfolder):
  45. os.mkdir(args.dldfolder)
  46.  
  47. #limit the number of download samples
  48. if args.nr_samples > 10000:
  49. print "You need very Very VERY many samples, 5k is enough for you"
  50. args.nr_samples = 4999
  51.  
  52. if args.nr_threads >= args.nr_samples:
  53. args.nr_threads = args.nr_samples
  54.  
  55. print "Try to download latest %i samples" %(args.nr_samples)
  56. print "Threads: %i" %(args.nr_threads)
  57. print "Malware samples will be downloaded to %s" %(args.dldfolder), "\n"
  58.  
  59. #construct user agents
  60. dldagent = {'User-Agent' : args.agent}
  61. useragent = { 'User-Agent' : 'Malc0de.com samples downloader v3.3 http://ViRii.Tk'}
  62.  
  63. #queue
  64. q = Queue.Queue()
  65.  
  66. #generate random string
  67. def get_random_word(a):
  68. word = ''
  69. for i in range(a):
  70. word += random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789')
  71. return word
  72.  
  73. #md5 file
  74. def md5Checksum(filePath):
  75. fh = open(filePath, 'rb')
  76. m = hashlib.md5()
  77. while True:
  78. data = fh.read(8192)
  79. if not data:
  80. break
  81. m.update(data)
  82. return m.hexdigest()
  83.  
  84. #nr paginilor ce trebuie vizitate
  85. counter = 0
  86. if args.nr_samples % 50 == 0:
  87. pages = args.nr_samples / 50
  88. else :
  89. pages = (args.nr_samples / 50) + 1
  90.  
  91. #find all malware address on curent page
  92. def getmalware(pagina):
  93. global counter
  94. b = re.findall("<td>[\d]{4}-[\d]{2}-[\d]{2}<\/td>\n.+\n", pagina)
  95. if b:
  96. for i in b:
  97. data = re.search("<td>([\d]{4}-[\d]{2}-[\d]{2})<\/td>", i)
  98. malware = re.search("\t<td>(.+)<\/td>", i)
  99. if data and malware:
  100. malware= re.sub("<br\/>", "",malware.group(1) )
  101. #print data.group(1), malware
  102. if counter >= args.nr_samples:
  103. return
  104. else:
  105. q.put(malware)
  106. counter += 1
  107. #browsing pages
  108. print "Browsing pages:"
  109. for i in range(1, pages + 1):
  110.  
  111. adresa = "http://malc0de.com/database/?&page=" + str(i)
  112. print "Searching on:", adresa
  113. time.sleep(3) # pauza intre pagini (s)
  114.  
  115. try:
  116. req = urllib2.Request(adresa, None, useragent)
  117. response = urllib2.urlopen(req)
  118. continut = response.read()
  119. getmalware(continut)
  120. except Exception as e:
  121. print e
  122. pass
  123.  
  124. def dld_mal(url_mal):
  125. #downloading malware samples
  126.  
  127. #write address of this sample
  128. with open(args.dldfolder + args.malurl, "a") as handle:
  129. handle.write(url_mal + "\n")
  130. handle.close()
  131.  
  132. #get file name
  133. file_name = url_mal.split("/")[-1]
  134.  
  135. #remove bad characters from file name
  136. if len(file_name)==0 or re.search("\?", file_name) or re.search("\&", file_name):
  137. file_name = "No_name" + str(get_random_word(8))
  138.  
  139. #try to download sample
  140. try:
  141. #check if url start with "http://
  142. if url_mal[:7] != "http://":
  143. url_mal = "http://" + url_mal
  144.  
  145. #construct url and set timeout
  146. url_construct = urllib2.Request(url_mal, None, dldagent)
  147. u = urllib2.urlopen(url_construct, timeout = 59) #timeout 1 min
  148.  
  149. # every downloaded malware will have a uniq name: "Malware_sample" + "_" + 3 random characters
  150. f_name = args.dldfolder + str(file_name) +"_" + get_random_word(3)
  151.  
  152. #write to file
  153. f = open(f_name, 'wb')
  154. block_sz = 8192
  155. while True:
  156. buffer = u.read(block_sz)
  157. if not buffer:
  158. break
  159. f.write(buffer)
  160. f.close()
  161.  
  162. #write info to _files.txt
  163. with open(args.dldfolder + args.info, "a") as handle:
  164. md5hash = md5Checksum(f_name)
  165. handle.write(str(md5Checksum(f_name)) +"\t" + str(file_name)+ "\t" + url_mal + "\n")
  166. handle.close
  167.  
  168. print "\n" + "Am descarcat: " + file_name,
  169. except Exception as e:
  170. #adding error to _errors.txt
  171. with open(args.dldfolder + args.error, "a") as handle:
  172. handle.write(url_mal + "\t" + str(e) + "\n")
  173. handle.close()
  174. pass
  175.  
  176. print "Downloading:",
  177. def worker():
  178. while True:
  179. if not q.empty():
  180. item = q.get()
  181. dld_mal(item)
  182. q.task_done()
  183.  
  184. for i in range(args.nr_threads):
  185. t = threading.Thread(target=worker)
  186. t.daemon = True
  187. t.start()
  188.  
  189. q.join()
  190. exit()
Add Comment
Please, Sign In to add comment