Advertisement
Guest User

Untitled

a guest
Dec 17th, 2017
81
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.90 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. from bs4 import BeautifulSoup, Comment
  3. import requests, re, sys, os, json
  4.  
  5. maleware_name_file = ""
  6. info_dir = "info"
  7. all_infos = []
  8.  
  9. if len(sys.argv) <= 1:
  10. print('Usage: [target_file]')
  11. exit(0)
  12. if len(sys.argv) == 2:
  13. maleware_name_file = sys.argv[1]
  14. if not os.path.exists(info_dir):
  15. os.makedirs(info_dir)
  16.  
  17. def wrtie2json(d, info,text):
  18. text = str(text.encode("utf8").decode("cp950", "ignore"))
  19. text = text.replace('What to do now', '\n[What to do now]')
  20. text = re.sub(r'\n+', '\n', text).strip() +'\n\n'
  21. text = text.replace('\n', ' <br> ')
  22. d[info.lower()] = text
  23.  
  24. def crawl(maleware_name):
  25. search_item = maleware_name
  26. base = "http://www.google.com"
  27. url = "http://www.google.com/search?q="+ search_item
  28.  
  29. response = requests.get(url)
  30. soup = BeautifulSoup(response.text,"html.parser")
  31. fileCount = 0
  32. mal = {}
  33. mal['name'] = maleware_name
  34. mal['content'] = []
  35. for item in soup.select(".r a"):
  36. # FOR MICROSOFT
  37. if "microsoft" in item['href']:
  38. d = {}
  39. r = requests.get(base + item['href'])
  40. data = r.text
  41. soup = BeautifulSoup(data, "html.parser")
  42. # for br in soup.find_all("br"):
  43. # br.replace_with("<br>")
  44. def writeText(file, text):
  45. text = str(text.encode("utf8").decode("cp950", "ignore"))
  46. text = text.replace('What to do now', '\n[What to do now]')
  47. text = re.sub(r'\n+', '\n', text).strip() +'\n\n'
  48. text = text.replace('\n', '<br>')
  49. file.write(text)
  50.  
  51. with open(os.path.join(info_dir, maleware_name +'_' + str(fileCount+1) + '.txt'), 'w') as file:
  52. # summary id:SummaryDrawerStub
  53. summary = soup.find("button", {"id": "SummaryDrawerStub"})
  54. if(summary):
  55. summary = summary.find_next_sibling("div")
  56. file.write('[Summary]\n')
  57. writeText(file, summary.text)
  58. wrtie2json(d, 'summary', summary.text)
  59.  
  60. # # tech-description class:tech-info-content
  61. techInfo = soup.find("div", {"class": "tech-info-content"})
  62. if(techInfo):
  63. file.write('[Technical description]\n')
  64. writeText(file, techInfo.text)
  65. wrtie2json(d, 'tech', techInfo.text)
  66.  
  67. # symptoms
  68. symptoms = soup.find('h2', {'class': 'drawer-headings-d'}, text='Symptoms')
  69. if(symptoms):
  70. file.write('[Symptoms]\n')
  71. symptoms = symptoms.parent.find_next_sibling('div')
  72. writeText(file, symptoms.text)
  73. wrtie2json(d, 'symptoms', symptoms.text)
  74.  
  75. # what to do now
  76. wtdn = soup.find('h2', {'class': 'drawer-headings-d'}, text='What to do now')
  77. if(wtdn):
  78. file.write('[What to do now]\n')
  79. wtdn= wtdn.parent.find_next_sibling('div')
  80. writeText(file, wtdn.text)
  81. wrtie2json(d, 'what2do', wtdn.text)
  82.  
  83. fileCount = fileCount + 1
  84. print(d)
  85. mal['content'].append(d)
  86.  
  87. # remove blank files
  88. for i in range(fileCount):
  89. filename = maleware_name + '_' + str(i+1) + '.txt'
  90. if(os.path.getsize(os.path.join(info_dir,filename)) < 30):
  91. os.remove(os.path.join(info_dir,filename))
  92. print("Done.")
  93. return mal
  94.  
  95.  
  96.  
  97. with open(maleware_name_file, 'r') as f:
  98. all_infos = []
  99. for line in f:
  100. maleware_name = line.rstrip('\n')
  101. all_infos.append(crawl(maleware_name))
  102. with open(os.path.join(info_dir, 'malware_info.json') , 'w') as outfile:
  103. json.dump(all_infos, outfile)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement