Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- from bs4 import BeautifulSoup, Comment
- import requests, re, sys, os, json
- maleware_name_file = ""
- info_dir = "info"
- all_infos = []
- if len(sys.argv) <= 1:
- print('Usage: [target_file]')
- exit(0)
- if len(sys.argv) == 2:
- maleware_name_file = sys.argv[1]
- if not os.path.exists(info_dir):
- os.makedirs(info_dir)
- def wrtie2json(d, info,text):
- text = str(text.encode("utf8").decode("cp950", "ignore"))
- text = text.replace('What to do now', '\n[What to do now]')
- text = re.sub(r'\n+', '\n', text).strip() +'\n\n'
- text = text.replace('\n', ' <br> ')
- d[info.lower()] = text
- def crawl(maleware_name):
- search_item = maleware_name
- base = "http://www.google.com"
- url = "http://www.google.com/search?q="+ search_item
- response = requests.get(url)
- soup = BeautifulSoup(response.text,"html.parser")
- fileCount = 0
- mal = {}
- mal['name'] = maleware_name
- mal['content'] = []
- for item in soup.select(".r a"):
- # FOR MICROSOFT
- if "microsoft" in item['href']:
- d = {}
- r = requests.get(base + item['href'])
- data = r.text
- soup = BeautifulSoup(data, "html.parser")
- # for br in soup.find_all("br"):
- # br.replace_with("<br>")
- def writeText(file, text):
- text = str(text.encode("utf8").decode("cp950", "ignore"))
- text = text.replace('What to do now', '\n[What to do now]')
- text = re.sub(r'\n+', '\n', text).strip() +'\n\n'
- text = text.replace('\n', '<br>')
- file.write(text)
- with open(os.path.join(info_dir, maleware_name +'_' + str(fileCount+1) + '.txt'), 'w') as file:
- # summary id:SummaryDrawerStub
- summary = soup.find("button", {"id": "SummaryDrawerStub"})
- if(summary):
- summary = summary.find_next_sibling("div")
- file.write('[Summary]\n')
- writeText(file, summary.text)
- wrtie2json(d, 'summary', summary.text)
- # # tech-description class:tech-info-content
- techInfo = soup.find("div", {"class": "tech-info-content"})
- if(techInfo):
- file.write('[Technical description]\n')
- writeText(file, techInfo.text)
- wrtie2json(d, 'tech', techInfo.text)
- # symptoms
- symptoms = soup.find('h2', {'class': 'drawer-headings-d'}, text='Symptoms')
- if(symptoms):
- file.write('[Symptoms]\n')
- symptoms = symptoms.parent.find_next_sibling('div')
- writeText(file, symptoms.text)
- wrtie2json(d, 'symptoms', symptoms.text)
- # what to do now
- wtdn = soup.find('h2', {'class': 'drawer-headings-d'}, text='What to do now')
- if(wtdn):
- file.write('[What to do now]\n')
- wtdn= wtdn.parent.find_next_sibling('div')
- writeText(file, wtdn.text)
- wrtie2json(d, 'what2do', wtdn.text)
- fileCount = fileCount + 1
- print(d)
- mal['content'].append(d)
- # remove blank files
- for i in range(fileCount):
- filename = maleware_name + '_' + str(i+1) + '.txt'
- if(os.path.getsize(os.path.join(info_dir,filename)) < 30):
- os.remove(os.path.join(info_dir,filename))
- print("Done.")
- return mal
- with open(maleware_name_file, 'r') as f:
- all_infos = []
- for line in f:
- maleware_name = line.rstrip('\n')
- all_infos.append(crawl(maleware_name))
- with open(os.path.join(info_dir, 'malware_info.json') , 'w') as outfile:
- json.dump(all_infos, outfile)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement