Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import json
- import pandas as pd
- from urllib.parse import urlparse
- class HarAnalyzer:
- def __init__(self, harPath):
- file = open(harPath, "r")
- self.harJson = json.load(file)
- self.build_dict_by_protocol()
- print("Total of request: {}".format(self.get_total_request()))
- for protocol in self.json_by_protocol:
- print("{} : {}".format(protocol, len(self.json_by_protocol[protocol])))
- def build_dict_by_protocol(self):
- self.json_by_protocol = {}
- for entry in self.harJson.get("log").get("entries"):
- protocol_str_request = entry.get("request").get("httpVersion")
- if protocol_str_request in self.json_by_protocol:
- self.json_by_protocol[protocol_str_request].append(entry)
- else:
- self.json_by_protocol[protocol_str_request] = [entry]
- def get_total_request(self):
- return len(self.harJson.get("log").get("entries"))
- def get_content_type_repartition(self):
- content_by_protocol = {}
- for protocol in self.json_by_protocol:
- list_entries = self.json_by_protocol[protocol]
- content_by_protocol[protocol] = {}
- for entry in list_entries:
- headers_list = entry.get("response").get("headers")
- for header in headers_list:
- if header.get("name") == "Content-Type" or header.get("name") == "content-type":
- content_type = header.get("value")
- content_type = content_type.split(";")[0]
- if content_type in content_by_protocol[protocol]:
- content_by_protocol[protocol][content_type] += 1
- else:
- content_by_protocol[protocol][content_type] = 1
- return content_by_protocol
- def get_domain_ip(self):
- protocol_domain = {}
- for protocol in self.json_by_protocol:
- protocol_domain[protocol] = {}
- for entry in self.json_by_protocol[protocol]:
- parsed_uri = urlparse(entry.get("request").get("url") )
- ip = entry.get("serverIPAddress")
- domain = parsed_uri.netloc
- if domain in protocol_domain[protocol]:
- if not ip in protocol_domain[protocol][domain]:
- protocol_domain[protocol][domain].append(ip)
- else:
- protocol_domain[protocol][domain] = [ip]
- return protocol_domain
- def get_nb_request_by_domain(self):
- domain_request = {}
- for protocol in self.json_by_protocol:
- domain_request[protocol] = {}
- for entry in self.json_by_protocol[protocol]:
- parsed_uri = urlparse(entry.get("request").get("url") )
- ip = entry.get("serverIPAddress")
- domain = parsed_uri.netloc
- if domain in domain_request[protocol]:
- domain_request[protocol][domain] += 1
- else:
- domain_request[protocol][domain] = 1
- return domain_request
- def get_headers_stats(self):
- protocol_header_request = {}
- protocol_header_response = {}
- for protocol in self.json_by_protocol:
- protocol_header_request[protocol] = {}
- protocol_header_response[protocol] = {}
- for entry in self.json_by_protocol[protocol]:
- headers_request = entry.get("request").get("headers")
- headers_response = entry.get("response").get("headers")
- parsed_uri = urlparse(entry.get("request").get("url"))
- domain = parsed_uri.netloc
- if not domain in protocol_header_request[protocol]:
- # {'name': "count"}
- protocol_header_request[protocol][domain] = {}
- for header in headers_request:
- if header.get("name") in protocol_header_request[protocol][domain]:
- protocol_header_request[protocol][domain][header.get("name")] +=1
- else:
- protocol_header_request[protocol][domain][header.get("name")] = 1
- if not domain in protocol_header_response[protocol]:
- # {'name': "count"}
- protocol_header_response[protocol][domain] = {}
- for header in headers_response:
- if header.get("name") in protocol_header_response[protocol][domain]:
- protocol_header_response[protocol][domain][header.get("name")] +=1
- else:
- protocol_header_response[protocol][domain][header.get("name")] = 1
- return {"request": protocol_header_request, "response": protocol_header_response}
- def get_cookie_domain_stats(self):
- cookie_domain = {}
- cookie_domain_exist = {}
- cookie_domain_exist_req = {}
- for protocol in self.json_by_protocol:
- entries = self.json_by_protocol[protocol]
- for entry in entries:
- headers_response = entry.get("response").get("headers")
- headers_request = entry.get("request").get("headers")
- parsed_uri = urlparse(entry.get("request").get("url"))
- domain = parsed_uri.netloc
- count_set = 0
- count_send = 0
- if not domain in cookie_domain_exist_req:
- cookie_domain_exist_req[domain] = []
- for cookie in entry.get("request").get("cookies"):
- if cookie.get("name") not in cookie_domain_exist_req[domain]:
- cookie_domain_exist_req[domain].append(cookie.get("name"))
- count_send +=1
- for header in headers_response:
- if header.get("name") == "set-cookie":
- name = header.get("value").split("=")[0]
- if not domain in cookie_domain_exist:
- cookie_domain_exist[domain] = []
- if not name in cookie_domain_exist[domain]:
- cookie_domain_exist[domain].append(name)
- count_set +=1
- if not domain in cookie_domain:
- if count_set != 0 or count_send != 0:
- cookie_domain[domain]= {"set-cookie": count_set, "cookies-send": count_send}
- else:
- cookie_domain[domain]["set-cookie"] += count_set
- cookie_domain[domain]["cookies-send"] += count_send
- return cookie_domain
- analyzer = HarAnalyzer("./very_curious.har")
- # Return a table with the number of content type received for each protocol
- #print(pd.DataFrame(analyzer.get_content_type_repartition()).to_string())
- # Return a table with the domain list and their IP for each protocol
- #print(pd.DataFrame(analyzer.get_domain_ip()).to_string())
- # Return a table with the domain list and the number of request for those domain for each protocol
- #print(pd.DataFrame(analyzer.get_nb_request_by_domain()).to_string())
- # (3 next lines) Print all the request headers for http2 (replace http/2.0 by another protocol if desired)
- #pandas_df_request_http2_header = pd.DataFrame(analyzer.get_headers_stats()["request"]["http/2.0"])
- #sum_request_http2_header = pandas_df_request_http2_header.sum(axis=1)
- #print(sum_request_http2_header.to_string())
- # (3 next lines) Print all the response headers for http2 (replace http/2.0 by another protocol if desired)
- #pandas_df_response_http2_header = pd.DataFrame(analyzer.get_headers_stats()["response"]["http/2.0"])
- #sum_response_http2_header = pandas_df_response_http2_header.sum(axis=1)
- #print(sum_response_http2_header.to_string())
- # Get the number of unique cookies send and received to/from each domain
- #print(pd.DataFrame(analyzer.get_cookie_domain_stats()).transpose().to_string())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement