Advertisement
Guest User

Har analyzer

a guest
Dec 8th, 2019
304
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.27 KB | None | 0 0
  1. import json
  2. import pandas as pd
  3. from urllib.parse import urlparse
  4.  
  5. class HarAnalyzer:
  6.     def __init__(self, harPath):
  7.         file = open(harPath, "r")
  8.         self.harJson = json.load(file)
  9.         self.build_dict_by_protocol()
  10.  
  11.         print("Total of request: {}".format(self.get_total_request()))
  12.         for protocol in self.json_by_protocol:
  13.             print("{} : {}".format(protocol, len(self.json_by_protocol[protocol])))
  14.    
  15.     def build_dict_by_protocol(self):
  16.         self.json_by_protocol = {}
  17.  
  18.         for entry in self.harJson.get("log").get("entries"):
  19.             protocol_str_request = entry.get("request").get("httpVersion")
  20.             if protocol_str_request in self.json_by_protocol:
  21.                 self.json_by_protocol[protocol_str_request].append(entry)
  22.             else:
  23.                 self.json_by_protocol[protocol_str_request] = [entry]
  24.    
  25.     def get_total_request(self):
  26.         return len(self.harJson.get("log").get("entries"))
  27.    
  28.     def get_content_type_repartition(self):
  29.         content_by_protocol = {}
  30.         for protocol in self.json_by_protocol:
  31.             list_entries = self.json_by_protocol[protocol]
  32.             content_by_protocol[protocol] = {}
  33.             for entry in list_entries:
  34.                 headers_list = entry.get("response").get("headers")
  35.                 for header in headers_list:
  36.                     if header.get("name") == "Content-Type" or header.get("name") == "content-type":
  37.                         content_type = header.get("value")
  38.                         content_type = content_type.split(";")[0]
  39.                         if content_type in content_by_protocol[protocol]:
  40.                             content_by_protocol[protocol][content_type] += 1
  41.                         else:
  42.                             content_by_protocol[protocol][content_type] = 1
  43.         return content_by_protocol
  44.    
  45.     def get_domain_ip(self):
  46.         protocol_domain = {}
  47.        
  48.         for protocol in self.json_by_protocol:
  49.             protocol_domain[protocol] = {}
  50.             for entry in self.json_by_protocol[protocol]:
  51.                 parsed_uri = urlparse(entry.get("request").get("url") )
  52.                 ip = entry.get("serverIPAddress")
  53.                 domain = parsed_uri.netloc
  54.                 if domain in protocol_domain[protocol]:
  55.                     if not ip in protocol_domain[protocol][domain]:
  56.                         protocol_domain[protocol][domain].append(ip)
  57.                        
  58.                 else:
  59.                     protocol_domain[protocol][domain] = [ip]
  60.         return protocol_domain
  61.  
  62.     def get_nb_request_by_domain(self):
  63.         domain_request = {}
  64.         for protocol in self.json_by_protocol:
  65.             domain_request[protocol] = {}
  66.             for entry in self.json_by_protocol[protocol]:
  67.                 parsed_uri = urlparse(entry.get("request").get("url") )
  68.                 ip = entry.get("serverIPAddress")
  69.                 domain = parsed_uri.netloc
  70.                 if domain in domain_request[protocol]:
  71.                     domain_request[protocol][domain] += 1  
  72.                 else:
  73.                     domain_request[protocol][domain] = 1
  74.         return domain_request
  75.    
  76.     def get_headers_stats(self):
  77.         protocol_header_request = {}
  78.         protocol_header_response = {}
  79.  
  80.         for protocol in self.json_by_protocol:
  81.             protocol_header_request[protocol] = {}
  82.             protocol_header_response[protocol] = {}
  83.            
  84.             for entry in self.json_by_protocol[protocol]:
  85.                 headers_request = entry.get("request").get("headers")
  86.                 headers_response = entry.get("response").get("headers")
  87.                
  88.                 parsed_uri = urlparse(entry.get("request").get("url"))
  89.                 domain = parsed_uri.netloc
  90.                
  91.                 if not domain in protocol_header_request[protocol]:
  92.                     # {'name': "count"}
  93.                     protocol_header_request[protocol][domain] = {}
  94.                
  95.                 for header in headers_request:
  96.                     if header.get("name") in protocol_header_request[protocol][domain]:
  97.                         protocol_header_request[protocol][domain][header.get("name")] +=1
  98.                     else:
  99.                         protocol_header_request[protocol][domain][header.get("name")] = 1
  100.                        
  101.                 if not domain in protocol_header_response[protocol]:
  102.                     # {'name': "count"}
  103.                     protocol_header_response[protocol][domain] = {}
  104.                
  105.                 for header in headers_response:
  106.                     if header.get("name") in protocol_header_response[protocol][domain]:
  107.                         protocol_header_response[protocol][domain][header.get("name")] +=1
  108.                     else:
  109.                         protocol_header_response[protocol][domain][header.get("name")] = 1
  110.         return {"request": protocol_header_request, "response": protocol_header_response}
  111.  
  112.     def get_cookie_domain_stats(self):
  113.         cookie_domain = {}
  114.         cookie_domain_exist = {}
  115.         cookie_domain_exist_req = {}
  116.         for protocol in self.json_by_protocol:
  117.             entries = self.json_by_protocol[protocol]
  118.             for entry in entries:
  119.                 headers_response = entry.get("response").get("headers")
  120.                 headers_request = entry.get("request").get("headers")
  121.                 parsed_uri = urlparse(entry.get("request").get("url"))
  122.                 domain = parsed_uri.netloc
  123.                
  124.                 count_set = 0
  125.                 count_send = 0
  126.                
  127.                 if not domain in cookie_domain_exist_req:
  128.                     cookie_domain_exist_req[domain] = []
  129.                 for cookie in entry.get("request").get("cookies"):
  130.                     if cookie.get("name") not in cookie_domain_exist_req[domain]:
  131.                         cookie_domain_exist_req[domain].append(cookie.get("name"))
  132.                         count_send +=1
  133.                    
  134.                    
  135.                 for header in headers_response:
  136.                     if header.get("name") == "set-cookie":
  137.                         name = header.get("value").split("=")[0]
  138.                         if not domain in cookie_domain_exist:
  139.                             cookie_domain_exist[domain] = []
  140.                        
  141.                         if not name in cookie_domain_exist[domain]:
  142.                             cookie_domain_exist[domain].append(name)
  143.                             count_set +=1
  144.                
  145.                 if not domain in cookie_domain:
  146.                     if count_set != 0 or count_send != 0:
  147.                         cookie_domain[domain]= {"set-cookie": count_set, "cookies-send": count_send}
  148.                 else:
  149.                     cookie_domain[domain]["set-cookie"] += count_set
  150.                     cookie_domain[domain]["cookies-send"] += count_send
  151.         return cookie_domain
  152.        
  153.  
  154. analyzer = HarAnalyzer("./very_curious.har")
  155.  
  156. # Return a table with the number of content type received for each protocol
  157. #print(pd.DataFrame(analyzer.get_content_type_repartition()).to_string())
  158.  
  159. # Return a table with the domain list and their IP for each protocol
  160. #print(pd.DataFrame(analyzer.get_domain_ip()).to_string())
  161.  
  162. # Return a table with the domain list and the number of request for those domain for each protocol
  163. #print(pd.DataFrame(analyzer.get_nb_request_by_domain()).to_string())
  164.  
  165. # (3 next lines) Print all the request headers for http2 (replace http/2.0 by another protocol if desired)
  166. #pandas_df_request_http2_header = pd.DataFrame(analyzer.get_headers_stats()["request"]["http/2.0"])
  167. #sum_request_http2_header = pandas_df_request_http2_header.sum(axis=1)
  168. #print(sum_request_http2_header.to_string())
  169.  
  170. # (3 next lines) Print all the response headers for http2 (replace http/2.0 by another protocol if desired)
  171. #pandas_df_response_http2_header = pd.DataFrame(analyzer.get_headers_stats()["response"]["http/2.0"])
  172. #sum_response_http2_header = pandas_df_response_http2_header.sum(axis=1)
  173. #print(sum_response_http2_header.to_string())
  174.  
  175.  
  176. # Get the number of unique cookies send and received to/from each domain
  177. #print(pd.DataFrame(analyzer.get_cookie_domain_stats()).transpose().to_string())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement