Advertisement
Appendko

Chatlog Keyword Analyzer

Aug 30th, 2022
873
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.69 KB | None | 0 0
  1. # ### Chatlog Keyword Analyzer by Append Huang
  2. # Timestamp: 20220831 01:50 UTC+8
  3. #
  4. # Usage: python keyword_analyzer.py chat.log
  5. # Use Chatty log (or, part of Chatty log) / Rechat log (txt format) as input
  6. # Please save the log as chat.log or put the filename as a command-line arguments
  7. #
  8. # for detailed output, use:
  9. # python keyword_analyzer.py chat.log 1
  10.  
  11. import sys
  12. from collections import Counter
  13.  
  14. filename = "chat.log"
  15. if len(sys.argv) > 1:
  16.     filename = sys.argv[1]
  17.    
  18. VERBOSE = False
  19. if len(sys.argv) > 2:
  20.     VERBOSE = bool(int(sys.argv[2]))
  21.    
  22. with open(filename,'r', encoding='utf-8') as f:
  23.     f_lst = f.readlines()
  24.  
  25. user_c = Counter()
  26. msg_c = Counter()
  27. user_msg_lst = []
  28. for line in f_lst:
  29.     line2 = line.strip().split(']',1)[1]
  30.     if line2.startswith(" MOD_ACTION:"):
  31.         # MOD_ACTION:  (automod_rejected [racism1; "Ching Chong":identity7] <luffymtlqpb> Ching Chong)
  32.         try:
  33.             line2 = line2.split(']',1)[1][:-1]
  34.            
  35.         # [23:29:53] MOD_ACTION: id9000 (emoteonly) --> skip this line
  36.         except IndexError:
  37.             continue
  38.  
  39.     def user_helper(x):
  40.         return x.isalnum() or x=='(' or x==')' or x==' '
  41.        
  42.     user2, msg2 = "", ""
  43.     if "<" in line2 and ">" in line2: # <nastasyaebu> colored
  44.         user, msg = line2.split(">")
  45.         user2 = user.split("<",1)[1].strip()
  46.         user2 = "".join([x.lower() for x in user2 if user_helper(x)]) # remove symbols
  47.         msg2 = msg.strip()
  48.  
  49.     elif ":" in line2: #[10:45:51.135] recilurn: 老二 老二 老二
  50.         user, msg = line2.split(":", 1)
  51.         user2 = user.strip()
  52.         msg2 = msg.strip()
  53.        
  54.     if user2 and msg2:
  55.         user_c[user2] += 1
  56.         msg_c[msg2] +=1
  57.         user_msg_lst.append([user2, msg2])
  58.  
  59. if VERBOSE:
  60.     print("# User List:")
  61.     for user_count in sorted(user_c.items(), key=lambda x:x[1], reverse=True):
  62.         print("    %s: %d"%user_count)
  63.     print()
  64.     print("# Msg List:")
  65.     for msg_count in sorted(msg_c.items(), key=lambda x:x[1], reverse=True):
  66.         print("    %s: %d"%msg_count)
  67.     print()
  68.  
  69. key_lst = [x for x,c in sorted(msg_c.items(), key=lambda x:x[1], reverse=True) if c >1]
  70. user_key_lst = [(i,v) for i,v in user_msg_lst if v in key_lst]    
  71. user_c2 = Counter()
  72. msg_c2 = Counter()
  73. for user, msg in user_key_lst:
  74.     user_c2[user] += 1
  75.     msg_c2[msg] += 1
  76.    
  77. print("# User List with Repeated Keywords:")
  78. for user_count in sorted(user_c2.items(), key=lambda x:x[1], reverse=True):
  79.     print("    %s: %d"%user_count)
  80. print()
  81. print("# Msg List with Repeated Keywords:")
  82. for msg_count in sorted(msg_c2.items(), key=lambda x:x[1], reverse=True):
  83.     print("    %s: %d"%msg_count)
  84.    
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement