Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # ### Chatlog Keyword Analyzer by Append Huang
- # Timestamp: 20220831 01:50 UTC+8
- #
- # Usage: python keyword_analyzer.py chat.log
- # Use Chatty log (or, part of Chatty log) / Rechat log (txt format) as input
- # Please save the log as chat.log or put the filename as a command-line arguments
- #
- # for detailed output, use:
- # python keyword_analyzer.py chat.log 1
- import sys
- from collections import Counter
- filename = "chat.log"
- if len(sys.argv) > 1:
- filename = sys.argv[1]
- VERBOSE = False
- if len(sys.argv) > 2:
- VERBOSE = bool(int(sys.argv[2]))
- with open(filename,'r', encoding='utf-8') as f:
- f_lst = f.readlines()
- user_c = Counter()
- msg_c = Counter()
- user_msg_lst = []
- for line in f_lst:
- line2 = line.strip().split(']',1)[1]
- if line2.startswith(" MOD_ACTION:"):
- # MOD_ACTION: (automod_rejected [racism1; "Ching Chong":identity7] <luffymtlqpb> Ching Chong)
- try:
- line2 = line2.split(']',1)[1][:-1]
- # [23:29:53] MOD_ACTION: id9000 (emoteonly) --> skip this line
- except IndexError:
- continue
- def user_helper(x):
- return x.isalnum() or x=='(' or x==')' or x==' '
- user2, msg2 = "", ""
- if "<" in line2 and ">" in line2: # <nastasyaebu> colored
- user, msg = line2.split(">")
- user2 = user.split("<",1)[1].strip()
- user2 = "".join([x.lower() for x in user2 if user_helper(x)]) # remove symbols
- msg2 = msg.strip()
- elif ":" in line2: #[10:45:51.135] recilurn: 老二 老二 老二
- user, msg = line2.split(":", 1)
- user2 = user.strip()
- msg2 = msg.strip()
- if user2 and msg2:
- user_c[user2] += 1
- msg_c[msg2] +=1
- user_msg_lst.append([user2, msg2])
- if VERBOSE:
- print("# User List:")
- for user_count in sorted(user_c.items(), key=lambda x:x[1], reverse=True):
- print(" %s: %d"%user_count)
- print()
- print("# Msg List:")
- for msg_count in sorted(msg_c.items(), key=lambda x:x[1], reverse=True):
- print(" %s: %d"%msg_count)
- print()
- key_lst = [x for x,c in sorted(msg_c.items(), key=lambda x:x[1], reverse=True) if c >1]
- user_key_lst = [(i,v) for i,v in user_msg_lst if v in key_lst]
- user_c2 = Counter()
- msg_c2 = Counter()
- for user, msg in user_key_lst:
- user_c2[user] += 1
- msg_c2[msg] += 1
- print("# User List with Repeated Keywords:")
- for user_count in sorted(user_c2.items(), key=lambda x:x[1], reverse=True):
- print(" %s: %d"%user_count)
- print()
- print("# Msg List with Repeated Keywords:")
- for msg_count in sorted(msg_c2.items(), key=lambda x:x[1], reverse=True):
- print(" %s: %d"%msg_count)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement