SHARE
TWEET

Trillian Log Parser

a guest Nov 14th, 2019 107 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import re
  2. import urllib.parse
  3. from datetime import datetime
  4. import os
  5. import html
  6.  
  7. if not os.path.exists("OUTPUT"):
  8.     os.mkdir("OUTPUT")
  9. else:
  10.     for root, dirs, files in os.walk("OUTPUT"):
  11.         for file in files:
  12.             os.remove(root+"\\"+file)
  13.  
  14. users = {}
  15. myname = input("Enter your own nickname: ")
  16.  
  17. for root, dirs, files in os.walk("_CLOUD"):
  18.     for filename in files:
  19.         if filename.endswith(".xml"):
  20.             username = re.search(r'(.*?).xml',filename).group(1)
  21.             users[username] = None
  22.  
  23. for user in users:
  24.     if users[user] == None:
  25.         alias = input("Enter an alias for \""+user+"\": ")
  26.         if alias == "":
  27.             defaultname = re.search(r'.*?-(.*)',user).group(1)
  28.             print("Using default "+defaultname)
  29.             users[user] = defaultname
  30.         else:
  31.             users[user] = alias
  32.            
  33. for user in users:
  34.     print("Parsing logs for "+user+" ("+users[user]+")...")
  35.     posts = {}
  36.     protocol = re.search(r'(.*?)-.*', user).group(1).lower()
  37.     if not os.path.exists("OUTPUT\\"+protocol):
  38.         os.mkdir("OUTPUT\\"+protocol)
  39.     for root, dirs, files in os.walk("_CLOUD"):
  40.         for filename in files:
  41.             if filename.endswith(".xml"):
  42.                 if re.match(user+'.xml', filename) is not None:
  43.                     # print("OK, reading "+root+"\\"+filename)
  44.                     with open(root+"\\"+filename) as file:
  45.                         filelines = file.readlines()
  46.                         for line in filelines:
  47.                             if line.startswith("<message"):
  48.                                 try:
  49.                                     time = re.search(r'time=\"(.+?)\"', line).group(1)
  50.                                     type = re.search(r'type=\"(.+?)\"', line).group(1)
  51.                                     text = re.search(r'text=\"(%20)?(.+?)\"', line).group(2)
  52.                                 except:
  53.                                     continue
  54.                                 text = urllib.parse.unquote(text)
  55.                                 text = html.unescape(text)
  56.                                 text = re.sub(r'</?a(.*?)>', '', text)
  57.                                 text = re.sub(r'</?b(.*?)>', '**', text)
  58.                                 text = re.sub(r'</?i(.*?)>', '_', text)
  59.                                
  60.                                 timetext = datetime.utcfromtimestamp(int(time)/1000).strftime('%Y-%m-%d %H:%M:%S')
  61.                                 if type.startswith("incoming"):
  62.                                     name = users[user]
  63.                                 else:
  64.                                     name = myname
  65.                                 if text.startswith("/me"):
  66.                                     posts[time] = "["+timetext+"] "+name+" "+text[4:]
  67.                                 else:
  68.                                     posts[time] = "["+timetext+"] "+name+": "+text
  69.                                
  70.     outputfile = open("OUTPUT\\"+protocol+"\\"+users[user]+".txt", "a", encoding="utf-8")
  71.     for post in posts:
  72.         # print("writing "+post)
  73.         outputfile.write(posts[post]+"\n")
  74.     outputfile.close()
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top