Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import urllib.parse
- from datetime import datetime
- import os
- import html
- if not os.path.exists("OUTPUT"):
- os.mkdir("OUTPUT")
- else:
- for root, dirs, files in os.walk("OUTPUT"):
- for file in files:
- os.remove(root+"\\"+file)
- users = {}
- myname = input("Enter your own nickname: ")
- for root, dirs, files in os.walk("_CLOUD"):
- for filename in files:
- if filename.endswith(".xml"):
- username = re.search(r'(.*?).xml',filename).group(1)
- users[username] = None
- for user in users:
- if users[user] == None:
- alias = input("Enter an alias for \""+user+"\": ")
- if alias == "":
- defaultname = re.search(r'.*?-(.*)',user).group(1)
- print("Using default "+defaultname)
- users[user] = defaultname
- else:
- users[user] = alias
- for user in users:
- print("Parsing logs for "+user+" ("+users[user]+")...")
- posts = {}
- protocol = re.search(r'(.*?)-.*', user).group(1).lower()
- if not os.path.exists("OUTPUT\\"+protocol):
- os.mkdir("OUTPUT\\"+protocol)
- for root, dirs, files in os.walk("_CLOUD"):
- for filename in files:
- if filename.endswith(".xml"):
- if re.match(user+'.xml', filename) is not None:
- # print("OK, reading "+root+"\\"+filename)
- with open(root+"\\"+filename) as file:
- filelines = file.readlines()
- for line in filelines:
- if line.startswith("<message"):
- try:
- time = re.search(r'time=\"(.+?)\"', line).group(1)
- type = re.search(r'type=\"(.+?)\"', line).group(1)
- text = re.search(r'text=\"(%20)?(.+?)\"', line).group(2)
- except:
- continue
- text = urllib.parse.unquote(text)
- text = html.unescape(text)
- text = re.sub(r'</?a(.*?)>', '', text)
- text = re.sub(r'</?b(.*?)>', '**', text)
- text = re.sub(r'</?i(.*?)>', '_', text)
- timetext = datetime.utcfromtimestamp(int(time)/1000).strftime('%Y-%m-%d %H:%M:%S')
- if type.startswith("incoming"):
- name = users[user]
- else:
- name = myname
- if text.startswith("/me"):
- posts[time] = "["+timetext+"] "+name+" "+text[4:]
- else:
- posts[time] = "["+timetext+"] "+name+": "+text
- outputfile = open("OUTPUT\\"+protocol+"\\"+users[user]+".txt", "a", encoding="utf-8")
- for post in posts:
- # print("writing "+post)
- outputfile.write(posts[post]+"\n")
- outputfile.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement