Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- # Script to process / prettify twitter dump file direct-message.js
- import json
- import pickle
- import pprint
- import re
- import requests
- import time
- picklefile = 'known_users.db'
- known_users = {}
- default_users = {}
- real_names = {}
- try:
- with open(picklefile, 'r') as pfile:
- known_users = pickle.load(pfile)
- except Exception as e:
- pass
- perspective = "3390728889" # split convos, focusing on this user
- default_users["3390728889"] = ( "@Arron_banks", "" )
- def get_user(id):
- if id not in known_users:
- query_twitter(id)
- if id in known_users:
- return(known_users[id])
- else:
- return (id, '')
- def query_twitter(id):
- url = "https://twitter.com/intent/user?user_id=" + id
- time.sleep(0.5)
- request = requests.get(url)
- fn = do_re("(?<=<title>)(.*)(?= \()", request.text)
- handle = do_re("(?<=<p><span class=\"nickname\">)(.*)(?=<\/span><\/p>)", request.text)
- if handle:
- known_users[id] = (handle, fn)
- elif id in default_users:
- known_users[id] = default_users[id]
- else:
- known_users[id] = (id, '')
- class Dm:
- def __init__(self):
- self.mid = None
- self.rid = None
- self.sid = None
- self.text = ''
- self.murl = None
- self.created = None
- def __str__(self):
- #print("created: " + self.created)
- #print("mid: " + self.mid)
- #print("sid: " + self.sid)
- #print("rid: " + self.rid)
- #print("text: " + self.text)
- #print("murl: " + self.murl)
- sh, sn = get_user(self.sid)
- rh, rn = get_user(self.rid)
- return "{} {}->{}: {}\n".format(self.created, sh, rh, self.text)
- def show(self):
- sh, sn = get_user(self.sid)
- rh, rn = get_user(self.rid)
- return "{} {} -> {}: {}\n".format(self.created, sh, rh, self.text)
- def do_re(reg, s):
- m = re.search(reg, s)
- if m:
- return m.group(0)
- else:
- return None
- def shitty_dm_parser(dmstr):
- dm = Dm()
- dm.mid = do_re("(?<=\"id\" : \")(.*)(?=\",)", dmstr)
- dm.rid = do_re("(?<=\"recipientId\" : \")(.*)(?=\",)", dmstr)
- dm.sid = do_re("(?<=\"senderId\" : \")(.*)(?=\",)", dmstr)
- dm.text = do_re("(?<=\"text\" : \")(.*)(?=\",)", dmstr)
- dm.murl = do_re("(?<=\"mediaUrls\" : )(.*)(?=,)", dmstr)
- dm.created = do_re("(?<=\"createdAt\" : \")(.*)(?=\")", dmstr)
- return dm
- with open('direct-message.js', 'r') as messages:
- dms = messages.read().split("\"messageCreate\" : ")
- dms.pop(0)
- last_contact = None
- for dmstr in dms:
- dm = shitty_dm_parser(dmstr)
- if ((dm.sid == perspective and dm.rid != last_contact)
- or (dm.rid == perspective and dm.sid != last_contact)):
- last_contact = dm.rid if (dm.rid != perspective) else dm.sid
- sh, sn = get_user(perspective)
- rh, rn = get_user(last_contact)
- print("\n------ {} {} <-> {} {} ------".format(sn, sh, rn, rh))
- print(dm.show())
- with open(picklefile, 'wb') as pfile:
- # Pickle the 'data' dictionary using the highest protocol available.
- pickle.dump(known_users, pfile, pickle.HIGHEST_PROTOCOL)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement