Advertisement
Guest User

Untitled

a guest
Nov 19th, 2019
305
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.22 KB | None | 0 0
  1. #!/usr/bin/env python3
  2.  
  3. # Script to process / prettify twitter dump file direct-message.js
  4.  
  5. import json
  6. import pickle
  7. import pprint
  8. import re
  9. import requests
  10. import time
  11.  
  12. picklefile = 'known_users.db'
  13.  
  14. known_users = {}
  15. default_users = {}
  16. real_names = {}
  17. try:
  18.     with open(picklefile, 'r') as pfile:
  19.         known_users = pickle.load(pfile)
  20. except Exception as e:
  21.     pass
  22.  
  23. perspective = "3390728889"  # split convos, focusing on this user
  24.  
  25. default_users["3390728889"] = ( "@Arron_banks", "" )
  26.  
  27. def get_user(id):
  28.     if id not in known_users:
  29.         query_twitter(id)
  30.  
  31.     if id in known_users:
  32.         return(known_users[id])
  33.     else:
  34.         return (id, '')
  35.  
  36.  
  37. def query_twitter(id):
  38.     url = "https://twitter.com/intent/user?user_id=" + id
  39.     time.sleep(0.5)
  40.     request = requests.get(url)
  41.     fn = do_re("(?<=<title>)(.*)(?= \()", request.text)
  42.     handle = do_re("(?<=<p><span class=\"nickname\">)(.*)(?=<\/span><\/p>)", request.text)
  43.  
  44.     if handle:
  45.         known_users[id] = (handle, fn)
  46.     elif id in default_users:
  47.         known_users[id] = default_users[id]
  48.     else:
  49.         known_users[id] = (id, '')
  50.  
  51.  
  52.  
  53.  
  54. class Dm:
  55.     def __init__(self):
  56.         self.mid = None
  57.         self.rid = None
  58.         self.sid = None
  59.         self.text = ''
  60.         self.murl = None
  61.         self.created = None
  62.  
  63.     def __str__(self):
  64.         #print("created: " + self.created)
  65.         #print("mid: " + self.mid)
  66.         #print("sid: " + self.sid)
  67.         #print("rid: " + self.rid)
  68.         #print("text: " + self.text)
  69.         #print("murl: " + self.murl)
  70.         sh, sn = get_user(self.sid)
  71.         rh, rn = get_user(self.rid)
  72.         return "{} {}->{}: {}\n".format(self.created, sh, rh, self.text)
  73.  
  74.     def show(self):
  75.         sh, sn = get_user(self.sid)
  76.         rh, rn = get_user(self.rid)
  77.         return "{} {} -> {}: {}\n".format(self.created, sh, rh, self.text)
  78.  
  79.  
  80. def do_re(reg, s):
  81.     m = re.search(reg, s)
  82.     if m:
  83.         return m.group(0)
  84.     else:
  85.         return None
  86.  
  87. def shitty_dm_parser(dmstr):
  88.     dm = Dm()
  89.     dm.mid = do_re("(?<=\"id\" : \")(.*)(?=\",)", dmstr)
  90.     dm.rid = do_re("(?<=\"recipientId\" : \")(.*)(?=\",)", dmstr)
  91.     dm.sid = do_re("(?<=\"senderId\" : \")(.*)(?=\",)", dmstr)
  92.     dm.text = do_re("(?<=\"text\" : \")(.*)(?=\",)", dmstr)
  93.     dm.murl = do_re("(?<=\"mediaUrls\" : )(.*)(?=,)", dmstr)
  94.     dm.created = do_re("(?<=\"createdAt\" : \")(.*)(?=\")", dmstr)
  95.     return dm
  96.  
  97.  
  98. with open('direct-message.js', 'r') as messages:
  99.     dms = messages.read().split("\"messageCreate\" : ")
  100.     dms.pop(0)
  101.  
  102.  
  103.     last_contact = None
  104.  
  105.     for dmstr in dms:
  106.         dm = shitty_dm_parser(dmstr)
  107.         if ((dm.sid == perspective and dm.rid != last_contact)
  108.             or (dm.rid == perspective and dm.sid != last_contact)):
  109.             last_contact = dm.rid if (dm.rid != perspective) else dm.sid
  110.             sh, sn = get_user(perspective)
  111.             rh, rn = get_user(last_contact)
  112.             print("\n------ {} {} <-> {} {} ------".format(sn, sh, rn, rh))
  113.         print(dm.show())
  114.  
  115. with open(picklefile, 'wb') as pfile:
  116.     # Pickle the 'data' dictionary using the highest protocol available.
  117.     pickle.dump(known_users, pfile, pickle.HIGHEST_PROTOCOL)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement