Advertisement
Guest User

Untitled

a guest
Jan 22nd, 2019
109
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.22 KB | None | 0 0
  1. import json
  2. import glob
  3. import re
  4.  
  5. ROB_ID = 'U0MF2V8TX'
  6. CHANNELS = ['animu', 'articles', 'coordination', 'dnd-coordination',
  7.             'dnd-discussion', 'fantasy', 'gaming', 'general', 'mtg',
  8.             'random', 'tv']
  9.  
  10. def filter_line(s):
  11.     regex = re.compile(r"\&gt;|<@U[a-zA-Z0-9]*>\s|:[a-z_]*:")
  12.     return regex.sub("", s)
  13.  
  14. def pull_lines():
  15.     lines = []
  16.     files = []
  17.  
  18.     hm_lines = 1000
  19.  
  20.     for chan in CHANNELS:
  21.         files.extend(glob.glob(chan + '/*.json'))
  22.  
  23.         i = 1
  24.         for file in files:
  25.             with open(file, 'r') as f:
  26.                 print("Processing file " + str(i) + '/' + str(len(files)))
  27.                 contents = json.loads(f.read())
  28.                 for post in contents[:hm_lines]:
  29.                     if "user" in post:
  30.                         if "subtype" not in post:
  31.                             if (post['user'] == ROB_ID):
  32.                                 if post["text"][0] != '<':
  33.                                     lines.append(filter_line(post["text"]))
  34.             i += 1
  35.    
  36.     str_list = list(filter(None, lines))
  37.    
  38.     with open('rob_lines.txt', 'w', encoding='utf-8') as f:
  39.         for item in str_list:
  40.             f.write("%s\n" % item)
  41.  
  42. if __name__ == '__main__':
  43.     pull_lines()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement