Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import json
- import glob
- import re
- ROB_ID = 'U0MF2V8TX'
- CHANNELS = ['animu', 'articles', 'coordination', 'dnd-coordination',
- 'dnd-discussion', 'fantasy', 'gaming', 'general', 'mtg',
- 'random', 'tv']
- def filter_line(s):
- regex = re.compile(r"\>|<@U[a-zA-Z0-9]*>\s|:[a-z_]*:")
- return regex.sub("", s)
- def pull_lines():
- lines = []
- files = []
- hm_lines = 1000
- for chan in CHANNELS:
- files.extend(glob.glob(chan + '/*.json'))
- i = 1
- for file in files:
- with open(file, 'r') as f:
- print("Processing file " + str(i) + '/' + str(len(files)))
- contents = json.loads(f.read())
- for post in contents[:hm_lines]:
- if "user" in post:
- if "subtype" not in post:
- if (post['user'] == ROB_ID):
- if post["text"][0] != '<':
- lines.append(filter_line(post["text"]))
- i += 1
- str_list = list(filter(None, lines))
- with open('rob_lines.txt', 'w', encoding='utf-8') as f:
- for item in str_list:
- f.write("%s\n" % item)
- if __name__ == '__main__':
- pull_lines()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement