Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- import pprint
- import operator
- import os
- import csv
- import json
- print("written by /u/kittens_from_space")
- print("discord: kittenswolf#8723")
- print("====")
- print("If you want the script to ignore common words, create a stopwords.txt file with one word per line. If you're too lazy, google '<your language> stopwords' and copy them.")
- def parse_csv(file_path):
- with open(file_path, "r", encoding="utf8") as f:
- readCSV = csv.reader(f, delimiter=',')
- return list(readCSV)
- def get_most_used_words(messages):
- try:
- stopwords = [item.lower() for item in open("stopwords.txt", "r").read().split("\n")]
- except Exception:
- stopwords = []
- stopwords.append('')
- reverse_dict = {}
- for message in messages:
- msg_words = message[2].split(" ")
- for word in msg_words:
- try:
- cur = reverse_dict[word.lower()]
- except KeyError:
- cur = 0
- if word.lower() not in stopwords:
- if len(word) > 1:
- cur += 1
- reverse_dict[word.lower()] = cur
- return reverse_dict
- print("Loading channels...")
- message_channels = [x[0] for x in os.walk("messages") if not x[0] == "messages"]
- print("Loaded {} channels.".format(len(message_channels)))
- print("Loading messages...")
- all_messages = []
- for channel in message_channels:
- all_messages += parse_csv(channel + "/messages.csv")
- print("Loaded {} messages.".format(len(all_messages)))
- print("Getting most used words...")
- most_used = get_most_used_words(all_messages)
- sorted_most_used = sorted(most_used.items(), key=operator.itemgetter(1))
- sorted_most_used = list(reversed(sorted_most_used))
- print()
- print("Your 100 top used words:")
- print("No. | Word | Usage")
- i = 1
- for word in sorted_most_used[:100]:
- print('{}. "{}" | x{}'.format(i, word[0], word[1]))
- i += 1
Add Comment
Please, Sign In to add comment