Untitled

# -*- coding: utf-8 -*-

import pprint

import operator
import os
import csv
import json

print("written by /u/kittens_from_space")
print("discord: kittenswolf#8723")
print("====")
print("If you want the script to ignore common words, create a stopwords.txt file with one word per line. If you're too lazy, google '<your language> stopwords' and copy them.")

def parse_csv(file_path):
    with open(file_path, "r", encoding="utf8") as f:
        readCSV = csv.reader(f, delimiter=',')

        return list(readCSV)

def get_most_used_words(messages):
    try:
        stopwords = [item.lower() for item in open("stopwords.txt", "r").read().split("\n")]
    except Exception:
        stopwords = []

    stopwords.append('')

    reverse_dict = {}
    for message in messages:
        msg_words = message[2].split(" ")

        for word in msg_words:
            try:
                cur = reverse_dict[word.lower()]
            except KeyError:
                cur = 0

            if word.lower() not in stopwords:
                if len(word) > 1:
                    cur += 1
                    reverse_dict[word.lower()] = cur

    return reverse_dict

print("Loading channels...")
message_channels = [x[0] for x in os.walk("messages") if not x[0] == "messages"]
print("Loaded {} channels.".format(len(message_channels)))

print("Loading messages...")
all_messages = []
for channel in message_channels:
    all_messages += parse_csv(channel + "/messages.csv")

print("Loaded {} messages.".format(len(all_messages)))

print("Getting most used words...")
most_used = get_most_used_words(all_messages)
sorted_most_used = sorted(most_used.items(), key=operator.itemgetter(1))

sorted_most_used = list(reversed(sorted_most_used))

print()
print("Your 100 top used words:")
print("No. | Word | Usage")

i = 1
for word in sorted_most_used[:100]:
    print('{}. "{}" | x{}'.format(i, word[0], word[1]))

    i += 1