Guest User

Untitled

a guest
Jun 24th, 2018
87
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.87 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2.  
  3. import pprint
  4.  
  5. import operator
  6. import os
  7. import csv
  8. import json
  9.  
  10. print("written by /u/kittens_from_space")
  11. print("discord: kittenswolf#8723")
  12. print("====")
  13. print("If you want the script to ignore common words, create a stopwords.txt file with one word per line. If you're too lazy, google '<your language> stopwords' and copy them.")
  14.  
  15. def parse_csv(file_path):
  16. with open(file_path, "r", encoding="utf8") as f:
  17. readCSV = csv.reader(f, delimiter=',')
  18.  
  19. return list(readCSV)
  20.  
  21. def get_most_used_words(messages):
  22. try:
  23. stopwords = [item.lower() for item in open("stopwords.txt", "r").read().split("\n")]
  24. except Exception:
  25. stopwords = []
  26.  
  27. stopwords.append('')
  28.  
  29. reverse_dict = {}
  30. for message in messages:
  31. msg_words = message[2].split(" ")
  32.  
  33. for word in msg_words:
  34. try:
  35. cur = reverse_dict[word.lower()]
  36. except KeyError:
  37. cur = 0
  38.  
  39. if word.lower() not in stopwords:
  40. if len(word) > 1:
  41. cur += 1
  42. reverse_dict[word.lower()] = cur
  43.  
  44. return reverse_dict
  45.  
  46. print("Loading channels...")
  47. message_channels = [x[0] for x in os.walk("messages") if not x[0] == "messages"]
  48. print("Loaded {} channels.".format(len(message_channels)))
  49.  
  50. print("Loading messages...")
  51. all_messages = []
  52. for channel in message_channels:
  53. all_messages += parse_csv(channel + "/messages.csv")
  54.  
  55. print("Loaded {} messages.".format(len(all_messages)))
  56.  
  57. print("Getting most used words...")
  58. most_used = get_most_used_words(all_messages)
  59. sorted_most_used = sorted(most_used.items(), key=operator.itemgetter(1))
  60.  
  61. sorted_most_used = list(reversed(sorted_most_used))
  62.  
  63. print()
  64. print("Your 100 top used words:")
  65. print("No. | Word | Usage")
  66.  
  67. i = 1
  68. for word in sorted_most_used[:100]:
  69. print('{}. "{}" | x{}'.format(i, word[0], word[1]))
  70.  
  71. i += 1
Add Comment
Please, Sign In to add comment