Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import collections
- import string
- def retrieveMostFrequentlyUseWords(helpText, wordsToExclude):
- wordsToExclude = set([item.lower() for item in wordsToExclude])
- helpText = helpText.lower()
- exclude = set(string.punctuation)
- helpText = ''.join(ch if ch not in exclude else " " for ch in helpText).split()
- c = collections.Counter()
- for word in helpText:
- c[word] += 1
- for sym in wordsToExclude:
- if sym in c:
- c.pop(sym)
- most_common = c.most_common()
- most_frequent = []
- if not len(most_common):
- return []
- max_n = most_common[0][1]
- most_frequent.append(most_common[0][0])
- for i in range(1, len(most_common)):
- if most_common[i][1] < max_n:
- return most_frequent
- most_frequent.append(most_common[i][0])
- return most_frequent
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement