Guest User

Untitled

a guest
Apr 23rd, 2018
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.65 KB | None | 0 0
  1. import re
  2. from nltk import ngrams
  3. from collections import Counter
  4.  
  5. def tokenize(string):
  6. return re.findall(r'\w+', string.lower())
  7.  
  8. def tag_cloud(list, degree):
  9. grams = ngrams(list,int(degree))
  10. return (grams)
  11.  
  12. def word_counter(list):
  13. counts = Counter(list).most_common(10)
  14. print(counts)
  15.  
  16. def main():
  17. final = []
  18. filename = input('enter filename:\n')
  19. degree = input('please define degree:')
  20. with open(filename) as f:
  21. content = f.readlines()
  22. content = [x.strip() for x in content]
  23. final = []
  24. for line in content:
  25. lst = tokenize(line)
  26. tag = tag_cloud(lst, degree)
  27. tail = list(tag)
  28. final.extend(tail)
  29. unque = set(final)
  30. word_counter(final)
Add Comment
Please, Sign In to add comment