Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- from nltk import ngrams
- from collections import Counter
- def tokenize(string):
- return re.findall(r'\w+', string.lower())
- def tag_cloud(list, degree):
- grams = ngrams(list,int(degree))
- return (grams)
- def word_counter(list):
- counts = Counter(list).most_common(10)
- print(counts)
- def main():
- final = []
- filename = input('enter filename:\n')
- degree = input('please define degree:')
- with open(filename) as f:
- content = f.readlines()
- content = [x.strip() for x in content]
- final = []
- for line in content:
- lst = tokenize(line)
- tag = tag_cloud(lst, degree)
- tail = list(tag)
- final.extend(tail)
- unque = set(final)
- word_counter(final)
Add Comment
Please, Sign In to add comment