Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from operator import itemgetter
- import re
- import sys
- def make_dict(text):
- word_dict = {}
- p = re.compile('\\b\w{1,20}\'{0,1}\\w{0,10}\\b')
- for line in text:
- m = p.finditer(line)
- for item in m:
- if line[item.start():item.end()] not in word_dict:
- word_dict[line[item.start():item.end()]] = 1
- else:
- word_dict[line[item.start():item.end()]] += 1
- return word_dict
- def sort_dict(word_dict):
- return sorted(word_dict.iteritems(), key=itemgetter(1), reverse=True)
- def main(argv):
- text = open(argv[1], 'r')
- word_dict = make_dict(text)
- text.close()
- word_list = sort_dict(word_dict)
- print word_list[:int(argv[2])]
- if __name__ == '__main__':
- main(sys.argv)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement