from operator import itemgetter
import re
import sys
def make_dict(text):
word_dict = {}
p = re.compile('\\b\w{1,20}\'{0,1}\\w{0,10}\\b')
for line in text:
m = p.finditer(line)
for item in m:
if line[item.start():item.end()] not in word_dict:
word_dict[line[item.start():item.end()]] = 1
else:
word_dict[line[item.start():item.end()]] += 1
return word_dict
def sort_dict(word_dict):
return sorted(word_dict.iteritems(), key=itemgetter(1), reverse=True)
def main(argv):
text = open(argv[1], 'r')
word_dict = make_dict(text)
text.close()
word_list = sort_dict(word_dict)
print word_list[:int(argv[2])]
if __name__ == '__main__':
main(sys.argv)