Advertisement
Guest User

cdsboy

a guest
Mar 10th, 2009
304
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.74 KB | None | 0 0
  1. from operator import itemgetter
  2. import re
  3. import sys
  4.  
  5. def make_dict(text):
  6.   word_dict = {}
  7.  
  8.   p = re.compile('\\b\w{1,20}\'{0,1}\\w{0,10}\\b')
  9.  
  10.   for line in text:
  11.     m = p.finditer(line)
  12.     for item in m:
  13.       if line[item.start():item.end()] not in word_dict:
  14.         word_dict[line[item.start():item.end()]] = 1
  15.       else:
  16.         word_dict[line[item.start():item.end()]] += 1
  17.  
  18.   return word_dict
  19.  
  20. def sort_dict(word_dict):
  21.   return sorted(word_dict.iteritems(), key=itemgetter(1), reverse=True)
  22.  
  23. def main(argv):
  24.   text = open(argv[1], 'r')
  25.  
  26.   word_dict = make_dict(text)
  27.   text.close()
  28.  
  29.   word_list = sort_dict(word_dict)
  30.  
  31.   print word_list[:int(argv[2])]
  32.  
  33. if __name__ == '__main__':
  34.   main(sys.argv)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement