Want more features on Pastebin? Sign Up, it's FREE!
Guest

cdsboy

By: a guest on Mar 10th, 2009  |  syntax: Python  |  size: 0.74 KB  |  views: 166  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. from operator import itemgetter
  2. import re
  3. import sys
  4.  
  5. def make_dict(text):
  6.   word_dict = {}
  7.  
  8.   p = re.compile('\\b\w{1,20}\'{0,1}\\w{0,10}\\b')
  9.  
  10.   for line in text:
  11.     m = p.finditer(line)
  12.     for item in m:
  13.       if line[item.start():item.end()] not in word_dict:
  14.         word_dict[line[item.start():item.end()]] = 1
  15.       else:
  16.         word_dict[line[item.start():item.end()]] += 1
  17.  
  18.   return word_dict
  19.  
  20. def sort_dict(word_dict):
  21.   return sorted(word_dict.iteritems(), key=itemgetter(1), reverse=True)
  22.  
  23. def main(argv):
  24.   text = open(argv[1], 'r')
  25.  
  26.   word_dict = make_dict(text)
  27.   text.close()
  28.  
  29.   word_list = sort_dict(word_dict)
  30.  
  31.   print word_list[:int(argv[2])]
  32.  
  33. if __name__ == '__main__':
  34.   main(sys.argv)
clone this paste RAW Paste Data