talloaktrees

word frequency program

Feb 4th, 2012
398
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.60 KB | None | 0 0
  1. import string, time, math, webbrowser
  2.  
  3. def timefunc(function, *args):
  4.     start = time.time()
  5.     data = function(*args)
  6.     end = time.time()
  7.     timetaken = end - start
  8.     print "Function: "+function.__name__+"\nTime taken:",timetaken
  9.     return data
  10.  
  11. def process_text(filename):
  12.     t = []
  13.     for line in fin:
  14.         for i in line.split():
  15.             word = i.lower()
  16.             word = word.strip(string.punctuation)
  17.             if word != '':
  18.                 t.append(word)
  19.     return t
  20.  
  21. def create_freq_dict(wordlist):
  22.     d = dict()
  23.     for word in wordlist:
  24.         if word not in d.keys():
  25.             d[word] = 1
  26.         else:
  27.             d[word] += 1
  28.     return d
  29.  
  30. def sort_dict(in_dict):
  31.     t = []
  32.     for key,value in in_dict.items():
  33.         t.append((value, key))
  34.     t.sort(reverse=True)
  35.     return t
  36.    
  37. def write_results(sorted_list):
  38.     fout = open('wordfreq.txt','w')
  39.     fout.write('Word Frequency List\n\n')
  40.     r = 0
  41.     for i in sorted_list:
  42.         r += 1
  43.         fillamount = 20 - (len(i[1]) + len(str(r)))
  44.         write_str = str(r)+': '+i[1]+' '+('-' * (fillamount-2))+' '+str(i[0])+'\n'
  45.         fout.write(write_str)
  46.     fout.close()
  47.  
  48.  
  49. ## The Brothers Grimm
  50. ## This file can be obtained from Project Gutenberg:
  51. ## http://www.gutenberg.org/cache/epub/5314/pg5314.txt
  52. fin = open('c:\Python27\My Programs\wx\grimm.txt')
  53.  
  54. wordlist = timefunc(process_text,fin)
  55. freqdict = timefunc(create_freq_dict,wordlist)
  56. sorted_list = timefunc(sort_dict,freqdict)
  57. results = timefunc(write_results,sorted_list)
  58.  
  59. webbrowser.open('wordfreq.txt')
  60.  
  61.  
  62. print "END"
Advertisement
Add Comment
Please, Sign In to add comment