Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import string, time, math, webbrowser
- def timefunc(function, *args):
- start = time.time()
- data = function(*args)
- end = time.time()
- timetaken = end - start
- print "Function: "+function.__name__+"\nTime taken:",timetaken
- return data
- def process_text(filename):
- t = []
- for line in fin:
- for i in line.split():
- word = i.lower()
- word = word.strip(string.punctuation)
- if word != '':
- t.append(word)
- return t
- def create_freq_dict(wordlist):
- d = dict()
- for word in wordlist:
- if word not in d.keys():
- d[word] = 1
- else:
- d[word] += 1
- return d
- def sort_dict(in_dict):
- t = []
- for key,value in in_dict.items():
- t.append((value, key))
- t.sort(reverse=True)
- return t
- def write_results(sorted_list):
- fout = open('wordfreq.txt','w')
- fout.write('Word Frequency List\n\n')
- r = 0
- for i in sorted_list:
- r += 1
- fillamount = 20 - (len(i[1]) + len(str(r)))
- write_str = str(r)+': '+i[1]+' '+('-' * (fillamount-2))+' '+str(i[0])+'\n'
- fout.write(write_str)
- fout.close()
- ## The Brothers Grimm
- ## This file can be obtained from Project Gutenberg:
- ## http://www.gutenberg.org/cache/epub/5314/pg5314.txt
- fin = open('c:\Python27\My Programs\wx\grimm.txt')
- wordlist = timefunc(process_text,fin)
- freqdict = timefunc(create_freq_dict,wordlist)
- sorted_list = timefunc(sort_dict,freqdict)
- results = timefunc(write_results,sorted_list)
- webbrowser.open('wordfreq.txt')
- print "END"
Advertisement
Add Comment
Please, Sign In to add comment