Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sys, os
- print ""
- print "Running Script..."
- print ""
- print "This program analyzes word frequency in a file and"
- print "prints a report on the n most frequent words."
- print ""
- filename = raw_input("File to analyze? ")
- if os.path.isfile(filename):
- print "The file", filename, "exists!"
- else:
- print "The file", filename, "doesn't exist!"
- sys.exit()
- print ""
- output = raw_input("Output analysis of how many words? ")
- readfile = open(filename, 'r+')
- words = readfile.read().split()
- wordcount = {}
- for word in words:
- if word in wordcount:
- wordcount[word] += 1
- else:
- wordcount[word] = 1
- sortbyfreq = sorted(wordcount,key=wordcount.get,reverse=True)
- for word in sortbyfreq:
- print "%-20s %10d" % (word, wordcount[word])
- limit = {enter number}
- counter = 0
- for word in sortbyfreq:
- print "%-20s %10d" % (word, wordcount[word])
- counter += 1
- if counter >= limit:
- break
- from collections import Counter
- sortbyfreq = Counter(words) # Instead of the wordcount dictionary + for loop.
- n = int(raw_input('How many?: '))
- for item, count in sortbyfreq.most_common(n):
- print "%-20s %10d" % (item, count)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement