salawank

wordcount.py

Aug 5th, 2013
138
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import sys
  2.  
  3. # +++your code here+++
  4. # Define print_words(filename) and print_top(filename) functions.
  5. # You could write a helper utility function that reads a file
  6. # and builds and returns a word/count dict for it.
  7. # Then print_words() and print_top() can just call the utility function.
  8.    
  9. ###
  10.  
  11. def open_file_dict(filename):
  12.  
  13.     word_count = {}
  14.     f = open(filename, 'r')
  15.    
  16.     for line in f:
  17.         words = line.split()
  18.         for to_lower in words:
  19.             lower_words = to_lower.lower()
  20.             #print lower_words
  21.            
  22.             if not lower_words in word_count: # if not exist yet lower_words in word_count
  23.                 word_count[lower_words] = 1   # put it in the dictionary
  24.             else: # if it is already exist
  25.                 word_count[lower_words] = word_count[lower_words] + 1 #increment it by how many there is by 1
  26.     return word_count
  27.  
  28.  
  29. def print_words(filename):
  30.  
  31.        
  32.     word_count = open_file_dict(filename)
  33.    
  34.     words = sorted(word_count.keys())
  35.    
  36.     for word in words:
  37.         print word, word_count[word]
  38.    
  39.    
  40. def get_count(word_count_tuple):
  41.   """Returns the count from a dict word/count tuple  -- used for custom sort."""
  42.   return word_count_tuple[1]
  43.    
  44.  
  45.    
  46. def print_top(filename):
  47.  
  48.     word_count = word_count_dict(filename)
  49.  
  50.     items = sorted(word_count.items(), key=get_count, reverse=True)
  51.  
  52.   # Print the first 20
  53.     for item in items[:20]:
  54.         print item[0], item[1]
  55.  
  56.  
  57.  
  58. # This basic command line argument parsing code is provided and
  59. # calls the print_words() and print_top() functions which you must define.
  60. def main():
  61.   if len(sys.argv) != 3:
  62.     print 'usage: ./wordcount.py {--count | --topcount} file'
  63.     sys.exit(1)
  64.  
  65.   option = sys.argv[1]
  66.   filename = sys.argv[2]
  67.   if option == '--count':
  68.     print_words(filename)
  69.   elif option == '--topcount':
  70.     print_top(filename)
  71.   else:
  72.     print 'unknown option: ' + option
  73.     sys.exit(1)
  74.  
  75. if __name__ == '__main__':
  76.   main()
RAW Paste Data