Advertisement
FamiHug

WordCounter

Jul 1st, 2012
59
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.09 KB | None | 0 0
  1. #!/usr/bin/env python
  2. import sys
  3.  
  4. def nice_print(li):
  5.     for i in li:
  6.         print i[1][0] , i[0] ,
  7.         for j in i[1][1]:
  8.             print j,
  9.         print
  10.  
  11. def count_words(filename):
  12.     fin = open(filename, 'r')
  13.     lines = fin.readlines()
  14.    
  15.     words = []
  16.     for line in lines:
  17.         words += line.split()
  18.    
  19.     #Tolower
  20.     words = [w.lower() for w in words]
  21.    
  22.     #remove dup, sort
  23.     removed_dup = sorted(list(set(words)))
  24.    
  25.     d = {}
  26.     for w in removed_dup:
  27.         #first contain cntr, second is lines
  28.         d[w] = [0, []]
  29.    
  30.     #count word and line
  31.     for line in lines:
  32.         line_number = lines.index(line) + 1
  33.         for w in line.split():
  34.             lower = w.lower()
  35.             if lower in removed_dup:
  36.                 d[lower][0] += 1
  37.                 if line_number not in d[lower][1]:
  38.                     d[lower][1].append(line_number)
  39.    
  40.     #sorted_d = sorted(d.values()[0])
  41.     sorted_d = sorted(d.iteritems() , key = lambda x: (x[1][0] * -1, x[0]))
  42.     nice_print(sorted_d)
  43.  
  44. def main():
  45.     if len(sys.argv) != 2:
  46.         print 'Use: python wordcount2.py filename'
  47.         sys.exit(1)
  48.  
  49.     filename = sys.argv[1]
  50.     count_words(filename)
  51.  
  52. if __name__ == '__main__':
  53.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement