Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import sys
- def nice_print(li):
- for i in li:
- print i[1][0] , i[0] ,
- for j in i[1][1]:
- print j,
- print
- def count_words(filename):
- fin = open(filename, 'r')
- lines = fin.readlines()
- words = []
- for line in lines:
- words += line.split()
- #Tolower
- words = [w.lower() for w in words]
- #remove dup, sort
- removed_dup = sorted(list(set(words)))
- d = {}
- for w in removed_dup:
- #first contain cntr, second is lines
- d[w] = [0, []]
- #count word and line
- for line in lines:
- line_number = lines.index(line) + 1
- for w in line.split():
- lower = w.lower()
- if lower in removed_dup:
- d[lower][0] += 1
- if line_number not in d[lower][1]:
- d[lower][1].append(line_number)
- #sorted_d = sorted(d.values()[0])
- sorted_d = sorted(d.iteritems() , key = lambda x: (x[1][0] * -1, x[0]))
- nice_print(sorted_d)
- def main():
- if len(sys.argv) != 2:
- print 'Use: python wordcount2.py filename'
- sys.exit(1)
- filename = sys.argv[1]
- count_words(filename)
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement