Advertisement
homer512

Word count class

Jan 7th, 2016
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/python2
  2.  
  3.  
  4. """Object-based word count demonstration"""
  5.  
  6.  
  7. import sys
  8. import collections
  9. import csv
  10. import re
  11.  
  12.  
  13. class WordCount(object):
  14.     """Count of words in a text
  15.  
  16.    Attributes:
  17.    counts -- collections.Counter of words
  18.    """
  19.     def __init__(self, counts):
  20.         """Intializes a WordCount
  21.  
  22.        Do not call directly, use WordCount.fromfile or WordCount.fromstring
  23.        instead
  24.        """
  25.         self.counts = counts
  26.  
  27.     @classmethod
  28.     def fromfile(cls, filename):
  29.         """Creates a WordCount for the given file"""
  30.         with open(filename, 'r') as infile:
  31.             text = infile.read()
  32.         return cls.fromstring(text)
  33.  
  34.     @classmethod
  35.     def fromstring(cls, text):
  36.         """Creates a WordCount for the given string"""
  37.         wordlist = re.findall(r'\w+', text)
  38.         counts = collections.Counter(wordlist)
  39.         return cls(counts)
  40.  
  41.     @property
  42.     def ordered(self):
  43.         """List of (word, count) tuples in descending frequency"""
  44.         items = self.counts.items()
  45.         items.sort(key=lambda (word, count): (-count, word))
  46.         return items
  47.  
  48.     def to_csv(self, filename):
  49.         """Writes the ordered counts to file"""
  50.         with open(filename, 'w') as outfile:
  51.             writer = csv.writer(outfile)
  52.             writer.writerows(self.ordered)
  53.  
  54.  
  55.  
  56. def main():
  57.     """Counts words (and numbers) in a file"""
  58.     try:
  59.         infilename, outfilename = sys.argv[1:3]
  60.     except IndexError:
  61.         print "Usage: %s INFILE.txt OUTFILE.csv" % sys.argv[0]
  62.         sys.exit(1)
  63.     count = WordCount.fromfile(infilename)
  64.     count.to_csv(outfilename)
  65.  
  66.  
  67. if __name__ == '__main__':
  68.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement