SHOW:
|
|
- or go back to the newest paste.
1 | #!/usr/bin/python2 | |
2 | ||
3 | ||
4 | - | import string |
4 | + | """Object-based word count demonstration""" |
5 | ||
6 | ||
7 | import sys | |
8 | import collections | |
9 | import csv | |
10 | import re | |
11 | ||
12 | ||
13 | class WordCount(object): | |
14 | """Count of words in a text | |
15 | ||
16 | Attributes: | |
17 | - | with open(infilename, 'r') as infile: |
17 | + | counts -- collections.Counter of words |
18 | - | text = infile.read() |
18 | + | """ |
19 | - | wordstring = text.translate(None, string.punctuation) |
19 | + | def __init__(self, counts): |
20 | - | wordlist = wordstring.split() |
20 | + | """Intializes a WordCount |
21 | - | counts = collections.Counter(wordlist) |
21 | + | |
22 | - | ordered = counts.most_common() |
22 | + | Do not call directly, use WordCount.fromfile or WordCount.fromstring |
23 | - | with open(outfilename, 'w') as outfile: |
23 | + | instead |
24 | - | writer = csv.writer(outfile) |
24 | + | """ |
25 | - | writer.writerows(ordered) |
25 | + | self.counts = counts |
26 | ||
27 | @classmethod | |
28 | def fromfile(cls, filename): | |
29 | """Creates a WordCount for the given file""" | |
30 | with open(filename, 'r') as infile: | |
31 | text = infile.read() | |
32 | return cls.fromstring(text) | |
33 | ||
34 | @classmethod | |
35 | def fromstring(cls, text): | |
36 | """Creates a WordCount for the given string""" | |
37 | wordlist = re.findall(r'\w+', text) | |
38 | counts = collections.Counter(wordlist) | |
39 | return cls(counts) | |
40 | ||
41 | @property | |
42 | def ordered(self): | |
43 | """List of (word, count) tuples in descending frequency""" | |
44 | items = self.counts.items() | |
45 | items.sort(key=lambda (word, count): (-count, word)) | |
46 | return items | |
47 | ||
48 | def to_csv(self, filename): | |
49 | """Writes the ordered counts to file""" | |
50 | with open(filename, 'w') as outfile: | |
51 | writer = csv.writer(outfile) | |
52 | writer.writerows(self.ordered) | |
53 | ||
54 | ||
55 | ||
56 | def main(): | |
57 | """Counts words (and numbers) in a file""" | |
58 | try: | |
59 | infilename, outfilename = sys.argv[1:3] | |
60 | except IndexError: | |
61 | print "Usage: %s INFILE.txt OUTFILE.csv" % sys.argv[0] | |
62 | sys.exit(1) | |
63 | count = WordCount.fromfile(infilename) | |
64 | count.to_csv(outfilename) | |
65 | ||
66 | ||
67 | if __name__ == '__main__': | |
68 | main() |