Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import sys
- import logging
- logger = logging.getLogger('wordsfrequency')
- class WrongInputError(Exception):
- pass
- def count_words_frequency(text):
- """ Counts words frequency in text.
- Returns tuple, structure is illustrated by example - ((word, frequency), <...>).
- Tuple is ordered by frequency in desc order.
- Word with equal frequency ordered by alphabet.
- Arguments:
- text -- string
- """
- words = text.split()
- stats = dict()
- for word in words:
- word = word.lower()
- if word not in stats:
- stats[word] = 0
- stats[word] += 1
- # prepare stats for ordering
- output = list(stats.items())
- # python sort is stable, so we need to sort stats by key in alphabetical order
- output.sort(key=lambda item: item[0])
- # and then sort it by frequency (elements with equal frequency
- # will not be changed, because sort is stable)
- output.sort(key=lambda item: item[1], reverse=True)
- return tuple(output)
- if __name__ == "__main__":
- file_data = None
- try:
- if(len(sys.argv) > 1):
- source = sys.argv[1]
- file_data = open(source).read()
- else:
- raise WrongInputError("Cant find file path in script arguments")
- except IOError:
- logger.error("""Error: cant read the file.\n\nAborting.""")
- sys.exit()
- except WrongInputError:
- logger.error("""Error: Please, provide real path to the file as first argument.\n\n"""\
- """For example: python words_frequency_count.py ./source.txt\n\n"""\
- """Aborting.""")
- sys.exit()
- if file_data:
- buf = []
- stats = count_words_frequency(file_data)
- for row in stats:
- buf.append('{}:{}'.format(*row))
- print("\n".join(buf))
Add Comment
Please, Sign In to add comment