Guest User

Untitled

a guest
Nov 25th, 2017
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.79 KB | None | 0 0
  1. #!/usr/bin/env python
  2. import sys
  3. import logging
  4.  
  5.  
  6. logger = logging.getLogger('wordsfrequency')
  7.  
  8.  
  9. class WrongInputError(Exception):
  10. pass
  11.  
  12.  
  13. def count_words_frequency(text):
  14. """ Counts words frequency in text.
  15. Returns tuple, structure is illustrated by example - ((word, frequency), <...>).
  16. Tuple is ordered by frequency in desc order.
  17. Word with equal frequency ordered by alphabet.
  18.  
  19. Arguments:
  20. text -- string
  21. """
  22. words = text.split()
  23. stats = dict()
  24. for word in words:
  25. word = word.lower()
  26. if word not in stats:
  27. stats[word] = 0
  28. stats[word] += 1
  29. # prepare stats for ordering
  30. output = list(stats.items())
  31. # python sort is stable, so we need to sort stats by key in alphabetical order
  32. output.sort(key=lambda item: item[0])
  33. # and then sort it by frequency (elements with equal frequency
  34. # will not be changed, because sort is stable)
  35. output.sort(key=lambda item: item[1], reverse=True)
  36. return tuple(output)
  37.  
  38.  
  39. if __name__ == "__main__":
  40. file_data = None
  41. try:
  42. if(len(sys.argv) > 1):
  43. source = sys.argv[1]
  44. file_data = open(source).read()
  45. else:
  46. raise WrongInputError("Cant find file path in script arguments")
  47. except IOError:
  48. logger.error("""Error: cant read the file.\n\nAborting.""")
  49. sys.exit()
  50. except WrongInputError:
  51. logger.error("""Error: Please, provide real path to the file as first argument.\n\n"""\
  52. """For example: python words_frequency_count.py ./source.txt\n\n"""\
  53. """Aborting.""")
  54. sys.exit()
  55.  
  56. if file_data:
  57. buf = []
  58. stats = count_words_frequency(file_data)
  59. for row in stats:
  60. buf.append('{}:{}'.format(*row))
  61. print("\n".join(buf))
Add Comment
Please, Sign In to add comment