Advertisement
Guest User

programaN02V01.py

a guest
Mar 18th, 2019
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.02 KB | None | 0 0
  1. import sys  
  2. import os
  3.  
  4. def main():  
  5.    filepath = "MyADCP.txt"
  6.  
  7.    if not os.path.isfile(filepath):
  8.        print("File path {} does not exist. Exiting...".format(filepath))
  9.        sys.exit()
  10.  
  11.    bag_of_words = {}
  12.    with open(filepath) as fp:
  13.        cnt = 0
  14.        for line in fp:
  15.            print("line {} contents {}".format(cnt, line))
  16.            record_word_cnt(line.strip().split(' '), bag_of_words)
  17.            cnt += 1
  18.    sorted_words = order_bag_of_words(bag_of_words, desc=True)
  19.    print("Most frequent 10 words {}".format(sorted_words[:10]))
  20.  
  21. def order_bag_of_words(bag_of_words, desc=False):  
  22.    words = [(word, cnt) for word, cnt in bag_of_words.items()]
  23.    return sorted(words, key=lambda x: x[1], reverse=desc)
  24.  
  25. def record_word_cnt(words, bag_of_words):  
  26.    for word in words:
  27.        if word != '':
  28.            if word.lower() in bag_of_words:
  29.                bag_of_words[word.lower()] += 1
  30.            else:
  31.                bag_of_words[word.lower()] = 1
  32.  
  33. if __name__ == '__main__':  
  34.    main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement