Advertisement
coletucker12

paradigms9

Dec 2nd, 2019
268
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.32 KB | None | 0 0
  1. import sys
  2. import operator
  3. import re
  4.  
  5. def count_combos(input_string):
  6.   freq = {}
  7.   words = input_string.split()
  8.   if len(words) == 1:
  9.     return freq
  10.   for idx, word in enumerate(words):
  11.     if idx+1 < len(words):
  12.       word_pair = (word, words[idx+1])
  13.       if word_pair in freq:
  14.         freq[word_pair] += 1
  15.       else:
  16.         freq[word_pair] = 1
  17.   return freq
  18.  
  19. def main():
  20.   story_path = sys.argv[1]
  21.   skip_path = sys.argv[2]
  22.  
  23.   with open(story_path, 'r') as content:
  24.     story_content = content.read().lower()
  25.  
  26.   with open(skip_path, 'r') as content:
  27.     skip_words = content.read().lower().split(',')
  28.  
  29.   story_content = re.sub('[\n\t.?,;:\'\"]', ' ', story_content)
  30.   story_list = story_content.split(' ')
  31.   story_list = list(filter(None, story_list))
  32.   story_list = list(filter(lambda x: x not in skip_words, story_list))
  33.  
  34.   freq_dict = count_combos(" ".join(story_list))
  35.   freq_dict = sorted(freq_dict.items(), key=operator.itemgetter(1))
  36.   top_5 = freq_dict[len(freq_dict)-5:len(freq_dict)]
  37.  
  38.   print(f"Story file name: {story_path}")
  39.   print(f"Skip words file name: {skip_path}")
  40.   print(f"Skip words: {skip_words}")
  41.   print(f"The five most frequently occuring word pairs are:")
  42.   for pair in top_5[::-1]:
  43.     print(f"('{pair[0][0]} {pair[0][1]}', {pair[1]})")
  44.  
  45. if __name__ == "__main__":
  46.   main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement