Guest User

Find largest subset of words with unique letters

a guest
Apr 19th, 2022
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.86 KB | None | 0 0
  1. from itertools import islice
  2.  
  3. def load_words(filepath):
  4.     """Loads words from the word list into a list."""
  5.  
  6.     with open(filepath, "r") as fp:
  7.         return fp.read().splitlines()
  8.  
  9. def remove_words_with_repeat_letters(words):
  10.     """Removes words with duplicate letters and returns the filtered list."""
  11.  
  12.     return [word for word in words if len(set(word)) == len(word)]
  13.  
  14. def max_subset_without_repeat_letters(wordsets, disallowed_letters=set(),
  15.                                         index=-1, depth=0, hist=[]):
  16.     """Given an index, find the maximum number of words after the last word which
  17.    can be added to a pre-existing subset, without repeating any letters in the
  18.    subset. Returns the largest possible amount of words that can be added to
  19.    the subset.
  20.    """
  21.    
  22.     # Print progress
  23.     if depth == 1:
  24.         print(index, end=" ")
  25.  
  26.     # Iterate through all the words that come after index
  27.     for i in range(index + 1, len(wordsets)):
  28.         word, wordset = wordsets[i]
  29.        
  30.         # Check that the word does not contain previous letters
  31.         if disallowed_letters.isdisjoint(wordset):
  32.             new_hist = hist + [word]
  33.             max_subset_without_repeat_letters(wordsets, disallowed_letters |
  34.                                         wordset, i, depth + 1, new_hist)
  35.             if len(new_hist) >= 5:
  36.                 print("\n\n" + " ".join(new_hist) + "\n") # TODO: Save this
  37.  
  38. if __name__ == "__main__":
  39.     words_path = "path_to_word_list_here"
  40.  
  41.     words = load_words(words_path)
  42.  
  43.     # Remove words with duplicate letters
  44.     words = remove_words_with_repeat_letters(words)
  45.  
  46.     # import random
  47.     # words = random.sample(words, 3000)
  48.    
  49.     # Pre-compute sets of words for performance.
  50.     wordsets = tuple([(word, set(word)) for word in words])
  51.    
  52.     output = max_subset_without_repeat_letters(wordsets)
Advertisement
Add Comment
Please, Sign In to add comment