Advertisement
lucaswiman

Simple texttwist algorithm

Jun 18th, 2011
303
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.16 KB | None | 0 0
  1. >>> from collections import defaultdict
  2. >>>
  3. >>> def read_file(f):
  4. ...     with open(f, 'r') as f:
  5. ...         return f.read()
  6. ...
  7. >>> def sort_string(s):
  8. ...     return ''.join(sorted(s))
  9. ...
  10. >>>
  11. >>> words = read_file('./scrabble_dict.txt').lower().split()
  12. >>> sorted_to_words = defaultdict(list)
  13. >>> all_sorted_words = map(sort_string, words)
  14. >>>
  15. >>> for word in words:
  16. ...     sorted_to_words[sort_string(word)].append(word)
  17. ...
  18. >>>
  19. >>> all_sorted_words = ' %s ' % ' '.join(all_sorted_words)
  20. >>>    
  21. ...
  22. >>> def get_subwords(word, all_sorted_words=all_sorted_words, sorted_to_words=sorted_to_words):
  23. ...     word = sort_string(word)
  24. ...     regex = re.compile(' (%s) ' % ''.join(c + '?' for c in word))
  25. ...     matches = regex.findall(all_sorted_words)
  26. ...     return sorted(set(sum(map(sorted_to_words.__getitem__, matches), [])), key=len, reverse=True)
  27. ...
  28. >>>
  29. >>> get_subwords('elephant')
  30. ['elephant', 'heeltap', 'heptane', 'phenate', 'haptene', 'lateen', 'hapten', 'thenal', 'planet', 'ethane', 'peahen', 'hantle', 'platen', 'paten', 'thane', 'plena', 'pleat', 'tepal', 'anele', 'panel', 'telae', 'eaten', 'leant', 'lepta', 'elate', 'plane', 'aleph', 'neath', 'penal', 'etape', 'plate', 'palet', 'petal', 'plant', 'leapt', 'enate', 'lethe', 'laten', 'help', 'pant', 'elan', 'pane', 'hate', 'pent', 'late', 'tape', 'lath', 'then', 'halt', 'thee', 'hale', 'haen', 'teal', 'alee', 'haet', 'heel', 'pale', 'pelt', 'lean', 'leap', 'pele', 'peen', 'peel', 'plea', 'etna', 'path', 'peal', 'tepa', 'pate', 'ante', 'tela', 'nape', 'tele', 'teen', 'teel', 'lept', 'heal', 'heat', 'heap', 'thae', 'than', 'tael', 'neap', 'neat', 'epha', 'phat', 'neep', 'hent', 'leet', 'peat', 'plat', 'pean', 'plan', 'lent', 'hant', 'lane', 'tale', 'eath', 'ale', 'alt', 'alp', 'hae', 'eat', 'hap', 'hat', 'pee', 'eel', 'tel', 'ten', 'tea', 'tee', 'pet', 'nee', 'peh', 'net', 'pea', 'lea', 'lee', 'nth', 'let', 'ate', 'eth', 'apt', 'pht', 'tap', 'ape', 'het', 'hep', 'hen', 'ane', 'ant', 'tae', 'pen', 'tan', 'pat', 'nae', 'nah', 'nap', 'eta', 'pah', 'pal', 'pan', 'lat', 'lap', 'the', 'pe', 'la', 'ta', 'el', 'en', 'eh', 'et', 'pa', 'ha', 'he', 'ae', 'ah', 'al', 'an', 'at', 'na', 'ne']
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement