Advertisement
Guest User

Untitled

a guest
Jul 24th, 2017
55
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.42 KB | None | 0 0
  1. """This is a data definition class--Searchable_list.
  2. Searchable_list takes a list of strings and makes it searchable.
  3. Searchable meaning you can find which elements in the list have a pattern.
  4. """
  5. class Searchable_list(object):
  6. """this will make your word list searchable.
  7. Note, It will also loose the original order of the list."""
  8. def __init__(self, lis):
  9. assert hasattr(lis,"__iter__")
  10. self.search_dict=dict()
  11. for word in set(lis):self.add_word(word)
  12.  
  13. def add_word(self,word):
  14. """this will add a word to the search_dict
  15. search dict is of the form: {letter:{nextletter:{(index,word)}}}
  16. """
  17. assert type(word) is str#or isinstance(word,str)
  18. for index,val in enumerate(word[:-1]):
  19. next_letter=self.search_dict.setdefault(val,dict())
  20. words_list=next_letter.setdefault(word[index+1],set())#object modification
  21. words_list.add((index,word))#object modifification
  22.  
  23. def find_matches(self,seq):
  24. """finds all the words in the list with this sequence.
  25. Uses '.' as wildcard.
  26. """
  27. s_d=self.search_dict
  28. assert len(seq)>1
  29. #could put a try catch to catch key errors
  30. for index,letter in enumerate(seq[:-1]):
  31. if not(letter=="."and seq[index+1]=="."):
  32. #no point if they all match...
  33. if letter==".":
  34. L_m=set.union(*(i.get(seq[index+1],set()) for i in s_d.values()))
  35. #.get is important here. not all is have i[seq[index+1]]
  36. elif seq[index+1]==".":
  37. L_m=set.union(*(i for i in s_d[letter].values()))
  38. else:
  39. L_m=s_d[letter].get(seq[index+1],{})#this is a set.
  40. #L_m==letter_matches
  41. if index>0:
  42. m_m=((i-index,word) for i,word in L_m)
  43. #m_m=matches_matches. These words still have the pattern.
  44. #your matching all indexes to the original m_s
  45. m_s.intersection_update(m_m)
  46. #m_s=matches_set
  47. else:
  48. m_s=L_m.copy()
  49. #http://stackoverflow.com/questions/23200969/how-to-clone-or-copy-a-set-in-python
  50. return m_s
  51.  
  52. def find_matches(self,seq):
  53. """finds all the words in the list with this sequence.
  54. Uses '.' as wildcard.
  55. """
  56. assert len(seq)>1
  57. s_d = self.search_dict
  58. setsList =[]
  59. while seq[-1]=='.':
  60. #not solved by if index+1=='.' because there's no [letter][''] for word endings in self.search_dict.
  61. #without this, .f. wouldn't find (0,"of"), because the L_m in the seq[index+1]=="." if wouldn't include it.
  62. seq = seq[:-1]
  63. for index,letter in enumerate(seq[:-1]):
  64. if not(letter=="." and seq[index+1]=="."):#no point if they all match...
  65. if letter==".":
  66. L_m = set.union(*(i.get(seq[index+1],set()) for i in s_d.values()))
  67. #.get is important here. not all is have i[seq[index+1]]
  68. elif seq[index+1]==".":
  69. L_m = set.union(*s_d[letter].values())
  70. else:
  71. L_m = s_d[letter].get(seq[index+1],{})#this is a set.
  72. #not using s_d.get could cause errors here...
  73. #L_m==letter_matches
  74. setsList.append({(i-index,word) for i,word in L_m})
  75. return set.intersection(*setsList)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement