Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """This is a data definition class--Searchable_list.
- Searchable_list takes a list of strings and makes it searchable.
- Searchable meaning you can find which elements in the list have a pattern.
- """
- class Searchable_list(object):
- """this will make your word list searchable.
- Note, It will also loose the original order of the list."""
- def __init__(self, lis):
- assert hasattr(lis,"__iter__")
- self.search_dict=dict()
- for word in set(lis):self.add_word(word)
- def add_word(self,word):
- """this will add a word to the search_dict
- search dict is of the form: {letter:{nextletter:{(index,word)}}}
- """
- assert type(word) is str#or isinstance(word,str)
- for index,val in enumerate(word[:-1]):
- next_letter=self.search_dict.setdefault(val,dict())
- words_list=next_letter.setdefault(word[index+1],set())#object modification
- words_list.add((index,word))#object modifification
- def find_matches(self,seq):
- """finds all the words in the list with this sequence.
- Uses '.' as wildcard.
- """
- s_d=self.search_dict
- assert len(seq)>1
- #could put a try catch to catch key errors
- for index,letter in enumerate(seq[:-1]):
- if not(letter=="."and seq[index+1]=="."):
- #no point if they all match...
- if letter==".":
- L_m=set.union(*(i.get(seq[index+1],set()) for i in s_d.values()))
- #.get is important here. not all is have i[seq[index+1]]
- elif seq[index+1]==".":
- L_m=set.union(*(i for i in s_d[letter].values()))
- else:
- L_m=s_d[letter].get(seq[index+1],{})#this is a set.
- #L_m==letter_matches
- if index>0:
- m_m=((i-index,word) for i,word in L_m)
- #m_m=matches_matches. These words still have the pattern.
- #your matching all indexes to the original m_s
- m_s.intersection_update(m_m)
- #m_s=matches_set
- else:
- m_s=L_m.copy()
- #http://stackoverflow.com/questions/23200969/how-to-clone-or-copy-a-set-in-python
- return m_s
- def find_matches(self,seq):
- """finds all the words in the list with this sequence.
- Uses '.' as wildcard.
- """
- assert len(seq)>1
- s_d = self.search_dict
- setsList =[]
- while seq[-1]=='.':
- #not solved by if index+1=='.' because there's no [letter][''] for word endings in self.search_dict.
- #without this, .f. wouldn't find (0,"of"), because the L_m in the seq[index+1]=="." if wouldn't include it.
- seq = seq[:-1]
- for index,letter in enumerate(seq[:-1]):
- if not(letter=="." and seq[index+1]=="."):#no point if they all match...
- if letter==".":
- L_m = set.union(*(i.get(seq[index+1],set()) for i in s_d.values()))
- #.get is important here. not all is have i[seq[index+1]]
- elif seq[index+1]==".":
- L_m = set.union(*s_d[letter].values())
- else:
- L_m = s_d[letter].get(seq[index+1],{})#this is a set.
- #not using s_d.get could cause errors here...
- #L_m==letter_matches
- setsList.append({(i-index,word) for i,word in L_m})
- return set.intersection(*setsList)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement