from itertools import chain EPSILON = object() class GNFA: """ tests: >>> a = GNFA('a', ['ab1'], set('b')) >>> a.pre('a') set([]) >>> a.post('a') set(['b']) >>> a.pre('b') set(['a']) >>> a.post('b') set([]) """ def __init__(self, startState, transitions, finalStates): ## infer state set from transitions self.states = set() self.states.update(q for q, r, lab in transitions) self.states.update(r for q, r, lab in transitions) assert startState in self.states self.startState = startState self._labels = dict(((q, r), lab) for q, r, lab in transitions) self.transitions = Index(transitions, 'start end label'.split()) assert finalStates <= self.states self.finalStates = finalStates def __str__(self): return ("startState => %(startState)s\n" "transitions => %(transitions)s\n" "finalStates => %(finalStates)s" % self.__dict__ ) def re(self): """ test: >>> a = GNFA('a', ['ab1'], set('b')) >>> a.re() '1' >>> import re >>> a2 = GNFA('a', ['ab1', 'ba1', 'ac0', 'ca0', 'bd0', 'db0', 'cd1', 'dc1'], set('b')) >>> eo = re.compile(a2.re() + '$') >>> def binary(n): ... if n == 0: return '0' ... digits = [] ... while n: ... if n % 2: digits.append('1') ... else: digits.append('0') ... n >>= 1 ... return ''.join(reversed(digits)) >>> binary(0) '0' >>> binary(1) '1' >>> binary(2) '10' >>> binary(3) '11' >>> binary(4) '100' >>> binary(5) '101' >>> binary(6) '110' >>> binary(7) '111' >>> def eo_p(s): return s.count('1') % 2 == 1 and s.count('0') % 2 == 0 ... >>> for n in range(10000): ... s = binary(n) ... if eo_p(s) != bool(eo.match(s)): ... print n ... break ... """ normalized = self.normalized() ## rip states in self from normalized for q in self.states: normalized.irip(q) ## return single remaining label in normlized res = list(normalized._labels.values()) assert len(res) == 1 return res[0] def normalized(self): startState = object() # construct a new start and end states finalState = object() ## add transitions from new start state and to new end state transitions = set(self.transitions) transitions.add( (startState, self.startState, EPSILON) ) transitions.update((f, finalState, EPSILON) for f in self.finalStates) return GNFA(startState, transitions, set([finalState])) def irip(self, q): """ tests: >>> a = GNFA('a', ['ab0', 'bc0'], set(['c'])) >>> a.irip('b') >>> print a startState => a transitions => set([('a', 'c', '00')]) finalStates => set(['c']) >>> a._labels {('a', 'c'): '00'} """ pre = self.pre(q) post = self.post(q) selfLoop = self._labels.get( (q,q), EPSILON ) ## add new new transitions corresponding to paths going through q for p in pre: for r in post: lab = reConcat(self._labels[p,q], reStar(selfLoop), self._labels[q,r] ) self.addTransition(p, r, lab) self.removeState(q) def addTransition(self, q, r, lab): """ test: >>> a = GNFA('a', ['ab1'], set('b')) >>> a.addTransition('a', 'b', 'the world is ending') '(1|the world is ending)' >>> print a startState => a transitions => set([('a', 'b', '(1|the world is ending)')]) finalStates => set(['b']) >>> a._labels {('a', 'b'): '(1|the world is ending)'} """ if (q, r) in self._labels: oldLab = self._labels[q,r] newLab = reUnion(oldLab, lab) # union labels self._labels[q,r] = newLab # update self._labels with new label self.transitions.remove(q, r, oldLab) # update self.transitions with new transition self.transitions.add(q, r, newLab) return newLab else: self._labels[q,r] = lab self.transitions.add(q, r, lab) return lab def removeState(self, q): """ tests: >>> a = GNFA('a', ['ab1'], set('b')) >>> a.removeState('b') >>> print a startState => a transitions => set([]) finalStates => set([]) >>> a._labels {} """ if q == self.startState: raise ValueError('Cannot remove the start state.') pre = self.pre(q) for p in pre: lab = self._labels[p,q] self.transitions.remove(p, q, lab) del self._labels[p,q] post = self.post(q) for r in post: lab = self._labels[q,r] self.transitions.remove(q, r, lab) del self._labels[q,r] if (q,q) in self._labels: lab = self._labels[q,q] self.transitions.remove(q, q, lab) del self._labels[q,q] self.finalStates.discard(q) # discard succeeds even if q does not exist def pre(self, q): """Returns a set of predecessors. Result does not include argument. """ return self.transitions.project1( self.transitions.select(1, q), 'start' ) - set([q]) def post(self, q): """Returns a set of successors. Result does not include argument. """ return self.transitions.project1( self.transitions.select(0, q), 'end' ) - set([q]) class Index: """ tests: >>> i = Index(['abc', 'adc', 'bbb']) >>> i.select(0, 'crazy') set([]) >>> i.select(1, 'crazy') set([]) >>> i.select(2, 'crazy') set([]) >>> i.select(0, 'a') set([('a', 'b', 'c'), ('a', 'd', 'c')]) >>> i.select(0, 'b') set([('b', 'b', 'b')]) >>> i.select(1, 'b') set([('b', 'b', 'b'), ('a', 'b', 'c')]) >>> i.select(2, 'c') set([('a', 'b', 'c'), ('a', 'd', 'c')]) >>> i.removeTuple('bbb') >>> print i set([('a', 'b', 'c'), ('a', 'd', 'c')]) """ def __init__(self, tuples, columnNames=None): ## set self.columnNames and determine arity if columnNames is None: arity, tuples = inferArity(tuples) self.columnNames = columnNames = range(arity) else: self.columnNames = columnNames arity = len(columnNames) ## for determining column number self._columnName2columnNumber = dict((name, n) for n, name in enumerate(columnNames)) self._tables = [{} for x in range(arity)] # initialize indeces self.update(tuples) # populate indeces def getArity(self): return len(self.columnNames) arity = property(getArity) def __iter__(self): return chain(*self._tables[0].values()) def __str__(self): return str(set(iter(self))) def select(self, col, val): return self._tables[self.columnNumber(col)].get(val, set()) def project(self, tuples, *cols): cols = map(self.columnNumber, cols) return set(tuple(tup[i] for i in cols) for tup in tuples) def project1(self, tuples, col): col = self.columnNumber(col) return set(tup[col] for tup in tuples) def columnNumber(self, col): try: return self._columnName2columnNumber[col] except KeyError as ex: if isinstance(col, int): return col raise def update(self, tuples): for tup in tuples: self.addTuple(tup) def add(self, *tup): self.assertArity(tup) ## update all tables for val, tab in zip(tup, self._tables): tab.setdefault(val, set()).add(tup) def addTuple(self, tup): self.add(*tup) def remove(self, *tup): """Removes the tuple. Tuple must exist. """ self.assertArity(tup) ## remove from all tables for val, tab in zip(tup, self._tables): tab[val].remove(tup) def removeTuple(self, tup): self.remove(*tup) def assertArity(self, tup): if len(tup) != self.arity: raise ValueError("%d arguments given to an Index with arity %d." % (len(tup), self.arity) ) def reConcat(*res): """ tests: >>> reConcat('a', 'b') 'ab' >>> reConcat('a', EPSILON) 'a' """ return ''.join(re for re in res if re is not EPSILON) def reUnion(*res): """ tests: >>> reUnion('a', 'b') '(a|b)' >>> reUnion('a', EPSILON) '(a)' """ return '(' + '|'.join(re for re in res if re is not EPSILON) + ')' def reStar(re): """ tests: >>> reStar('a') '(a)*' >>> reStar(EPSILON) is EPSILON True """ if re is EPSILON: return EPSILON else: return '(%s)*' % re def inferArity(tuples): tup, rest = first(tuples) return len(tup), chain((tup,), rest) def first(seq): """Returns the first element, and an iterator with the remaining elements. If there is no first element, throw StopIteration. """ it = iter(seq) return it.next(), it