from itertools import chain
EPSILON = object()
class GNFA:
"""
tests:
>>> a = GNFA('a', ['ab1'], set('b'))
>>> a.pre('a')
set([])
>>> a.post('a')
set(['b'])
>>> a.pre('b')
set(['a'])
>>> a.post('b')
set([])
"""
def __init__(self, startState, transitions, finalStates):
## infer state set from transitions
self.states = set()
self.states.update(q for q, r, lab in transitions)
self.states.update(r for q, r, lab in transitions)
assert startState in self.states
self.startState = startState
self._labels = dict(((q, r), lab) for q, r, lab in transitions)
self.transitions = Index(transitions, 'start end label'.split())
assert finalStates <= self.states
self.finalStates = finalStates
def __str__(self):
return ("startState => %(startState)s\n"
"transitions => %(transitions)s\n"
"finalStates => %(finalStates)s" %
self.__dict__ )
def re(self):
"""
test:
>>> a = GNFA('a', ['ab1'], set('b'))
>>> a.re()
'1'
>>> import re
>>> a2 = GNFA('a', ['ab1', 'ba1', 'ac0', 'ca0', 'bd0', 'db0', 'cd1', 'dc1'], set('b'))
>>> eo = re.compile(a2.re() + '$')
>>> def binary(n):
... if n == 0: return '0'
... digits = []
... while n:
... if n % 2: digits.append('1')
... else: digits.append('0')
... n >>= 1
... return ''.join(reversed(digits))
>>> binary(0)
'0'
>>> binary(1)
'1'
>>> binary(2)
'10'
>>> binary(3)
'11'
>>> binary(4)
'100'
>>> binary(5)
'101'
>>> binary(6)
'110'
>>> binary(7)
'111'
>>> def eo_p(s): return s.count('1') % 2 == 1 and s.count('0') % 2 == 0
...
>>> for n in range(10000):
... s = binary(n)
... if eo_p(s) != bool(eo.match(s)):
... print n
... break
...
"""
normalized = self.normalized()
## rip states in self from normalized
for q in self.states:
normalized.irip(q)
## return single remaining label in normlized
res = list(normalized._labels.values())
assert len(res) == 1
return res[0]
def normalized(self):
startState = object() # construct a new start and end states
finalState = object()
## add transitions from new start state and to new end state
transitions = set(self.transitions)
transitions.add( (startState, self.startState, EPSILON) )
transitions.update((f, finalState, EPSILON) for f in self.finalStates)
return GNFA(startState, transitions, set([finalState]))
def irip(self, q):
"""
tests:
>>> a = GNFA('a', ['ab0', 'bc0'], set(['c']))
>>> a.irip('b')
>>> print a
startState => a
transitions => set([('a', 'c', '00')])
finalStates => set(['c'])
>>> a._labels
{('a', 'c'): '00'}
"""
pre = self.pre(q)
post = self.post(q)
selfLoop = self._labels.get( (q,q), EPSILON )
## add new new transitions corresponding to paths going through q
for p in pre:
for r in post:
lab = reConcat(self._labels[p,q],
reStar(selfLoop),
self._labels[q,r] )
self.addTransition(p, r, lab)
self.removeState(q)
def addTransition(self, q, r, lab):
"""
test:
>>> a = GNFA('a', ['ab1'], set('b'))
>>> a.addTransition('a', 'b', 'the world is ending')
'(1|the world is ending)'
>>> print a
startState => a
transitions => set([('a', 'b', '(1|the world is ending)')])
finalStates => set(['b'])
>>> a._labels
{('a', 'b'): '(1|the world is ending)'}
"""
if (q, r) in self._labels:
oldLab = self._labels[q,r]
newLab = reUnion(oldLab, lab) # union labels
self._labels[q,r] = newLab # update self._labels with new label
self.transitions.remove(q, r, oldLab) # update self.transitions with new transition
self.transitions.add(q, r, newLab)
return newLab
else:
self._labels[q,r] = lab
self.transitions.add(q, r, lab)
return lab
def removeState(self, q):
"""
tests:
>>> a = GNFA('a', ['ab1'], set('b'))
>>> a.removeState('b')
>>> print a
startState => a
transitions => set([])
finalStates => set([])
>>> a._labels
{}
"""
if q == self.startState:
raise ValueError('Cannot remove the start state.')
pre = self.pre(q)
for p in pre:
lab = self._labels[p,q]
self.transitions.remove(p, q, lab)
del self._labels[p,q]
post = self.post(q)
for r in post:
lab = self._labels[q,r]
self.transitions.remove(q, r, lab)
del self._labels[q,r]
if (q,q) in self._labels:
lab = self._labels[q,q]
self.transitions.remove(q, q, lab)
del self._labels[q,q]
self.finalStates.discard(q) # discard succeeds even if q does not exist
def pre(self, q):
"""Returns a set of predecessors.
Result does not include argument.
"""
return self.transitions.project1(
self.transitions.select(1, q),
'start' ) - set([q])
def post(self, q):
"""Returns a set of successors.
Result does not include argument.
"""
return self.transitions.project1(
self.transitions.select(0, q),
'end' ) - set([q])
class Index:
"""
tests:
>>> i = Index(['abc', 'adc', 'bbb'])
>>> i.select(0, 'crazy')
set([])
>>> i.select(1, 'crazy')
set([])
>>> i.select(2, 'crazy')
set([])
>>> i.select(0, 'a')
set([('a', 'b', 'c'), ('a', 'd', 'c')])
>>> i.select(0, 'b')
set([('b', 'b', 'b')])
>>> i.select(1, 'b')
set([('b', 'b', 'b'), ('a', 'b', 'c')])
>>> i.select(2, 'c')
set([('a', 'b', 'c'), ('a', 'd', 'c')])
>>> i.removeTuple('bbb')
>>> print i
set([('a', 'b', 'c'), ('a', 'd', 'c')])
"""
def __init__(self, tuples, columnNames=None):
## set self.columnNames and determine arity
if columnNames is None:
arity, tuples = inferArity(tuples)
self.columnNames = columnNames = range(arity)
else:
self.columnNames = columnNames
arity = len(columnNames)
## for determining column number
self._columnName2columnNumber = dict((name, n) for n, name in enumerate(columnNames))
self._tables = [{} for x in range(arity)] # initialize indeces
self.update(tuples) # populate indeces
def getArity(self): return len(self.columnNames)
arity = property(getArity)
def __iter__(self):
return chain(*self._tables[0].values())
def __str__(self):
return str(set(iter(self)))
def select(self, col, val):
return self._tables[self.columnNumber(col)].get(val, set())
def project(self, tuples, *cols):
cols = map(self.columnNumber, cols)
return set(tuple(tup[i] for i in cols) for tup in tuples)
def project1(self, tuples, col):
col = self.columnNumber(col)
return set(tup[col] for tup in tuples)
def columnNumber(self, col):
try:
return self._columnName2columnNumber[col]
except KeyError as ex:
if isinstance(col, int):
return col
raise
def update(self, tuples):
for tup in tuples:
self.addTuple(tup)
def add(self, *tup):
self.assertArity(tup)
## update all tables
for val, tab in zip(tup, self._tables):
tab.setdefault(val, set()).add(tup)
def addTuple(self, tup):
self.add(*tup)
def remove(self, *tup):
"""Removes the tuple.
Tuple must exist.
"""
self.assertArity(tup)
## remove from all tables
for val, tab in zip(tup, self._tables):
tab[val].remove(tup)
def removeTuple(self, tup):
self.remove(*tup)
def assertArity(self, tup):
if len(tup) != self.arity:
raise ValueError("%d arguments given to an Index with arity %d." %
(len(tup), self.arity) )
def reConcat(*res):
"""
tests:
>>> reConcat('a', 'b')
'ab'
>>> reConcat('a', EPSILON)
'a'
"""
return ''.join(re for re in res if re is not EPSILON)
def reUnion(*res):
"""
tests:
>>> reUnion('a', 'b')
'(a|b)'
>>> reUnion('a', EPSILON)
'(a)'
"""
return '(' + '|'.join(re for re in res if re is not EPSILON) + ')'
def reStar(re):
"""
tests:
>>> reStar('a')
'(a)*'
>>> reStar(EPSILON) is EPSILON
True
"""
if re is EPSILON:
return EPSILON
else:
return '(%s)*' % re
def inferArity(tuples):
tup, rest = first(tuples)
return len(tup), chain((tup,), rest)
def first(seq):
"""Returns the first element, and an iterator with the remaining elements.
If there is no first element, throw StopIteration.
"""
it = iter(seq)
return it.next(), it