Share Pastebin
Guest
Public paste!

Untitled

By: a guest | Feb 9th, 2010 | Syntax: Python | Size: 7.77 KB | Hits: 48 | Expires: Never
Copy text to clipboard
  1. #!/usr/bin/env python
  2.  
  3. """
  4. FSA module
  5.  
  6. This module implements simple deterministic and non-deterministic finite
  7. state automata. It follows the description in Chapter 2 of Jurafsky and
  8. Martin (2000) fairly closely.
  9.  
  10. @author: Rob Malouf
  11. @organization: Dept. of Linguistics, San Diego State University
  12. @contact: rmalouf@mail.sdsu.edu
  13. @version: 1
  14. @since: 30-January-2008
  15. """
  16.  
  17. class FSA(object):
  18.  
  19.     """
  20.    General finite-state automaton class
  21.    
  22.    An FSA consists of:
  23.      - an alphabet (C{self.alphabet})
  24.      - a state-transition table (C{self.transition_table})
  25.      - an initial state (C{self.initial})
  26.      - a set of final states (C{self.final})
  27.    """
  28.  
  29.     def __init__(self,T,finals,initial=0):
  30.  
  31.         """
  32.        Create a new FSA object
  33.                
  34.        @param T: the transition function
  35.        @type T: a C{list} of C{dict}s
  36.        @param finals: final states
  37.        @type finals: C{list}
  38.        @param initial: initial state (by default, 0)
  39.        @type initial: C{int}
  40.  
  41.        @raise ValueError: the FSA is mis-specified somehow
  42.        """
  43.  
  44.         # transition function
  45.         self.transition_table = T
  46.         for state in self.transition_table:
  47.             for symbol in state:
  48.                 # the value of the transition function should be a list of states
  49.                 if not hasattr(state[symbol],'__getitem__'):
  50.                     state[symbol] = [ state[symbol] ]                  
  51.                 for newState in state[symbol]:
  52.                     try:
  53.                         self.transition_table[newState]
  54.                     except:
  55.                         raise ValueError,'%s not a valid state'%newState
  56.  
  57.         # initial state
  58.         self.initial = initial
  59.         try:
  60.             self.transition_table[self.initial]
  61.         except:
  62.             raise ValueError,'%s not a valid initial state'%initial
  63.        
  64.         # final states
  65.         try:
  66.             self.finals = set(finals)
  67.             for final in self.finals:
  68.                 self.transition_table[final]
  69.         except:
  70.             raise ValueError,'%s not valid final states'%finals
  71.  
  72.         # alphabet
  73.         self.alphabet = set()
  74.         for state in self.transition_table:
  75.             self.alphabet.update(state.keys())
  76.  
  77.     def __repr__(self):
  78.         """Produce a string representation of an FSA (e.g., for printing)."""
  79.         result = '{ %s\n' % (self.__class__.__name__)
  80.         result += '  Alphabet : %s\n'%self.alphabet
  81.         for index in xrange(len(self.transition_table)):
  82.             result += '  %s:' % index
  83.             if index == self.initial:
  84.                 result += ' initial'
  85.             if index in self.finals:
  86.                 result += ' final'
  87.             result += '\n'
  88.             for key in self.transition_table[index]:
  89.                 result += '    %s => %s\n'%(key,self.transition_table[index][key])
  90.         result += '}'
  91.         return result
  92.  
  93.  
  94.     def visit(self,trace,index,state):
  95.         """Trace: visit a node in an FSA"""
  96.         if trace:
  97.             print 'Obs %d at state %d'%(index,state)
  98.  
  99.     def accept(self,trace):
  100.         """Trace: FSA accepts a tape"""
  101.         if trace:
  102.             print 'ACCEPT'
  103.  
  104.     def reject(self,trace,reason=None):
  105.         """Trace: FSA rejects a tape"""
  106.         if trace:
  107.             if reason:
  108.                 print 'REJECT (%s)'%reason
  109.             else:
  110.                 print 'REJECT'
  111.  
  112.    
  113. class DFSA(FSA):
  114.     """Deterministic FSA class"""
  115.  
  116.     def __init__(self,*args,**kwargs):
  117.  
  118.         # create FSA
  119.         super(DFSA,self).__init__(*args,**kwargs)
  120.        
  121.         # verify that it is deterministic
  122.         if '' in self.alphabet:
  123.             raise ValueError,'epsilon transitions not allowed in DFSAs'
  124.         for state in self.transition_table:
  125.             for newStates in state.values():
  126.                 if len(newStates) > 1:
  127.                     raise ValueError,'non-deterministic transitions not allowed in DFSAs'
  128.  
  129.     def recognize(self,tape,trace=False):
  130.         """verify whether an input tape is accepted by the FSA (see fig. 2.12)"""
  131.  
  132.         index = 0
  133.         current_state = self.initial
  134.  
  135.         while True:
  136.  
  137.             self.visit(trace,index,current_state)
  138.  
  139.             if index == len(tape):
  140.  
  141.                 # end of input tape
  142.                 if current_state in self.finals:
  143.                     self.accept(trace)
  144.                     return True
  145.                 else:
  146.                     self.reject(trace,'%d is not an accepting state'%current_state)
  147.                     return False
  148.  
  149.             else:
  150.  
  151.                 if tape[index] in self.transition_table[current_state]:
  152.                     # follow transition to next state                  
  153.                     current_state = self.transition_table[current_state][tape[index]][0]
  154.                     index += 1
  155.                 else:
  156.                     # no transition!
  157.                     self.reject(trace,'no transition for "%s" at state %d'%(tape[index],current_state))
  158.                     return False
  159.  
  160. class NFSA(FSA):
  161.     """Non-deterministic FSA class"""
  162.  
  163.     def recognize(self,tape,trace=False):
  164.         """verify whether an input tape is accepted by the FSA (see fig. 2.19)"""
  165.  
  166.         agenda = [(self.initial,0)]
  167.  
  168.         while agenda:
  169.        
  170.             (state,index) = agenda.pop()
  171.             self.visit(trace,index,state)
  172.  
  173.             if index == len(tape):  
  174.  
  175.                 # end of input tape
  176.                 if state in self.finals:
  177.                     self.accept(trace)
  178.                     return True
  179.                 else:
  180.                     self.reject(trace,'%d is not an accepting state'%state)
  181.                     return False
  182.  
  183.             else:
  184.    
  185.                 # add new agenda items
  186.                 if tape[index] in self.transition_table[state]:
  187.                     for newState in self.transition_table[state][tape[index]]:
  188.                         agenda.append((newState,index+1))
  189.                 if '' in self.transition_table[state]:
  190.                     for newState in self.transition_table[state]['']:
  191.                         agenda.append((newState,index))
  192.  
  193.         self.reject(trace,'no more agenda items')
  194.         return False
  195.  
  196. ### some examples
  197.  
  198. # The sheep language machine (Fig 2.10, J & M)  
  199. fsa1 = DFSA( [ { 'f':1 },            # state 0  b => 1
  200.                { 'a':2 },            # state 1  a => 2
  201.                { 'a':3 },            # state 2  a => 3
  202.                { 'a':3, '!':4},      # state 3  a => 3,  ! => 4
  203.                {  } ],               # state 4  undef for everything.
  204.              [ 4 ])
  205. """@var: An example DFSA"""
  206.  
  207. # Fig. 2.17
  208. fsa2 = NFSA( [ { 'b':1 },
  209.                { 'a':2 },
  210.                { 'a':[ 2, 3 ] },
  211.                { '!':4 },
  212.                { } ],
  213.              [ 4 ] )
  214. """@var: An example NFSA"""
  215.  
  216. # Fig. 2.18
  217. fsa3 = NFSA( [ { 'b':1 },
  218.                { 'a':2 },
  219.                { 'a':3 },
  220.                { '' :2,
  221.                  '!':4 },
  222.                { } ],
  223.              [ 4 ] )
  224. """@var: An example NFSA with epsilon transitions"""
  225. fsatest = DFSA( [ { 'one', 'two','three','four','five','six','seven','eight','nine','ten','eleven','twelve','mid-': 1,'a': 3},
  226.                                   { 'day', 'night','o`clock': 2 },
  227.                                   { 'quarter to', 'quarter after' : 3 },
  228.                                   { 'one', 'two','three','four','five','six','seven','eight','nine','ten','eleven','twelve': 4},
  229.                                   { } ],
  230.                                 [ 2, 4 ] )
  231.                                
  232.                                
  233. fsatest = NFSA( [ { 'one', 'two','three','four','five','six','seven','eight','nine','ten','eleven','twelve','mid-','a':[ 1, 3 ] },
  234.                                   { 'day', 'night','o`clock': 2 },
  235.                                   { 'quarter to', 'quarter after' : 3 },
  236.                                   { 'one', 'two','three','four','five','six','seven','eight','nine','ten','eleven','twelve':4 },
  237.                                   { } ],
  238.                                   [ 2, 4] )