generative grammar and langage generation

#!/bin/python
'''
grammar generator
'''

'''
todo
--check presence of nonterminal in  lr at in italisation
-- add sort on self.ect_R element
   sort by len on left part of the rules
'''
import string as ST
from operator import itemgetter
import warnings as WA
class Gramgen(object):
    '''
    '''
    def __init__(self,Vn,Vt,R,S,max_len):
        self.Vn = Vn
        self.Vt = Vt
        self.R = R
        self.Vn.sort()
        self.Vt.sort()
        self.R.sort()
        self.d = dict()
        self.L = list()
        self.S = S
        # this is used to mimit recursion when no terminal
        # charaters appers after dirivation.
        # it is  minimal nbr of terminal charater dirived from
        # one nonterminal charater
        # this an attempt to guess the length of the terminal string
        # when no terminal charaters appers after mutiples derivations
        self.max_len = max_len  # max lenght in the left side of rules
        self.shringking_rules()
    def shringking_rules(self):
        for lr,rr in self.R:
            l  = self.count_terminals(lr)
            r  = self.count_terminals(rr)
            if l > r:
                WA.warn("word length prediction not accurate due to shrinking rules")
                break

    def count_non_terminals(self,string):
        n=0
        for s in string:
            if s in self.Vn:
                n+=1
        return n
    def count_terminals(self,string):
        n=0
        for s in string:
            if s in self.Vt:
                n+=1
        return n

    def non_terminal_pos(self,P,start=0):
        post=0
        nbr_Vn=0
        for i in xrange(start,len(P)):
            if P[i] in self.Vn:
                return i
        return -1
    def extend_R (self):
        '''
        copy the rules list and append information:
        pos is the pos of the first non-terminal character
        in lr (left part of the rule) or rr (right part of rule)
        '''
        self.ext_R=list()
        for r in self.R:
            lr,rr=r
            lr_pos=self.non_terminal_pos(lr)
            rr_pos=self.non_terminal_pos(rr)
            lr_len=len(lr)
            rr_len=len(rr)
            self.ext_R.append((lr,lr_pos,rr,rr_pos,lr_len,rr_len))
        #print self.ext_R
    def order_R(self):
        '''
        dictionary:key is  lr_pos:
        position of the firs non terminal character
        in the left part  of the rules
        data all  extend rules having  the same lr_pos
        this is for optimisation:
        avoiding to always start at the begining
        of the word during derivation.
        we have also to order the liste of rules
        '''
        self.lr_poskey=dict()
        for ext_r in self.ext_R:
            try:
                lr_pos=ext_r[1]
                ext_r_list=self.lr_poskey[lr_pos]
            except (KeyError):
                self.lr_poskey[lr_pos]=[ext_r]
                continue
            ext_r_list.append(ext_r)
        for key, value in self.lr_poskey.iteritems():
            value=sorted(value,key=itemgetter(4))
            self.lr_poskey[key]=value

        print self.lr_poskey

    def gen (self):
        '''
        '''
        self. extend_R()
        self.order_R()
        self.gen2(self.S,0)

    def gen2(self,P,nt_pos):
        '''
        '''
        #print "!!! P,nt_pos-->",P,nt_pos
        if nt_pos <0:
            if len(P) <= self.max_len:
                #print "XXX apennd->",P
                self.L.append(P)
                return
            else:
                return
        elif nt_pos >=self.max_len :
            return
        derivation=False
        for lr_pos,ext_r in self.lr_poskey.iteritems():
            #print "----",lr_pos,ext_r
            # no replacement possibl
            start = nt_pos- lr_pos
            if start  < 0 :
                continue
            else:
                for er in ext_r:
                    #print "===",er
                    lr,lr_pos,rr,rr_pos,lr_len,rr_len=er

                    end=start+len(lr)
                    #print "start,end",start,end
                    if end > len(P):
                        print "P lr",P,lr
                        break
                    if P[start:end]==lr:
                        derivation=True
                        #print "    ooo: P,lr,rr,rr_pos ->",P,lr,rr,rr_pos
                        newP = P[:start] + rr + P[end:]
                        if rr_pos < 0:
                            new_nt_pos = self.non_terminal_pos(newP,start+rr_len)
                        else:
                            new_nt_pos = start + rr_pos
                        self.gen2(newP,new_nt_pos)
        if  derivation==False:
            raise Exception("Derivation impossible")
if __name__ =="__main__":
    '''
    exemple with rules generating langage with Na(P)=Nb(P)
    same number of a and b in each word of L
    '''
    S="S"
    Vn=["S","A","B"] # non terminal
    Vt=["a","b"]     # terminal
    print Vn,Vt
    L=list()
    R=[("S","aB"),  ("A","a"),("A","aS"),("A","bAA"),
       ("S","bA"),("B","b"),("B","bS"),("B","aBB")]
    Lg=Gramgen(Vn,Vt,R,S,16)
    Lg.gen()
    print Lg.L