Advertisement
dechicom

generative grammar and langage generation

Feb 9th, 2020
437
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.27 KB | None | 0 0
  1. #!/bin/python
  2. '''
  3. grammar generator
  4. '''
  5.  
  6. '''
  7. todo
  8. --check presence of nonterminal in  lr at in italisation
  9. -- add sort on self.ect_R element
  10.   sort by len on left part of the rules
  11. '''
  12. import string as ST
  13. from operator import itemgetter
  14. import warnings as WA
  15. class Gramgen(object):
  16.     '''
  17.    '''
  18.     def __init__(self,Vn,Vt,R,S,max_len):
  19.         self.Vn = Vn
  20.         self.Vt = Vt
  21.         self.R = R
  22.         self.Vn.sort()
  23.         self.Vt.sort()
  24.         self.R.sort()
  25.         self.d = dict()
  26.         self.L = list()
  27.         self.S = S
  28.         # this is used to mimit recursion when no terminal
  29.         # charaters appers after dirivation.
  30.         # it is  minimal nbr of terminal charater dirived from
  31.         # one nonterminal charater
  32.         # this an attempt to guess the length of the terminal string
  33.         # when no terminal charaters appers after mutiples derivations
  34.         self.max_len = max_len  # max lenght in the left side of rules
  35.         self.shringking_rules()
  36.     def shringking_rules(self):
  37.         for lr,rr in self.R:
  38.             l  = self.count_terminals(lr)
  39.             r  = self.count_terminals(rr)
  40.             if l > r:
  41.                 WA.warn("word length prediction not accurate due to shrinking rules")
  42.                 break
  43.        
  44.     def count_non_terminals(self,string):
  45.         n=0
  46.         for s in string:
  47.             if s in self.Vn:
  48.                 n+=1
  49.         return n
  50.     def count_terminals(self,string):
  51.         n=0
  52.         for s in string:
  53.             if s in self.Vt:
  54.                 n+=1
  55.         return n
  56.            
  57.     def non_terminal_pos(self,P,start=0):
  58.         post=0
  59.         nbr_Vn=0
  60.         for i in xrange(start,len(P)):
  61.             if P[i] in self.Vn:
  62.                 return i
  63.         return -1
  64.     def extend_R (self):
  65.         '''
  66.        copy the rules list and append information:
  67.        pos is the pos of the first non-terminal character
  68.        in lr (left part of the rule) or rr (right part of rule)
  69.        '''
  70.         self.ext_R=list()
  71.         for r in self.R:
  72.             lr,rr=r
  73.             lr_pos=self.non_terminal_pos(lr)
  74.             rr_pos=self.non_terminal_pos(rr)
  75.             lr_len=len(lr)
  76.             rr_len=len(rr)
  77.             self.ext_R.append((lr,lr_pos,rr,rr_pos,lr_len,rr_len))
  78.         #print self.ext_R
  79.     def order_R(self):
  80.         '''
  81.        dictionary:key is  lr_pos:
  82.        position of the firs non terminal character
  83.        in the left part  of the rules
  84.        data all  extend rules having  the same lr_pos
  85.        this is for optimisation:
  86.        avoiding to always start at the begining
  87.        of the word during derivation.
  88.        we have also to order the liste of rules
  89.        '''
  90.         self.lr_poskey=dict()
  91.         for ext_r in self.ext_R:
  92.             try:
  93.                 lr_pos=ext_r[1]
  94.                 ext_r_list=self.lr_poskey[lr_pos]
  95.             except (KeyError):
  96.                 self.lr_poskey[lr_pos]=[ext_r]
  97.                 continue
  98.             ext_r_list.append(ext_r)
  99.         for key, value in self.lr_poskey.iteritems():
  100.             value=sorted(value,key=itemgetter(4))
  101.             self.lr_poskey[key]=value
  102.  
  103.         print self.lr_poskey
  104.  
  105.     def gen (self):
  106.         '''
  107.        '''
  108.         self. extend_R()
  109.         self.order_R()
  110.         self.gen2(self.S,0)
  111.        
  112.     def gen2(self,P,nt_pos):
  113.         '''
  114.        '''
  115.         #print "!!! P,nt_pos-->",P,nt_pos
  116.         if nt_pos <0:
  117.             if len(P) <= self.max_len:
  118.                 #print "XXX apennd->",P
  119.                 self.L.append(P)
  120.                 return
  121.             else:
  122.                 return
  123.         elif nt_pos >=self.max_len :
  124.             return
  125.         derivation=False
  126.         for lr_pos,ext_r in self.lr_poskey.iteritems():
  127.             #print "----",lr_pos,ext_r
  128.             # no replacement possibl
  129.             start = nt_pos- lr_pos
  130.             if start  < 0 :
  131.                 continue
  132.             else:
  133.                 for er in ext_r:
  134.                     #print "===",er
  135.                     lr,lr_pos,rr,rr_pos,lr_len,rr_len=er
  136.                    
  137.                     end=start+len(lr)
  138.                     #print "start,end",start,end
  139.                     if end > len(P):
  140.                         print "P lr",P,lr
  141.                         break
  142.                     if P[start:end]==lr:
  143.                         derivation=True
  144.                         #print "    ooo: P,lr,rr,rr_pos ->",P,lr,rr,rr_pos
  145.                         newP = P[:start] + rr + P[end:]
  146.                         if rr_pos < 0:
  147.                             new_nt_pos = self.non_terminal_pos(newP,start+rr_len)
  148.                         else:
  149.                             new_nt_pos = start + rr_pos
  150.                         self.gen2(newP,new_nt_pos)
  151.         if  derivation==False:
  152.             raise Exception("Derivation impossible")
  153. if __name__ =="__main__":
  154.     '''
  155.    exemple with rules generating langage with Na(P)=Nb(P)
  156.    same number of a and b in each word of L
  157.    '''    
  158.     S="S"
  159.     Vn=["S","A","B"] # non terminal
  160.     Vt=["a","b"]     # terminal
  161.     print Vn,Vt
  162.     L=list()      
  163.     R=[("S","aB"),  ("A","a"),("A","aS"),("A","bAA"),
  164.        ("S","bA"),("B","b"),("B","bS"),("B","aBB")]
  165.     Lg=Gramgen(Vn,Vt,R,S,16)
  166.     Lg.gen()
  167.     print Lg.L
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement