Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/python
- '''
- grammar generator
- '''
- '''
- todo
- --check presence of nonterminal in lr at in italisation
- -- add sort on self.ect_R element
- sort by len on left part of the rules
- '''
- import string as ST
- from operator import itemgetter
- import warnings as WA
- class Gramgen(object):
- '''
- '''
- def __init__(self,Vn,Vt,R,S,max_len):
- self.Vn = Vn
- self.Vt = Vt
- self.R = R
- self.Vn.sort()
- self.Vt.sort()
- self.R.sort()
- self.d = dict()
- self.L = list()
- self.S = S
- # this is used to mimit recursion when no terminal
- # charaters appers after dirivation.
- # it is minimal nbr of terminal charater dirived from
- # one nonterminal charater
- # this an attempt to guess the length of the terminal string
- # when no terminal charaters appers after mutiples derivations
- self.max_len = max_len # max lenght in the left side of rules
- self.shringking_rules()
- def shringking_rules(self):
- for lr,rr in self.R:
- l = self.count_terminals(lr)
- r = self.count_terminals(rr)
- if l > r:
- WA.warn("word length prediction not accurate due to shrinking rules")
- break
- def count_non_terminals(self,string):
- n=0
- for s in string:
- if s in self.Vn:
- n+=1
- return n
- def count_terminals(self,string):
- n=0
- for s in string:
- if s in self.Vt:
- n+=1
- return n
- def non_terminal_pos(self,P,start=0):
- post=0
- nbr_Vn=0
- for i in xrange(start,len(P)):
- if P[i] in self.Vn:
- return i
- return -1
- def extend_R (self):
- '''
- copy the rules list and append information:
- pos is the pos of the first non-terminal character
- in lr (left part of the rule) or rr (right part of rule)
- '''
- self.ext_R=list()
- for r in self.R:
- lr,rr=r
- lr_pos=self.non_terminal_pos(lr)
- rr_pos=self.non_terminal_pos(rr)
- lr_len=len(lr)
- rr_len=len(rr)
- self.ext_R.append((lr,lr_pos,rr,rr_pos,lr_len,rr_len))
- #print self.ext_R
- def order_R(self):
- '''
- dictionary:key is lr_pos:
- position of the firs non terminal character
- in the left part of the rules
- data all extend rules having the same lr_pos
- this is for optimisation:
- avoiding to always start at the begining
- of the word during derivation.
- we have also to order the liste of rules
- '''
- self.lr_poskey=dict()
- for ext_r in self.ext_R:
- try:
- lr_pos=ext_r[1]
- ext_r_list=self.lr_poskey[lr_pos]
- except (KeyError):
- self.lr_poskey[lr_pos]=[ext_r]
- continue
- ext_r_list.append(ext_r)
- for key, value in self.lr_poskey.iteritems():
- value=sorted(value,key=itemgetter(4))
- self.lr_poskey[key]=value
- print self.lr_poskey
- def gen (self):
- '''
- '''
- self. extend_R()
- self.order_R()
- self.gen2(self.S,0)
- def gen2(self,P,nt_pos):
- '''
- '''
- #print "!!! P,nt_pos-->",P,nt_pos
- if nt_pos <0:
- if len(P) <= self.max_len:
- #print "XXX apennd->",P
- self.L.append(P)
- return
- else:
- return
- elif nt_pos >=self.max_len :
- return
- derivation=False
- for lr_pos,ext_r in self.lr_poskey.iteritems():
- #print "----",lr_pos,ext_r
- # no replacement possibl
- start = nt_pos- lr_pos
- if start < 0 :
- continue
- else:
- for er in ext_r:
- #print "===",er
- lr,lr_pos,rr,rr_pos,lr_len,rr_len=er
- end=start+len(lr)
- #print "start,end",start,end
- if end > len(P):
- print "P lr",P,lr
- break
- if P[start:end]==lr:
- derivation=True
- #print " ooo: P,lr,rr,rr_pos ->",P,lr,rr,rr_pos
- newP = P[:start] + rr + P[end:]
- if rr_pos < 0:
- new_nt_pos = self.non_terminal_pos(newP,start+rr_len)
- else:
- new_nt_pos = start + rr_pos
- self.gen2(newP,new_nt_pos)
- if derivation==False:
- raise Exception("Derivation impossible")
- if __name__ =="__main__":
- '''
- exemple with rules generating langage with Na(P)=Nb(P)
- same number of a and b in each word of L
- '''
- S="S"
- Vn=["S","A","B"] # non terminal
- Vt=["a","b"] # terminal
- print Vn,Vt
- L=list()
- R=[("S","aB"), ("A","a"),("A","aS"),("A","bAA"),
- ("S","bA"),("B","b"),("B","bS"),("B","aBB")]
- Lg=Gramgen(Vn,Vt,R,S,16)
- Lg.gen()
- print Lg.L
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement