Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/python
- '''
- Egramgen.py
- grammar generator
- '''
- import string as ST
- from operator import itemgetter
- import warnings as WA
- class Gramgen(object):
- '''
- '''
- def __init__(self,Vn,Vt,R,S,max_len):
- self.Vn = Vn
- self.Vt = Vt
- self.R = R
- self.Vn.sort()
- self.Vt.sort()
- self.R.sort()
- self.d = dict()
- self.L = list()
- self.S = S
- self.shringking_rules()
- self.max_len=max_len
- def shringking_rules(self):
- for lr,rr in self.R:
- l = self.count_terminals(lr)
- r = self.count_terminals(rr)
- if l > r:
- WA.warn("word length prediction not accurate due to shrinking rules")
- break
- def count_non_terminals(self,string):
- n=0
- for s in string:
- if s in self.Vn:
- n+=1
- return n
- def count_terminals(self,string):
- n=0
- for s in string:
- if s in self.Vt:
- n+=1
- return n
- def non_terminal_pos(self,Q,start=0):
- post=0
- nbr_Vn=0
- P=Q[0]
- for i in xrange(start,len(P)):
- if P[i] in self.Vn:
- return i
- return -1
- def extend_R (self):
- '''
- copy the rules list and append information:
- pos is the pos of the first non-terminal character
- in lr (left part of the rule) or rr (right part of rule)
- '''
- self.ext_R=list()
- for r in self.R:
- lr,rr=r
- lr_pos=self.non_terminal_pos([lr])
- rr_pos=self.non_terminal_pos([rr])
- lr_len=len(lr)
- rr_len=len(rr)
- self.ext_R.append((lr,lr_pos,rr,rr_pos,lr_len,rr_len))
- def order_R(self):
- '''
- '''
- self.lr_poskey=dict()
- for ext_r in self.ext_R:
- try:
- lr_pos=ext_r[1]
- ext_r_list=self.lr_poskey[lr_pos]
- except (KeyError):
- self.lr_poskey[lr_pos]=[ext_r]
- continue
- ext_r_list.append(ext_r)
- for key, value in self.lr_poskey.iteritems():
- value=sorted(value,key=itemgetter(4))
- self.lr_poskey[key]=value
- def order_R2(self):
- '''
- we want to produce 3 imbricated dictionaries
- self.lr_pos_dic={lr_pos:{lr_len:{lr:[[rr1,rrpos1],[rr2,rrpos2]]}},.....}
- '''
- self.lr_pos_dic=dict()
- for ext_r in self.ext_R:
- lr,lr_pos,rr,rr_pos,lr_len,rr_len=ext_r
- if lr_pos in self.lr_pos_dic:
- lr_len_dic=self.lr_pos_dic[lr_pos]
- else:
- self.lr_pos_dic[lr_pos]=lr_len_dic=dict()
- if lr_len in lr_len_dic:
- lr_dic=lr_len_dic[lr_len]
- else:
- lr_len_dic[lr_len]=lr_dic=dict()
- if lr in lr_dic:
- lr_dic[lr].append([rr,rr_pos])
- else:
- lr_dic[lr]=[[rr,rr_pos],]
- for key, value in self.lr_pos_dic.iteritems():
- value=list(value.iteritems())
- value=sorted(value,key=itemgetter(0))
- self.lr_pos_dic[key]=value
- def gen (self):
- '''
- '''
- self. extend_R()
- self.order_R()
- self.order_R2()
- self.gen2D(self.S,0)
- self.L=list(set(self.L))
- def gen2D(self,P,nt_pos):
- '''
- this is a more optimized algorithm that should work better
- when there are a lot of rules. need further tests
- '''
- print P
- if nt_pos <0:
- if len(P) <= self.max_len:
- self.L.append(P)
- return
- else:
- return
- elif len(P) > self.max_len :
- return
- derivation=False
- for lr_pos,lr_len_list in self.lr_pos_dic.iteritems():
- start = max(0,nt_pos- lr_pos)
- for lr_len,lr_dic in lr_len_list:
- for start2 in xrange(start,len(P)):
- end=start2+lr_len
- if end > len(P):
- break
- try:
- rr_list=lr_dic[P[start2:end]]
- except:
- continue
- for rr,rr_pos in rr_list:
- derivation=True
- newP = P[:start2] + rr + P[end:]
- new_nt_pos = self.non_terminal_pos([newP],start)
- self.gen2D(newP,new_nt_pos)
- if derivation==False:
- raise Exception("Derivation impossible:%s" %(P))
- #return
- if __name__ =="__main__":
- '''
- exemple with rules generating langage with Na(P)=Nb(P)
- same number of a and b in each word of L
- '''
- S="S"
- Vn=["S","A","B"] # non terminal
- Vt=["a","b"] # terminal
- R=[("S","aB"), ("A","a"),("A","aS"),("A","bAA"),
- ("S","bA"),("B","b"),("B","bS"),("B","aBB")]
- Lg=Gramgen(Vn,Vt,R,S,8)
- Lg.gen()
- print Lg.L
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement