Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ID,FORM,LEMMA,UPOS,XPOS,FEATS,HEAD,DEPREL,DEPS,MISC=range(10)
- def read_conll(inp,max_sent=0,drop_tokens=True):
- comments=[]
- sent=[]
- yielded=0
- for line in inp:
- line=line.strip()
- if line.startswith("#"):
- comments.append(line)
- elif not line:
- if sent:
- yield sent,comments
- yielded+=1
- if max_sent>0 and yielded==max_sent:
- break
- sent,comments=[],[]
- else:
- cols=line.split("\t")
- if drop_tokens and "-" in cols[ID]:
- continue
- sent.append(cols)
- else:
- if sent:
- yield sent,comments
Add Comment
Please, Sign In to add comment