Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import nltk
- import spacy
- nlp = spacy.load('en_core_web_sm')
- sent="The speed limit is 90 kilometres per hour on roads outside built-up areas."
- doc=nlp(sent)
- def sent_root(sent):
- for index,token in enumerate(sent):
- if token.head == token:
- return token, index
- def sent_subj(sent):
- for index,token in enumerate(sent):
- if token.dep_ == 'nsubj':
- return token, index
- def sent_child(token):
- complete_subj = ''
- for child in token.children:
- if(child.is_punct == False):
- if(child.dep_ == 'compound'):
- complete_subj += child.text + ' ' + token.text+' '
- else:
- complete_subj += child.text + ' '
- for child_token in child.children:
- if(child.is_punct == False):
- complete_subj += child_token.text+' '
- return complete_subj
- def doc_ents_root(sent, root):
- ents_root = root.text+' '
- for token in sent.ents:
- ents_root += token.text + ' '
- return ents_root
- def action(sent):
- #Obtaining the sent root
- root, root_idx = sent_root(sent)
- #Obtaining the subject
- subj, subj_idx = sent_subj(sent)
- #Obtaining the whole subject (subj + comps)
- complete_subj = sent_child(subj)
- complete_ents = doc_ents_root(sent, root)
- return complete_subj + complete_ents
- action(doc)
Add Comment
Please, Sign In to add comment