Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def query_NLP_server(my_text, to_print=False):
- '''
- Query the NLP server to tokenize and tag my_text, and do some process to return nice my_tokens
- :param my_text (string): The sentence we want to extract the token and the tags
- :param to_print (boolean): Option to print the resulted tokens extracted from NLP server
- :return: my_tokens (list of list of tuples): The tokens with tags extracted from my_text
- '''
- # 1- Ask the query to the NLP Server
- with CoreNLPClient(annotators=['tokenize', 'ssplit', 'pos', 'parse'],
- timeout=30000,
- output_format="json",
- properties={'tokenize.language': 'en'}
- ) as client:
- ann = client.annotate(my_text)
- # 2- Process the output of the NLP Server to have a nice token list
- output = ann['sentences'][0]['parse']
- tree = ParentedTree.fromstring(output)
- my_tokens = []
- try:
- for subtree in tree.subtrees(filter=lambda t: t[0].parent().label() == 'ROOT'):
- for subtree2 in subtree:
- my_tokens.append(subtree2.pos())
- except: # when it is finish (the exception happen when it is ok for us)
- if to_print:
- print('The tokens extracted from NLP Server are :n', my_tokens, 'n')
- return my_tokens
- with CoreNLPClient(annotators=['tokenize', 'ssplit', 'pos', 'parse'],
- timeout=30000,
- output_format="json",
- properties={'tokenize.language': 'en',
- 'pos.model': 'edu/stanford/nlp/models/pos-tagger/french/french-ud.tagger',
- 'parse.model': 'edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz'}
- ) as client:
- ann = client.annotate(my_text)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement