Advertisement
Guest User

Untitled

a guest
Jun 19th, 2019
97
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.78 KB | None | 0 0
  1. def query_NLP_server(my_text, to_print=False):
  2. '''
  3. Query the NLP server to tokenize and tag my_text, and do some process to return nice my_tokens
  4. :param my_text (string): The sentence we want to extract the token and the tags
  5. :param to_print (boolean): Option to print the resulted tokens extracted from NLP server
  6. :return: my_tokens (list of list of tuples): The tokens with tags extracted from my_text
  7. '''
  8.  
  9. # 1- Ask the query to the NLP Server
  10. with CoreNLPClient(annotators=['tokenize', 'ssplit', 'pos', 'parse'],
  11. timeout=30000,
  12. output_format="json",
  13. properties={'tokenize.language': 'en'}
  14. ) as client:
  15. ann = client.annotate(my_text)
  16.  
  17. # 2- Process the output of the NLP Server to have a nice token list
  18. output = ann['sentences'][0]['parse']
  19. tree = ParentedTree.fromstring(output)
  20. my_tokens = []
  21. try:
  22. for subtree in tree.subtrees(filter=lambda t: t[0].parent().label() == 'ROOT'):
  23. for subtree2 in subtree:
  24. my_tokens.append(subtree2.pos())
  25. except: # when it is finish (the exception happen when it is ok for us)
  26. if to_print:
  27. print('The tokens extracted from NLP Server are :n', my_tokens, 'n')
  28. return my_tokens
  29.  
  30. with CoreNLPClient(annotators=['tokenize', 'ssplit', 'pos', 'parse'],
  31. timeout=30000,
  32. output_format="json",
  33. properties={'tokenize.language': 'en',
  34. 'pos.model': 'edu/stanford/nlp/models/pos-tagger/french/french-ud.tagger',
  35. 'parse.model': 'edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz'}
  36. ) as client:
  37. ann = client.annotate(my_text)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement