Guest User

Untitled

a guest
Jul 18th, 2018
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.96 KB | None | 0 0
  1. import requests
  2. from spacy.lang.en import English
  3. from spacy.matcher import PhraseMatcher
  4. from spacy.tokens import Doc, Span, Token
  5.  
  6. class RESTCountriesComponent(object):
  7. name = 'countries'
  8. def __init__(self, nlp, label='GPE'):
  9. self.countries = [u'MyCountry', u'MyOtherCountry']
  10. self.label = nlp.vocab.strings[label]
  11. patterns = [nlp(c) for c in self.countries]
  12. self.matcher = PhraseMatcher(nlp.vocab)
  13. self.matcher.add('COUNTRIES', None, *patterns)
  14. def __call__(self, doc):
  15. matches = self.matcher(doc)
  16. spans = []
  17. for _, start, end in matches:
  18. entity = Span(doc, start, end, label=self.label)
  19. spans.append(entity)
  20. doc.ents = list(doc.ents) + spans
  21. for span in spans:
  22. span.merge()
  23. return doc
  24.  
  25. nlp = English()
  26. rest_countries = RESTCountriesComponent(nlp)
  27. nlp.add_pipe(rest_countries)
  28. nlp.to_disk('myNlp')
  29.  
  30. nlp = spacy.load('myNlp')
Add Comment
Please, Sign In to add comment