Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from spacy.lang.en import English
- from spacy.matcher import PhraseMatcher
- from spacy.tokens import Doc, Span, Token
- class RESTCountriesComponent(object):
- name = 'countries'
- def __init__(self, nlp, label='GPE'):
- self.countries = [u'MyCountry', u'MyOtherCountry']
- self.label = nlp.vocab.strings[label]
- patterns = [nlp(c) for c in self.countries]
- self.matcher = PhraseMatcher(nlp.vocab)
- self.matcher.add('COUNTRIES', None, *patterns)
- def __call__(self, doc):
- matches = self.matcher(doc)
- spans = []
- for _, start, end in matches:
- entity = Span(doc, start, end, label=self.label)
- spans.append(entity)
- doc.ents = list(doc.ents) + spans
- for span in spans:
- span.merge()
- return doc
- nlp = English()
- rest_countries = RESTCountriesComponent(nlp)
- nlp.add_pipe(rest_countries)
- nlp.to_disk('myNlp')
- nlp = spacy.load('myNlp')
Add Comment
Please, Sign In to add comment