Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import spacy
- # Use English model
- spacy_en = spacy.load('en')
- # create a tokenizer function
- def tokenizer(text):
- text = text.replace("<br />", " ")
- return [tok.text for tok in spacy_en.tokenizer(text)]
Add Comment
Please, Sign In to add comment