Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import nltk
- from nltk.tokenize.moses import MosesDetokenizer
- my_dict = {'abc': 'aBc'} # put your special dict here
- # split the sentence in words
- words = nltk.word_tokenize(raw_sentence)
- # replace those that map in the dict
- new_words = [my_dict.get(word.lower(), word) for word in words]
- # now we construct it back to sentence
- detokenizer = MosesDetokenizer()
- detokenizer.detokenize(new_words, return_str=True)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement