Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from nltk.stem import PorterStemmer
- from nltk.tokenize import word_tokenize
- from slugify import slugify
- def get_root_words(text):
- results = ''
- vowels = ['a', 'e', 'i', 'o', 'u', 'y']
- ps = PorterStemmer()
- text = slugify(text, separator=' ')
- words = word_tokenize(text)
- for w in words:
- stem = ps.stem(w)
- if stem[-1] in vowels:
- stem = stem[:-1]
- results += stem + ' '
- return slugify(results, separator=' ')
- print(get_root_words("Toto, I have a feeling we're not in Kansas anymore."))
- # tot hav feel w r not in kans anymor
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement