Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import nltk
- from nltk.corpus import wordnet, stopwords
- from nltk.tokenize import word_tokenize
- from nltk.stem import WordNetLemmatizer
- from nltk import pos_tag
- nltk.download('punkt')
- nltk.download('averaged_perceptron_tagger')
- nltk.download('wordnet')
- nltk.download('omw-1.4')
- nltk.download('stopwords')
- nltk.download('averaged_perceptron_tagger_eng')
- lemmatizer = WordNetLemmatizer()
- def get_wordnet_pos(nltk_pos_tag):
- if nltk_pos_tag.startswith('J'):
- return wordnet.ADJ
- elif nltk_pos_tag.startswith('V'):
- return wordnet.VERB
- elif nltk_pos_tag.startswith('N'):
- return wordnet.NOUN
- elif nltk_pos_tag.startswith('R'):
- return wordnet.ADV
- else:
- return wordnet.NOUN
- text = """
- The striped bats are hanging on their feet for best.
- """
- tokens = word_tokenize(text)
- pos_tags = pos_tag(tokens)
- lemmatized_tokens = [lemmatizer.lemmatize(token, get_wordnet_pos(pos)) for token, pos in pos_tags]
- print("Original Tokens and POS Tags:")
- print(pos_tags)
- print("\nLemmatized Tokens:")
- print(lemmatized_tokens)
Advertisement
Add Comment
Please, Sign In to add comment