Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import nltk
- nltk.download('punkt')
- nltk.download('stopwords')
- from nltk.tokenize import sent_tokenize, word_tokenize
- from nltk.probability import FreqDist
- import matplotlib.pyplot as plt
- from nltk.corpus import stopwords
- from nltk.stem import PorterStemmer
- from nltk.stem.wordnet import WordNetLemmatizer
- from sklearn.feature_extraction.text import TfidfVectorizer
- from sklearn.metrics.pairwise import cosine_similarity
- documents = (genetic_algorithm_text, life_text, programming_language_text)
- tfidf_vectorizer = TfidfVectorizer()
- tfidf_matrix = tfidf_vectorizer.fit_transform(documents)
- # Cos pomiedzy genetycznym a zyciem oraz programowaniem:
- cos = cosine_similarity(tfidf_matrix[0:3], tfidf_matrix)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement