Advertisement
Guest User

Untitled

a guest
Jan 25th, 2020
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.71 KB | None | 0 0
  1. import nltk
  2. nltk.download('punkt')
  3. nltk.download('stopwords')
  4. from nltk.tokenize import sent_tokenize, word_tokenize
  5. from nltk.probability import FreqDist
  6. import matplotlib.pyplot as plt
  7. from nltk.corpus import stopwords
  8. from nltk.stem import PorterStemmer
  9. from nltk.stem.wordnet import WordNetLemmatizer
  10. from sklearn.feature_extraction.text import TfidfVectorizer
  11. from sklearn.metrics.pairwise import cosine_similarity
  12.  
  13.  
  14. documents = (genetic_algorithm_text, life_text, programming_language_text)
  15. tfidf_vectorizer = TfidfVectorizer()
  16. tfidf_matrix = tfidf_vectorizer.fit_transform(documents)
  17.  
  18. # Cos pomiedzy genetycznym a zyciem oraz programowaniem:
  19. cos = cosine_similarity(tfidf_matrix[0:3], tfidf_matrix)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement