Advertisement
Guest User

Untitled

a guest
May 6th, 2015
185
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.04 KB | None | 0 0
  1. from sklearn.feature_extraction.text import TfidfVectorizer
  2. from sklearn.metrics.pairwise import cosine_similarity
  3. from sklearn.cluster import AgglomerativeClustering
  4.  
  5. vectorizer = TfidfVectorizer(stop_words='english')
  6. X = vectorizer.fit_transform(data)
  7. C = 1 - cosine_similarity(X.T)
  8. ward = AgglomerativeClustering(n_clusters=k, linkage='ward').fit(C)
  9. label = ward.labels_
  10.  
  11. from sklearn.cluster import KMeans
  12.  
  13. km = KMeans(n_clusters=k, init='k-means++', max_iter=100, n_init=1)
  14. km.fit(X)
  15. order_centroids = km.cluster_centers_.argsort()[:, ::-1]
  16. terms = vectorizer.get_feature_names()
  17. for i in range(k):
  18. print("Cluster %d:" % i, end='')
  19. for ind in order_centroids[i, :10]:
  20. print(' %s' % terms[ind], end='')
  21.  
  22. from sklearn.decomposition import NMF
  23. nmf = NMF(n_components=k, random_state=1).fit(X)
  24.  
  25. feature_names = vectorizer.get_feature_names()
  26.  
  27. for topic_idx, topic in enumerate(nmf.components_):
  28. print("Topic #%d:" % topic_idx)
  29. print(" ".join([feature_names[i]
  30. for i in topic.argsort()[:-10-1:-1]]))
  31. print()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement