Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sklearn.metrics import pairwise_distances_argmin_min
- import numpy as np
- from sklearn.cluster import KMeans
- kmeans = KMeans(n_clusters=n_clusters)
- kmeans = kmeans.fit(encoded)
- n_clusters = int(np.ceil(len(encoded)**0.6))
- print(n_clusters)
- avg = []
- for j in range(n_clusters):
- idx = np.where(kmeans.labels_ == j)[0]
- avg.append(np.mean(idx))
- closest, _ = pairwise_distances_argmin_min(kmeans.cluster_centers_, encoded)
- ordering = sorted(range(n_clusters), key=lambda k: avg[k])
- summary = ' '.join([sentences[closest[idx]] for idx in ordering])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement