Advertisement
Guest User

Untitled

a guest
Oct 16th, 2019
107
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.39 KB | None | 0 0
  1. #!/usr/bin/env python3.7
  2. from scipy.cluster import hierarchy
  3. from matplotlib import pyplot as plt
  4. import numpy as np
  5.  
  6. class ClusteringItem:
  7. def __init__(self, label, features):
  8. self.label = label
  9. self.features = features
  10.  
  11. def __str__(self):
  12. return str(self.label)
  13.  
  14. class Clustering:
  15. def __init__(self, items):
  16. self.items = items
  17. self.clusters = set()
  18.  
  19. def linkage(self, dist, method="complete"):
  20. l = len(self.items)
  21. mat = np.zeros(int(l * (l - 1) / 2))
  22. idx = 0
  23. for i in range(l):
  24. for j in range(i + 1, l):
  25. mat[idx] = dist(self.items[i].features, self.items[j].features)
  26. idx += 1
  27. self.Z = hierarchy.linkage(mat, method=method)
  28.  
  29. def cluster(self, threshold, dendrogram=False):
  30. labels = hierarchy.fcluster(self.Z, threshold, criterion="distance")
  31. clusters = [[] for _ in range(max(labels))]
  32. for item, idx in zip(self.items, labels):
  33. clusters[idx-1].append(item)
  34. self.clusters = sorted(clusters, key=len, reverse=True)
  35.  
  36. if dendrogram:
  37. hierarchy.dendrogram(self.Z, color_threshold=threshold+np.finfo(float).eps, leaf_rotation=90.)
  38. xlabels = [str(self.items[int(i.get_text())]) for i in plt.gca().get_xticklabels()]
  39. plt.gca().set_xticklabels(xlabels)
  40. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement