Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3.7
- from scipy.cluster import hierarchy
- from matplotlib import pyplot as plt
- import numpy as np
- class ClusteringItem:
- def __init__(self, label, features):
- self.label = label
- self.features = features
- def __str__(self):
- return str(self.label)
- class Clustering:
- def __init__(self, items):
- self.items = items
- self.clusters = set()
- def linkage(self, dist, method="complete"):
- l = len(self.items)
- mat = np.zeros(int(l * (l - 1) / 2))
- idx = 0
- for i in range(l):
- for j in range(i + 1, l):
- mat[idx] = dist(self.items[i].features, self.items[j].features)
- idx += 1
- self.Z = hierarchy.linkage(mat, method=method)
- def cluster(self, threshold, dendrogram=False):
- labels = hierarchy.fcluster(self.Z, threshold, criterion="distance")
- clusters = [[] for _ in range(max(labels))]
- for item, idx in zip(self.items, labels):
- clusters[idx-1].append(item)
- self.clusters = sorted(clusters, key=len, reverse=True)
- if dendrogram:
- hierarchy.dendrogram(self.Z, color_threshold=threshold+np.finfo(float).eps, leaf_rotation=90.)
- xlabels = [str(self.items[int(i.get_text())]) for i in plt.gca().get_xticklabels()]
- plt.gca().set_xticklabels(xlabels)
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement