import numpy as np from sklearn import datasets from sklearn.metrics import pairwise_distances from sklearn.preprocessing import LabelEncoder def silhouette_samples(X, labels, metric='euclidean', **kwds): le = LabelEncoder() labels = le.fit_transform(labels) unique_labels = le.classes_ distances = pairwise_distances(X, metric=metric, **kwds) intra_clust_dists = np.ones(distances.shape[0], dtype=distances.dtype) inter_clust_dists = np.inf * intra_clust_dists for curr_label in unique_labels: mask = curr_label == labels current_distances = distances[mask] n_samples_curr_lab = np.sum(mask) - 1 if n_samples_curr_lab != 0: intra_clust_dists[mask] = np.sum(current_distances[:, mask], axis=1) / n_samples_curr_lab for other_label in unique_labels: if other_label != curr_label: other_mask = other_label == labels other_distances = np.mean(current_distances[:, other_mask], axis=1) inter_clust_dists[mask] = np.minimum(inter_clust_dists[mask], other_distances) sil_samples = inter_clust_dists - intra_clust_dists sil_samples /= np.maximum(intra_clust_dists, inter_clust_dists) return sil_samples dataset = datasets.load_iris() data = dataset.data target = dataset.target result = silhouette_samples(data, target) print(result)