from scipy.io import arff import numpy as np from sklearn.cluster import KMeans from sklearn.datasets import make_blobs import matplotlib.pyplot as plt from sklearn import metrics from sklearn.cluster import AgglomerativeClustering from sklearn.cluster import DBSCAN import hdbscan n_clusters = 2 data = arff.loadarff('diamond9.arff')[0] data_final = [] x_list = [] y_list = [] for (x, y, z) in data : x_list.append(x) y_list.append(y) data_final.append([x,y]) clustering = AgglomerativeClustering(n_clusters = n_clusters, linkage='average').fit(data_final) colors = clustering.labels_ plt.scatter(x_list, y_list, c=colors, s=5) plt.show() silh = metrics.silhouette_score(data_final, colors, metric='euclidean') dbsc = metrics.davies_bouldin_score(data_final, colors) caha = metrics.calinski_harabasz_score(data_final, colors) print("Coefficient de silhouette : ", silh) print("Indice de Davies Bouldin : ", dbsc) print("Indice de calinski harabasz : ", caha)