from scipy.io import arff import numpy as np from sklearn.cluster import KMeans import matplotlib.pyplot as plt from sklearn import metrics from sklearn.cluster import DBSCAN data = arff.loadarff('diamond9.arff')[0] data_final = [] x_list = [] y_list = [] for (x, y, z) in data : x_list.append(x) y_list.append(y) data_final.append([x,y]) clustering = DBSCAN(eps=0.5, min_samples=2).fit(data_final) colors = clustering.labels_ plt.scatter(x_list, y_list, c=colors, s=5) plt.show() silh = metrics.silhouette_score(data_final, colors, metric='euclidean') dbsc = metrics.davies_bouldin_score(data_final, colors) caha = metrics.calinski_harabasz_score(data_final, colors) print("Coefficient de silhouette : ", silh) print("Indice de Davies Bouldin : ", dbsc) print("Indice de calinski harabasz : ", caha)