from scipy.io import arff import numpy as np from sklearn.cluster import KMeans from sklearn.datasets import make_blobs import matplotlib.pyplot as plt from sklearn import metrics from sklearn.cluster import AgglomerativeClustering from sklearn.cluster import DBSCAN import hdbscan data_final = [] x_list = [] y_list = [] z_list = [] silhouette = [] calinski = [] davies = [] data = np.loadtxt('t.data') for (x, y, z) in data : x_list.append(x) y_list.append(y) z_list.append(z) data_final.append([x,y,z]) for n in range(2, 20): clustering = KMeans(n_clusters=n, init='k-means++').fit(data_final) colors = clustering.labels_ silh = metrics.silhouette_score(data_final, colors, metric='euclidean') silhouette.append(silh) dbsc = metrics.davies_bouldin_score(data_final, colors) davies.append(dbsc) caha = metrics.calinski_harabasz_score(data_final, colors) calinski.append(caha) plt.plot(range(2,20), silhouette, marker='o', label='Silhouette') plt.xlim(2,20) plt.xlabel('Nb clusters') plt.plot(range(2,20), davies, marker='o', label='Davies') plt.xlim(2,20) plt.xlabel('Nb clusters') """plt.plot(range(2,20), calinski, marker='o') plt.xlim(2,20) plt.xlabel('Nb clusters') plt.ylabel('Calinski coeff')""" plt.legend() plt.show() #silhouettte coefficient #get the index of the best result m = max(silhouette) indice = [i for i, j in enumerate(silhouette) if j == m][0] +2 print("Silhouette : ", indice) plt.subplot(3,1,1) #display the best obtained result clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final) colors = clustering.labels_ plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors) plt.show() #davies bouldin metrics #get the index of the best result m = min(davies) indice = [i for i, j in enumerate(davies) if j == m][0] +2 print("Davies Bouldin : ", indice) plt.subplot(3,1,2) #display the best obtained result with davies bouldin metrics clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final) colors = clustering.labels_ plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors) plt.show() #calinski metrics #get the index of the best result m = max(calinski) indice = [i for i, j in enumerate(calinski) if j == m][0] +2 print("Calinski Harabasz : ", indice) plt.subplot(3,1,3) #display the best obtained result clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final) colors = clustering.labels_ plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors) plt.show()