94 lines
2.5 KiB
Python
94 lines
2.5 KiB
Python
from scipy.io import arff
|
|
import numpy as np
|
|
from sklearn.cluster import KMeans
|
|
from sklearn.datasets import make_blobs
|
|
import matplotlib.pyplot as plt
|
|
from sklearn import metrics
|
|
from sklearn.cluster import AgglomerativeClustering
|
|
from sklearn.cluster import DBSCAN
|
|
import hdbscan
|
|
|
|
n_clusters = 2
|
|
|
|
data_final = []
|
|
x_list = []
|
|
y_list = []
|
|
|
|
silhouette = []
|
|
calinski = []
|
|
davies = []
|
|
|
|
|
|
data = np.loadtxt('zgo.data')
|
|
|
|
for (x, y) in data :
|
|
x_list.append(x)
|
|
y_list.append(y)
|
|
data_final.append([x,y])
|
|
|
|
for n in range(2, 20):
|
|
|
|
clustering = AgglomerativeClustering(n_clusters = n , linkage='average').fit(data_final)
|
|
colors = clustering.labels_
|
|
|
|
silh = metrics.silhouette_score(data_final, colors, metric='euclidean')
|
|
silhouette.append(silh)
|
|
dbsc = metrics.davies_bouldin_score(data_final, colors)
|
|
davies.append(dbsc)
|
|
caha = metrics.calinski_harabasz_score(data_final, colors)
|
|
calinski.append(caha)
|
|
|
|
plt.plot(range(2,20), silhouette, marker='o', label='Silhouette')
|
|
plt.xlim(2,20)
|
|
plt.xlabel('Nb clusters')
|
|
|
|
plt.plot(range(2,20), davies, marker='o', label='Davies')
|
|
plt.xlim(2,20)
|
|
plt.xlabel('Nb clusters')
|
|
|
|
"""plt.plot(range(2,20), calinski, marker='o')
|
|
plt.xlim(2,20)
|
|
plt.xlabel('Nb minimum de voisins')
|
|
plt.ylabel('Calinski coeff')"""
|
|
|
|
plt.legend()
|
|
|
|
plt.show()
|
|
|
|
#silhouettte coefficient
|
|
#get the index of the best result
|
|
m = max(silhouette)
|
|
indice = [i for i, j in enumerate(silhouette) if j == m][0] +2
|
|
print("Silhouette : ", indice)
|
|
|
|
plt.subplot(3,1,1)
|
|
#display the best obtained result
|
|
clustering = AgglomerativeClustering(n_clusters = indice , linkage='average').fit(data_final)
|
|
colors = clustering.fit_predict(data_final)
|
|
plt.scatter(x_list, y_list, c=colors, s=5)
|
|
|
|
#davies bouldin metrics
|
|
#get the index of the best result
|
|
m = min(davies)
|
|
indice = [i for i, j in enumerate(davies) if j == m][0] +2
|
|
print("Davies Bouldin : ", indice)
|
|
|
|
plt.subplot(3,1,2)
|
|
#display the best obtained result with davies bouldin metrics
|
|
clustering = AgglomerativeClustering(n_clusters = indice , linkage='average').fit(data_final)
|
|
colors = clustering.fit_predict(data_final)
|
|
plt.scatter(x_list, y_list, c=colors, s=5)
|
|
|
|
#calinski metrics
|
|
#get the index of the best result
|
|
m = max(calinski)
|
|
indice = [i for i, j in enumerate(calinski) if j == m][0] +2
|
|
print("Calinski Harabasz : ", indice)
|
|
|
|
plt.subplot(3,1,3)
|
|
#display the best obtained result
|
|
clustering = AgglomerativeClustering(n_clusters = indice , linkage='average').fit(data_final)
|
|
colors = clustering.fit_predict(data_final)
|
|
plt.scatter(x_list, y_list, c=colors, s=5)
|
|
|
|
plt.show()
|