Corrected code
This commit is contained in:
parent
3c778aedc1
commit
d49fca7c1e
7 changed files with 114 additions and 18 deletions
|
@ -19,7 +19,7 @@ calinski = []
|
||||||
davies = []
|
davies = []
|
||||||
|
|
||||||
|
|
||||||
data = np.loadtxt('tr.data')
|
data = np.loadtxt('zgo.data')
|
||||||
|
|
||||||
for (x, y) in data :
|
for (x, y) in data :
|
||||||
x_list.append(x)
|
x_list.append(x)
|
||||||
|
@ -37,6 +37,23 @@ for n in range(2, 20):
|
||||||
davies.append(dbsc)
|
davies.append(dbsc)
|
||||||
caha = metrics.calinski_harabasz_score(data_final, colors)
|
caha = metrics.calinski_harabasz_score(data_final, colors)
|
||||||
calinski.append(caha)
|
calinski.append(caha)
|
||||||
|
|
||||||
|
plt.plot(range(2,20), silhouette, marker='o', label='Silhouette')
|
||||||
|
plt.xlim(2,20)
|
||||||
|
plt.xlabel('Nb clusters')
|
||||||
|
|
||||||
|
plt.plot(range(2,20), davies, marker='o', label='Davies')
|
||||||
|
plt.xlim(2,20)
|
||||||
|
plt.xlabel('Nb clusters')
|
||||||
|
|
||||||
|
"""plt.plot(range(2,20), calinski, marker='o')
|
||||||
|
plt.xlim(2,20)
|
||||||
|
plt.xlabel('Nb minimum de voisins')
|
||||||
|
plt.ylabel('Calinski coeff')"""
|
||||||
|
|
||||||
|
plt.legend()
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
|
||||||
#silhouettte coefficient
|
#silhouettte coefficient
|
||||||
#get the index of the best result
|
#get the index of the best result
|
||||||
|
|
|
@ -19,7 +19,7 @@ calinski = []
|
||||||
davies = []
|
davies = []
|
||||||
|
|
||||||
|
|
||||||
data = np.loadtxt('zgo.data')
|
data = np.loadtxt('zgn.data')
|
||||||
|
|
||||||
for (x, y) in data :
|
for (x, y) in data :
|
||||||
x_list.append(x)
|
x_list.append(x)
|
||||||
|
@ -40,6 +40,23 @@ for n in range(2, 20):
|
||||||
caha = metrics.calinski_harabasz_score(data_final, colors)
|
caha = metrics.calinski_harabasz_score(data_final, colors)
|
||||||
calinski.append(caha)
|
calinski.append(caha)
|
||||||
|
|
||||||
|
plt.plot(range(2,20), silhouette, marker='o', label='Silhouette')
|
||||||
|
plt.xlim(2,20)
|
||||||
|
plt.xlabel('Nb minimum de voisins')
|
||||||
|
|
||||||
|
plt.plot(range(2,20), davies, marker='o', label='Davies')
|
||||||
|
plt.xlim(2,20)
|
||||||
|
plt.xlabel('Nb minimum de voisins')
|
||||||
|
|
||||||
|
"""plt.plot(range(2,20), calinski, marker='o')
|
||||||
|
plt.xlim(2,20)
|
||||||
|
plt.xlabel('Nb minimum de voisins')
|
||||||
|
plt.ylabel('Calinski coeff')"""
|
||||||
|
|
||||||
|
plt.legend()
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
|
||||||
#silhouettte coefficient
|
#silhouettte coefficient
|
||||||
#get the index of the best result
|
#get the index of the best result
|
||||||
m = max(silhouette)
|
m = max(silhouette)
|
||||||
|
|
|
@ -19,7 +19,7 @@ calinski = []
|
||||||
davies = []
|
davies = []
|
||||||
|
|
||||||
|
|
||||||
data = np.loadtxt('zgo.data')
|
data = np.loadtxt('tr.data')
|
||||||
|
|
||||||
for (x, y) in data :
|
for (x, y) in data :
|
||||||
x_list.append(x)
|
x_list.append(x)
|
||||||
|
@ -38,6 +38,23 @@ for n in range(2, 20):
|
||||||
caha = metrics.calinski_harabasz_score(data_final, colors)
|
caha = metrics.calinski_harabasz_score(data_final, colors)
|
||||||
calinski.append(caha)
|
calinski.append(caha)
|
||||||
|
|
||||||
|
"""plt.plot(range(2,20), silhouette, marker='o', label='Silhouette')
|
||||||
|
plt.xlim(2,20)
|
||||||
|
plt.xlabel('Nb clusters')
|
||||||
|
|
||||||
|
plt.plot(range(2,20), davies, marker='o', label='Davies')
|
||||||
|
plt.xlim(2,20)
|
||||||
|
plt.xlabel('Nb clusters')"""
|
||||||
|
|
||||||
|
plt.plot(range(2,20), calinski, marker='o')
|
||||||
|
plt.xlim(2,20)
|
||||||
|
plt.xlabel('Nb clusters')
|
||||||
|
plt.ylabel('Calinski coeff')
|
||||||
|
|
||||||
|
plt.legend()
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
|
||||||
#silhouettte coefficient
|
#silhouettte coefficient
|
||||||
#get the index of the best result
|
#get the index of the best result
|
||||||
m = max(silhouette)
|
m = max(silhouette)
|
||||||
|
@ -50,7 +67,6 @@ clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final)
|
||||||
colors = clustering.fit_predict(data_final)
|
colors = clustering.fit_predict(data_final)
|
||||||
plt.scatter(x_list, y_list, c=colors, s=5)
|
plt.scatter(x_list, y_list, c=colors, s=5)
|
||||||
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
#davies bouldin metrics
|
#davies bouldin metrics
|
||||||
#get the index of the best result
|
#get the index of the best result
|
||||||
|
@ -64,8 +80,6 @@ clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final)
|
||||||
colors = clustering.fit_predict(data_final)
|
colors = clustering.fit_predict(data_final)
|
||||||
plt.scatter(x_list, y_list, c=colors, s=5)
|
plt.scatter(x_list, y_list, c=colors, s=5)
|
||||||
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
#calinski metrics
|
#calinski metrics
|
||||||
#get the index of the best result
|
#get the index of the best result
|
||||||
m = max(calinski)
|
m = max(calinski)
|
||||||
|
|
|
@ -26,7 +26,7 @@ for (x, y, z) in data :
|
||||||
x_list.append(x)
|
x_list.append(x)
|
||||||
y_list.append(y)
|
y_list.append(y)
|
||||||
z_list.append(z)
|
z_list.append(z)
|
||||||
data_final.append([x,y])
|
data_final.append([x,y,z])
|
||||||
|
|
||||||
for n in range(2, 20):
|
for n in range(2, 20):
|
||||||
|
|
||||||
|
@ -39,7 +39,23 @@ for n in range(2, 20):
|
||||||
davies.append(dbsc)
|
davies.append(dbsc)
|
||||||
caha = metrics.calinski_harabasz_score(data_final, colors)
|
caha = metrics.calinski_harabasz_score(data_final, colors)
|
||||||
calinski.append(caha)
|
calinski.append(caha)
|
||||||
|
|
||||||
|
"""plt.plot(range(2,20), silhouette, marker='o', label='Silhouette')
|
||||||
|
plt.xlim(2,20)
|
||||||
|
plt.xlabel('Nb clusters')
|
||||||
|
|
||||||
|
plt.plot(range(2,20), davies, marker='o', label='Davies')
|
||||||
|
plt.xlim(2,20)
|
||||||
|
plt.xlabel('Nb clusters')"""
|
||||||
|
|
||||||
|
plt.plot(range(2,20), calinski, marker='o')
|
||||||
|
plt.xlim(2,20)
|
||||||
|
plt.xlabel('Nb clusters')
|
||||||
|
plt.ylabel('Calinski coeff')
|
||||||
|
|
||||||
|
plt.legend()
|
||||||
|
|
||||||
|
plt.show()
|
||||||
#silhouettte coefficient
|
#silhouettte coefficient
|
||||||
#get the index of the best result
|
#get the index of the best result
|
||||||
m = max(silhouette)
|
m = max(silhouette)
|
||||||
|
|
|
@ -26,7 +26,7 @@ for (x, y, z) in data :
|
||||||
x_list.append(x)
|
x_list.append(x)
|
||||||
y_list.append(y)
|
y_list.append(y)
|
||||||
z_list.append(z)
|
z_list.append(z)
|
||||||
data_final.append([x,y])
|
data_final.append([x,y,z])
|
||||||
|
|
||||||
clustering = DBSCAN(eps=0.25, min_samples=10).fit(data_final)
|
clustering = DBSCAN(eps=0.25, min_samples=10).fit(data_final)
|
||||||
colors = clustering.labels_
|
colors = clustering.labels_
|
||||||
|
|
|
@ -20,13 +20,13 @@ calinski = []
|
||||||
davies = []
|
davies = []
|
||||||
|
|
||||||
|
|
||||||
data = np.loadtxt('a.data')
|
data = np.loadtxt('h.data')
|
||||||
|
|
||||||
for (x, y, z) in data :
|
for (x, y, z) in data :
|
||||||
x_list.append(x)
|
x_list.append(x)
|
||||||
y_list.append(y)
|
y_list.append(y)
|
||||||
z_list.append(z)
|
z_list.append(z)
|
||||||
data_final.append([x,y])
|
data_final.append([x,y,z])
|
||||||
|
|
||||||
#get the values of the different coefficients for different min_samples values from 2 to 20
|
#get the values of the different coefficients for different min_samples values from 2 to 20
|
||||||
for n in range(2, 20):
|
for n in range(2, 20):
|
||||||
|
@ -42,6 +42,23 @@ for n in range(2, 20):
|
||||||
caha = metrics.calinski_harabasz_score(data_final, colors)
|
caha = metrics.calinski_harabasz_score(data_final, colors)
|
||||||
calinski.append(caha)
|
calinski.append(caha)
|
||||||
|
|
||||||
|
"""plt.plot(range(2,20), silhouette, marker='o', label='Silhouette')
|
||||||
|
plt.xlim(2,20)
|
||||||
|
plt.xlabel('Nb minimum de voisins')
|
||||||
|
|
||||||
|
plt.plot(range(2,20), davies, marker='o', label='Davies')
|
||||||
|
plt.xlim(2,20)
|
||||||
|
plt.xlabel('Nb minimum de voisins')"""
|
||||||
|
|
||||||
|
plt.plot(range(2,20), calinski, marker='o')
|
||||||
|
plt.xlim(2,20)
|
||||||
|
plt.xlabel('Nb minimum de voisins')
|
||||||
|
plt.ylabel('Calinski coeff')
|
||||||
|
|
||||||
|
plt.legend()
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
|
||||||
#silhouettte coefficient
|
#silhouettte coefficient
|
||||||
#get the index of the best result
|
#get the index of the best result
|
||||||
m = max(silhouette)
|
m = max(silhouette)
|
||||||
|
|
|
@ -8,8 +8,6 @@ from sklearn.cluster import AgglomerativeClustering
|
||||||
from sklearn.cluster import DBSCAN
|
from sklearn.cluster import DBSCAN
|
||||||
import hdbscan
|
import hdbscan
|
||||||
|
|
||||||
n_clusters = 2
|
|
||||||
|
|
||||||
data_final = []
|
data_final = []
|
||||||
x_list = []
|
x_list = []
|
||||||
y_list = []
|
y_list = []
|
||||||
|
@ -26,7 +24,7 @@ for (x, y, z) in data :
|
||||||
x_list.append(x)
|
x_list.append(x)
|
||||||
y_list.append(y)
|
y_list.append(y)
|
||||||
z_list.append(z)
|
z_list.append(z)
|
||||||
data_final.append([x,y])
|
data_final.append([x,y,z])
|
||||||
|
|
||||||
for n in range(2, 20):
|
for n in range(2, 20):
|
||||||
|
|
||||||
|
@ -39,7 +37,24 @@ for n in range(2, 20):
|
||||||
davies.append(dbsc)
|
davies.append(dbsc)
|
||||||
caha = metrics.calinski_harabasz_score(data_final, colors)
|
caha = metrics.calinski_harabasz_score(data_final, colors)
|
||||||
calinski.append(caha)
|
calinski.append(caha)
|
||||||
|
|
||||||
|
|
||||||
|
plt.plot(range(2,20), silhouette, marker='o', label='Silhouette')
|
||||||
|
plt.xlim(2,20)
|
||||||
|
plt.xlabel('Nb clusters')
|
||||||
|
|
||||||
|
plt.plot(range(2,20), davies, marker='o', label='Davies')
|
||||||
|
plt.xlim(2,20)
|
||||||
|
plt.xlabel('Nb clusters')
|
||||||
|
|
||||||
|
"""plt.plot(range(2,20), calinski, marker='o')
|
||||||
|
plt.xlim(2,20)
|
||||||
|
plt.xlabel('Nb clusters')
|
||||||
|
plt.ylabel('Calinski coeff')"""
|
||||||
|
|
||||||
|
plt.legend()
|
||||||
|
|
||||||
|
plt.show()
|
||||||
#silhouettte coefficient
|
#silhouettte coefficient
|
||||||
#get the index of the best result
|
#get the index of the best result
|
||||||
m = max(silhouette)
|
m = max(silhouette)
|
||||||
|
@ -49,7 +64,7 @@ print("Silhouette : ", indice)
|
||||||
plt.subplot(3,1,1)
|
plt.subplot(3,1,1)
|
||||||
#display the best obtained result
|
#display the best obtained result
|
||||||
clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final)
|
clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final)
|
||||||
colors = clustering.fit_predict(data_final)
|
colors = clustering.labels_
|
||||||
plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
|
plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
@ -62,7 +77,7 @@ print("Davies Bouldin : ", indice)
|
||||||
plt.subplot(3,1,2)
|
plt.subplot(3,1,2)
|
||||||
#display the best obtained result with davies bouldin metrics
|
#display the best obtained result with davies bouldin metrics
|
||||||
clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final)
|
clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final)
|
||||||
colors = clustering.fit_predict(data_final)
|
colors = clustering.labels_
|
||||||
plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
|
plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
@ -75,6 +90,6 @@ print("Calinski Harabasz : ", indice)
|
||||||
plt.subplot(3,1,3)
|
plt.subplot(3,1,3)
|
||||||
#display the best obtained result
|
#display the best obtained result
|
||||||
clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final)
|
clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final)
|
||||||
colors = clustering.fit_predict(data_final)
|
colors = clustering.labels_
|
||||||
plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
|
plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
Loading…
Reference in a new issue