Browse Source

Corrected code

Foussats Morgane 1 year ago
parent
commit
d49fca7c1e

+ 18
- 1
real_world/2D/agglomerative2D.py View File

@@ -19,7 +19,7 @@ calinski = []
19 19
 davies = []
20 20
 
21 21
 
22
-data = np.loadtxt('tr.data')
22
+data = np.loadtxt('zgo.data')
23 23
 
24 24
 for (x, y) in data :
25 25
     x_list.append(x)
@@ -37,6 +37,23 @@ for n in range(2, 20):
37 37
     davies.append(dbsc)
38 38
     caha = metrics.calinski_harabasz_score(data_final, colors)
39 39
     calinski.append(caha)
40
+
41
+plt.plot(range(2,20), silhouette, marker='o', label='Silhouette')
42
+plt.xlim(2,20)
43
+plt.xlabel('Nb clusters')
44
+
45
+plt.plot(range(2,20), davies, marker='o', label='Davies')
46
+plt.xlim(2,20)
47
+plt.xlabel('Nb clusters')
48
+
49
+"""plt.plot(range(2,20), calinski, marker='o')
50
+plt.xlim(2,20)
51
+plt.xlabel('Nb minimum de voisins')
52
+plt.ylabel('Calinski coeff')"""
53
+
54
+plt.legend()
55
+
56
+plt.show()
40 57
     
41 58
 #silhouettte coefficient
42 59
 #get the index of the best result 

+ 18
- 1
real_world/2D/hdbscan2D.py View File

@@ -19,7 +19,7 @@ calinski = []
19 19
 davies = []
20 20
 
21 21
 
22
-data = np.loadtxt('zgo.data')
22
+data = np.loadtxt('zgn.data')
23 23
 
24 24
 for (x, y) in data :
25 25
     x_list.append(x)
@@ -40,6 +40,23 @@ for n in range(2, 20):
40 40
     caha = metrics.calinski_harabasz_score(data_final, colors)
41 41
     calinski.append(caha)
42 42
 
43
+plt.plot(range(2,20), silhouette, marker='o', label='Silhouette')
44
+plt.xlim(2,20)
45
+plt.xlabel('Nb minimum de voisins')
46
+
47
+plt.plot(range(2,20), davies, marker='o', label='Davies')
48
+plt.xlim(2,20)
49
+plt.xlabel('Nb minimum de voisins')
50
+
51
+"""plt.plot(range(2,20), calinski, marker='o')
52
+plt.xlim(2,20)
53
+plt.xlabel('Nb minimum de voisins')
54
+plt.ylabel('Calinski coeff')"""
55
+
56
+plt.legend()
57
+
58
+plt.show()
59
+
43 60
 #silhouettte coefficient
44 61
 #get the index of the best result 
45 62
 m = max(silhouette)

+ 18
- 4
real_world/2D/kmeans2D.py View File

@@ -19,7 +19,7 @@ calinski = []
19 19
 davies = []
20 20
 
21 21
 
22
-data = np.loadtxt('zgo.data')
22
+data = np.loadtxt('tr.data')
23 23
 
24 24
 for (x, y) in data :
25 25
     x_list.append(x)
@@ -38,6 +38,23 @@ for n in range(2, 20):
38 38
     caha = metrics.calinski_harabasz_score(data_final, colors)
39 39
     calinski.append(caha)
40 40
     
41
+"""plt.plot(range(2,20), silhouette, marker='o', label='Silhouette')
42
+plt.xlim(2,20)
43
+plt.xlabel('Nb clusters')
44
+
45
+plt.plot(range(2,20), davies, marker='o', label='Davies')
46
+plt.xlim(2,20)
47
+plt.xlabel('Nb clusters')"""
48
+
49
+plt.plot(range(2,20), calinski, marker='o')
50
+plt.xlim(2,20)
51
+plt.xlabel('Nb clusters')
52
+plt.ylabel('Calinski coeff')
53
+
54
+plt.legend()
55
+
56
+plt.show()
57
+    
41 58
 #silhouettte coefficient
42 59
 #get the index of the best result 
43 60
 m = max(silhouette)
@@ -50,7 +67,6 @@ clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final)
50 67
 colors = clustering.fit_predict(data_final)
51 68
 plt.scatter(x_list, y_list, c=colors, s=5)
52 69
 
53
-plt.show()
54 70
 
55 71
 #davies bouldin metrics
56 72
 #get the index of the best result
@@ -64,8 +80,6 @@ clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final)
64 80
 colors = clustering.fit_predict(data_final)
65 81
 plt.scatter(x_list, y_list, c=colors, s=5)
66 82
 
67
-plt.show()
68
-
69 83
 #calinski metrics
70 84
 #get the index of the best result
71 85
 m = max(calinski)

+ 18
- 2
real_world/3D/agglomerative3D.py View File

@@ -26,7 +26,7 @@ for (x, y, z) in data :
26 26
     x_list.append(x)
27 27
     y_list.append(y)
28 28
     z_list.append(z)
29
-    data_final.append([x,y])
29
+    data_final.append([x,y,z])
30 30
 
31 31
 for n in range(2, 20):
32 32
 
@@ -39,7 +39,23 @@ for n in range(2, 20):
39 39
     davies.append(dbsc)
40 40
     caha = metrics.calinski_harabasz_score(data_final, colors)
41 41
     calinski.append(caha)
42
-    
42
+
43
+"""plt.plot(range(2,20), silhouette, marker='o', label='Silhouette')
44
+plt.xlim(2,20)
45
+plt.xlabel('Nb clusters')
46
+
47
+plt.plot(range(2,20), davies, marker='o', label='Davies')
48
+plt.xlim(2,20)
49
+plt.xlabel('Nb clusters')"""
50
+
51
+plt.plot(range(2,20), calinski, marker='o')
52
+plt.xlim(2,20)
53
+plt.xlabel('Nb clusters')
54
+plt.ylabel('Calinski coeff')
55
+
56
+plt.legend()
57
+
58
+plt.show()
43 59
 #silhouettte coefficient
44 60
 #get the index of the best result 
45 61
 m = max(silhouette)

+ 1
- 1
real_world/3D/dbscan3D.py View File

@@ -26,7 +26,7 @@ for (x, y, z) in data :
26 26
     x_list.append(x)
27 27
     y_list.append(y)
28 28
     z_list.append(z)
29
-    data_final.append([x,y])
29
+    data_final.append([x,y,z])
30 30
 
31 31
 clustering = DBSCAN(eps=0.25, min_samples=10).fit(data_final)
32 32
 colors = clustering.labels_

+ 19
- 2
real_world/3D/hdbscan3D.py View File

@@ -20,13 +20,13 @@ calinski = []
20 20
 davies = []
21 21
 
22 22
 
23
-data = np.loadtxt('a.data')
23
+data = np.loadtxt('h.data')
24 24
 
25 25
 for (x, y, z) in data :
26 26
     x_list.append(x)
27 27
     y_list.append(y)
28 28
     z_list.append(z)
29
-    data_final.append([x,y])
29
+    data_final.append([x,y,z])
30 30
 
31 31
 #get the values of the different coefficients for different min_samples values from 2 to 20
32 32
 for n in range(2, 20):
@@ -42,6 +42,23 @@ for n in range(2, 20):
42 42
     caha = metrics.calinski_harabasz_score(data_final, colors)
43 43
     calinski.append(caha)
44 44
 
45
+"""plt.plot(range(2,20), silhouette, marker='o', label='Silhouette')
46
+plt.xlim(2,20)
47
+plt.xlabel('Nb minimum de voisins')
48
+
49
+plt.plot(range(2,20), davies, marker='o', label='Davies')
50
+plt.xlim(2,20)
51
+plt.xlabel('Nb minimum de voisins')"""
52
+
53
+plt.plot(range(2,20), calinski, marker='o')
54
+plt.xlim(2,20)
55
+plt.xlabel('Nb minimum de voisins')
56
+plt.ylabel('Calinski coeff')
57
+
58
+plt.legend()
59
+
60
+plt.show()
61
+
45 62
 #silhouettte coefficient
46 63
 #get the index of the best result 
47 64
 m = max(silhouette)

+ 22
- 7
real_world/3D/kmeans3D.py View File

@@ -8,8 +8,6 @@ from sklearn.cluster import AgglomerativeClustering
8 8
 from sklearn.cluster import DBSCAN
9 9
 import hdbscan
10 10
 
11
-n_clusters = 2
12
-
13 11
 data_final = []
14 12
 x_list = []
15 13
 y_list = []
@@ -26,7 +24,7 @@ for (x, y, z) in data :
26 24
     x_list.append(x)
27 25
     y_list.append(y)
28 26
     z_list.append(z)
29
-    data_final.append([x,y])
27
+    data_final.append([x,y,z])
30 28
 
31 29
 for n in range(2, 20):
32 30
 
@@ -39,7 +37,24 @@ for n in range(2, 20):
39 37
     davies.append(dbsc)
40 38
     caha = metrics.calinski_harabasz_score(data_final, colors)
41 39
     calinski.append(caha)
42
-    
40
+   
41
+   
42
+plt.plot(range(2,20), silhouette, marker='o', label='Silhouette')
43
+plt.xlim(2,20)
44
+plt.xlabel('Nb clusters')
45
+
46
+plt.plot(range(2,20), davies, marker='o', label='Davies')
47
+plt.xlim(2,20)
48
+plt.xlabel('Nb clusters')
49
+
50
+"""plt.plot(range(2,20), calinski, marker='o')
51
+plt.xlim(2,20)
52
+plt.xlabel('Nb clusters')
53
+plt.ylabel('Calinski coeff')"""
54
+
55
+plt.legend()
56
+
57
+plt.show()
43 58
 #silhouettte coefficient
44 59
 #get the index of the best result 
45 60
 m = max(silhouette)
@@ -49,7 +64,7 @@ print("Silhouette : ",  indice)
49 64
 plt.subplot(3,1,1)
50 65
 #display the best obtained result
51 66
 clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final)
52
-colors = clustering.fit_predict(data_final)
67
+colors = clustering.labels_
53 68
 plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
54 69
 plt.show()
55 70
 
@@ -62,7 +77,7 @@ print("Davies Bouldin : ",  indice)
62 77
 plt.subplot(3,1,2)
63 78
 #display the best obtained result with davies bouldin metrics
64 79
 clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final)
65
-colors = clustering.fit_predict(data_final)
80
+colors = clustering.labels_
66 81
 plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
67 82
 plt.show()
68 83
 
@@ -75,6 +90,6 @@ print("Calinski Harabasz : ",  indice)
75 90
 plt.subplot(3,1,3)
76 91
 #display the best obtained result
77 92
 clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final)
78
-colors = clustering.fit_predict(data_final)
93
+colors = clustering.labels_
79 94
 plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
80 95
 plt.show()

Loading…
Cancel
Save