Browse Source

Creation of git and reorganisation of the code

Foussats Morgane 2 years ago
commit
3c778aedc1

+ 36
- 0
artificial/agglomerative.py View File

@@ -0,0 +1,36 @@
1
+from scipy.io import arff
2
+import numpy as np
3
+from sklearn.cluster import KMeans
4
+from sklearn.datasets import make_blobs
5
+import matplotlib.pyplot as plt
6
+from sklearn import metrics
7
+from sklearn.cluster import AgglomerativeClustering
8
+from sklearn.cluster import DBSCAN
9
+import hdbscan
10
+
11
+n_clusters = 2
12
+
13
+data = arff.loadarff('diamond9.arff')[0]
14
+data_final = []
15
+x_list = []
16
+y_list = []
17
+
18
+for (x, y, z) in data :
19
+    x_list.append(x)
20
+    y_list.append(y)
21
+    data_final.append([x,y])
22
+
23
+clustering = AgglomerativeClustering(n_clusters = n_clusters, linkage='average').fit(data_final)
24
+
25
+colors = clustering.labels_
26
+
27
+plt.scatter(x_list, y_list, c=colors, s=5)
28
+plt.show()
29
+
30
+silh = metrics.silhouette_score(data_final, colors, metric='euclidean')
31
+dbsc = metrics.davies_bouldin_score(data_final, colors)
32
+caha = metrics.calinski_harabasz_score(data_final, colors)
33
+print("Coefficient de silhouette : ", silh)
34
+print("Indice de Davies Bouldin : ", dbsc)
35
+print("Indice de calinski harabasz : ", caha)
36
+

+ 30
- 0
artificial/dbscan.py View File

@@ -0,0 +1,30 @@
1
+from scipy.io import arff
2
+import numpy as np
3
+from sklearn.cluster import KMeans
4
+import matplotlib.pyplot as plt
5
+from sklearn import metrics
6
+from sklearn.cluster import DBSCAN
7
+
8
+data = arff.loadarff('diamond9.arff')[0]
9
+data_final = []
10
+x_list = []
11
+y_list = []
12
+
13
+for (x, y, z) in data :
14
+    x_list.append(x)
15
+    y_list.append(y)
16
+    data_final.append([x,y])
17
+
18
+clustering = DBSCAN(eps=0.5, min_samples=2).fit(data_final)
19
+
20
+colors = clustering.labels_
21
+
22
+plt.scatter(x_list, y_list, c=colors, s=5)
23
+plt.show()
24
+
25
+silh = metrics.silhouette_score(data_final, colors, metric='euclidean')
26
+dbsc = metrics.davies_bouldin_score(data_final, colors)
27
+caha = metrics.calinski_harabasz_score(data_final, colors)
28
+print("Coefficient de silhouette : ", silh)
29
+print("Indice de Davies Bouldin : ", dbsc)
30
+print("Indice de calinski harabasz : ", caha)

+ 34
- 0
artificial/hdbscan.py View File

@@ -0,0 +1,34 @@
1
+from scipy.io import arff
2
+import numpy as np
3
+from sklearn.datasets import make_blobs
4
+import matplotlib.pyplot as plt
5
+from sklearn import metrics
6
+import hdbscan
7
+
8
+#WARNING: rename the file and do not call it HDBSCAN
9
+
10
+n_clusters = 2
11
+
12
+data = arff.loadarff('diamond9.arff')[0]
13
+data_final = []
14
+x_list = []
15
+y_list = []
16
+
17
+for (x, y, z) in data :
18
+    x_list.append(x)
19
+    y_list.append(y)
20
+    data_final.append([x,y])
21
+
22
+clustering = hdbscan.HDBSCAN(min_samples=10)
23
+
24
+colors = clustering.fit_predict(data_final)
25
+
26
+plt.scatter(x_list, y_list, c=colors, s=5)
27
+plt.show()
28
+
29
+silh = metrics.silhouette_score(data_final, colors, metric='euclidean')
30
+dbsc = metrics.davies_bouldin_score(data_final, colors)
31
+caha = metrics.calinski_harabasz_score(data_final, colors)
32
+print("Coefficient de silhouette : ", silh)
33
+print("Indice de Davies Bouldin : ", dbsc)
34
+print("Indice de calinski harabasz : ", caha)

+ 26
- 0
artificial/kmeans.py View File

@@ -0,0 +1,26 @@
1
+from scipy.io import arff
2
+import numpy as np
3
+from sklearn.cluster import KMeans
4
+from sklearn.datasets import make_blobs
5
+import matplotlib.pyplot as plt
6
+
7
+#K_means algorithm
8
+
9
+n_clusters = 3
10
+
11
+data = arff.loadarff('2d-4c-no9.arff')[0]
12
+data_final = []
13
+x_list = []
14
+y_list = []
15
+
16
+for (x, y, z) in data :
17
+    x_list.append(x)
18
+    y_list.append(y)
19
+    data_final.append([x,y])
20
+
21
+kmeans = KMeans(n_clusters, init='k-means++').fit(data_final)
22
+
23
+colors = kmeans.labels_
24
+
25
+plt.scatter(x_list, y_list, c=colors, s=5)
26
+plt.show()

+ 77
- 0
real_world/2D/agglomerative2D.py View File

@@ -0,0 +1,77 @@
1
+from scipy.io import arff
2
+import numpy as np
3
+from sklearn.cluster import KMeans
4
+from sklearn.datasets import make_blobs
5
+import matplotlib.pyplot as plt
6
+from sklearn import metrics
7
+from sklearn.cluster import AgglomerativeClustering
8
+from sklearn.cluster import DBSCAN
9
+import hdbscan
10
+
11
+n_clusters = 2
12
+
13
+data_final = []
14
+x_list = []
15
+y_list = []
16
+
17
+silhouette = []
18
+calinski = []
19
+davies = []
20
+
21
+
22
+data = np.loadtxt('tr.data')
23
+
24
+for (x, y) in data :
25
+    x_list.append(x)
26
+    y_list.append(y)
27
+    data_final.append([x,y])
28
+
29
+for n in range(2, 20):
30
+
31
+    clustering = AgglomerativeClustering(n_clusters = n , linkage='average').fit(data_final)
32
+    colors = clustering.labels_
33
+
34
+    silh = metrics.silhouette_score(data_final, colors, metric='euclidean')
35
+    silhouette.append(silh)
36
+    dbsc = metrics.davies_bouldin_score(data_final, colors)
37
+    davies.append(dbsc)
38
+    caha = metrics.calinski_harabasz_score(data_final, colors)
39
+    calinski.append(caha)
40
+    
41
+#silhouettte coefficient
42
+#get the index of the best result 
43
+m = max(silhouette)
44
+indice = [i for i, j in enumerate(silhouette) if j == m][0] +2
45
+print("Silhouette : ",  indice)
46
+
47
+plt.subplot(3,1,1)
48
+#display the best obtained result
49
+clustering = AgglomerativeClustering(n_clusters = indice , linkage='average').fit(data_final)
50
+colors = clustering.fit_predict(data_final)
51
+plt.scatter(x_list, y_list, c=colors, s=5)
52
+
53
+#davies bouldin metrics
54
+#get the index of the best result
55
+m = min(davies)
56
+indice = [i for i, j in enumerate(davies) if j == m][0] +2
57
+print("Davies Bouldin : ",  indice)
58
+
59
+plt.subplot(3,1,2)
60
+#display the best obtained result with davies bouldin metrics
61
+clustering = AgglomerativeClustering(n_clusters = indice , linkage='average').fit(data_final)
62
+colors = clustering.fit_predict(data_final)
63
+plt.scatter(x_list, y_list, c=colors, s=5)
64
+
65
+#calinski metrics
66
+#get the index of the best result
67
+m = max(calinski)
68
+indice = [i for i, j in enumerate(calinski) if j == m][0] +2
69
+print("Calinski Harabasz : ",  indice)
70
+
71
+plt.subplot(3,1,3)
72
+#display the best obtained result
73
+clustering = AgglomerativeClustering(n_clusters = indice , linkage='average').fit(data_final)
74
+colors = clustering.fit_predict(data_final)
75
+plt.scatter(x_list, y_list, c=colors, s=5)
76
+
77
+plt.show()

+ 43
- 0
real_world/2D/dbscan2D.py View File

@@ -0,0 +1,43 @@
1
+from scipy.io import arff
2
+import numpy as np
3
+from sklearn.cluster import KMeans
4
+from sklearn.datasets import make_blobs
5
+import matplotlib.pyplot as plt
6
+from sklearn import metrics
7
+from sklearn.cluster import AgglomerativeClustering
8
+from sklearn.cluster import DBSCAN
9
+import hdbscan
10
+
11
+n_clusters = 2
12
+
13
+data_final = []
14
+x_list = []
15
+y_list = []
16
+
17
+silhouette = []
18
+calinski = []
19
+davies = []
20
+
21
+
22
+data = np.loadtxt('zgo.data')
23
+
24
+for (x, y) in data :
25
+    x_list.append(x)
26
+    y_list.append(y)
27
+    data_final.append([x,y])
28
+
29
+clustering = DBSCAN(eps=0.35, min_samples=10).fit(data_final)
30
+colors = clustering.labels_
31
+
32
+
33
+silh = metrics.silhouette_score(data_final, colors, metric='euclidean')
34
+dbsc = metrics.davies_bouldin_score(data_final, colors)
35
+caha = metrics.calinski_harabasz_score(data_final, colors)
36
+    
37
+print("Coefficient de silhouette : ", silh)
38
+print("Indice de Davies Bouldin : ", dbsc)
39
+print("Indice de calinski harabasz : ", caha)
40
+
41
+plt.scatter(x_list, y_list, c=colors, s=5)
42
+
43
+plt.show()

+ 83
- 0
real_world/2D/hdbscan2D.py View File

@@ -0,0 +1,83 @@
1
+from scipy.io import arff
2
+import numpy as np
3
+from sklearn.cluster import KMeans
4
+from sklearn.datasets import make_blobs
5
+import matplotlib.pyplot as plt
6
+from sklearn import metrics
7
+from sklearn.cluster import AgglomerativeClustering
8
+from sklearn.cluster import DBSCAN
9
+import hdbscan
10
+
11
+n_clusters = 2
12
+
13
+data_final = []
14
+x_list = []
15
+y_list = []
16
+
17
+silhouette = []
18
+calinski = []
19
+davies = []
20
+
21
+
22
+data = np.loadtxt('zgo.data')
23
+
24
+for (x, y) in data :
25
+    x_list.append(x)
26
+    y_list.append(y)
27
+    data_final.append([x,y])
28
+
29
+#get the values of the different coefficients for different min_samples values from 2 to 20
30
+for n in range(2, 20):
31
+    clustering = hdbscan.HDBSCAN(min_samples=n)
32
+    colors = clustering.fit_predict(data_final)
33
+    
34
+    silh = metrics.silhouette_score(data_final, colors, metric='euclidean')
35
+    silhouette.append(silh)
36
+
37
+    dbsc = metrics.davies_bouldin_score(data_final, colors)
38
+    davies.append(dbsc)
39
+    
40
+    caha = metrics.calinski_harabasz_score(data_final, colors)
41
+    calinski.append(caha)
42
+
43
+#silhouettte coefficient
44
+#get the index of the best result 
45
+m = max(silhouette)
46
+indice = [i for i, j in enumerate(silhouette) if j == m][0] +2
47
+print("Silhouette : ",  indice)
48
+
49
+plt.subplot(3,1,1)
50
+#display the best obtained result
51
+clustering = hdbscan.HDBSCAN(min_samples=indice)
52
+colors = clustering.fit_predict(data_final)
53
+plt.scatter(x_list, y_list, c=colors, s=5)
54
+
55
+plt.show()
56
+
57
+#davies bouldin metrics
58
+#get the index of the best result
59
+m = min(davies)
60
+indice = [i for i, j in enumerate(davies) if j == m][0] +2
61
+print("Davies Bouldin : ",  indice)
62
+
63
+plt.subplot(3,1,2)
64
+#display the best obtained result with davies bouldin metrics
65
+clustering = hdbscan.HDBSCAN(min_samples=indice)
66
+colors = clustering.fit_predict(data_final)
67
+plt.scatter(x_list, y_list, c=colors, s=5)
68
+
69
+plt.show()
70
+
71
+#calinski metrics
72
+#get the index of the best result
73
+m = max(calinski)
74
+indice = [i for i, j in enumerate(calinski) if j == m][0] +2
75
+print("Calinski Harabasz : ",  indice)
76
+
77
+plt.subplot(3,1,3)
78
+#display the best obtained result
79
+clustering = hdbscan.HDBSCAN(min_samples=indice)
80
+colors = clustering.fit_predict(data_final)
81
+plt.scatter(x_list, y_list, c=colors, s=5)
82
+
83
+plt.show()

+ 81
- 0
real_world/2D/kmeans2D.py View File

@@ -0,0 +1,81 @@
1
+from scipy.io import arff
2
+import numpy as np
3
+from sklearn.cluster import KMeans
4
+from sklearn.datasets import make_blobs
5
+import matplotlib.pyplot as plt
6
+from sklearn import metrics
7
+from sklearn.cluster import AgglomerativeClustering
8
+from sklearn.cluster import DBSCAN
9
+import hdbscan
10
+
11
+n_clusters = 2
12
+
13
+data_final = []
14
+x_list = []
15
+y_list = []
16
+
17
+silhouette = []
18
+calinski = []
19
+davies = []
20
+
21
+
22
+data = np.loadtxt('zgo.data')
23
+
24
+for (x, y) in data :
25
+    x_list.append(x)
26
+    y_list.append(y)
27
+    data_final.append([x,y])
28
+
29
+for n in range(2, 20):
30
+
31
+    clustering = KMeans(n_clusters=n, init='k-means++').fit(data_final)
32
+    colors = clustering.labels_
33
+
34
+    silh = metrics.silhouette_score(data_final, colors, metric='euclidean')
35
+    silhouette.append(silh)
36
+    dbsc = metrics.davies_bouldin_score(data_final, colors)
37
+    davies.append(dbsc)
38
+    caha = metrics.calinski_harabasz_score(data_final, colors)
39
+    calinski.append(caha)
40
+    
41
+#silhouettte coefficient
42
+#get the index of the best result 
43
+m = max(silhouette)
44
+indice = [i for i, j in enumerate(silhouette) if j == m][0] +2
45
+print("Silhouette : ",  indice)
46
+
47
+plt.subplot(3,1,1)
48
+#display the best obtained result
49
+clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final)
50
+colors = clustering.fit_predict(data_final)
51
+plt.scatter(x_list, y_list, c=colors, s=5)
52
+
53
+plt.show()
54
+
55
+#davies bouldin metrics
56
+#get the index of the best result
57
+m = min(davies)
58
+indice = [i for i, j in enumerate(davies) if j == m][0] +2
59
+print("Davies Bouldin : ",  indice)
60
+
61
+plt.subplot(3,1,2)
62
+#display the best obtained result with davies bouldin metrics
63
+clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final)
64
+colors = clustering.fit_predict(data_final)
65
+plt.scatter(x_list, y_list, c=colors, s=5)
66
+
67
+plt.show()
68
+
69
+#calinski metrics
70
+#get the index of the best result
71
+m = max(calinski)
72
+indice = [i for i, j in enumerate(calinski) if j == m][0] +2
73
+print("Calinski Harabasz : ",  indice)
74
+
75
+plt.subplot(3,1,3)
76
+#display the best obtained result
77
+clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final)
78
+colors = clustering.fit_predict(data_final)
79
+plt.scatter(x_list, y_list, c=colors, s=5)
80
+
81
+plt.show()

+ 81
- 0
real_world/3D/agglomerative3D.py View File

@@ -0,0 +1,81 @@
1
+from scipy.io import arff
2
+import numpy as np
3
+from sklearn.cluster import KMeans
4
+from sklearn.datasets import make_blobs
5
+import matplotlib.pyplot as plt
6
+from sklearn import metrics
7
+from sklearn.cluster import AgglomerativeClustering
8
+from sklearn.cluster import DBSCAN
9
+import hdbscan
10
+
11
+n_clusters = 2
12
+
13
+data_final = []
14
+x_list = []
15
+y_list = []
16
+z_list = []
17
+
18
+silhouette = []
19
+calinski = []
20
+davies = []
21
+
22
+
23
+data = np.loadtxt('t.data')
24
+
25
+for (x, y, z) in data :
26
+    x_list.append(x)
27
+    y_list.append(y)
28
+    z_list.append(z)
29
+    data_final.append([x,y])
30
+
31
+for n in range(2, 20):
32
+
33
+    clustering = AgglomerativeClustering(n_clusters = n , linkage='single').fit(data_final)
34
+    colors = clustering.labels_
35
+
36
+    silh = metrics.silhouette_score(data_final, colors, metric='euclidean')
37
+    silhouette.append(silh)
38
+    dbsc = metrics.davies_bouldin_score(data_final, colors)
39
+    davies.append(dbsc)
40
+    caha = metrics.calinski_harabasz_score(data_final, colors)
41
+    calinski.append(caha)
42
+    
43
+#silhouettte coefficient
44
+#get the index of the best result 
45
+m = max(silhouette)
46
+indice = [i for i, j in enumerate(silhouette) if j == m][0] +2
47
+print("Silhouette : ",  indice)
48
+
49
+plt.subplot(3,1,1)
50
+#display the best obtained result
51
+clustering = AgglomerativeClustering(n_clusters = indice , linkage='average').fit(data_final)
52
+colors = clustering.fit_predict(data_final)
53
+plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
54
+plt.show()
55
+
56
+#davies bouldin metrics
57
+#get the index of the best result
58
+m = min(davies)
59
+indice = [i for i, j in enumerate(davies) if j == m][0] +2
60
+print("Davies Bouldin : ",  indice)
61
+
62
+plt.subplot(3,1,2)
63
+#display the best obtained result with davies bouldin metrics
64
+clustering = AgglomerativeClustering(n_clusters = indice , linkage='average').fit(data_final)
65
+colors = clustering.fit_predict(data_final)
66
+plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
67
+plt.show()
68
+
69
+#calinski metrics
70
+#get the index of the best result
71
+m = max(calinski)
72
+indice = [i for i, j in enumerate(calinski) if j == m][0] +2
73
+print("Calinski Harabasz : ",  indice)
74
+
75
+plt.subplot(3,1,3)
76
+#display the best obtained result
77
+clustering = AgglomerativeClustering(n_clusters = indice , linkage='average').fit(data_final)
78
+colors = clustering.fit_predict(data_final)
79
+plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
80
+
81
+plt.show()

+ 45
- 0
real_world/3D/dbscan3D.py View File

@@ -0,0 +1,45 @@
1
+from scipy.io import arff
2
+import numpy as np
3
+from sklearn.cluster import KMeans
4
+from sklearn.datasets import make_blobs
5
+import matplotlib.pyplot as plt
6
+from sklearn import metrics
7
+from sklearn.cluster import AgglomerativeClustering
8
+from sklearn.cluster import DBSCAN
9
+import hdbscan
10
+
11
+n_clusters = 2
12
+
13
+data_final = []
14
+x_list = []
15
+y_list = []
16
+z_list = []
17
+
18
+silhouette = []
19
+calinski = []
20
+davies = []
21
+
22
+
23
+data = np.loadtxt('t.data')
24
+
25
+for (x, y, z) in data :
26
+    x_list.append(x)
27
+    y_list.append(y)
28
+    z_list.append(z)
29
+    data_final.append([x,y])
30
+
31
+clustering = DBSCAN(eps=0.25, min_samples=10).fit(data_final)
32
+colors = clustering.labels_
33
+
34
+
35
+silh = metrics.silhouette_score(data_final, colors, metric='euclidean')
36
+dbsc = metrics.davies_bouldin_score(data_final, colors)
37
+caha = metrics.calinski_harabasz_score(data_final, colors)
38
+    
39
+print("Coefficient de silhouette : ", silh)
40
+print("Indice de Davies Bouldin : ", dbsc)
41
+print("Indice de calinski harabasz : ", caha)
42
+
43
+plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
44
+
45
+plt.show()

+ 84
- 0
real_world/3D/hdbscan3D.py View File

@@ -0,0 +1,84 @@
1
+from scipy.io import arff
2
+import numpy as np
3
+from sklearn.cluster import KMeans
4
+from sklearn.datasets import make_blobs
5
+import matplotlib.pyplot as plt
6
+from sklearn import metrics
7
+from sklearn.cluster import AgglomerativeClustering
8
+from sklearn.cluster import DBSCAN
9
+import hdbscan
10
+
11
+n_clusters = 2
12
+
13
+data_final = []
14
+x_list = []
15
+y_list = []
16
+z_list = []
17
+
18
+silhouette = []
19
+calinski = []
20
+davies = []
21
+
22
+
23
+data = np.loadtxt('a.data')
24
+
25
+for (x, y, z) in data :
26
+    x_list.append(x)
27
+    y_list.append(y)
28
+    z_list.append(z)
29
+    data_final.append([x,y])
30
+
31
+#get the values of the different coefficients for different min_samples values from 2 to 20
32
+for n in range(2, 20):
33
+    clustering = hdbscan.HDBSCAN(min_samples=n)
34
+    colors = clustering.fit_predict(data_final)
35
+    
36
+    silh = metrics.silhouette_score(data_final, colors, metric='euclidean')
37
+    silhouette.append(silh)
38
+
39
+    dbsc = metrics.davies_bouldin_score(data_final, colors)
40
+    davies.append(dbsc)
41
+    
42
+    caha = metrics.calinski_harabasz_score(data_final, colors)
43
+    calinski.append(caha)
44
+
45
+#silhouettte coefficient
46
+#get the index of the best result 
47
+m = max(silhouette)
48
+indice = [i for i, j in enumerate(silhouette) if j == m][0] +2
49
+print("Silhouette : ",  indice)
50
+
51
+plt.subplot(3,1,1)
52
+#display the best obtained result
53
+clustering = hdbscan.HDBSCAN(min_samples=indice)
54
+colors = clustering.fit_predict(data_final)
55
+plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
56
+plt.show()
57
+
58
+
59
+#davies bouldin metrics
60
+#get the index of the best result
61
+m = min(davies)
62
+indice = [i for i, j in enumerate(davies) if j == m][0] +2
63
+print("Davies Bouldin : ",  indice)
64
+
65
+plt.subplot(3,1,2)
66
+#display the best obtained result with davies bouldin metrics
67
+clustering = hdbscan.HDBSCAN(min_samples=indice)
68
+colors = clustering.fit_predict(data_final)
69
+plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
70
+plt.show()
71
+
72
+
73
+#calinski metrics
74
+#get the index of the best result
75
+m = max(calinski)
76
+indice = [i for i, j in enumerate(calinski) if j == m][0] +2
77
+print("Calinski Harabasz : ",  indice)
78
+
79
+plt.subplot(3,1,3)
80
+#display the best obtained result
81
+clustering = hdbscan.HDBSCAN(min_samples=indice)
82
+colors = clustering.fit_predict(data_final)
83
+plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
84
+plt.show()

+ 80
- 0
real_world/3D/kmeans3D.py View File

@@ -0,0 +1,80 @@
1
+from scipy.io import arff
2
+import numpy as np
3
+from sklearn.cluster import KMeans
4
+from sklearn.datasets import make_blobs
5
+import matplotlib.pyplot as plt
6
+from sklearn import metrics
7
+from sklearn.cluster import AgglomerativeClustering
8
+from sklearn.cluster import DBSCAN
9
+import hdbscan
10
+
11
+n_clusters = 2
12
+
13
+data_final = []
14
+x_list = []
15
+y_list = []
16
+z_list = []
17
+
18
+silhouette = []
19
+calinski = []
20
+davies = []
21
+
22
+
23
+data = np.loadtxt('t.data')
24
+
25
+for (x, y, z) in data :
26
+    x_list.append(x)
27
+    y_list.append(y)
28
+    z_list.append(z)
29
+    data_final.append([x,y])
30
+
31
+for n in range(2, 20):
32
+
33
+    clustering = KMeans(n_clusters=n, init='k-means++').fit(data_final)
34
+    colors = clustering.labels_
35
+
36
+    silh = metrics.silhouette_score(data_final, colors, metric='euclidean')
37
+    silhouette.append(silh)
38
+    dbsc = metrics.davies_bouldin_score(data_final, colors)
39
+    davies.append(dbsc)
40
+    caha = metrics.calinski_harabasz_score(data_final, colors)
41
+    calinski.append(caha)
42
+    
43
+#silhouettte coefficient
44
+#get the index of the best result 
45
+m = max(silhouette)
46
+indice = [i for i, j in enumerate(silhouette) if j == m][0] +2
47
+print("Silhouette : ",  indice)
48
+
49
+plt.subplot(3,1,1)
50
+#display the best obtained result
51
+clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final)
52
+colors = clustering.fit_predict(data_final)
53
+plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
54
+plt.show()
55
+
56
+#davies bouldin metrics
57
+#get the index of the best result
58
+m = min(davies)
59
+indice = [i for i, j in enumerate(davies) if j == m][0] +2
60
+print("Davies Bouldin : ",  indice)
61
+
62
+plt.subplot(3,1,2)
63
+#display the best obtained result with davies bouldin metrics
64
+clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final)
65
+colors = clustering.fit_predict(data_final)
66
+plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
67
+plt.show()
68
+
69
+#calinski metrics
70
+#get the index of the best result
71
+m = max(calinski)
72
+indice = [i for i, j in enumerate(calinski) if j == m][0] +2
73
+print("Calinski Harabasz : ",  indice)
74
+
75
+plt.subplot(3,1,3)
76
+#display the best obtained result
77
+clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final)
78
+colors = clustering.fit_predict(data_final)
79
+plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
80
+plt.show()

Loading…
Cancel
Save