Creation of git and reorganisation of the code
This commit is contained in:
commit
3c778aedc1
12 changed files with 700 additions and 0 deletions
36
artificial/agglomerative.py
Normal file
36
artificial/agglomerative.py
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
from scipy.io import arff
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.cluster import KMeans
|
||||||
|
from sklearn.datasets import make_blobs
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn import metrics
|
||||||
|
from sklearn.cluster import AgglomerativeClustering
|
||||||
|
from sklearn.cluster import DBSCAN
|
||||||
|
import hdbscan
|
||||||
|
|
||||||
|
n_clusters = 2
|
||||||
|
|
||||||
|
data = arff.loadarff('diamond9.arff')[0]
|
||||||
|
data_final = []
|
||||||
|
x_list = []
|
||||||
|
y_list = []
|
||||||
|
|
||||||
|
for (x, y, z) in data :
|
||||||
|
x_list.append(x)
|
||||||
|
y_list.append(y)
|
||||||
|
data_final.append([x,y])
|
||||||
|
|
||||||
|
clustering = AgglomerativeClustering(n_clusters = n_clusters, linkage='average').fit(data_final)
|
||||||
|
|
||||||
|
colors = clustering.labels_
|
||||||
|
|
||||||
|
plt.scatter(x_list, y_list, c=colors, s=5)
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
silh = metrics.silhouette_score(data_final, colors, metric='euclidean')
|
||||||
|
dbsc = metrics.davies_bouldin_score(data_final, colors)
|
||||||
|
caha = metrics.calinski_harabasz_score(data_final, colors)
|
||||||
|
print("Coefficient de silhouette : ", silh)
|
||||||
|
print("Indice de Davies Bouldin : ", dbsc)
|
||||||
|
print("Indice de calinski harabasz : ", caha)
|
||||||
|
|
30
artificial/dbscan.py
Normal file
30
artificial/dbscan.py
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
from scipy.io import arff
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.cluster import KMeans
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn import metrics
|
||||||
|
from sklearn.cluster import DBSCAN
|
||||||
|
|
||||||
|
data = arff.loadarff('diamond9.arff')[0]
|
||||||
|
data_final = []
|
||||||
|
x_list = []
|
||||||
|
y_list = []
|
||||||
|
|
||||||
|
for (x, y, z) in data :
|
||||||
|
x_list.append(x)
|
||||||
|
y_list.append(y)
|
||||||
|
data_final.append([x,y])
|
||||||
|
|
||||||
|
clustering = DBSCAN(eps=0.5, min_samples=2).fit(data_final)
|
||||||
|
|
||||||
|
colors = clustering.labels_
|
||||||
|
|
||||||
|
plt.scatter(x_list, y_list, c=colors, s=5)
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
silh = metrics.silhouette_score(data_final, colors, metric='euclidean')
|
||||||
|
dbsc = metrics.davies_bouldin_score(data_final, colors)
|
||||||
|
caha = metrics.calinski_harabasz_score(data_final, colors)
|
||||||
|
print("Coefficient de silhouette : ", silh)
|
||||||
|
print("Indice de Davies Bouldin : ", dbsc)
|
||||||
|
print("Indice de calinski harabasz : ", caha)
|
34
artificial/hdbscan.py
Normal file
34
artificial/hdbscan.py
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
from scipy.io import arff
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.datasets import make_blobs
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn import metrics
|
||||||
|
import hdbscan
|
||||||
|
|
||||||
|
#WARNING: rename the file and do not call it HDBSCAN
|
||||||
|
|
||||||
|
n_clusters = 2
|
||||||
|
|
||||||
|
data = arff.loadarff('diamond9.arff')[0]
|
||||||
|
data_final = []
|
||||||
|
x_list = []
|
||||||
|
y_list = []
|
||||||
|
|
||||||
|
for (x, y, z) in data :
|
||||||
|
x_list.append(x)
|
||||||
|
y_list.append(y)
|
||||||
|
data_final.append([x,y])
|
||||||
|
|
||||||
|
clustering = hdbscan.HDBSCAN(min_samples=10)
|
||||||
|
|
||||||
|
colors = clustering.fit_predict(data_final)
|
||||||
|
|
||||||
|
plt.scatter(x_list, y_list, c=colors, s=5)
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
silh = metrics.silhouette_score(data_final, colors, metric='euclidean')
|
||||||
|
dbsc = metrics.davies_bouldin_score(data_final, colors)
|
||||||
|
caha = metrics.calinski_harabasz_score(data_final, colors)
|
||||||
|
print("Coefficient de silhouette : ", silh)
|
||||||
|
print("Indice de Davies Bouldin : ", dbsc)
|
||||||
|
print("Indice de calinski harabasz : ", caha)
|
26
artificial/kmeans.py
Normal file
26
artificial/kmeans.py
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
from scipy.io import arff
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.cluster import KMeans
|
||||||
|
from sklearn.datasets import make_blobs
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
#K_means algorithm
|
||||||
|
|
||||||
|
n_clusters = 3
|
||||||
|
|
||||||
|
data = arff.loadarff('2d-4c-no9.arff')[0]
|
||||||
|
data_final = []
|
||||||
|
x_list = []
|
||||||
|
y_list = []
|
||||||
|
|
||||||
|
for (x, y, z) in data :
|
||||||
|
x_list.append(x)
|
||||||
|
y_list.append(y)
|
||||||
|
data_final.append([x,y])
|
||||||
|
|
||||||
|
kmeans = KMeans(n_clusters, init='k-means++').fit(data_final)
|
||||||
|
|
||||||
|
colors = kmeans.labels_
|
||||||
|
|
||||||
|
plt.scatter(x_list, y_list, c=colors, s=5)
|
||||||
|
plt.show()
|
77
real_world/2D/agglomerative2D.py
Normal file
77
real_world/2D/agglomerative2D.py
Normal file
|
@ -0,0 +1,77 @@
|
||||||
|
from scipy.io import arff
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.cluster import KMeans
|
||||||
|
from sklearn.datasets import make_blobs
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn import metrics
|
||||||
|
from sklearn.cluster import AgglomerativeClustering
|
||||||
|
from sklearn.cluster import DBSCAN
|
||||||
|
import hdbscan
|
||||||
|
|
||||||
|
n_clusters = 2
|
||||||
|
|
||||||
|
data_final = []
|
||||||
|
x_list = []
|
||||||
|
y_list = []
|
||||||
|
|
||||||
|
silhouette = []
|
||||||
|
calinski = []
|
||||||
|
davies = []
|
||||||
|
|
||||||
|
|
||||||
|
data = np.loadtxt('tr.data')
|
||||||
|
|
||||||
|
for (x, y) in data :
|
||||||
|
x_list.append(x)
|
||||||
|
y_list.append(y)
|
||||||
|
data_final.append([x,y])
|
||||||
|
|
||||||
|
for n in range(2, 20):
|
||||||
|
|
||||||
|
clustering = AgglomerativeClustering(n_clusters = n , linkage='average').fit(data_final)
|
||||||
|
colors = clustering.labels_
|
||||||
|
|
||||||
|
silh = metrics.silhouette_score(data_final, colors, metric='euclidean')
|
||||||
|
silhouette.append(silh)
|
||||||
|
dbsc = metrics.davies_bouldin_score(data_final, colors)
|
||||||
|
davies.append(dbsc)
|
||||||
|
caha = metrics.calinski_harabasz_score(data_final, colors)
|
||||||
|
calinski.append(caha)
|
||||||
|
|
||||||
|
#silhouettte coefficient
|
||||||
|
#get the index of the best result
|
||||||
|
m = max(silhouette)
|
||||||
|
indice = [i for i, j in enumerate(silhouette) if j == m][0] +2
|
||||||
|
print("Silhouette : ", indice)
|
||||||
|
|
||||||
|
plt.subplot(3,1,1)
|
||||||
|
#display the best obtained result
|
||||||
|
clustering = AgglomerativeClustering(n_clusters = indice , linkage='average').fit(data_final)
|
||||||
|
colors = clustering.fit_predict(data_final)
|
||||||
|
plt.scatter(x_list, y_list, c=colors, s=5)
|
||||||
|
|
||||||
|
#davies bouldin metrics
|
||||||
|
#get the index of the best result
|
||||||
|
m = min(davies)
|
||||||
|
indice = [i for i, j in enumerate(davies) if j == m][0] +2
|
||||||
|
print("Davies Bouldin : ", indice)
|
||||||
|
|
||||||
|
plt.subplot(3,1,2)
|
||||||
|
#display the best obtained result with davies bouldin metrics
|
||||||
|
clustering = AgglomerativeClustering(n_clusters = indice , linkage='average').fit(data_final)
|
||||||
|
colors = clustering.fit_predict(data_final)
|
||||||
|
plt.scatter(x_list, y_list, c=colors, s=5)
|
||||||
|
|
||||||
|
#calinski metrics
|
||||||
|
#get the index of the best result
|
||||||
|
m = max(calinski)
|
||||||
|
indice = [i for i, j in enumerate(calinski) if j == m][0] +2
|
||||||
|
print("Calinski Harabasz : ", indice)
|
||||||
|
|
||||||
|
plt.subplot(3,1,3)
|
||||||
|
#display the best obtained result
|
||||||
|
clustering = AgglomerativeClustering(n_clusters = indice , linkage='average').fit(data_final)
|
||||||
|
colors = clustering.fit_predict(data_final)
|
||||||
|
plt.scatter(x_list, y_list, c=colors, s=5)
|
||||||
|
|
||||||
|
plt.show()
|
43
real_world/2D/dbscan2D.py
Normal file
43
real_world/2D/dbscan2D.py
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
from scipy.io import arff
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.cluster import KMeans
|
||||||
|
from sklearn.datasets import make_blobs
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn import metrics
|
||||||
|
from sklearn.cluster import AgglomerativeClustering
|
||||||
|
from sklearn.cluster import DBSCAN
|
||||||
|
import hdbscan
|
||||||
|
|
||||||
|
n_clusters = 2
|
||||||
|
|
||||||
|
data_final = []
|
||||||
|
x_list = []
|
||||||
|
y_list = []
|
||||||
|
|
||||||
|
silhouette = []
|
||||||
|
calinski = []
|
||||||
|
davies = []
|
||||||
|
|
||||||
|
|
||||||
|
data = np.loadtxt('zgo.data')
|
||||||
|
|
||||||
|
for (x, y) in data :
|
||||||
|
x_list.append(x)
|
||||||
|
y_list.append(y)
|
||||||
|
data_final.append([x,y])
|
||||||
|
|
||||||
|
clustering = DBSCAN(eps=0.35, min_samples=10).fit(data_final)
|
||||||
|
colors = clustering.labels_
|
||||||
|
|
||||||
|
|
||||||
|
silh = metrics.silhouette_score(data_final, colors, metric='euclidean')
|
||||||
|
dbsc = metrics.davies_bouldin_score(data_final, colors)
|
||||||
|
caha = metrics.calinski_harabasz_score(data_final, colors)
|
||||||
|
|
||||||
|
print("Coefficient de silhouette : ", silh)
|
||||||
|
print("Indice de Davies Bouldin : ", dbsc)
|
||||||
|
print("Indice de calinski harabasz : ", caha)
|
||||||
|
|
||||||
|
plt.scatter(x_list, y_list, c=colors, s=5)
|
||||||
|
|
||||||
|
plt.show()
|
83
real_world/2D/hdbscan2D.py
Normal file
83
real_world/2D/hdbscan2D.py
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
from scipy.io import arff
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.cluster import KMeans
|
||||||
|
from sklearn.datasets import make_blobs
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn import metrics
|
||||||
|
from sklearn.cluster import AgglomerativeClustering
|
||||||
|
from sklearn.cluster import DBSCAN
|
||||||
|
import hdbscan
|
||||||
|
|
||||||
|
n_clusters = 2
|
||||||
|
|
||||||
|
data_final = []
|
||||||
|
x_list = []
|
||||||
|
y_list = []
|
||||||
|
|
||||||
|
silhouette = []
|
||||||
|
calinski = []
|
||||||
|
davies = []
|
||||||
|
|
||||||
|
|
||||||
|
data = np.loadtxt('zgo.data')
|
||||||
|
|
||||||
|
for (x, y) in data :
|
||||||
|
x_list.append(x)
|
||||||
|
y_list.append(y)
|
||||||
|
data_final.append([x,y])
|
||||||
|
|
||||||
|
#get the values of the different coefficients for different min_samples values from 2 to 20
|
||||||
|
for n in range(2, 20):
|
||||||
|
clustering = hdbscan.HDBSCAN(min_samples=n)
|
||||||
|
colors = clustering.fit_predict(data_final)
|
||||||
|
|
||||||
|
silh = metrics.silhouette_score(data_final, colors, metric='euclidean')
|
||||||
|
silhouette.append(silh)
|
||||||
|
|
||||||
|
dbsc = metrics.davies_bouldin_score(data_final, colors)
|
||||||
|
davies.append(dbsc)
|
||||||
|
|
||||||
|
caha = metrics.calinski_harabasz_score(data_final, colors)
|
||||||
|
calinski.append(caha)
|
||||||
|
|
||||||
|
#silhouettte coefficient
|
||||||
|
#get the index of the best result
|
||||||
|
m = max(silhouette)
|
||||||
|
indice = [i for i, j in enumerate(silhouette) if j == m][0] +2
|
||||||
|
print("Silhouette : ", indice)
|
||||||
|
|
||||||
|
plt.subplot(3,1,1)
|
||||||
|
#display the best obtained result
|
||||||
|
clustering = hdbscan.HDBSCAN(min_samples=indice)
|
||||||
|
colors = clustering.fit_predict(data_final)
|
||||||
|
plt.scatter(x_list, y_list, c=colors, s=5)
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
#davies bouldin metrics
|
||||||
|
#get the index of the best result
|
||||||
|
m = min(davies)
|
||||||
|
indice = [i for i, j in enumerate(davies) if j == m][0] +2
|
||||||
|
print("Davies Bouldin : ", indice)
|
||||||
|
|
||||||
|
plt.subplot(3,1,2)
|
||||||
|
#display the best obtained result with davies bouldin metrics
|
||||||
|
clustering = hdbscan.HDBSCAN(min_samples=indice)
|
||||||
|
colors = clustering.fit_predict(data_final)
|
||||||
|
plt.scatter(x_list, y_list, c=colors, s=5)
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
#calinski metrics
|
||||||
|
#get the index of the best result
|
||||||
|
m = max(calinski)
|
||||||
|
indice = [i for i, j in enumerate(calinski) if j == m][0] +2
|
||||||
|
print("Calinski Harabasz : ", indice)
|
||||||
|
|
||||||
|
plt.subplot(3,1,3)
|
||||||
|
#display the best obtained result
|
||||||
|
clustering = hdbscan.HDBSCAN(min_samples=indice)
|
||||||
|
colors = clustering.fit_predict(data_final)
|
||||||
|
plt.scatter(x_list, y_list, c=colors, s=5)
|
||||||
|
|
||||||
|
plt.show()
|
81
real_world/2D/kmeans2D.py
Normal file
81
real_world/2D/kmeans2D.py
Normal file
|
@ -0,0 +1,81 @@
|
||||||
|
from scipy.io import arff
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.cluster import KMeans
|
||||||
|
from sklearn.datasets import make_blobs
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn import metrics
|
||||||
|
from sklearn.cluster import AgglomerativeClustering
|
||||||
|
from sklearn.cluster import DBSCAN
|
||||||
|
import hdbscan
|
||||||
|
|
||||||
|
n_clusters = 2
|
||||||
|
|
||||||
|
data_final = []
|
||||||
|
x_list = []
|
||||||
|
y_list = []
|
||||||
|
|
||||||
|
silhouette = []
|
||||||
|
calinski = []
|
||||||
|
davies = []
|
||||||
|
|
||||||
|
|
||||||
|
data = np.loadtxt('zgo.data')
|
||||||
|
|
||||||
|
for (x, y) in data :
|
||||||
|
x_list.append(x)
|
||||||
|
y_list.append(y)
|
||||||
|
data_final.append([x,y])
|
||||||
|
|
||||||
|
for n in range(2, 20):
|
||||||
|
|
||||||
|
clustering = KMeans(n_clusters=n, init='k-means++').fit(data_final)
|
||||||
|
colors = clustering.labels_
|
||||||
|
|
||||||
|
silh = metrics.silhouette_score(data_final, colors, metric='euclidean')
|
||||||
|
silhouette.append(silh)
|
||||||
|
dbsc = metrics.davies_bouldin_score(data_final, colors)
|
||||||
|
davies.append(dbsc)
|
||||||
|
caha = metrics.calinski_harabasz_score(data_final, colors)
|
||||||
|
calinski.append(caha)
|
||||||
|
|
||||||
|
#silhouettte coefficient
|
||||||
|
#get the index of the best result
|
||||||
|
m = max(silhouette)
|
||||||
|
indice = [i for i, j in enumerate(silhouette) if j == m][0] +2
|
||||||
|
print("Silhouette : ", indice)
|
||||||
|
|
||||||
|
plt.subplot(3,1,1)
|
||||||
|
#display the best obtained result
|
||||||
|
clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final)
|
||||||
|
colors = clustering.fit_predict(data_final)
|
||||||
|
plt.scatter(x_list, y_list, c=colors, s=5)
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
#davies bouldin metrics
|
||||||
|
#get the index of the best result
|
||||||
|
m = min(davies)
|
||||||
|
indice = [i for i, j in enumerate(davies) if j == m][0] +2
|
||||||
|
print("Davies Bouldin : ", indice)
|
||||||
|
|
||||||
|
plt.subplot(3,1,2)
|
||||||
|
#display the best obtained result with davies bouldin metrics
|
||||||
|
clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final)
|
||||||
|
colors = clustering.fit_predict(data_final)
|
||||||
|
plt.scatter(x_list, y_list, c=colors, s=5)
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
#calinski metrics
|
||||||
|
#get the index of the best result
|
||||||
|
m = max(calinski)
|
||||||
|
indice = [i for i, j in enumerate(calinski) if j == m][0] +2
|
||||||
|
print("Calinski Harabasz : ", indice)
|
||||||
|
|
||||||
|
plt.subplot(3,1,3)
|
||||||
|
#display the best obtained result
|
||||||
|
clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final)
|
||||||
|
colors = clustering.fit_predict(data_final)
|
||||||
|
plt.scatter(x_list, y_list, c=colors, s=5)
|
||||||
|
|
||||||
|
plt.show()
|
81
real_world/3D/agglomerative3D.py
Normal file
81
real_world/3D/agglomerative3D.py
Normal file
|
@ -0,0 +1,81 @@
|
||||||
|
from scipy.io import arff
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.cluster import KMeans
|
||||||
|
from sklearn.datasets import make_blobs
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn import metrics
|
||||||
|
from sklearn.cluster import AgglomerativeClustering
|
||||||
|
from sklearn.cluster import DBSCAN
|
||||||
|
import hdbscan
|
||||||
|
|
||||||
|
n_clusters = 2
|
||||||
|
|
||||||
|
data_final = []
|
||||||
|
x_list = []
|
||||||
|
y_list = []
|
||||||
|
z_list = []
|
||||||
|
|
||||||
|
silhouette = []
|
||||||
|
calinski = []
|
||||||
|
davies = []
|
||||||
|
|
||||||
|
|
||||||
|
data = np.loadtxt('t.data')
|
||||||
|
|
||||||
|
for (x, y, z) in data :
|
||||||
|
x_list.append(x)
|
||||||
|
y_list.append(y)
|
||||||
|
z_list.append(z)
|
||||||
|
data_final.append([x,y])
|
||||||
|
|
||||||
|
for n in range(2, 20):
|
||||||
|
|
||||||
|
clustering = AgglomerativeClustering(n_clusters = n , linkage='single').fit(data_final)
|
||||||
|
colors = clustering.labels_
|
||||||
|
|
||||||
|
silh = metrics.silhouette_score(data_final, colors, metric='euclidean')
|
||||||
|
silhouette.append(silh)
|
||||||
|
dbsc = metrics.davies_bouldin_score(data_final, colors)
|
||||||
|
davies.append(dbsc)
|
||||||
|
caha = metrics.calinski_harabasz_score(data_final, colors)
|
||||||
|
calinski.append(caha)
|
||||||
|
|
||||||
|
#silhouettte coefficient
|
||||||
|
#get the index of the best result
|
||||||
|
m = max(silhouette)
|
||||||
|
indice = [i for i, j in enumerate(silhouette) if j == m][0] +2
|
||||||
|
print("Silhouette : ", indice)
|
||||||
|
|
||||||
|
plt.subplot(3,1,1)
|
||||||
|
#display the best obtained result
|
||||||
|
clustering = AgglomerativeClustering(n_clusters = indice , linkage='average').fit(data_final)
|
||||||
|
colors = clustering.fit_predict(data_final)
|
||||||
|
plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
#davies bouldin metrics
|
||||||
|
#get the index of the best result
|
||||||
|
m = min(davies)
|
||||||
|
indice = [i for i, j in enumerate(davies) if j == m][0] +2
|
||||||
|
print("Davies Bouldin : ", indice)
|
||||||
|
|
||||||
|
plt.subplot(3,1,2)
|
||||||
|
#display the best obtained result with davies bouldin metrics
|
||||||
|
clustering = AgglomerativeClustering(n_clusters = indice , linkage='average').fit(data_final)
|
||||||
|
colors = clustering.fit_predict(data_final)
|
||||||
|
plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
#calinski metrics
|
||||||
|
#get the index of the best result
|
||||||
|
m = max(calinski)
|
||||||
|
indice = [i for i, j in enumerate(calinski) if j == m][0] +2
|
||||||
|
print("Calinski Harabasz : ", indice)
|
||||||
|
|
||||||
|
plt.subplot(3,1,3)
|
||||||
|
#display the best obtained result
|
||||||
|
clustering = AgglomerativeClustering(n_clusters = indice , linkage='average').fit(data_final)
|
||||||
|
colors = clustering.fit_predict(data_final)
|
||||||
|
plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
|
||||||
|
|
||||||
|
plt.show()
|
45
real_world/3D/dbscan3D.py
Normal file
45
real_world/3D/dbscan3D.py
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
from scipy.io import arff
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.cluster import KMeans
|
||||||
|
from sklearn.datasets import make_blobs
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn import metrics
|
||||||
|
from sklearn.cluster import AgglomerativeClustering
|
||||||
|
from sklearn.cluster import DBSCAN
|
||||||
|
import hdbscan
|
||||||
|
|
||||||
|
n_clusters = 2
|
||||||
|
|
||||||
|
data_final = []
|
||||||
|
x_list = []
|
||||||
|
y_list = []
|
||||||
|
z_list = []
|
||||||
|
|
||||||
|
silhouette = []
|
||||||
|
calinski = []
|
||||||
|
davies = []
|
||||||
|
|
||||||
|
|
||||||
|
data = np.loadtxt('t.data')
|
||||||
|
|
||||||
|
for (x, y, z) in data :
|
||||||
|
x_list.append(x)
|
||||||
|
y_list.append(y)
|
||||||
|
z_list.append(z)
|
||||||
|
data_final.append([x,y])
|
||||||
|
|
||||||
|
clustering = DBSCAN(eps=0.25, min_samples=10).fit(data_final)
|
||||||
|
colors = clustering.labels_
|
||||||
|
|
||||||
|
|
||||||
|
silh = metrics.silhouette_score(data_final, colors, metric='euclidean')
|
||||||
|
dbsc = metrics.davies_bouldin_score(data_final, colors)
|
||||||
|
caha = metrics.calinski_harabasz_score(data_final, colors)
|
||||||
|
|
||||||
|
print("Coefficient de silhouette : ", silh)
|
||||||
|
print("Indice de Davies Bouldin : ", dbsc)
|
||||||
|
print("Indice de calinski harabasz : ", caha)
|
||||||
|
|
||||||
|
plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
|
||||||
|
|
||||||
|
plt.show()
|
84
real_world/3D/hdbscan3D.py
Normal file
84
real_world/3D/hdbscan3D.py
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
from scipy.io import arff
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.cluster import KMeans
|
||||||
|
from sklearn.datasets import make_blobs
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn import metrics
|
||||||
|
from sklearn.cluster import AgglomerativeClustering
|
||||||
|
from sklearn.cluster import DBSCAN
|
||||||
|
import hdbscan
|
||||||
|
|
||||||
|
n_clusters = 2
|
||||||
|
|
||||||
|
data_final = []
|
||||||
|
x_list = []
|
||||||
|
y_list = []
|
||||||
|
z_list = []
|
||||||
|
|
||||||
|
silhouette = []
|
||||||
|
calinski = []
|
||||||
|
davies = []
|
||||||
|
|
||||||
|
|
||||||
|
data = np.loadtxt('a.data')
|
||||||
|
|
||||||
|
for (x, y, z) in data :
|
||||||
|
x_list.append(x)
|
||||||
|
y_list.append(y)
|
||||||
|
z_list.append(z)
|
||||||
|
data_final.append([x,y])
|
||||||
|
|
||||||
|
#get the values of the different coefficients for different min_samples values from 2 to 20
|
||||||
|
for n in range(2, 20):
|
||||||
|
clustering = hdbscan.HDBSCAN(min_samples=n)
|
||||||
|
colors = clustering.fit_predict(data_final)
|
||||||
|
|
||||||
|
silh = metrics.silhouette_score(data_final, colors, metric='euclidean')
|
||||||
|
silhouette.append(silh)
|
||||||
|
|
||||||
|
dbsc = metrics.davies_bouldin_score(data_final, colors)
|
||||||
|
davies.append(dbsc)
|
||||||
|
|
||||||
|
caha = metrics.calinski_harabasz_score(data_final, colors)
|
||||||
|
calinski.append(caha)
|
||||||
|
|
||||||
|
#silhouettte coefficient
|
||||||
|
#get the index of the best result
|
||||||
|
m = max(silhouette)
|
||||||
|
indice = [i for i, j in enumerate(silhouette) if j == m][0] +2
|
||||||
|
print("Silhouette : ", indice)
|
||||||
|
|
||||||
|
plt.subplot(3,1,1)
|
||||||
|
#display the best obtained result
|
||||||
|
clustering = hdbscan.HDBSCAN(min_samples=indice)
|
||||||
|
colors = clustering.fit_predict(data_final)
|
||||||
|
plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
#davies bouldin metrics
|
||||||
|
#get the index of the best result
|
||||||
|
m = min(davies)
|
||||||
|
indice = [i for i, j in enumerate(davies) if j == m][0] +2
|
||||||
|
print("Davies Bouldin : ", indice)
|
||||||
|
|
||||||
|
plt.subplot(3,1,2)
|
||||||
|
#display the best obtained result with davies bouldin metrics
|
||||||
|
clustering = hdbscan.HDBSCAN(min_samples=indice)
|
||||||
|
colors = clustering.fit_predict(data_final)
|
||||||
|
plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
#calinski metrics
|
||||||
|
#get the index of the best result
|
||||||
|
m = max(calinski)
|
||||||
|
indice = [i for i, j in enumerate(calinski) if j == m][0] +2
|
||||||
|
print("Calinski Harabasz : ", indice)
|
||||||
|
|
||||||
|
plt.subplot(3,1,3)
|
||||||
|
#display the best obtained result
|
||||||
|
clustering = hdbscan.HDBSCAN(min_samples=indice)
|
||||||
|
colors = clustering.fit_predict(data_final)
|
||||||
|
plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
|
||||||
|
plt.show()
|
80
real_world/3D/kmeans3D.py
Normal file
80
real_world/3D/kmeans3D.py
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
from scipy.io import arff
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.cluster import KMeans
|
||||||
|
from sklearn.datasets import make_blobs
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn import metrics
|
||||||
|
from sklearn.cluster import AgglomerativeClustering
|
||||||
|
from sklearn.cluster import DBSCAN
|
||||||
|
import hdbscan
|
||||||
|
|
||||||
|
n_clusters = 2
|
||||||
|
|
||||||
|
data_final = []
|
||||||
|
x_list = []
|
||||||
|
y_list = []
|
||||||
|
z_list = []
|
||||||
|
|
||||||
|
silhouette = []
|
||||||
|
calinski = []
|
||||||
|
davies = []
|
||||||
|
|
||||||
|
|
||||||
|
data = np.loadtxt('t.data')
|
||||||
|
|
||||||
|
for (x, y, z) in data :
|
||||||
|
x_list.append(x)
|
||||||
|
y_list.append(y)
|
||||||
|
z_list.append(z)
|
||||||
|
data_final.append([x,y])
|
||||||
|
|
||||||
|
for n in range(2, 20):
|
||||||
|
|
||||||
|
clustering = KMeans(n_clusters=n, init='k-means++').fit(data_final)
|
||||||
|
colors = clustering.labels_
|
||||||
|
|
||||||
|
silh = metrics.silhouette_score(data_final, colors, metric='euclidean')
|
||||||
|
silhouette.append(silh)
|
||||||
|
dbsc = metrics.davies_bouldin_score(data_final, colors)
|
||||||
|
davies.append(dbsc)
|
||||||
|
caha = metrics.calinski_harabasz_score(data_final, colors)
|
||||||
|
calinski.append(caha)
|
||||||
|
|
||||||
|
#silhouettte coefficient
|
||||||
|
#get the index of the best result
|
||||||
|
m = max(silhouette)
|
||||||
|
indice = [i for i, j in enumerate(silhouette) if j == m][0] +2
|
||||||
|
print("Silhouette : ", indice)
|
||||||
|
|
||||||
|
plt.subplot(3,1,1)
|
||||||
|
#display the best obtained result
|
||||||
|
clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final)
|
||||||
|
colors = clustering.fit_predict(data_final)
|
||||||
|
plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
#davies bouldin metrics
|
||||||
|
#get the index of the best result
|
||||||
|
m = min(davies)
|
||||||
|
indice = [i for i, j in enumerate(davies) if j == m][0] +2
|
||||||
|
print("Davies Bouldin : ", indice)
|
||||||
|
|
||||||
|
plt.subplot(3,1,2)
|
||||||
|
#display the best obtained result with davies bouldin metrics
|
||||||
|
clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final)
|
||||||
|
colors = clustering.fit_predict(data_final)
|
||||||
|
plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
#calinski metrics
|
||||||
|
#get the index of the best result
|
||||||
|
m = max(calinski)
|
||||||
|
indice = [i for i, j in enumerate(calinski) if j == m][0] +2
|
||||||
|
print("Calinski Harabasz : ", indice)
|
||||||
|
|
||||||
|
plt.subplot(3,1,3)
|
||||||
|
#display the best obtained result
|
||||||
|
clustering = KMeans(n_clusters=indice, init='k-means++').fit(data_final)
|
||||||
|
colors = clustering.fit_predict(data_final)
|
||||||
|
plt.axes(projection='3d').scatter3D(x_list, y_list, z_list, c=colors)
|
||||||
|
plt.show()
|
Loading…
Reference in a new issue