36 lines
980 B
Python
36 lines
980 B
Python
from scipy.io import arff
|
|
import numpy as np
|
|
from sklearn.cluster import KMeans
|
|
from sklearn.datasets import make_blobs
|
|
import matplotlib.pyplot as plt
|
|
from sklearn import metrics
|
|
from sklearn.cluster import AgglomerativeClustering
|
|
from sklearn.cluster import DBSCAN
|
|
import hdbscan
|
|
|
|
n_clusters = 2
|
|
|
|
data = arff.loadarff('diamond9.arff')[0]
|
|
data_final = []
|
|
x_list = []
|
|
y_list = []
|
|
|
|
for (x, y, z) in data :
|
|
x_list.append(x)
|
|
y_list.append(y)
|
|
data_final.append([x,y])
|
|
|
|
clustering = AgglomerativeClustering(n_clusters = n_clusters, linkage='average').fit(data_final)
|
|
|
|
colors = clustering.labels_
|
|
|
|
plt.scatter(x_list, y_list, c=colors, s=5)
|
|
plt.show()
|
|
|
|
silh = metrics.silhouette_score(data_final, colors, metric='euclidean')
|
|
dbsc = metrics.davies_bouldin_score(data_final, colors)
|
|
caha = metrics.calinski_harabasz_score(data_final, colors)
|
|
print("Coefficient de silhouette : ", silh)
|
|
print("Indice de Davies Bouldin : ", dbsc)
|
|
print("Indice de calinski harabasz : ", caha)
|
|
|