204 lines
8.6 KiB
Python
204 lines
8.6 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Created on Wed Dec 8 16:07:28 2021
|
|
|
|
@author: pfaure
|
|
"""
|
|
from numpy import arange
|
|
from myplotlib import print_1d_data
|
|
from mydatalib import (extract_data_txt, scale_data, apply_kmeans,
|
|
apply_agglomerative_clustering, apply_DBSCAN,
|
|
apply_mean_shift, evaluate)
|
|
|
|
path = './new-data/'
|
|
dataset_name_list = ["d32", "d64"]
|
|
#eps_list = [0.6, 0.75, 0.2, 0.8, 0.8]
|
|
eps_list = [0.8, 0.8]
|
|
save = True
|
|
for i in range(0, 2):
|
|
dataset_name = dataset_name_list[i]
|
|
eps = eps_list[i]
|
|
|
|
print("-----------------------------------------------------------")
|
|
print(" Chargement du dataset : " + dataset_name)
|
|
data = extract_data_txt(path + dataset_name)
|
|
|
|
print("-----------------------------------------------------------")
|
|
print(" Mise à l'échelle")
|
|
data_scaled = scale_data(data)
|
|
|
|
k_max = 10
|
|
print("-----------------------------------------------------------")
|
|
print(" Application de k-means")
|
|
# Application de k-means pour plusieurs valeurs de k
|
|
# et evaluation de la solution
|
|
k = []
|
|
durations = []
|
|
silouettes = []
|
|
daviess = []
|
|
calinskis = []
|
|
inerties = []
|
|
iterations = []
|
|
for i in range(2, k_max):
|
|
# Application de k-means
|
|
(model, duration) = apply_kmeans(data_scaled, k=i, init="k-means++")
|
|
# Evaluation de la solution de clustering
|
|
(silouette, davies, calinski) = evaluate(data_scaled, model)
|
|
# Enregistrement des valeurs
|
|
k += [i]
|
|
durations += [duration]
|
|
silouettes += [silouette]
|
|
daviess += [davies]
|
|
calinskis += [calinski]
|
|
inerties += [model.inertia_]
|
|
iterations += [model.n_iter_]
|
|
|
|
# Affichage des résultats
|
|
print_1d_data(k, k, x_name="k", y_name="k", dataset_name=dataset_name,
|
|
method_name="k-means", stop=False, save=save)
|
|
print_1d_data(k, durations, x_name="k", y_name="temps_de_calcul", y_unit="ms",
|
|
dataset_name=dataset_name, method_name="k-means",
|
|
stop=False, save=save)
|
|
print_1d_data(k, silouettes, x_name="k", y_name="coeficient_de_silhouette",
|
|
dataset_name=dataset_name, method_name="k-means",
|
|
stop=False, save=save)
|
|
print_1d_data(k, daviess, x_name="k", y_name="coeficient_de_Davies",
|
|
dataset_name=dataset_name, method_name="k-means",
|
|
stop=False, save=save)
|
|
print_1d_data(k, calinskis, x_name="k", y_name="coeficient_de_Calinski",
|
|
dataset_name=dataset_name, method_name="k-means",
|
|
stop=False, save=save)
|
|
print_1d_data(k, inerties, x_name="k", y_name="inertie",
|
|
dataset_name=dataset_name, method_name="k-means",
|
|
stop=False, save=save)
|
|
print_1d_data(k, iterations, x_name="k", y_name="nombre_d_iterations",
|
|
dataset_name=dataset_name, method_name="k-means",
|
|
stop=True, save=save)
|
|
|
|
print("-----------------------------------------------------------")
|
|
print(" Création clusters : agglomerative ")
|
|
# Application du clustering agglomeratif pour plusieurs valeurs de k
|
|
# et evaluation de la solution
|
|
linkage = "ward"
|
|
k = []
|
|
durations = []
|
|
silouettes = []
|
|
daviess = []
|
|
calinskis = []
|
|
for i in range(2, k_max):
|
|
# Application du clustering agglomeratif
|
|
(model, duration) = apply_agglomerative_clustering(
|
|
data_scaled, k=i, linkage=linkage)
|
|
# Evaluation de la solution de clustering
|
|
(silouette, davies, calinski) = evaluate(data_scaled, model)
|
|
# Enregistrement des valeurs
|
|
k += [i]
|
|
durations += [duration]
|
|
silouettes += [silouette]
|
|
daviess += [davies]
|
|
calinskis += [calinski]
|
|
|
|
# Affichage des résultats
|
|
print_1d_data(k, k, x_name="k", y_name="k", dataset_name=dataset_name,
|
|
method_name="agglomerative_" + linkage, stop=False, save=save)
|
|
print_1d_data(k, durations, x_name="k", y_name="temps_de_calcul", y_unit="ms",
|
|
dataset_name=dataset_name,
|
|
method_name="agglomerative_" + linkage, stop=False, save=save)
|
|
print_1d_data(k, silouettes, x_name="k", y_name="coeficient_de_silhouette",
|
|
dataset_name=dataset_name,
|
|
method_name="agglomerative_" + linkage, stop=False, save=save)
|
|
print_1d_data(k, daviess, x_name="k", y_name="coeficient_de_Davies",
|
|
dataset_name=dataset_name,
|
|
method_name="agglomerative_" + linkage, stop=False, save=save)
|
|
print_1d_data(k, calinskis, x_name="k", y_name="coeficient_de_Calinski",
|
|
dataset_name=dataset_name,
|
|
method_name="agglomerative_" + linkage, stop=False, save=save)
|
|
|
|
min_sample_max = 30
|
|
print("-----------------------------------------------------------")
|
|
print(" Création clusters : DBSCAN")
|
|
params = []
|
|
for i in range(1, min_sample_max):
|
|
params += [(eps, i)]
|
|
durations = []
|
|
silouettes = []
|
|
daviess = []
|
|
calinskis = []
|
|
clusters = []
|
|
noise_points = []
|
|
for (distance, min_pts) in params:
|
|
# Application du clustering agglomeratif
|
|
(model, duration) = apply_DBSCAN(data_scaled, distance, min_pts)
|
|
cl_pred = model.labels_
|
|
# Evaluation de la solution de clustering
|
|
(silouette, davies, calinski) = evaluate(data_scaled, model)
|
|
# Enregistrement des valeurs
|
|
durations += [duration]
|
|
silouettes += [silouette]
|
|
daviess += [davies]
|
|
calinskis += [calinski]
|
|
clusters += [len(set(cl_pred)) - (1 if -1 in cl_pred else 0)]
|
|
noise_points += [list(cl_pred).count(-1)]
|
|
|
|
# Affichage des résultats
|
|
params = [str(i) for i in params]
|
|
print_1d_data(params, durations, x_name="(eps,min_pts)",
|
|
y_name="temps_de_calcul", y_unit="ms", dataset_name=dataset_name,
|
|
method_name="DBSCAN", stop=False, save=save)
|
|
print_1d_data(params, silouettes, x_name="(eps,min_pts)",
|
|
y_name="coeficient_de_silhouette", dataset_name=dataset_name,
|
|
method_name="DBSCAN", stop=False, save=save)
|
|
print_1d_data(params, daviess, x_name="(eps,min_pts)",
|
|
y_name="coeficient_de_Davies", dataset_name=dataset_name,
|
|
method_name="DBSCAN", stop=False, save=save)
|
|
print_1d_data(params, calinskis, x_name="(eps,min_pts)",
|
|
y_name="coeficient_de_Calinski", dataset_name=dataset_name,
|
|
method_name="DBSCAN", stop=False, save=save)
|
|
print_1d_data(params, clusters, x_name="(eps,min_pts)",
|
|
y_name="nombre_de_clusters", dataset_name=dataset_name,
|
|
method_name="DBSCAN", stop=False, save=save)
|
|
print_1d_data(params, noise_points, x_name="(eps,min_pts)",
|
|
y_name="points_de_bruit", dataset_name=dataset_name,
|
|
method_name="DBSCAN", stop=False, save=save)
|
|
|
|
print("-----------------------------------------------------------")
|
|
print(" Création clusters : mean-shift")
|
|
# Application de Affinity Propagation pour plusieurs valeurs de préférence
|
|
# et evaluation de la solution
|
|
|
|
k_max = 2
|
|
|
|
k = []
|
|
durations = []
|
|
silouettes = []
|
|
daviess = []
|
|
calinskis = []
|
|
for bandwidth in arange(0.1, k_max, 0.2):
|
|
# Application du clustering
|
|
(model, duration) = apply_mean_shift(
|
|
data_scaled, bandwidth=bandwidth)
|
|
# Evaluation de la solution de clustering
|
|
(silouette, davies, calinski) = evaluate(data_scaled, model)
|
|
# Enregistrement des valeurs
|
|
k += [bandwidth]
|
|
durations += [duration]
|
|
silouettes += [silouette]
|
|
daviess += [davies]
|
|
calinskis += [calinski]
|
|
|
|
# Affichage des résultats
|
|
print_1d_data(k, k, x_name="k", y_name="k", dataset_name=dataset_name,
|
|
method_name="mean-shift", stop=False, save=save)
|
|
print_1d_data(k, durations, x_name="k", y_name="temps_de_calcul", y_unit="ms",
|
|
dataset_name=dataset_name,
|
|
method_name="mean-shift", stop=False, save=save)
|
|
print_1d_data(k, silouettes, x_name="k", y_name="coeficient_de_silhouette",
|
|
dataset_name=dataset_name,
|
|
method_name="mean-shift", stop=False, save=save)
|
|
print_1d_data(k, daviess, x_name="k", y_name="coeficient_de_Davies",
|
|
dataset_name=dataset_name,
|
|
method_name="mean-shift", stop=False, save=save)
|
|
print_1d_data(k, calinskis, x_name="k", y_name="coeficient_de_Calinski",
|
|
dataset_name=dataset_name,
|
|
method_name="mean-shift", stop=False, save=save)
|