tp-analyse-donnees/tp1-kmeans.py
2022-01-08 16:55:24 +01:00

77 lines
2.8 KiB
Python

# -*- coding: utf-8 -*-
"""
Created on Fri Nov 19 23:08:23 2021
@author: pfaure
"""
from myplotlib import print_1d_data, print_2d_data, print_3d_data
from mydatalib import (extract_data_2d, extract_data_3d, scale_data,
apply_kmeans, evaluate)
path = './artificial/'
dataset_name = "xclara"
save = True
print("-----------------------------------------------------------")
print(" Chargement du dataset : " + dataset_name)
data = extract_data_2d(path + dataset_name)
print_2d_data(data, dataset_name=dataset_name+"_brute", stop=False, save=save)
print("-----------------------------------------------------------")
print(" Mise à l'échelle")
data_scaled = scale_data(data)
print_2d_data(data_scaled, dataset_name=dataset_name +
"_scaled", stop=False, save=save)
# Application de k-means pour plusieurs valeurs de k
# et evaluation de la solution
k = []
durations = []
silouettes = []
daviess = []
calinskis = []
inerties = []
iterations = []
for i in range(2, 50):
# Application de k-means
(model, duration) = apply_kmeans(data_scaled, k=i, init="k-means++")
# Affichage des clusters
print_2d_data(data_scaled, dataset_name=dataset_name,
method_name="k-means", k=i, c=model.labels_,
stop=False, save=save)
# Evaluation de la solution de clustering
(silouette, davies, calinski) = evaluate(data_scaled, model)
# Enregistrement des valeurs
k += [i]
durations += [duration]
silouettes += [silouette]
daviess += [davies]
calinskis += [calinski]
inerties += [model.inertia_]
iterations += [model.n_iter_]
# Affichage des résultats
print_1d_data(k, k, x_name="k", y_name="k", dataset_name=dataset_name,
method_name="k-means", stop=False, save=save)
print_1d_data(k, durations, x_name="k", y_name="temps_de_calcul", y_unit="ms",
dataset_name=dataset_name, method_name="k-means",
stop=False, save=save)
print_1d_data(k, silouettes, x_name="k", y_name="coeficient_de_silhouette",
dataset_name=dataset_name, method_name="k-means",
stop=False, save=save)
print_1d_data(k, daviess, x_name="k", y_name="coeficient_de_Davies",
dataset_name=dataset_name, method_name="k-means",
stop=False, save=save)
print_1d_data(k, calinskis, x_name="k", y_name="coeficient_de_Calinski",
dataset_name=dataset_name, method_name="k-means",
stop=False, save=save)
print_1d_data(k, inerties, x_name="k", y_name="inertie",
dataset_name=dataset_name, method_name="k-means",
stop=False, save=save)
print_1d_data(k, iterations, x_name="k", y_name="nombre_d_iterations",
dataset_name=dataset_name, method_name="k-means",
stop=True, save=save)