# -*- coding: utf-8 -*- """ Created on Fri Nov 19 23:08:23 2021 @author: pfaure """ from myplotlib import print_1d_data, print_2d_data, print_3d_data from mydatalib import (extract_data_2d, extract_data_3d, scale_data, apply_kmeans, evaluate) path = './artificial/' dataset_name = "xclara" save = True # Extraction et visualisation d'un dataset 2D data = extract_data_2d(path + dataset_name) print_2d_data(data, dataset_name=dataset_name+"_brute", stop=False, save=save) # Extraction et visualisation d'un dataset 3D data_golfball = extract_data_3d(path+"golfball") print_3d_data(data_golfball, dataset_name="golfball", stop=False, save=save) # Scaling des data 2D et visualisation data_scaled = scale_data(data) print_2d_data(data_scaled, dataset_name=dataset_name + "_scaled", stop=False, save=save) # Application de k-means pour plusieurs valeurs de k # et evaluation de la solution k = [] durations = [] silouettes = [] daviess = [] calinskis = [] inerties = [] iterations = [] for i in range(2, 50): # Application de k-means (model, duration) = apply_kmeans(data_scaled, k=i, init="k-means++") # Affichage des clusters print_2d_data(data_scaled, dataset_name=dataset_name, method_name="k-means", k=i, c=model.labels_, stop=False, save=save) # Evaluation de la solution de clustering (silouette, davies, calinski) = evaluate(data_scaled, model) # Enregistrement des valeurs k += [i] durations += [duration] silouettes += [silouette] daviess += [davies] calinskis += [calinski] inerties += [model.inertia_] iterations += [model.n_iter_] # Affichage des résultats print_1d_data(k, k, x_name="k", y_name="k", dataset_name=dataset_name, method_name="k-means", stop=False, save=save) print_1d_data(k, durations, x_name="k", y_name="temps_de_calcul", y_unit="ms", dataset_name=dataset_name, method_name="k-means", stop=False, save=save) print_1d_data(k, silouettes, x_name="k", y_name="coeficient_de_silhouette", dataset_name=dataset_name, method_name="k-means", stop=False, save=save) print_1d_data(k, daviess, x_name="k", y_name="coeficient_de_Davies", dataset_name=dataset_name, method_name="k-means", stop=False, save=save) print_1d_data(k, calinskis, x_name="k", y_name="coeficient_de_Calinski", dataset_name=dataset_name, method_name="k-means", stop=False, save=save) print_1d_data(k, inerties, x_name="k", y_name="inertie", dataset_name=dataset_name, method_name="k-means", stop=False, save=save) print_1d_data(k, iterations, x_name="k", y_name="nombre_d_iterations", dataset_name=dataset_name, method_name="k-means", stop=True, save=save)