1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- # -*- coding: utf-8 -*-
- """
- Created on Fri Nov 19 23:08:23 2021
-
- @author: pfaure
- """
-
-
- from myplotlib import print_1d_data, print_2d_data, print_3d_data
- from mydatalib import (extract_data_2d, extract_data_3d, scale_data,
- apply_kmeans, evaluate)
-
-
- path = './artificial/'
- dataset_name = "xclara"
-
- save = True
-
- print("-----------------------------------------------------------")
- print(" Chargement du dataset : " + dataset_name)
- data = extract_data_2d(path + dataset_name)
- print_2d_data(data, dataset_name=dataset_name+"_brute", stop=False, save=save)
-
- print("-----------------------------------------------------------")
- print(" Mise à l'échelle")
- data_scaled = scale_data(data)
- print_2d_data(data_scaled, dataset_name=dataset_name +
- "_scaled", stop=False, save=save)
-
- # Application de k-means pour plusieurs valeurs de k
- # et evaluation de la solution
- k = []
- durations = []
- silouettes = []
- daviess = []
- calinskis = []
- inerties = []
- iterations = []
- for i in range(2, 50):
- # Application de k-means
- (model, duration) = apply_kmeans(data_scaled, k=i, init="k-means++")
- # Affichage des clusters
- print_2d_data(data_scaled, dataset_name=dataset_name,
- method_name="k-means", k=i, c=model.labels_,
- stop=False, save=save)
- # Evaluation de la solution de clustering
- (silouette, davies, calinski) = evaluate(data_scaled, model)
- # Enregistrement des valeurs
- k += [i]
- durations += [duration]
- silouettes += [silouette]
- daviess += [davies]
- calinskis += [calinski]
- inerties += [model.inertia_]
- iterations += [model.n_iter_]
-
- # Affichage des résultats
- print_1d_data(k, k, x_name="k", y_name="k", dataset_name=dataset_name,
- method_name="k-means", stop=False, save=save)
- print_1d_data(k, durations, x_name="k", y_name="temps_de_calcul", y_unit="ms",
- dataset_name=dataset_name, method_name="k-means",
- stop=False, save=save)
- print_1d_data(k, silouettes, x_name="k", y_name="coeficient_de_silhouette",
- dataset_name=dataset_name, method_name="k-means",
- stop=False, save=save)
- print_1d_data(k, daviess, x_name="k", y_name="coeficient_de_Davies",
- dataset_name=dataset_name, method_name="k-means",
- stop=False, save=save)
- print_1d_data(k, calinskis, x_name="k", y_name="coeficient_de_Calinski",
- dataset_name=dataset_name, method_name="k-means",
- stop=False, save=save)
- print_1d_data(k, inerties, x_name="k", y_name="inertie",
- dataset_name=dataset_name, method_name="k-means",
- stop=False, save=save)
- print_1d_data(k, iterations, x_name="k", y_name="nombre_d_iterations",
- dataset_name=dataset_name, method_name="k-means",
- stop=True, save=save)
|