123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- Created on Wed Dec 8 16:07:28 2021
-
- @author: pfaure
- """
-
- from sklearn.neighbors import NearestNeighbors
- import numpy as np
-
- from myplotlib import print_1d_data, print_2d_data
- from mydatalib import extract_data_2d, scale_data, apply_DBSCAN, evaluate
-
- path = './artificial/'
- dataset_name = "s-set1"
- save = True
-
- print("-----------------------------------------------------------")
- print(" Chargement du dataset : " + dataset_name)
- data = extract_data_2d(path + dataset_name)
- print_2d_data(data, dataset_name=dataset_name +
- "_brutes", stop=False, save=save)
-
- print("-----------------------------------------------------------")
- print(" Mise à l'échelle")
- data_scaled = scale_data(data)
- print_2d_data(data_scaled, dataset_name=dataset_name +
- "_scaled", stop=False, save=save)
-
- print("-----------------------------------------------------------")
- print(" Calcul du voisinage")
- n = 7
- neighbors = NearestNeighbors(n_neighbors=n)
- neighbors.fit(data_scaled)
- distances, indices = neighbors.kneighbors(data_scaled)
- distances = list(map(lambda x: sum(x[1:n-1])/(len(x)-1), distances))
- distances = np.sort(distances, axis=0)
- print_1d_data(distances, range(1, len(distances)+1), x_name="distance_moyenne",
- y_name="nombre_de_points", dataset_name=dataset_name,
- method_name="DBSCAN", stop=False, save=save)
-
-
- print("-----------------------------------------------------------")
- print(" Création clusters : DBSCAN")
- params = []
- for i in range(1, 30):
- params += [(0.1, i)]
- durations = []
- silouettes = []
- daviess = []
- calinskis = []
- clusters = []
- noise_points = []
- for (distance, min_pts) in params:
- # Application du clustering agglomeratif
- (model, duration) = apply_DBSCAN(data_scaled, distance, min_pts)
- cl_pred = model.labels_
- # Affichage des clusters# Affichage des clusters
- print_2d_data(data_scaled, dataset_name=dataset_name,
- method_name="DBSCAN-Eps=" +
- str(distance)+"-Minpt="+str(min_pts),
- k=0, stop=False, save=save, c=cl_pred)
- # Evaluation de la solution de clustering
- (silouette, davies, calinski) = evaluate(data_scaled, model)
- # Enregistrement des valeurs
- durations += [duration]
- silouettes += [silouette]
- daviess += [davies]
- calinskis += [calinski]
- clusters += [len(set(cl_pred)) - (1 if -1 in cl_pred else 0)]
- noise_points += [list(cl_pred).count(-1)]
-
- # Affichage des résultats
- params = [str(i) for i in params]
- print_1d_data(params, durations, x_name="(eps,min_pts)",
- y_name="temps_de_calcul", y_unit="ms", dataset_name=dataset_name,
- method_name="DBSCAN", stop=False, save=save)
- print_1d_data(params, silouettes, x_name="(eps,min_pts)",
- y_name="coeficient_de_silhouette", dataset_name=dataset_name,
- method_name="DBSCAN", stop=False, save=save)
- print_1d_data(params, daviess, x_name="(eps,min_pts)",
- y_name="coeficient_de_Davies", dataset_name=dataset_name,
- method_name="DBSCAN", stop=False, save=save)
- print_1d_data(params, calinskis, x_name="(eps,min_pts)",
- y_name="coeficient_de_Calinski", dataset_name=dataset_name,
- method_name="DBSCAN", stop=False, save=save)
- print_1d_data(params, clusters, x_name="(eps,min_pts)",
- y_name="nombre_de_clusters", dataset_name=dataset_name,
- method_name="DBSCAN", stop=False, save=save)
- print_1d_data(params, noise_points, x_name="(eps,min_pts)",
- y_name="points_de_bruit", dataset_name=dataset_name,
- method_name="DBSCAN", stop=False, save=save)
|