#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Wed Dec 8 16:07:28 2021 @author: pfaure """ from sklearn.neighbors import NearestNeighbors import numpy as np from myplotlib import print_1d_data, print_2d_data from mydatalib import extract_data_2d, scale_data, apply_DBSCAN, evaluate path = './artificial/' dataset_name = "s-set1" save = True print("-----------------------------------------------------------") print(" Chargement du dataset : " + dataset_name) data = extract_data_2d(path + dataset_name) print_2d_data(data, dataset_name=dataset_name + "_brutes", stop=False, save=save) print("-----------------------------------------------------------") print(" Mise à l'échelle") data_scaled = scale_data(data) print_2d_data(data_scaled, dataset_name=dataset_name + "_scaled", stop=False, save=save) print("-----------------------------------------------------------") print(" Calcul du voisinage") n = 7 neighbors = NearestNeighbors(n_neighbors=n) neighbors.fit(data_scaled) distances, indices = neighbors.kneighbors(data_scaled) distances = list(map(lambda x: sum(x[1:n-1])/(len(x)-1), distances)) distances = np.sort(distances, axis=0) print_1d_data(distances, range(1, len(distances)+1), x_name="distance_moyenne", y_name="nombre_de_points", dataset_name=dataset_name, method_name="DBSCAN", stop=False, save=save) print("-----------------------------------------------------------") print(" Création clusters : DBSCAN") params = [] for i in range(1, 30): params += [(0.1, i)] durations = [] silouettes = [] daviess = [] calinskis = [] clusters = [] noise_points = [] for (distance, min_pts) in params: # Application du clustering agglomeratif (model, duration) = apply_DBSCAN(data_scaled, distance, min_pts) cl_pred = model.labels_ # Affichage des clusters# Affichage des clusters print_2d_data(data_scaled, dataset_name=dataset_name, method_name="DBSCAN-Eps=" + str(distance)+"-Minpt="+str(min_pts), k=0, stop=False, save=save, c=cl_pred) # Evaluation de la solution de clustering (silouette, davies, calinski) = evaluate(data_scaled, model) # Enregistrement des valeurs durations += [duration] silouettes += [silouette] daviess += [davies] calinskis += [calinski] clusters += [len(set(cl_pred)) - (1 if -1 in cl_pred else 0)] noise_points += [list(cl_pred).count(-1)] # Affichage des résultats params = [str(i) for i in params] print_1d_data(params, durations, x_name="(eps,min_pts)", y_name="temps_de_calcul", y_unit="ms", dataset_name=dataset_name, method_name="DBSCAN", stop=False, save=save) print_1d_data(params, silouettes, x_name="(eps,min_pts)", y_name="coeficient_de_silhouette", dataset_name=dataset_name, method_name="DBSCAN", stop=False, save=save) print_1d_data(params, daviess, x_name="(eps,min_pts)", y_name="coeficient_de_Davies", dataset_name=dataset_name, method_name="DBSCAN", stop=False, save=save) print_1d_data(params, calinskis, x_name="(eps,min_pts)", y_name="coeficient_de_Calinski", dataset_name=dataset_name, method_name="DBSCAN", stop=False, save=save) print_1d_data(params, clusters, x_name="(eps,min_pts)", y_name="nombre_de_clusters", dataset_name=dataset_name, method_name="DBSCAN", stop=False, save=save) print_1d_data(params, noise_points, x_name="(eps,min_pts)", y_name="points_de_bruit", dataset_name=dataset_name, method_name="DBSCAN", stop=False, save=save)