on analyse
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

tp3-dbscan.py 3.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Wed Dec 8 16:07:28 2021
  5. @author: pfaure
  6. """
  7. from sklearn.neighbors import NearestNeighbors
  8. import numpy as np
  9. from myplotlib import print_1d_data, print_2d_data
  10. from mydatalib import extract_data_2d, scale_data, apply_DBSCAN, evaluate
  11. path = './artificial/'
  12. dataset_name = "s-set1"
  13. save = True
  14. print("-----------------------------------------------------------")
  15. print(" Chargement du dataset : " + dataset_name)
  16. data = extract_data_2d(path + dataset_name)
  17. print_2d_data(data, dataset_name=dataset_name +
  18. "_brutes", stop=False, save=save)
  19. print("-----------------------------------------------------------")
  20. print(" Mise à l'échelle")
  21. data_scaled = scale_data(data)
  22. print_2d_data(data_scaled, dataset_name=dataset_name +
  23. "_scaled", stop=False, save=save)
  24. print("-----------------------------------------------------------")
  25. print(" Calcul du voisinage")
  26. n = 7
  27. neighbors = NearestNeighbors(n_neighbors=n)
  28. neighbors.fit(data_scaled)
  29. distances, indices = neighbors.kneighbors(data_scaled)
  30. distances = list(map(lambda x: sum(x[1:n-1])/(len(x)-1), distances))
  31. distances = np.sort(distances, axis=0)
  32. print_1d_data(distances, range(1, len(distances)+1), x_name="distance_moyenne",
  33. y_name="nombre_de_points", dataset_name=dataset_name,
  34. method_name="DBSCAN", stop=False, save=save)
  35. print("-----------------------------------------------------------")
  36. print(" Création clusters : DBSCAN")
  37. params = []
  38. for i in range(1, 30):
  39. params += [(0.1, i)]
  40. durations = []
  41. silouettes = []
  42. daviess = []
  43. calinskis = []
  44. clusters = []
  45. noise_points = []
  46. for (distance, min_pts) in params:
  47. # Application du clustering agglomeratif
  48. (model, duration) = apply_DBSCAN(data_scaled, distance, min_pts)
  49. cl_pred = model.labels_
  50. # Affichage des clusters# Affichage des clusters
  51. print_2d_data(data_scaled, dataset_name=dataset_name,
  52. method_name="DBSCAN-Eps=" +
  53. str(distance)+"-Minpt="+str(min_pts),
  54. k=0, stop=False, save=save, c=cl_pred)
  55. # Evaluation de la solution de clustering
  56. (silouette, davies, calinski) = evaluate(data_scaled, model)
  57. # Enregistrement des valeurs
  58. durations += [duration]
  59. silouettes += [silouette]
  60. daviess += [davies]
  61. calinskis += [calinski]
  62. clusters += [len(set(cl_pred)) - (1 if -1 in cl_pred else 0)]
  63. noise_points += [list(cl_pred).count(-1)]
  64. # Affichage des résultats
  65. params = [str(i) for i in params]
  66. print_1d_data(params, durations, x_name="(eps,min_pts)",
  67. y_name="temps_de_calcul", y_unit="ms", dataset_name=dataset_name,
  68. method_name="DBSCAN", stop=False, save=save)
  69. print_1d_data(params, silouettes, x_name="(eps,min_pts)",
  70. y_name="coeficient_de_silhouette", dataset_name=dataset_name,
  71. method_name="DBSCAN", stop=False, save=save)
  72. print_1d_data(params, daviess, x_name="(eps,min_pts)",
  73. y_name="coeficient_de_Davies", dataset_name=dataset_name,
  74. method_name="DBSCAN", stop=False, save=save)
  75. print_1d_data(params, calinskis, x_name="(eps,min_pts)",
  76. y_name="coeficient_de_Calinski", dataset_name=dataset_name,
  77. method_name="DBSCAN", stop=False, save=save)
  78. print_1d_data(params, clusters, x_name="(eps,min_pts)",
  79. y_name="nombre_de_clusters", dataset_name=dataset_name,
  80. method_name="DBSCAN", stop=False, save=save)
  81. print_1d_data(params, noise_points, x_name="(eps,min_pts)",
  82. y_name="points_de_bruit", dataset_name=dataset_name,
  83. method_name="DBSCAN", stop=False, save=save)