on analyse
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

tp1-kmeans.py 2.8KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Fri Nov 19 23:08:23 2021
  4. @author: pfaure
  5. """
  6. from myplotlib import print_1d_data, print_2d_data, print_3d_data
  7. from mydatalib import (extract_data_2d, extract_data_3d, scale_data,
  8. apply_kmeans, evaluate)
  9. path = './artificial/'
  10. dataset_name = "xclara"
  11. save = True
  12. print("-----------------------------------------------------------")
  13. print(" Chargement du dataset : " + dataset_name)
  14. data = extract_data_2d(path + dataset_name)
  15. print_2d_data(data, dataset_name=dataset_name+"_brute", stop=False, save=save)
  16. print("-----------------------------------------------------------")
  17. print(" Mise à l'échelle")
  18. data_scaled = scale_data(data)
  19. print_2d_data(data_scaled, dataset_name=dataset_name +
  20. "_scaled", stop=False, save=save)
  21. # Application de k-means pour plusieurs valeurs de k
  22. # et evaluation de la solution
  23. k = []
  24. durations = []
  25. silouettes = []
  26. daviess = []
  27. calinskis = []
  28. inerties = []
  29. iterations = []
  30. for i in range(2, 50):
  31. # Application de k-means
  32. (model, duration) = apply_kmeans(data_scaled, k=i, init="k-means++")
  33. # Affichage des clusters
  34. print_2d_data(data_scaled, dataset_name=dataset_name,
  35. method_name="k-means", k=i, c=model.labels_,
  36. stop=False, save=save)
  37. # Evaluation de la solution de clustering
  38. (silouette, davies, calinski) = evaluate(data_scaled, model)
  39. # Enregistrement des valeurs
  40. k += [i]
  41. durations += [duration]
  42. silouettes += [silouette]
  43. daviess += [davies]
  44. calinskis += [calinski]
  45. inerties += [model.inertia_]
  46. iterations += [model.n_iter_]
  47. # Affichage des résultats
  48. print_1d_data(k, k, x_name="k", y_name="k", dataset_name=dataset_name,
  49. method_name="k-means", stop=False, save=save)
  50. print_1d_data(k, durations, x_name="k", y_name="temps_de_calcul", y_unit="ms",
  51. dataset_name=dataset_name, method_name="k-means",
  52. stop=False, save=save)
  53. print_1d_data(k, silouettes, x_name="k", y_name="coeficient_de_silhouette",
  54. dataset_name=dataset_name, method_name="k-means",
  55. stop=False, save=save)
  56. print_1d_data(k, daviess, x_name="k", y_name="coeficient_de_Davies",
  57. dataset_name=dataset_name, method_name="k-means",
  58. stop=False, save=save)
  59. print_1d_data(k, calinskis, x_name="k", y_name="coeficient_de_Calinski",
  60. dataset_name=dataset_name, method_name="k-means",
  61. stop=False, save=save)
  62. print_1d_data(k, inerties, x_name="k", y_name="inertie",
  63. dataset_name=dataset_name, method_name="k-means",
  64. stop=False, save=save)
  65. print_1d_data(k, iterations, x_name="k", y_name="nombre_d_iterations",
  66. dataset_name=dataset_name, method_name="k-means",
  67. stop=True, save=save)