on analyse
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

tp6-preprocessing.py 1.3KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Sun Jan 9 11:12:30 2022
  5. @author: pfaure
  6. """
  7. from sklearn.neighbors import NearestNeighbors
  8. import numpy as np
  9. from myplotlib import print_1d_data, print_2d_data
  10. from mydatalib import extract_data_csv, scale_data
  11. path = './new-data/'
  12. dataset_name = "pluie"
  13. save = False
  14. print("-----------------------------------------------------------")
  15. print(" Chargement du dataset : " + dataset_name)
  16. (villes, data) = extract_data_csv(path + dataset_name, 13, 13)
  17. print(data)
  18. # print_2d_data(data, dataset_name=dataset_name +
  19. # "_brutes", stop=False, save=save)
  20. print("-----------------------------------------------------------")
  21. print(" Mise à l'échelle")
  22. data_scaled = scale_data(data)
  23. # print_2d_data(data_scaled, dataset_name=dataset_name +
  24. # "_scaled", stop=False, save=save)
  25. print("-----------------------------------------------------------")
  26. print(" Calcul du voisinage")
  27. n = 5
  28. neighbors = NearestNeighbors(n_neighbors=n)
  29. neighbors.fit(data_scaled)
  30. distances, indices = neighbors.kneighbors(data_scaled)
  31. distances = list(map(lambda x: sum(x[1:n-1])/(len(x)-1), distances))
  32. distances = np.sort(distances, axis=0)
  33. print_1d_data(distances, range(1, len(distances)+1), x_name="distance_moyenne",
  34. y_name="nombre_de_points", stop=False, save=save)