tp-analyse-donnees/tp6-preprocessing.py
2022-01-09 11:28:38 +01:00

40 lines
1.3 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Jan 9 11:12:30 2022
@author: pfaure
"""
from sklearn.neighbors import NearestNeighbors
import numpy as np
from myplotlib import print_1d_data, print_2d_data
from mydatalib import extract_data_csv, scale_data
path = './new-data/'
dataset_name = "pluie"
save = False
print("-----------------------------------------------------------")
print(" Chargement du dataset : " + dataset_name)
(villes, data) = extract_data_csv(path + dataset_name, 13, 13)
print(data)
# print_2d_data(data, dataset_name=dataset_name +
# "_brutes", stop=False, save=save)
print("-----------------------------------------------------------")
print(" Mise à l'échelle")
data_scaled = scale_data(data)
# print_2d_data(data_scaled, dataset_name=dataset_name +
# "_scaled", stop=False, save=save)
print("-----------------------------------------------------------")
print(" Calcul du voisinage")
n = 5
neighbors = NearestNeighbors(n_neighbors=n)
neighbors.fit(data_scaled)
distances, indices = neighbors.kneighbors(data_scaled)
distances = list(map(lambda x: sum(x[1:n-1])/(len(x)-1), distances))
distances = np.sort(distances, axis=0)
print_1d_data(distances, range(1, len(distances)+1), x_name="distance_moyenne",
y_name="nombre_de_points", stop=False, save=save)