Add dummy files
This commit is contained in:
parent
d5fe64d1a2
commit
e3c328855d
3 changed files with 282 additions and 0 deletions
94
tp4-affinity.py
Normal file
94
tp4-affinity.py
Normal file
|
@ -0,0 +1,94 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Created on Wed Dec 8 16:07:28 2021
|
||||||
|
|
||||||
|
@author: pfaure
|
||||||
|
"""
|
||||||
|
|
||||||
|
from sklearn.neighbors import NearestNeighbors
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from myplotlib import print_1d_data, print_2d_data
|
||||||
|
from mydatalib import extract_data_2d, scale_data, apply_DBSCAN, evaluate
|
||||||
|
|
||||||
|
path = './artificial/'
|
||||||
|
dataset_name = "banana"
|
||||||
|
save = True
|
||||||
|
|
||||||
|
print("-----------------------------------------------------------")
|
||||||
|
print(" Chargement du dataset : " + dataset_name)
|
||||||
|
data = extract_data_2d(path + dataset_name)
|
||||||
|
print_2d_data(data, dataset_name=dataset_name +
|
||||||
|
"_brutes", stop=False, save=save)
|
||||||
|
|
||||||
|
print("-----------------------------------------------------------")
|
||||||
|
print(" Mise à l'échelle")
|
||||||
|
data_scaled = scale_data(data)
|
||||||
|
print_2d_data(data_scaled, dataset_name=dataset_name +
|
||||||
|
"_scaled", stop=False, save=save)
|
||||||
|
|
||||||
|
print("-----------------------------------------------------------")
|
||||||
|
print(" Calcul du voisinage")
|
||||||
|
n = 50
|
||||||
|
neighbors = NearestNeighbors(n_neighbors=n)
|
||||||
|
neighbors.fit(data)
|
||||||
|
distances, indices = neighbors.kneighbors(data)
|
||||||
|
distances = list(map(lambda x: sum(x[1:n-1])/(len(x)-1), distances))
|
||||||
|
print(distances)
|
||||||
|
distances = np.sort(distances, axis=0)
|
||||||
|
print(distances)
|
||||||
|
print_1d_data(distances, range(1, len(distances)+1), x_name="distance_moyenne",
|
||||||
|
y_name="nombre_de_points", stop=False, save=False)
|
||||||
|
|
||||||
|
|
||||||
|
print("-----------------------------------------------------------")
|
||||||
|
print(" Création clusters : DBSCAN")
|
||||||
|
params = []
|
||||||
|
for i in range(1, 20):
|
||||||
|
params += [(i/100, 5)]
|
||||||
|
durations = []
|
||||||
|
silouettes = []
|
||||||
|
daviess = []
|
||||||
|
calinskis = []
|
||||||
|
clusters = []
|
||||||
|
noise_points = []
|
||||||
|
for (distance, min_pts) in params:
|
||||||
|
# Application du clustering agglomeratif
|
||||||
|
(model, duration) = apply_DBSCAN(data, distance, min_pts)
|
||||||
|
cl_pred = model.labels_
|
||||||
|
# Affichage des clusters# Affichage des clusters
|
||||||
|
print_2d_data(data_scaled, dataset_name=dataset_name,
|
||||||
|
method_name="DBSCAN-Eps=" +
|
||||||
|
str(distance)+"-Minpt="+str(min_pts),
|
||||||
|
k=0, stop=False, save=save, c=cl_pred)
|
||||||
|
# Evaluation de la solution de clustering
|
||||||
|
(silouette, davies, calinski) = evaluate(data_scaled, model)
|
||||||
|
# Enregistrement des valeurs
|
||||||
|
durations += [duration]
|
||||||
|
silouettes += [silouette]
|
||||||
|
daviess += [davies]
|
||||||
|
calinskis += [calinski]
|
||||||
|
clusters += [len(set(cl_pred)) - (1 if -1 in cl_pred else 0)]
|
||||||
|
noise_points += [list(cl_pred).count(-1)]
|
||||||
|
|
||||||
|
# Affichage des résultats
|
||||||
|
params = [str(i) for i in params]
|
||||||
|
print_1d_data(params, durations, x_name="(eps,min_pts)",
|
||||||
|
y_name="temps_de_calcul", y_unit="ms", dataset_name=dataset_name,
|
||||||
|
method_name="DBSCAN", stop=False, save=save)
|
||||||
|
print_1d_data(params, silouettes, x_name="(eps,min_pts)",
|
||||||
|
y_name="coeficient_de_silhouette", dataset_name=dataset_name,
|
||||||
|
method_name="DBSCAN", stop=False, save=save)
|
||||||
|
print_1d_data(params, daviess, x_name="(eps,min_pts)",
|
||||||
|
y_name="coeficient_de_Davies", dataset_name=dataset_name,
|
||||||
|
method_name="DBSCAN", stop=False, save=save)
|
||||||
|
print_1d_data(params, calinskis, x_name="(eps,min_pts)",
|
||||||
|
y_name="coeficient_de_Calinski", dataset_name=dataset_name,
|
||||||
|
method_name="DBSCAN", stop=False, save=save)
|
||||||
|
print_1d_data(params, clusters, x_name="(eps,min_pts)",
|
||||||
|
y_name="nombre_de_clusters", dataset_name=dataset_name,
|
||||||
|
method_name="DBSCAN", stop=False, save=save)
|
||||||
|
print_1d_data(params, noise_points, x_name="(eps,min_pts)",
|
||||||
|
y_name="points_de_bruit", dataset_name=dataset_name,
|
||||||
|
method_name="DBSCAN", stop=False, save=save)
|
94
tp5-artificial-dataset.py
Normal file
94
tp5-artificial-dataset.py
Normal file
|
@ -0,0 +1,94 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Created on Wed Dec 8 16:07:28 2021
|
||||||
|
|
||||||
|
@author: pfaure
|
||||||
|
"""
|
||||||
|
|
||||||
|
from sklearn.neighbors import NearestNeighbors
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from myplotlib import print_1d_data, print_2d_data
|
||||||
|
from mydatalib import extract_data_2d, scale_data, apply_DBSCAN, evaluate
|
||||||
|
|
||||||
|
path = './artificial/'
|
||||||
|
dataset_name = "banana"
|
||||||
|
save = True
|
||||||
|
|
||||||
|
print("-----------------------------------------------------------")
|
||||||
|
print(" Chargement du dataset : " + dataset_name)
|
||||||
|
data = extract_data_2d(path + dataset_name)
|
||||||
|
print_2d_data(data, dataset_name=dataset_name +
|
||||||
|
"_brutes", stop=False, save=save)
|
||||||
|
|
||||||
|
print("-----------------------------------------------------------")
|
||||||
|
print(" Mise à l'échelle")
|
||||||
|
data_scaled = scale_data(data)
|
||||||
|
print_2d_data(data_scaled, dataset_name=dataset_name +
|
||||||
|
"_scaled", stop=False, save=save)
|
||||||
|
|
||||||
|
print("-----------------------------------------------------------")
|
||||||
|
print(" Calcul du voisinage")
|
||||||
|
n = 50
|
||||||
|
neighbors = NearestNeighbors(n_neighbors=n)
|
||||||
|
neighbors.fit(data)
|
||||||
|
distances, indices = neighbors.kneighbors(data)
|
||||||
|
distances = list(map(lambda x: sum(x[1:n-1])/(len(x)-1), distances))
|
||||||
|
print(distances)
|
||||||
|
distances = np.sort(distances, axis=0)
|
||||||
|
print(distances)
|
||||||
|
print_1d_data(distances, range(1, len(distances)+1), x_name="distance_moyenne",
|
||||||
|
y_name="nombre_de_points", stop=False, save=False)
|
||||||
|
|
||||||
|
|
||||||
|
print("-----------------------------------------------------------")
|
||||||
|
print(" Création clusters : DBSCAN")
|
||||||
|
params = []
|
||||||
|
for i in range(1, 20):
|
||||||
|
params += [(i/100, 5)]
|
||||||
|
durations = []
|
||||||
|
silouettes = []
|
||||||
|
daviess = []
|
||||||
|
calinskis = []
|
||||||
|
clusters = []
|
||||||
|
noise_points = []
|
||||||
|
for (distance, min_pts) in params:
|
||||||
|
# Application du clustering agglomeratif
|
||||||
|
(model, duration) = apply_DBSCAN(data, distance, min_pts)
|
||||||
|
cl_pred = model.labels_
|
||||||
|
# Affichage des clusters# Affichage des clusters
|
||||||
|
print_2d_data(data_scaled, dataset_name=dataset_name,
|
||||||
|
method_name="DBSCAN-Eps=" +
|
||||||
|
str(distance)+"-Minpt="+str(min_pts),
|
||||||
|
k=0, stop=False, save=save, c=cl_pred)
|
||||||
|
# Evaluation de la solution de clustering
|
||||||
|
(silouette, davies, calinski) = evaluate(data_scaled, model)
|
||||||
|
# Enregistrement des valeurs
|
||||||
|
durations += [duration]
|
||||||
|
silouettes += [silouette]
|
||||||
|
daviess += [davies]
|
||||||
|
calinskis += [calinski]
|
||||||
|
clusters += [len(set(cl_pred)) - (1 if -1 in cl_pred else 0)]
|
||||||
|
noise_points += [list(cl_pred).count(-1)]
|
||||||
|
|
||||||
|
# Affichage des résultats
|
||||||
|
params = [str(i) for i in params]
|
||||||
|
print_1d_data(params, durations, x_name="(eps,min_pts)",
|
||||||
|
y_name="temps_de_calcul", y_unit="ms", dataset_name=dataset_name,
|
||||||
|
method_name="DBSCAN", stop=False, save=save)
|
||||||
|
print_1d_data(params, silouettes, x_name="(eps,min_pts)",
|
||||||
|
y_name="coeficient_de_silhouette", dataset_name=dataset_name,
|
||||||
|
method_name="DBSCAN", stop=False, save=save)
|
||||||
|
print_1d_data(params, daviess, x_name="(eps,min_pts)",
|
||||||
|
y_name="coeficient_de_Davies", dataset_name=dataset_name,
|
||||||
|
method_name="DBSCAN", stop=False, save=save)
|
||||||
|
print_1d_data(params, calinskis, x_name="(eps,min_pts)",
|
||||||
|
y_name="coeficient_de_Calinski", dataset_name=dataset_name,
|
||||||
|
method_name="DBSCAN", stop=False, save=save)
|
||||||
|
print_1d_data(params, clusters, x_name="(eps,min_pts)",
|
||||||
|
y_name="nombre_de_clusters", dataset_name=dataset_name,
|
||||||
|
method_name="DBSCAN", stop=False, save=save)
|
||||||
|
print_1d_data(params, noise_points, x_name="(eps,min_pts)",
|
||||||
|
y_name="points_de_bruit", dataset_name=dataset_name,
|
||||||
|
method_name="DBSCAN", stop=False, save=save)
|
94
tp6-real-dataset.py
Normal file
94
tp6-real-dataset.py
Normal file
|
@ -0,0 +1,94 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Created on Wed Dec 8 16:07:28 2021
|
||||||
|
|
||||||
|
@author: pfaure
|
||||||
|
"""
|
||||||
|
|
||||||
|
from sklearn.neighbors import NearestNeighbors
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from myplotlib import print_1d_data, print_2d_data
|
||||||
|
from mydatalib import extract_data_2d, scale_data, apply_DBSCAN, evaluate
|
||||||
|
|
||||||
|
path = './artificial/'
|
||||||
|
dataset_name = "banana"
|
||||||
|
save = True
|
||||||
|
|
||||||
|
print("-----------------------------------------------------------")
|
||||||
|
print(" Chargement du dataset : " + dataset_name)
|
||||||
|
data = extract_data_2d(path + dataset_name)
|
||||||
|
print_2d_data(data, dataset_name=dataset_name +
|
||||||
|
"_brutes", stop=False, save=save)
|
||||||
|
|
||||||
|
print("-----------------------------------------------------------")
|
||||||
|
print(" Mise à l'échelle")
|
||||||
|
data_scaled = scale_data(data)
|
||||||
|
print_2d_data(data_scaled, dataset_name=dataset_name +
|
||||||
|
"_scaled", stop=False, save=save)
|
||||||
|
|
||||||
|
print("-----------------------------------------------------------")
|
||||||
|
print(" Calcul du voisinage")
|
||||||
|
n = 50
|
||||||
|
neighbors = NearestNeighbors(n_neighbors=n)
|
||||||
|
neighbors.fit(data)
|
||||||
|
distances, indices = neighbors.kneighbors(data)
|
||||||
|
distances = list(map(lambda x: sum(x[1:n-1])/(len(x)-1), distances))
|
||||||
|
print(distances)
|
||||||
|
distances = np.sort(distances, axis=0)
|
||||||
|
print(distances)
|
||||||
|
print_1d_data(distances, range(1, len(distances)+1), x_name="distance_moyenne",
|
||||||
|
y_name="nombre_de_points", stop=False, save=False)
|
||||||
|
|
||||||
|
|
||||||
|
print("-----------------------------------------------------------")
|
||||||
|
print(" Création clusters : DBSCAN")
|
||||||
|
params = []
|
||||||
|
for i in range(1, 20):
|
||||||
|
params += [(i/100, 5)]
|
||||||
|
durations = []
|
||||||
|
silouettes = []
|
||||||
|
daviess = []
|
||||||
|
calinskis = []
|
||||||
|
clusters = []
|
||||||
|
noise_points = []
|
||||||
|
for (distance, min_pts) in params:
|
||||||
|
# Application du clustering agglomeratif
|
||||||
|
(model, duration) = apply_DBSCAN(data, distance, min_pts)
|
||||||
|
cl_pred = model.labels_
|
||||||
|
# Affichage des clusters# Affichage des clusters
|
||||||
|
print_2d_data(data_scaled, dataset_name=dataset_name,
|
||||||
|
method_name="DBSCAN-Eps=" +
|
||||||
|
str(distance)+"-Minpt="+str(min_pts),
|
||||||
|
k=0, stop=False, save=save, c=cl_pred)
|
||||||
|
# Evaluation de la solution de clustering
|
||||||
|
(silouette, davies, calinski) = evaluate(data_scaled, model)
|
||||||
|
# Enregistrement des valeurs
|
||||||
|
durations += [duration]
|
||||||
|
silouettes += [silouette]
|
||||||
|
daviess += [davies]
|
||||||
|
calinskis += [calinski]
|
||||||
|
clusters += [len(set(cl_pred)) - (1 if -1 in cl_pred else 0)]
|
||||||
|
noise_points += [list(cl_pred).count(-1)]
|
||||||
|
|
||||||
|
# Affichage des résultats
|
||||||
|
params = [str(i) for i in params]
|
||||||
|
print_1d_data(params, durations, x_name="(eps,min_pts)",
|
||||||
|
y_name="temps_de_calcul", y_unit="ms", dataset_name=dataset_name,
|
||||||
|
method_name="DBSCAN", stop=False, save=save)
|
||||||
|
print_1d_data(params, silouettes, x_name="(eps,min_pts)",
|
||||||
|
y_name="coeficient_de_silhouette", dataset_name=dataset_name,
|
||||||
|
method_name="DBSCAN", stop=False, save=save)
|
||||||
|
print_1d_data(params, daviess, x_name="(eps,min_pts)",
|
||||||
|
y_name="coeficient_de_Davies", dataset_name=dataset_name,
|
||||||
|
method_name="DBSCAN", stop=False, save=save)
|
||||||
|
print_1d_data(params, calinskis, x_name="(eps,min_pts)",
|
||||||
|
y_name="coeficient_de_Calinski", dataset_name=dataset_name,
|
||||||
|
method_name="DBSCAN", stop=False, save=save)
|
||||||
|
print_1d_data(params, clusters, x_name="(eps,min_pts)",
|
||||||
|
y_name="nombre_de_clusters", dataset_name=dataset_name,
|
||||||
|
method_name="DBSCAN", stop=False, save=save)
|
||||||
|
print_1d_data(params, noise_points, x_name="(eps,min_pts)",
|
||||||
|
y_name="points_de_bruit", dataset_name=dataset_name,
|
||||||
|
method_name="DBSCAN", stop=False, save=save)
|
Loading…
Reference in a new issue