Fin TP1 OK --> fonctionnel
This commit is contained in:
parent
5122467dfe
commit
1ac7ab4212
1 changed files with 98 additions and 20 deletions
|
@ -6,11 +6,14 @@ Created on Fri Nov 19 23:08:23 2021
|
||||||
"""
|
"""
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
from mpl_toolkits.mplot3d import axes3d # Fonction pour la 3D
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from scipy.io import arff
|
from scipy.io import arff
|
||||||
from sklearn import cluster
|
from sklearn import cluster
|
||||||
from sklearn import metrics
|
from sklearn import metrics
|
||||||
|
from sklearn import preprocessing
|
||||||
|
from sklearn.cluster import KMeans
|
||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
# READ a data set (arff format)
|
# READ a data set (arff format)
|
||||||
|
@ -28,28 +31,71 @@ from sklearn import metrics
|
||||||
# --> IGNORER CETTE INFORMATION ....
|
# --> IGNORER CETTE INFORMATION ....
|
||||||
# 2d-4c-no9.arff
|
# 2d-4c-no9.arff
|
||||||
|
|
||||||
|
def extract_data_2d(databrut):
|
||||||
|
return np.array([[x[0],x[1]] for x in databrut[0]])
|
||||||
|
|
||||||
|
def extract_data_3d(databrut):
|
||||||
|
return np.array([[x[0],x[1],x[2]] for x in databrut[0]])
|
||||||
|
|
||||||
|
def print_3d_data(data, stop:bool = True, c=None):
|
||||||
|
print("---------------------------------------")
|
||||||
|
print("Affichage données initiales ")
|
||||||
|
f0 = data[:,0] # tous les élements de la première colonne
|
||||||
|
f1 = data[:,1] # tous les éléments de la deuxième colonne
|
||||||
|
f2 = data[:,2] # tous les éléments de la troisième colonne
|
||||||
|
fig = plt.figure()
|
||||||
|
ax = fig.gca(projection='3d') # Affichage en 3D
|
||||||
|
if (c is None):
|
||||||
|
ax.scatter(f0, f1, f2, label='Courbe', marker='d') # Tracé des points 3D
|
||||||
|
else:
|
||||||
|
ax.scatter(f0, f1, f2, c=c, label='Courbe', marker='d') # Tracé des points 3D
|
||||||
|
ax.set_xlabel('X')
|
||||||
|
ax.set_ylabel('Y')
|
||||||
|
ax.set_zlabel('Z')
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.title("Donnees initiales")
|
||||||
|
plt.show(block=stop)
|
||||||
|
|
||||||
|
def print_2d_data(data, stop:bool = True, c=None):
|
||||||
|
print("---------------------------------------")
|
||||||
|
print("Affichage données initiales ")
|
||||||
|
f0 = data[:,0] # tous les élements de la première colonne
|
||||||
|
f1 = data[:,1] # tous les éléments de la deuxième colonne
|
||||||
|
fig = plt.figure()
|
||||||
|
#plt.hist2d(f0, f1)
|
||||||
|
if (c is None):
|
||||||
|
plt.scatter(f0, f1, s=8)
|
||||||
|
else:
|
||||||
|
plt.scatter(f0, f1, c=c, s=8)
|
||||||
|
plt.title("Donnees initiales")
|
||||||
|
plt.show(block=stop)
|
||||||
|
|
||||||
|
def print_1d_data(x, y, stop:bool = True):
|
||||||
|
fig = plt.figure()
|
||||||
|
plt.plot(x, y)
|
||||||
|
plt.title("Toto")
|
||||||
|
plt.show(block=stop)
|
||||||
|
|
||||||
|
# (model, duration) = apply_kmeans(data, k=3)
|
||||||
|
def apply_kmeans(data, k:int=3, init="k-means++"):
|
||||||
|
##################################################################
|
||||||
|
# Run clustering method for a given number of clusters
|
||||||
|
tps1 = time.time()
|
||||||
|
model_km = cluster.KMeans(n_clusters=k, init=init)
|
||||||
|
model_km.fit(data)
|
||||||
|
tps2 = time.time()
|
||||||
|
|
||||||
|
return (model_km, round((tps2 - tps1)*1000,2))
|
||||||
|
|
||||||
|
def evaluate(data, model_km):
|
||||||
|
silh = metrics.silhouette_score(data, model_km.labels_, metric='euclidean')
|
||||||
|
return (silh, model_km.inertia_, model_km.n_iter_)
|
||||||
|
|
||||||
path = './artificial/'
|
path = './artificial/'
|
||||||
databrut = arff.loadarff(open(path+"xclara.arff", 'r'))
|
databrut_s_set2 = arff.loadarff(open(path+"s-set2.arff", 'r'))
|
||||||
datanp = np.array([[x[0],x[1]] for x in databrut[0]])
|
data_s_set2 = extract_data_2d(databrut_s_set2)
|
||||||
#print(databrut)
|
print_2d_data(data_s_set2, False)
|
||||||
#print(datanp)
|
|
||||||
|
|
||||||
##################################################################
|
|
||||||
# PLOT datanp (en 2D) - / scatter plot
|
|
||||||
# Extraire chaque valeur de features pour en faire une liste
|
|
||||||
# EX :
|
|
||||||
# - pour t1=t[:,0] --> [1, 3, 5, 7]
|
|
||||||
# - pour t2=t[:,1] --> [2, 4, 6, 8]
|
|
||||||
print("---------------------------------------")
|
|
||||||
print("Affichage données initiales ")
|
|
||||||
f0 = datanp[:,0] # tous les élements de la première colonne
|
|
||||||
f1 = datanp[:,1] # tous les éléments de la deuxième colonne
|
|
||||||
#print(f0)
|
|
||||||
#print(f1)
|
|
||||||
|
|
||||||
plt.scatter(f0, f1, s=8)
|
|
||||||
plt.title("Donnees initiales")
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
########################################################################
|
########################################################################
|
||||||
# AUTRES VISUALISATION DU JEU DE DONNEES
|
# AUTRES VISUALISATION DU JEU DE DONNEES
|
||||||
|
@ -57,7 +103,39 @@ plt.show()
|
||||||
# But : essayer d'autres types de plot
|
# But : essayer d'autres types de plot
|
||||||
########################################################################
|
########################################################################
|
||||||
|
|
||||||
|
databrut_golfball = arff.loadarff(open(path+"golfball.arff", 'r'))
|
||||||
|
data_golfball = extract_data_3d(databrut_golfball)
|
||||||
|
print_3d_data(data_golfball, False)
|
||||||
|
|
||||||
########################################################################
|
########################################################################
|
||||||
# STANDARDISER ET VISUALISER
|
# STANDARDISER ET VISUALISER
|
||||||
# But : comparer des méthodes de standardisation, ...
|
# But : comparer des méthodes de standardisation, ...
|
||||||
########################################################################
|
########################################################################
|
||||||
|
scaler = preprocessing.StandardScaler()
|
||||||
|
data_s_set2_scaled = scaler.fit_transform(data_s_set2)
|
||||||
|
|
||||||
|
|
||||||
|
#kmeans = KMeans(n_clusters=3000, random_state=0).fit_predict(data_s_set2_scaled)
|
||||||
|
#print_2d_data(data_s_set2_scaled, True, kmeans)
|
||||||
|
k=[]
|
||||||
|
durations=[]
|
||||||
|
silouettes=[]
|
||||||
|
inerties=[]
|
||||||
|
iterations=[]
|
||||||
|
for i in range(2,50):
|
||||||
|
(model, duration) = apply_kmeans(data_s_set2_scaled, k=i, init="k-means++")
|
||||||
|
print_2d_data(data_s_set2_scaled, False, model.labels_)
|
||||||
|
(silouette, inertie, iteration) = evaluate(data_s_set2_scaled, model)
|
||||||
|
k+=[i]
|
||||||
|
durations+=[duration]
|
||||||
|
silouettes+=[silouette]
|
||||||
|
inerties+=[inertie]
|
||||||
|
iterations+=[iteration]
|
||||||
|
|
||||||
|
print_1d_data(k, k, False)
|
||||||
|
print_1d_data(k, durations, False)
|
||||||
|
print_1d_data(k, silouettes, False)
|
||||||
|
print_1d_data(k, inerties, False)
|
||||||
|
print_1d_data(k, iterations, True)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue