Programmes python apprentissage supervise.

This commit is contained in:
Abdel-Kader Chabi-Sika-Boni 2020-11-27 00:49:09 +01:00
parent 11b677af02
commit 1c40b662ce
32 changed files with 713 additions and 0 deletions

34
TP1_prog1.py Normal file
View file

@ -0,0 +1,34 @@
from sklearn import datasets
import matplotlib.pyplot as plt
###################
### EXERCICE 1 ###
###################
# Chargement de la base de données MNIST
mnist = datasets.fetch_openml('mnist_784')
# Affichage de quelques commandes
affichage = True
if affichage:
print(mnist)
print (mnist.data)
print (mnist.target)
print (len(mnist.data))
help(len)
print (mnist.data.shape)
print (mnist.target.shape)
print (mnist.data[0])
print (mnist.data[0][1])
print (mnist.data[:,1])
print (len(mnist.data[:100]))
# Extraction et affichage de la première image de la base de données
images = mnist.data.reshape((-1, 28, 28))
for i in range(5):
plt.imshow(images[i],cmap=plt.gray(),interpolation="nearest")
plt.show()
# Affichage de la classe de la première image de la base de données
targets = mnist.target
print("Classe de la première image: %s"%(targets[0]))

151
TP1_prog2.py Normal file
View file

@ -0,0 +1,151 @@
from sklearn import datasets
from sklearn.model_selection import train_test_split, KFold
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt
import numpy as np
from random import seed
from graphs import bars_plot2, bars_plot
import time
np.random.seed(0) # initialise le generateur aletoire avec seed=0
seed(0)
###################
### EXERCICE 2 ###
###################
# Chargement de la base de données MNIST
mnist = datasets.fetch_openml('mnist_784')
# Extraction des images de la base de données
images = mnist.data.reshape((-1, 28, 28))
# Extraction des classes des images de la base de données
targets = mnist.target
# Génération d'échantillon de données avec 5000 exemples
indexes = np.random.randint(70000, size=5000)
data = images[indexes,:]
data_targets = targets[indexes]
# Division de la base de données en lots d'entraînement et de test
def k_10_classifier_and_score():
pourcentage_donnees = 0.8
xtrain, xtest, ytrain, ytest = train_test_split(data, data_targets, train_size=pourcentage_donnees)
# Mise en conformité des dimensions de xtrain et xtest aux dimensions attendues par le classifeur
xtrain = xtrain.reshape((-1, 784))
xtest = xtest.reshape((-1, 784))
# Instanciation et entraînement du classifieur
k = 10
clf = KNeighborsClassifier(n_neighbors=k)
clf.fit(X=xtrain, y=ytrain)
predictions = clf.predict(X=xtest)
# Classe d'image 4 et sa classe prédite
print("VERDICT IMAGE N°4\nClasse réelle: %s\nClasse prédite: %s"%(ytest[3], predictions[3]))
# Affichage du score sur l'échantillon de test
score_xtest = clf.score(X=xtest, y=ytest)
print("Score xtest = %.2f%%"%(score_xtest*100))
# Taux d'erreur sur les données d'apprentissage
score_xtrain = clf.score(X=xtrain, y=ytrain)
print("Score d'erreur xtrain = %.2f%%"%(100*(1-score_xtrain)))
def k_variation_and_score():
# Variation du nombre k de voisins et score résultant
n_folds = 10
kf = KFold(n_splits=n_folds, shuffle=True)
k_score = {}
for k in range(2,16):
sum_score = 0
for train_index, test_index in kf.split(X=data):
x_train, x_test = data[train_index], data[test_index]
y_train, y_test = data_targets[train_index], data_targets[test_index]
x_train = x_train.reshape((-1, 784))
x_test = x_test.reshape((-1, 784))
clf = KNeighborsClassifier(n_neighbors=k)
clf.fit(X=x_train, y=y_train)
sum_score += clf.score(X=x_test, y=y_test)
k_score[k] = (sum_score/n_folds)*100
bars_plot2(bar_labels=list(k_score.keys()), bar_heights=list(k_score.values()), xlabel="Valeur de k",
ylabel="Score", fig_title="Score par valeur de k (nombre de voisins)", show=True, percent_mark=True,
output="tp1Output/mnist_k_variation_scores.png")
def train_test_percent_variation_and_score():
# Variation du pourcentage des échantillons (training et test) et affichage des scores résultants
k_score_percent = {}
for pourcentage in [0.05*i for i in range(1,20)]:
xtrain_percent, xtest_percent, ytrain_percent, ytest_percent = train_test_split(data, data_targets, train_size=pourcentage)
xtrain_percent = xtrain_percent.reshape((-1, 784))
xtest_percent = xtest_percent.reshape((-1, 784))
clf = KNeighborsClassifier(n_neighbors=10)
clf.fit(X=xtrain_percent, y=ytrain_percent)
k_score_percent[pourcentage] = clf.score(X=xtest_percent, y=ytest_percent)*100
bars_plot2(bar_labels=["%.2f" % (percent) for percent in list(k_score_percent.keys())],
bar_heights=list(k_score_percent.values()), xlabel="Pourcentage d'échantillons train",
ylabel="Score",
fig_title="Score par pourcentage de découpage de DATA (5000 images) en échantillons train et test",
show=True, percent_mark=True, bar_colors="red",
output="tp1Output/mnist_split_data_percentage_scores.png")
def train_percent_variation_and_score():
# Fixation de la taille de l'échantillon test (25% de DATA) et variation de la taille de l'échantillon train avec affichage de score
k_score_train_variation = {}
xtrain_global, xtest_25_percent, ytrain_global, ytest_25_percent = train_test_split(data, data_targets, train_size=0.75)
xtest_25_percent = xtest_25_percent.reshape((-1, 784))
for pourcentage in [(10*j)/100.0 for j in range(1,10)]:
xtrain_echantillon = xtrain_global[:int(pourcentage*len(xtrain_global))+1]
xtrain_echantillon = xtrain_echantillon.reshape((-1, 784))
ytrain_echantillon = ytrain_global[:int(pourcentage*len(xtrain_global))+1]
clf = KNeighborsClassifier(n_neighbors=10)
clf.fit(X=xtrain_echantillon, y=ytrain_echantillon)
k_score_train_variation[pourcentage] = clf.score(X=xtest_25_percent, y=ytest_25_percent)*100
bars_plot2(bar_labels=["%.2f" % (percent) for percent in list(k_score_train_variation.keys())],
bar_heights=list(k_score_train_variation.values()),
xlabel="Pourcentage d'échantillons train", ylabel="Score",
fig_title="Score par pourcentage de découpage de train (%s images initialement) et %s images comme test (25%% de DATA)" % (
len(xtrain_global), len(xtest_25_percent)),
show=True, percent_mark=True, bar_colors="magenta",
output="tp1Output/mnist_split_various_train_percentage_scores_with_fixed_test.png")
def distance_type_variation_and_score():
# Variation du type de distance avec train=75% de DATA et test=25% de DATA puis affichage de score
xtrain_distance, xtest_distance, ytrain_distance, ytest_distance = train_test_split(data, data_targets, train_size=0.75)
xtrain_distance = xtrain_distance.reshape((-1, 784))
xtest_distance = xtest_distance.reshape((-1, 784))
distances = ["minkowski", "euclidean", "hamming", "canberra", "braycurtis"]
distance_score ={}
for distance in distances:
clf = KNeighborsClassifier(n_neighbors=10, metric=distance)
clf.fit(X=xtrain_distance, y=ytrain_distance)
distance_score[distance] = clf.score(X=xtest_distance, y=ytest_distance)*100
bars_plot2(bar_labels=list(distance_score.keys()), bar_heights=list(distance_score.values()),
xlabel="Types de distance", ylabel="Score",
fig_title="Score pour différent type de distance (métrique) avec train=75% de DATA et test=25% de DATA",
show=True, percent_mark=True, bar_colors="green",
output="tp1Output/mnist_various_distance_metric_scores.png")
def parameter_njobs_variation_and_score():
xtrain_distance, xtest_distance, ytrain_distance, ytest_distance = train_test_split(data, data_targets, train_size=0.75)
xtrain_distance = xtrain_distance.reshape((-1, 784))
xtest_distance = xtest_distance.reshape((-1, 784))
distances = ["minkowski", "euclidean", "hamming", "canberra", "braycurtis"]
for n_jobs in [6, -1]:
distance_based_training_time = {}
for distance in distances:
clf = KNeighborsClassifier(n_neighbors=10, metric=distance, n_jobs=n_jobs)
tic = time.process_time()
clf.fit(X=xtrain_distance, y=ytrain_distance)
toc = time.process_time()
distance_based_training_time[distance] = (toc-tic)
bars_plot2(bar_labels=list(distance_based_training_time.keys()),
bar_heights=list(distance_based_training_time.values()),
xlabel="Types de distance", ylabel="Temps d'entraînement (seconde)", show=True, bar_colors="pink",
fig_title="Temps d'entraînement pour différent type de distance avec train=75%% de DATA et test=25%% de DATA et\nN_JOBS=%s" % (
n_jobs))
k_10_classifier_and_score()
k_variation_and_score()
train_test_percent_variation_and_score()
train_percent_variation_and_score()
distance_type_variation_and_score()
parameter_njobs_variation_and_score()

217
TP2_prog1.py Normal file
View file

@ -0,0 +1,217 @@
from sklearn import datasets
from sklearn.model_selection import train_test_split, KFold
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import precision_score, zero_one_loss, recall_score
import numpy as np
import matplotlib.pyplot as plt
from random import seed, randint
from graphs import bars_plot2, bars_plot
import time
np.random.seed(0) # initialise le generateur aletoire avec seed=0
seed(0)
#######################
### TP2 PROGRAMME ###
#######################
# Chargement de la base de données MNIST
mnist = datasets.fetch_openml('mnist_784')
# Division de la base de données
x_train = mnist.data[:4900,:] ## training
y_train = mnist.target[:4900]
x_test = mnist.data[4900:7000,:] ## test
y_test = mnist.target[4900:7000]
# Mise en conformité des dimensions de xtrain et xtest aux dimensions attendues par le classifeur
x_train = x_train.reshape((-1, 784))
x_test = x_test.reshape((-1, 784))
def modele_hidden_layer_sizes_50():
# Création du modèle, entraînement et génération de précession
modele = MLPClassifier(hidden_layer_sizes=(50))
modele.fit(X=x_train, y=y_train)
score = modele.score(X=x_test, y=y_test)
print("MLPClassifier[hidden_layer_sizes=(50)] score = %.2f"%(100*score)) # après compilation 95.19%
# Classe d'image 4 et sa classe prédite
y_pred = modele.predict(X=x_test)
print("VERDICT IMAGE N°4\nClasse réelle: %s\nClasse prédite: %s"%(y_test[3], y_pred[3]))
# Calcul de la précession à travers le package precision_score
score_package = precision_score(y_true=y_test, y_pred=y_pred, average="micro")
print("MLPClassifier[hidden_layer_sizes=(50)] score[using precision_score] = %.2f"%(100*score_package)) # après compilation %
def layers_number_variation_and_scores():
# Variation du nombre de couches et affichage de scores résultants
scores_dictionary = {}
layers = []
for iteration in range(1,11):
layers.append(50)
hidden_layers = tuple(layers)
print(hidden_layers)
modele_iter = MLPClassifier(hidden_layer_sizes=hidden_layers)
modele_iter.fit(X=x_train, y=y_train)
scores_dictionary["%s couches"%(iteration)] = modele_iter.score(X=x_test, y=y_test) * 100
# Génération sous forme diagramme à barre
print(scores_dictionary)
plt.figure(figsize=(12.8, 9.6))
plt.plot(range(1,11), list(scores_dictionary.values()))
plt.xlabel("Nombre de couches de 50 neurones")
plt.xlim(left=1)
plt.ylabel("Score")
plt.title("Courbe d'évolution du score de précession en fonction du nombre de couches cachées")
plt.savefig("tp2Output/scores_with_hidden_layers_variation.png", dpi="figure")
plt.show(); plt.close()
def five_models_with_different_layers():
scores = {}
times = {}
layers = {}
for counter in range(5):
hl = tuple([randint(10, 300) for i in range(counter*2 + 2)])
modele = MLPClassifier(hidden_layer_sizes=hl)
tic = time.process_time()
modele.fit(X=x_train, y=y_train)
toc = time.process_time()
times[counter] = toc - tic
scores[counter] = modele.score(X=x_test, y=y_test) * 100
layers[counter] = hl
barWidth = 0.4
y1 = [times[c] for c in range(5)]
y2 = [scores[c] for c in range(5)]
r1 = [0, 4, 8, 12, 16]
r2 = [x + barWidth for x in r1]
plt.figure(figsize=(12.8, 9.6))
bar1 = plt.bar(r1, y1, width=barWidth, color=['red' for i in y1], linewidth=2, label="temps (sec)")
bar2 = plt.bar(r2, y2, width=barWidth, color=['green' for i in y2], linewidth=4, label="precision (%)")
plt.xticks([r + barWidth for r in r1], ['%s couches\n%s'%(i*2+2, layers[i]) for i in range(5)], rotation=10)
plt.xlabel("Modèles")
plt.ylabel("Valeur")
plt.title("Evaluation du temps d'apprentissage et la précession de cinq modèles de différentes couches")
for rect in bar1 + bar2:
height = rect.get_height()
plt.text(rect.get_x() + rect.get_width() / 2.0, height, '%.2f' % height, ha='center', va='bottom')
plt.legend()
plt.savefig("tp2Output/comparison_times_scores_of_methods_different_layers", dpi="figure")
plt.show()
def optimization_algorithms_variation():
solvers = ["lbfgs", "sgd", "adam"]
scores = {}
scores["lbfgs"] = {}
scores["sgd"] = {}
scores["adam"] = {}
for counter in range(1, 11):
hl = tuple([randint(10, 300) for i in range(counter)])
for solver in solvers:
modele = MLPClassifier(hidden_layer_sizes=hl, solver=solver)
modele.fit(X=x_train, y=y_train)
scores[solver][counter] = modele.score(X=x_test, y=y_test) * 100
print(scores)
plt.figure(figsize=(12.8, 9.6))
plt.plot(range(1,11), list(scores["lbfgs"].values()), label="L-BFGS")
plt.plot(range(1,11), list(scores["sgd"].values()), label="SGD")
plt.plot(range(1,11), list(scores["adam"].values()), label="ADAM")
plt.xlabel("Nombre de couches cachées")
plt.xlim(left=1)
plt.ylabel("Score de précession")
plt.title("Courbe d'évolution du score de précession en fonction du nombre de couches cachées")
plt.legend()
plt.savefig("tp2Output/scores_with_hidden_layers_variation_different_solvers.png", dpi="figure")
plt.show(); plt.close()
def activation_functions_variation():
activations = ["identity", "logistic", "tanh", "relu"]
scores = {}
scores["identity"] = {}
scores["logistic"] = {}
scores["tanh"] = {}
scores["relu"] = {}
for counter in range(1, 11):
hl = tuple([randint(10, 300) for i in range(counter)])
for activation in activations:
modele = MLPClassifier(hidden_layer_sizes=hl, activation=activation)
modele.fit(X=x_train, y=y_train)
scores[activation][counter] = modele.score(X=x_test, y=y_test) * 100
plt.figure(figsize=(12.8, 9.6))
plt.plot(range(1,11), list(scores["identity"].values()), label="identity")
plt.plot(range(1,11), list(scores["logistic"].values()), label="logistic")
plt.plot(range(1,11), list(scores["tanh"].values()), label="tanh")
plt.plot(range(1, 11), list(scores["relu"].values()), label="relu")
plt.xlabel("Nombre de couches cachées")
plt.xlim(left=1)
plt.ylabel("Score de précession")
plt.title("Courbe d'évolution du score de précession en fonction du nombre de couches cachées")
plt.legend()
plt.savefig("tp2Output/scores_with_hidden_layers_variation_different_activations_with_adam.png", dpi="figure")
plt.show(); plt.close()
def alpha_parameter_variation():
scores = []
alphas = [10**(-i) for i in range(1,9)]
alphas = alphas[::-1]
for alpha in alphas:
# hl = tuple([randint(10, 300) for i in range(counter)])
modele = MLPClassifier(hidden_layer_sizes=(60,47,179,251,296,61,191,232,171,114), alpha=alpha)
modele.fit(X=x_train, y=y_train)
scores.append(modele.score(X=x_test, y=y_test) * 100)
plt.figure(figsize=(12.8, 9.6))
plt.plot(alphas, scores)
plt.xlabel("Valeurs d'alpha")
plt.xlim(left=10e-8)
plt.ylabel("Score de précession")
plt.title("Courbe d'évolution du score de précession en fonction du paramètre alpha")
plt.savefig("tp2Output/scores_with_alpha_variation_same_hidden_layers_sizes.png", dpi="figure")
plt.show(); plt.close()
def times_scores_recalls_errors_comparison():
scores = {}
times = {}
recalls = {}
errors = {}
layers = {}
for counter in range(5):
hl = tuple([randint(10, 300) for i in range(counter * 2 + 2)])
modele = MLPClassifier(hidden_layer_sizes=hl)
tic = time.process_time()
modele.fit(X=x_train, y=y_train)
toc = time.process_time()
y_pred = modele.predict(X=x_test)
times[counter] = toc - tic
scores[counter] = modele.score(X=x_test, y=y_test) * 100
recalls[counter] = recall_score(y_true=y_test, y_pred=y_pred, average="micro")
errors[counter] = zero_one_loss(y_true=y_test, y_pred=y_pred) * 100
layers[counter] = hl
barWidth = 0.4
y1 = [times[c] for c in range(5)]
y2 = [scores[c] for c in range(5)]
y3 = [recalls[c] for c in range(5)]
y4 = [errors[c] for c in range(5)]
r1 = [0, 4, 8, 12, 16]
r2 = [x + barWidth for x in r1]
r3 = [x + 2 * barWidth for x in r1]
r4 = [x + 3 * barWidth for x in r1]
plt.figure(figsize=(12.8, 9.6))
bar1 = plt.bar(r1, y1, width=barWidth, color=['red' for i in y1], linewidth=2, label="temps (sec)")
bar2 = plt.bar(r2, y2, width=barWidth, color=['green' for i in y2], linewidth=4, label="precision (%)")
bar3 = plt.bar(r3, y3, width=barWidth, color=['red' for i in y3], linewidth=1, label="recall")
bar4 = plt.bar(r4, y4, width=barWidth, color=['magenta' for i in y4], linewidth=3, label="error (%)")
plt.xticks([r + 1.5*barWidth for r in r1], ['%s couches\n%s' % (i * 2 + 2, layers[i]) for i in range(5)], rotation=10)
plt.xlabel("Modèles")
plt.ylabel("Valeur")
plt.title("Evaluation du temps d'apprentissage, la précession, le recall et l'erreur de cinq modèles de différentes couches")
for rect in bar1 + bar2 + bar3 + bar4:
height = rect.get_height()
plt.text(rect.get_x() + rect.get_width() / 2.0, height, '%.2f' % height, ha='center', va='bottom')
plt.legend()
plt.savefig("tp2Output/comparison_times_scores_recalls_errors_of_methods_different_layers", dpi="figure")
plt.show(); plt.close()
modele_hidden_layer_sizes_50()
layers_number_variation_and_scores()
five_models_with_different_layers()
optimization_algorithms_variation()
activation_functions_variation()
alpha_parameter_variation()
times_scores_recalls_errors_comparison()

257
TP3_prog1.py Normal file
View file

@ -0,0 +1,257 @@
from sklearn import datasets
from sklearn.model_selection import train_test_split, KFold
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import precision_score, recall_score, plot_confusion_matrix, confusion_matrix, ConfusionMatrixDisplay
import numpy as np
import matplotlib.pyplot as plt
from random import seed, randint
from graphs import bars_plot2, curve_plot
from statistics import mean
import time
np.random.seed(0) # initialise le generateur aletoire avec seed=0
seed(0)
#######################
### TP3 PROGRAMME ###
#######################
# Chargement de la base de données MNIST
mnist = datasets.fetch_openml('mnist_784')
# Division de la base de données
x_train = mnist.data[:3500,:] ## training
y_train = mnist.target[:3500]
x_test = mnist.data[3500:5001,:] ## test
y_test = mnist.target[3500:5001]
# print(x_train.shape,x_test.shape) #
# Mise en conformité des dimensions de xtrain et xtest aux dimensions attendues par le classifeur
x_train = x_train.reshape((-1, 784))
x_test = x_test.reshape((-1, 784))
def kernel_variation():
kernels = ["linear","poly","rbf","sigmoid"]
scores = {}
for kernel in kernels:
clsvm = SVC(kernel=kernel)
clsvm.fit(X=x_train, y=y_train)
scores[kernel] = clsvm.score(X=x_test, y=y_test) * 100
print("Score avec kernel=%s: %.2f"%(kernel, scores[kernel]))
bars_plot2(bar_labels=kernels, bar_heights=list(scores.values()), xlabel="Kernels utilisés",
ylabel="Score", fig_title="Score par kernel utilisé", show=True, percent_mark=True,
output="tp3Output/scores_for_kernel_variation.png")
def kernel_and_c_variation():
kernels = ["linear", "poly", "rbf", "sigmoid"]
scores = {}
c_values = [0.1 + 0.18 * x for x in range(6)]
for kernel in kernels:
scores[kernel] = {}
for c_value in c_values:
clsvm = SVC(kernel=kernel, C=c_value)
clsvm.fit(X=x_train, y=y_train)
scores[kernel][c_value] = clsvm.score(X=x_test, y=y_test) * 100
print("Score avec kernel=%s: %.2f" % (kernel, clsvm.score(X=x_test, y=y_test) * 100))
plt.figure(figsize=(12.8, 9.6))
plt.plot(c_values, list(scores["linear"].values()), label="linear")
plt.plot(c_values, list(scores["poly"].values()), label="poly")
plt.plot(c_values, list(scores["rbf"].values()), label="rbf")
plt.plot(c_values, list(scores["sigmoid"].values()), label="sigmoid")
plt.xlabel("Valeurs de C")
plt.xlim(left=0.1)
plt.ylabel("Score (précision)")
plt.title("Courbe d'évolution de la précision en fonction de C")
plt.legend()
plt.savefig("tp3Output/scores_all_kernel_with_c_variation.png", dpi="figure")
plt.show()
plt.close()
def train_test_error_with_c_variation():
kernels = ["linear", "poly", "rbf", "sigmoid"]
scores = {}
c_values = [0.1 + 0.18 * x for x in range(6)]
data_types = ["train", "test"]
for data_type in data_types:
scores[data_type] = {}
for kernel in kernels:
scores[data_type][kernel] = {}
for c_value in c_values:
clsvm = SVC(kernel=kernel, C=c_value)
clsvm.fit(X=x_train, y=y_train)
if data_type=="train":
scores[data_type][kernel][c_value] = (1 - clsvm.score(X=x_train, y=y_train)) * 100
else:
scores[data_type][kernel][c_value] = (1 - clsvm.score(X=x_test, y=y_test)) * 100
print("Score d'erreur données=%s avec kernel=%s: %.2f" % (data_type, kernel, scores[data_type][kernel][c_value]))
plt.figure(figsize=(12.8, 9.6))
plt.plot(c_values, list(scores["train"]["linear"].values()), label="train linear", marker="o")
plt.plot(c_values, list(scores["train"]["poly"].values()), label="train poly", marker="o")
plt.plot(c_values, list(scores["train"]["rbf"].values()), label="train rbf", marker="o")
plt.plot(c_values, list(scores["train"]["sigmoid"].values()), label="train sigmoid", marker="o")
plt.plot(c_values, list(scores["test"]["linear"].values()), label="test linear", marker="*")
plt.plot(c_values, list(scores["test"]["poly"].values()), label="test poly", marker="*")
plt.plot(c_values, list(scores["test"]["rbf"].values()), label="test rbf", marker="*")
plt.plot(c_values, list(scores["test"]["sigmoid"].values()), label="test sigmoid", marker="*")
plt.xlabel("Valeurs de C")
plt.xlim(left=0.1)
plt.ylabel("Error score")
plt.title("Courbe d'évolution du pourcentage d'erreur de précision en fonction de C")
plt.legend()
plt.savefig("tp3Output/error_scores_train_test_with_kernel_and_c_variation.png", dpi="figure")
plt.show()
plt.close()
def generate_confusion_matrix():
kernels = ["linear", "poly", "rbf", "sigmoid"]
for kernel in kernels:
clsvm = SVC(kernel=kernel)
clsvm.fit(X=x_train, y=y_train)
plot_confusion_matrix(estimator=clsvm, X=x_test, y_true=y_test, values_format=".1f")
plt.show()
def time_precision_recall_error():
kernels = ["linear", "poly", "rbf", "sigmoid"]
times = {}
scores = {}
recalls = {}
errors = {}
c_values = [0.1 + 0.18 * x for x in range(6)]
for kernel in kernels:
times[kernel] = {}
scores[kernel] = {}
recalls[kernel] = {}
errors[kernel] = {}
for c_value in c_values:
clsvm = SVC(kernel=kernel, C=c_value)
tic = time.process_time()
clsvm.fit(X=x_train, y=y_train)
toc = time.process_time()
y_pred = clsvm.predict(X=x_test)
times[kernel][c_value] = toc - tic
scores[kernel][c_value] = clsvm.score(X=x_test, y=y_test) * 100
recalls[kernel][c_value] = recall_score(y_true=y_test, y_pred=y_pred, average="micro")
errors[kernel][c_value] = 100 - scores[kernel][c_value]
print("Score avec kernel=%s: %.2f" % (kernel, clsvm.score(X=x_test, y=y_test) * 100))
plt.figure(figsize=(12.8, 9.6))
plt.plot(c_values, list(times["linear"].values()), label="linear")
plt.plot(c_values, list(times["poly"].values()), label="poly")
plt.plot(c_values, list(times["rbf"].values()), label="rbf")
plt.plot(c_values, list(times["sigmoid"].values()), label="sigmoid")
plt.xlabel("Valeurs de C")
plt.xlim(left=0.1)
plt.ylabel("Temps d'apprentissage (secondes)")
plt.title("Courbe d'évolution du temps d'apprentissage en fonction de C")
plt.legend()
plt.savefig("tp3Output/times_all_kernel_with_c_variation.png", dpi="figure")
plt.show(); plt.close()
###
plt.figure(figsize=(12.8, 9.6))
plt.plot(c_values, list(scores["linear"].values()), label="linear precision")
plt.plot(c_values, list(scores["poly"].values()), label="poly precision")
plt.plot(c_values, list(scores["rbf"].values()), label="rbf precision")
plt.plot(c_values, list(scores["sigmoid"].values()), label="sigmoid precision")
plt.plot(c_values, list(errors["linear"].values()), label="linear error", marker="*", linestyle="-.")
plt.plot(c_values, list(errors["poly"].values()), label="poly error", marker="*", linestyle="-.")
plt.plot(c_values, list(errors["rbf"].values()), label="rbf error", marker="*", linestyle="-.")
plt.plot(c_values, list(errors["sigmoid"].values()), label="sigmoid error", marker="*", linestyle="-.")
plt.xlabel("Valeurs de C")
plt.xlim(left=0.1)
plt.ylabel("Score")
plt.title("Courbe d'évolution de la précision et de l'erreur de classification en fonction de C")
plt.legend()
plt.savefig("tp3Output/scores_and_errors_all_kernel_with_c_variation.png", dpi="figure")
plt.show(); plt.close()
###
plt.figure(figsize=(12.8, 9.6))
plt.plot(c_values, list(recalls["linear"].values()), label="linear")
plt.plot(c_values, list(recalls["poly"].values()), label="poly")
plt.plot(c_values, list(recalls["rbf"].values()), label="rbf")
plt.plot(c_values, list(recalls["sigmoid"].values()), label="sigmoid")
plt.xlabel("Valeurs de C")
plt.xlim(left=0.1)
plt.ylabel("Recall score")
plt.title("Courbe d'évolution du recall en fonction de C")
plt.legend()
plt.savefig("tp3Output/recalls_all_kernel_with_c_variation.png", dpi="figure")
plt.show(); plt.close()
def methods_comparison():
times = {}
scores = {}
recalls = {}
errors = {}
matrices = {}
### Méthode 1
knn = KNeighborsClassifier(n_neighbors=10)
tic = time.process_time()
knn.fit(X=x_train, y=y_train)
toc = time.process_time()
y_pred = knn.predict(X=x_test)
times["knn"] = toc - tic
scores["knn"] = knn.score(X=x_test, y=y_pred) * 100
recalls["knn"] = recall_score(y_true=y_test, y_pred=y_pred, average="micro")
errors["knn"] = 100 - scores["knn"]
# matrices["knn"] = confusion_matrix(y_true=y_test,y_pred=y_pred)
plot_confusion_matrix(estimator=knn, X=x_test, y_true=y_test, values_format=".1f"); plt.show()
### Méthode 2
mlp = MLPClassifier(hidden_layer_sizes=(50))
tic = time.process_time()
mlp.fit(X=x_train, y=y_train)
toc = time.process_time()
y_pred = knn.predict(X=x_test)
times["mlp"] = toc - tic
scores["mlp"] = mlp.score(X=x_test, y=y_pred) * 100
recalls["mlp"] = recall_score(y_true=y_test, y_pred=y_pred, average="micro")
errors["mlp"] = 100 - scores["mlp"]
# matrices["mlp"] = confusion_matrix(y_true=y_test, y_pred=y_pred)
plot_confusion_matrix(estimator=mlp, X=x_test, y_true=y_test, values_format=".1f"); plt.show()
### Méthode 3
svc = SVC(kernel="rbf", C=1)
tic = time.process_time()
svc.fit(X=x_train, y=y_train)
toc = time.process_time()
y_pred = knn.predict(X=x_test)
times["svc"] = toc - tic
scores["svc"] = svc.score(X=x_test, y=y_pred) * 100
recalls["svc"] = recall_score(y_true=y_test, y_pred=y_pred, average="micro")
errors["svc"] = 100 - scores["svc"]
# matrices["svc"] = confusion_matrix(y_true=y_test, y_pred=y_pred)
plot_confusion_matrix(estimator=svc, X=x_test, y_true=y_test, values_format=".1f"); plt.show()
### Plotting
barWidth = 0.5
y1 = [times["knn"], times["mlp"], times["svc"]]
y2 = [scores["knn"], scores["mlp"], scores["svc"]]
y3 = [recalls["knn"], recalls["mlp"], recalls["svc"]]
y4 = [errors["knn"], errors["mlp"], errors["svc"]]
r1 = [0, 3, 6]
r2 = [x + barWidth for x in r1]
r3 = [x + 2 * barWidth for x in r1]
r4 = [x + 3 * barWidth for x in r1]
plt.figure(figsize=(12.8, 9.6))
bar1 = plt.bar(r1, y1, width=barWidth, color=['green' for i in y1], linewidth=2, label="temps (sec)")
bar2 = plt.bar(r2, y2, width=barWidth, color=['yellow' for i in y2], linewidth=4, label="precision (%)")
bar3 = plt.bar(r3, y3, width=barWidth, color=['red' for i in y3], linewidth=1, label="recall")
bar4 = plt.bar(r4, y4, width=barWidth, color=['magenta' for i in y4], linewidth=3, label="error (%)")
plt.xticks([r + 1.5 * barWidth for r in r1], ['Modèle K-nn', 'Modèle MLP', 'Modèle SVM'])
plt.xlabel("Méthodes")
plt.ylabel("Valeur")
plt.title("Evaluation de différentes métriques de performance pour chacune des méthodes vues")
for rect in bar1 + bar2 + bar3 + bar4:
height = rect.get_height()
plt.text(rect.get_x() + rect.get_width() / 2.0, height, '%.2f' % height, ha='center', va='bottom')
plt.legend()
plt.show()
plt.savefig("tp3Output/global_comparison_of_methods_different_metrics", dpi="figure")
kernel_variation()
kernel_and_c_variation()
train_test_error_with_c_variation()
generate_confusion_matrix()
time_precision_recall_error()
methods_comparison()

54
graphs.py Normal file
View file

@ -0,0 +1,54 @@
def bars_plot(bar_labels=None, bar_heights=None, bar_colors="blue", bar_width=0.25, xlabel="xlabel", ylabel="ylabel", fig_title="title", figsize=(12.8, 9.6), output="./bars.png", show=False):
import matplotlib.pyplot as plt
plt.figure(figsize=figsize)
plt.bar(x=list(map(str,bar_labels)), height=bar_heights, width=bar_width, color=bar_colors)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.title(fig_title)
plt.savefig(output, dpi="figure")
if show:
plt.show()
plt.close()
def clustering_plot(abcissas=None, ordinates=None, predictions=None, marker="o", xlabel="xlabel", ylabel="ylabel", fig_title="title", figsize=(12.8, 9.6), output="./bars.png", show=False):
import matplotlib.pyplot as plt
plt.figure(figsize=figsize)
plt.scatter(x=abcissas, y=ordinates, c=predictions, marker=marker)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.title(fig_title)
plt.savefig(output, dpi="figure")
if show:
plt.show()
plt.close()
def bars_plot2(bar_labels=None, bar_heights=None, bar_colors="blue", bar_width=0.25, xlabel="xlabel", ylabel="ylabel", fig_title="title", figsize=(12.8, 9.6), output="./bars.png", show=False, show_heights=True, percent_mark=False):
import matplotlib.pyplot as plt
plt.figure(figsize=figsize)
bar = plt.bar(x=list(map(str,bar_labels)), height=bar_heights, width=bar_width, color=bar_colors)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.title(fig_title)
if show_heights:
for rect in bar:
height = rect.get_height()
if percent_mark:
plt.text(rect.get_x() + rect.get_width() / 2.0, height, '%.2f%%' % height, ha='center', va='bottom')
else:
plt.text(rect.get_x() + rect.get_width() / 2.0, height, '%.2f' % height, ha='center', va='bottom')
plt.savefig(output, dpi="figure")
if show:
plt.show()
plt.close()
def curve_plot(abcissas=None, ordinates=None, xlabel="xlabel", ylabel="ylabel", fig_title="title", figsize=(12.8, 9.6), output="./bars.png", show=False):
import matplotlib.pyplot as plt
plt.figure(figsize=figsize)
plt.plot(abcissas,ordinates)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.title(fig_title)
plt.savefig(output, dpi="figure")
if show:
plt.show()
plt.close()

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 68 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 77 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 71 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 87 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 61 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 62 KiB