diff --git a/TP1_prog1.py b/TP1_prog1.py new file mode 100644 index 0000000..b75b2c5 --- /dev/null +++ b/TP1_prog1.py @@ -0,0 +1,34 @@ +from sklearn import datasets +import matplotlib.pyplot as plt + + +################### +### EXERCICE 1 ### +################### +# Chargement de la base de données MNIST +mnist = datasets.fetch_openml('mnist_784') + +# Affichage de quelques commandes +affichage = True +if affichage: + print(mnist) + print (mnist.data) + print (mnist.target) + print (len(mnist.data)) + help(len) + print (mnist.data.shape) + print (mnist.target.shape) + print (mnist.data[0]) + print (mnist.data[0][1]) + print (mnist.data[:,1]) + print (len(mnist.data[:100])) + +# Extraction et affichage de la première image de la base de données +images = mnist.data.reshape((-1, 28, 28)) +for i in range(5): + plt.imshow(images[i],cmap=plt.gray(),interpolation="nearest") + plt.show() + +# Affichage de la classe de la première image de la base de données +targets = mnist.target +print("Classe de la première image: %s"%(targets[0])) diff --git a/TP1_prog2.py b/TP1_prog2.py new file mode 100644 index 0000000..54e536d --- /dev/null +++ b/TP1_prog2.py @@ -0,0 +1,151 @@ +from sklearn import datasets +from sklearn.model_selection import train_test_split, KFold +from sklearn.neighbors import KNeighborsClassifier +import matplotlib.pyplot as plt +import numpy as np +from random import seed +from graphs import bars_plot2, bars_plot +import time + +np.random.seed(0) # initialise le generateur aletoire avec seed=0 +seed(0) + +################### +### EXERCICE 2 ### +################### +# Chargement de la base de données MNIST +mnist = datasets.fetch_openml('mnist_784') + +# Extraction des images de la base de données +images = mnist.data.reshape((-1, 28, 28)) + +# Extraction des classes des images de la base de données +targets = mnist.target + +# Génération d'échantillon de données avec 5000 exemples +indexes = np.random.randint(70000, size=5000) +data = images[indexes,:] +data_targets = targets[indexes] + +# Division de la base de données en lots d'entraînement et de test +def k_10_classifier_and_score(): + pourcentage_donnees = 0.8 + xtrain, xtest, ytrain, ytest = train_test_split(data, data_targets, train_size=pourcentage_donnees) + # Mise en conformité des dimensions de xtrain et xtest aux dimensions attendues par le classifeur + xtrain = xtrain.reshape((-1, 784)) + xtest = xtest.reshape((-1, 784)) + # Instanciation et entraînement du classifieur + k = 10 + clf = KNeighborsClassifier(n_neighbors=k) + clf.fit(X=xtrain, y=ytrain) + predictions = clf.predict(X=xtest) + # Classe d'image 4 et sa classe prédite + print("VERDICT IMAGE N°4\nClasse réelle: %s\nClasse prédite: %s"%(ytest[3], predictions[3])) + # Affichage du score sur l'échantillon de test + score_xtest = clf.score(X=xtest, y=ytest) + print("Score xtest = %.2f%%"%(score_xtest*100)) + # Taux d'erreur sur les données d'apprentissage + score_xtrain = clf.score(X=xtrain, y=ytrain) + print("Score d'erreur xtrain = %.2f%%"%(100*(1-score_xtrain))) + +def k_variation_and_score(): + # Variation du nombre k de voisins et score résultant + n_folds = 10 + kf = KFold(n_splits=n_folds, shuffle=True) + k_score = {} + for k in range(2,16): + sum_score = 0 + for train_index, test_index in kf.split(X=data): + x_train, x_test = data[train_index], data[test_index] + y_train, y_test = data_targets[train_index], data_targets[test_index] + x_train = x_train.reshape((-1, 784)) + x_test = x_test.reshape((-1, 784)) + clf = KNeighborsClassifier(n_neighbors=k) + clf.fit(X=x_train, y=y_train) + sum_score += clf.score(X=x_test, y=y_test) + k_score[k] = (sum_score/n_folds)*100 + bars_plot2(bar_labels=list(k_score.keys()), bar_heights=list(k_score.values()), xlabel="Valeur de k", + ylabel="Score", fig_title="Score par valeur de k (nombre de voisins)", show=True, percent_mark=True, + output="tp1Output/mnist_k_variation_scores.png") + +def train_test_percent_variation_and_score(): + # Variation du pourcentage des échantillons (training et test) et affichage des scores résultants + k_score_percent = {} + for pourcentage in [0.05*i for i in range(1,20)]: + xtrain_percent, xtest_percent, ytrain_percent, ytest_percent = train_test_split(data, data_targets, train_size=pourcentage) + xtrain_percent = xtrain_percent.reshape((-1, 784)) + xtest_percent = xtest_percent.reshape((-1, 784)) + clf = KNeighborsClassifier(n_neighbors=10) + clf.fit(X=xtrain_percent, y=ytrain_percent) + k_score_percent[pourcentage] = clf.score(X=xtest_percent, y=ytest_percent)*100 + bars_plot2(bar_labels=["%.2f" % (percent) for percent in list(k_score_percent.keys())], + bar_heights=list(k_score_percent.values()), xlabel="Pourcentage d'échantillons train", + ylabel="Score", + fig_title="Score par pourcentage de découpage de DATA (5000 images) en échantillons train et test", + show=True, percent_mark=True, bar_colors="red", + output="tp1Output/mnist_split_data_percentage_scores.png") + +def train_percent_variation_and_score(): + # Fixation de la taille de l'échantillon test (25% de DATA) et variation de la taille de l'échantillon train avec affichage de score + k_score_train_variation = {} + xtrain_global, xtest_25_percent, ytrain_global, ytest_25_percent = train_test_split(data, data_targets, train_size=0.75) + xtest_25_percent = xtest_25_percent.reshape((-1, 784)) + for pourcentage in [(10*j)/100.0 for j in range(1,10)]: + xtrain_echantillon = xtrain_global[:int(pourcentage*len(xtrain_global))+1] + xtrain_echantillon = xtrain_echantillon.reshape((-1, 784)) + ytrain_echantillon = ytrain_global[:int(pourcentage*len(xtrain_global))+1] + clf = KNeighborsClassifier(n_neighbors=10) + clf.fit(X=xtrain_echantillon, y=ytrain_echantillon) + k_score_train_variation[pourcentage] = clf.score(X=xtest_25_percent, y=ytest_25_percent)*100 + bars_plot2(bar_labels=["%.2f" % (percent) for percent in list(k_score_train_variation.keys())], + bar_heights=list(k_score_train_variation.values()), + xlabel="Pourcentage d'échantillons train", ylabel="Score", + fig_title="Score par pourcentage de découpage de train (%s images initialement) et %s images comme test (25%% de DATA)" % ( + len(xtrain_global), len(xtest_25_percent)), + show=True, percent_mark=True, bar_colors="magenta", + output="tp1Output/mnist_split_various_train_percentage_scores_with_fixed_test.png") + +def distance_type_variation_and_score(): + # Variation du type de distance avec train=75% de DATA et test=25% de DATA puis affichage de score + xtrain_distance, xtest_distance, ytrain_distance, ytest_distance = train_test_split(data, data_targets, train_size=0.75) + xtrain_distance = xtrain_distance.reshape((-1, 784)) + xtest_distance = xtest_distance.reshape((-1, 784)) + distances = ["minkowski", "euclidean", "hamming", "canberra", "braycurtis"] + distance_score ={} + for distance in distances: + clf = KNeighborsClassifier(n_neighbors=10, metric=distance) + clf.fit(X=xtrain_distance, y=ytrain_distance) + distance_score[distance] = clf.score(X=xtest_distance, y=ytest_distance)*100 + bars_plot2(bar_labels=list(distance_score.keys()), bar_heights=list(distance_score.values()), + xlabel="Types de distance", ylabel="Score", + fig_title="Score pour différent type de distance (métrique) avec train=75% de DATA et test=25% de DATA", + show=True, percent_mark=True, bar_colors="green", + output="tp1Output/mnist_various_distance_metric_scores.png") + + +def parameter_njobs_variation_and_score(): + xtrain_distance, xtest_distance, ytrain_distance, ytest_distance = train_test_split(data, data_targets, train_size=0.75) + xtrain_distance = xtrain_distance.reshape((-1, 784)) + xtest_distance = xtest_distance.reshape((-1, 784)) + distances = ["minkowski", "euclidean", "hamming", "canberra", "braycurtis"] + for n_jobs in [6, -1]: + distance_based_training_time = {} + for distance in distances: + clf = KNeighborsClassifier(n_neighbors=10, metric=distance, n_jobs=n_jobs) + tic = time.process_time() + clf.fit(X=xtrain_distance, y=ytrain_distance) + toc = time.process_time() + distance_based_training_time[distance] = (toc-tic) + bars_plot2(bar_labels=list(distance_based_training_time.keys()), + bar_heights=list(distance_based_training_time.values()), + xlabel="Types de distance", ylabel="Temps d'entraînement (seconde)", show=True, bar_colors="pink", + fig_title="Temps d'entraînement pour différent type de distance avec train=75%% de DATA et test=25%% de DATA et\nN_JOBS=%s" % ( + n_jobs)) + + +k_10_classifier_and_score() +k_variation_and_score() +train_test_percent_variation_and_score() +train_percent_variation_and_score() +distance_type_variation_and_score() +parameter_njobs_variation_and_score() diff --git a/TP2_prog1.py b/TP2_prog1.py new file mode 100644 index 0000000..dfcbd1f --- /dev/null +++ b/TP2_prog1.py @@ -0,0 +1,217 @@ +from sklearn import datasets +from sklearn.model_selection import train_test_split, KFold +from sklearn.neural_network import MLPClassifier +from sklearn.metrics import precision_score, zero_one_loss, recall_score +import numpy as np +import matplotlib.pyplot as plt +from random import seed, randint +from graphs import bars_plot2, bars_plot +import time + + +np.random.seed(0) # initialise le generateur aletoire avec seed=0 +seed(0) + +####################### +### TP2 PROGRAMME ### +####################### +# Chargement de la base de données MNIST +mnist = datasets.fetch_openml('mnist_784') + +# Division de la base de données +x_train = mnist.data[:4900,:] ## training +y_train = mnist.target[:4900] +x_test = mnist.data[4900:7000,:] ## test +y_test = mnist.target[4900:7000] + +# Mise en conformité des dimensions de xtrain et xtest aux dimensions attendues par le classifeur +x_train = x_train.reshape((-1, 784)) +x_test = x_test.reshape((-1, 784)) + +def modele_hidden_layer_sizes_50(): + # Création du modèle, entraînement et génération de précession + modele = MLPClassifier(hidden_layer_sizes=(50)) + modele.fit(X=x_train, y=y_train) + score = modele.score(X=x_test, y=y_test) + print("MLPClassifier[hidden_layer_sizes=(50)] score = %.2f"%(100*score)) # après compilation 95.19% + # Classe d'image 4 et sa classe prédite + y_pred = modele.predict(X=x_test) + print("VERDICT IMAGE N°4\nClasse réelle: %s\nClasse prédite: %s"%(y_test[3], y_pred[3])) + # Calcul de la précession à travers le package precision_score + score_package = precision_score(y_true=y_test, y_pred=y_pred, average="micro") + print("MLPClassifier[hidden_layer_sizes=(50)] score[using precision_score] = %.2f"%(100*score_package)) # après compilation % + +def layers_number_variation_and_scores(): + # Variation du nombre de couches et affichage de scores résultants + scores_dictionary = {} + layers = [] + for iteration in range(1,11): + layers.append(50) + hidden_layers = tuple(layers) + print(hidden_layers) + modele_iter = MLPClassifier(hidden_layer_sizes=hidden_layers) + modele_iter.fit(X=x_train, y=y_train) + scores_dictionary["%s couches"%(iteration)] = modele_iter.score(X=x_test, y=y_test) * 100 + # Génération sous forme diagramme à barre + print(scores_dictionary) + plt.figure(figsize=(12.8, 9.6)) + plt.plot(range(1,11), list(scores_dictionary.values())) + plt.xlabel("Nombre de couches de 50 neurones") + plt.xlim(left=1) + plt.ylabel("Score") + plt.title("Courbe d'évolution du score de précession en fonction du nombre de couches cachées") + plt.savefig("tp2Output/scores_with_hidden_layers_variation.png", dpi="figure") + plt.show(); plt.close() + +def five_models_with_different_layers(): + scores = {} + times = {} + layers = {} + for counter in range(5): + hl = tuple([randint(10, 300) for i in range(counter*2 + 2)]) + modele = MLPClassifier(hidden_layer_sizes=hl) + tic = time.process_time() + modele.fit(X=x_train, y=y_train) + toc = time.process_time() + times[counter] = toc - tic + scores[counter] = modele.score(X=x_test, y=y_test) * 100 + layers[counter] = hl + barWidth = 0.4 + y1 = [times[c] for c in range(5)] + y2 = [scores[c] for c in range(5)] + r1 = [0, 4, 8, 12, 16] + r2 = [x + barWidth for x in r1] + plt.figure(figsize=(12.8, 9.6)) + bar1 = plt.bar(r1, y1, width=barWidth, color=['red' for i in y1], linewidth=2, label="temps (sec)") + bar2 = plt.bar(r2, y2, width=barWidth, color=['green' for i in y2], linewidth=4, label="precision (%)") + plt.xticks([r + barWidth for r in r1], ['%s couches\n%s'%(i*2+2, layers[i]) for i in range(5)], rotation=10) + plt.xlabel("Modèles") + plt.ylabel("Valeur") + plt.title("Evaluation du temps d'apprentissage et la précession de cinq modèles de différentes couches") + for rect in bar1 + bar2: + height = rect.get_height() + plt.text(rect.get_x() + rect.get_width() / 2.0, height, '%.2f' % height, ha='center', va='bottom') + plt.legend() + plt.savefig("tp2Output/comparison_times_scores_of_methods_different_layers", dpi="figure") + plt.show() + +def optimization_algorithms_variation(): + solvers = ["lbfgs", "sgd", "adam"] + scores = {} + scores["lbfgs"] = {} + scores["sgd"] = {} + scores["adam"] = {} + for counter in range(1, 11): + hl = tuple([randint(10, 300) for i in range(counter)]) + for solver in solvers: + modele = MLPClassifier(hidden_layer_sizes=hl, solver=solver) + modele.fit(X=x_train, y=y_train) + scores[solver][counter] = modele.score(X=x_test, y=y_test) * 100 + print(scores) + plt.figure(figsize=(12.8, 9.6)) + plt.plot(range(1,11), list(scores["lbfgs"].values()), label="L-BFGS") + plt.plot(range(1,11), list(scores["sgd"].values()), label="SGD") + plt.plot(range(1,11), list(scores["adam"].values()), label="ADAM") + plt.xlabel("Nombre de couches cachées") + plt.xlim(left=1) + plt.ylabel("Score de précession") + plt.title("Courbe d'évolution du score de précession en fonction du nombre de couches cachées") + plt.legend() + plt.savefig("tp2Output/scores_with_hidden_layers_variation_different_solvers.png", dpi="figure") + plt.show(); plt.close() + +def activation_functions_variation(): + activations = ["identity", "logistic", "tanh", "relu"] + scores = {} + scores["identity"] = {} + scores["logistic"] = {} + scores["tanh"] = {} + scores["relu"] = {} + for counter in range(1, 11): + hl = tuple([randint(10, 300) for i in range(counter)]) + for activation in activations: + modele = MLPClassifier(hidden_layer_sizes=hl, activation=activation) + modele.fit(X=x_train, y=y_train) + scores[activation][counter] = modele.score(X=x_test, y=y_test) * 100 + plt.figure(figsize=(12.8, 9.6)) + plt.plot(range(1,11), list(scores["identity"].values()), label="identity") + plt.plot(range(1,11), list(scores["logistic"].values()), label="logistic") + plt.plot(range(1,11), list(scores["tanh"].values()), label="tanh") + plt.plot(range(1, 11), list(scores["relu"].values()), label="relu") + plt.xlabel("Nombre de couches cachées") + plt.xlim(left=1) + plt.ylabel("Score de précession") + plt.title("Courbe d'évolution du score de précession en fonction du nombre de couches cachées") + plt.legend() + plt.savefig("tp2Output/scores_with_hidden_layers_variation_different_activations_with_adam.png", dpi="figure") + plt.show(); plt.close() + +def alpha_parameter_variation(): + scores = [] + alphas = [10**(-i) for i in range(1,9)] + alphas = alphas[::-1] + for alpha in alphas: + # hl = tuple([randint(10, 300) for i in range(counter)]) + modele = MLPClassifier(hidden_layer_sizes=(60,47,179,251,296,61,191,232,171,114), alpha=alpha) + modele.fit(X=x_train, y=y_train) + scores.append(modele.score(X=x_test, y=y_test) * 100) + plt.figure(figsize=(12.8, 9.6)) + plt.plot(alphas, scores) + plt.xlabel("Valeurs d'alpha") + plt.xlim(left=10e-8) + plt.ylabel("Score de précession") + plt.title("Courbe d'évolution du score de précession en fonction du paramètre alpha") + plt.savefig("tp2Output/scores_with_alpha_variation_same_hidden_layers_sizes.png", dpi="figure") + plt.show(); plt.close() + +def times_scores_recalls_errors_comparison(): + scores = {} + times = {} + recalls = {} + errors = {} + layers = {} + for counter in range(5): + hl = tuple([randint(10, 300) for i in range(counter * 2 + 2)]) + modele = MLPClassifier(hidden_layer_sizes=hl) + tic = time.process_time() + modele.fit(X=x_train, y=y_train) + toc = time.process_time() + y_pred = modele.predict(X=x_test) + times[counter] = toc - tic + scores[counter] = modele.score(X=x_test, y=y_test) * 100 + recalls[counter] = recall_score(y_true=y_test, y_pred=y_pred, average="micro") + errors[counter] = zero_one_loss(y_true=y_test, y_pred=y_pred) * 100 + layers[counter] = hl + barWidth = 0.4 + y1 = [times[c] for c in range(5)] + y2 = [scores[c] for c in range(5)] + y3 = [recalls[c] for c in range(5)] + y4 = [errors[c] for c in range(5)] + r1 = [0, 4, 8, 12, 16] + r2 = [x + barWidth for x in r1] + r3 = [x + 2 * barWidth for x in r1] + r4 = [x + 3 * barWidth for x in r1] + plt.figure(figsize=(12.8, 9.6)) + bar1 = plt.bar(r1, y1, width=barWidth, color=['red' for i in y1], linewidth=2, label="temps (sec)") + bar2 = plt.bar(r2, y2, width=barWidth, color=['green' for i in y2], linewidth=4, label="precision (%)") + bar3 = plt.bar(r3, y3, width=barWidth, color=['red' for i in y3], linewidth=1, label="recall") + bar4 = plt.bar(r4, y4, width=barWidth, color=['magenta' for i in y4], linewidth=3, label="error (%)") + plt.xticks([r + 1.5*barWidth for r in r1], ['%s couches\n%s' % (i * 2 + 2, layers[i]) for i in range(5)], rotation=10) + plt.xlabel("Modèles") + plt.ylabel("Valeur") + plt.title("Evaluation du temps d'apprentissage, la précession, le recall et l'erreur de cinq modèles de différentes couches") + for rect in bar1 + bar2 + bar3 + bar4: + height = rect.get_height() + plt.text(rect.get_x() + rect.get_width() / 2.0, height, '%.2f' % height, ha='center', va='bottom') + plt.legend() + plt.savefig("tp2Output/comparison_times_scores_recalls_errors_of_methods_different_layers", dpi="figure") + plt.show(); plt.close() + + +modele_hidden_layer_sizes_50() +layers_number_variation_and_scores() +five_models_with_different_layers() +optimization_algorithms_variation() +activation_functions_variation() +alpha_parameter_variation() +times_scores_recalls_errors_comparison() diff --git a/TP3_prog1.py b/TP3_prog1.py new file mode 100644 index 0000000..935b5ab --- /dev/null +++ b/TP3_prog1.py @@ -0,0 +1,257 @@ +from sklearn import datasets +from sklearn.model_selection import train_test_split, KFold +from sklearn.neural_network import MLPClassifier +from sklearn.svm import SVC +from sklearn.neighbors import KNeighborsClassifier +from sklearn.metrics import precision_score, recall_score, plot_confusion_matrix, confusion_matrix, ConfusionMatrixDisplay +import numpy as np +import matplotlib.pyplot as plt +from random import seed, randint +from graphs import bars_plot2, curve_plot +from statistics import mean +import time + + +np.random.seed(0) # initialise le generateur aletoire avec seed=0 +seed(0) + +####################### +### TP3 PROGRAMME ### +####################### +# Chargement de la base de données MNIST +mnist = datasets.fetch_openml('mnist_784') + +# Division de la base de données +x_train = mnist.data[:3500,:] ## training +y_train = mnist.target[:3500] +x_test = mnist.data[3500:5001,:] ## test +y_test = mnist.target[3500:5001] + +# print(x_train.shape,x_test.shape) # + +# Mise en conformité des dimensions de xtrain et xtest aux dimensions attendues par le classifeur +x_train = x_train.reshape((-1, 784)) +x_test = x_test.reshape((-1, 784)) + +def kernel_variation(): + kernels = ["linear","poly","rbf","sigmoid"] + scores = {} + for kernel in kernels: + clsvm = SVC(kernel=kernel) + clsvm.fit(X=x_train, y=y_train) + scores[kernel] = clsvm.score(X=x_test, y=y_test) * 100 + print("Score avec kernel=%s: %.2f"%(kernel, scores[kernel])) + bars_plot2(bar_labels=kernels, bar_heights=list(scores.values()), xlabel="Kernels utilisés", + ylabel="Score", fig_title="Score par kernel utilisé", show=True, percent_mark=True, + output="tp3Output/scores_for_kernel_variation.png") + + +def kernel_and_c_variation(): + kernels = ["linear", "poly", "rbf", "sigmoid"] + scores = {} + c_values = [0.1 + 0.18 * x for x in range(6)] + for kernel in kernels: + scores[kernel] = {} + for c_value in c_values: + clsvm = SVC(kernel=kernel, C=c_value) + clsvm.fit(X=x_train, y=y_train) + scores[kernel][c_value] = clsvm.score(X=x_test, y=y_test) * 100 + print("Score avec kernel=%s: %.2f" % (kernel, clsvm.score(X=x_test, y=y_test) * 100)) + plt.figure(figsize=(12.8, 9.6)) + plt.plot(c_values, list(scores["linear"].values()), label="linear") + plt.plot(c_values, list(scores["poly"].values()), label="poly") + plt.plot(c_values, list(scores["rbf"].values()), label="rbf") + plt.plot(c_values, list(scores["sigmoid"].values()), label="sigmoid") + plt.xlabel("Valeurs de C") + plt.xlim(left=0.1) + plt.ylabel("Score (précision)") + plt.title("Courbe d'évolution de la précision en fonction de C") + plt.legend() + plt.savefig("tp3Output/scores_all_kernel_with_c_variation.png", dpi="figure") + plt.show() + plt.close() + +def train_test_error_with_c_variation(): + kernels = ["linear", "poly", "rbf", "sigmoid"] + scores = {} + c_values = [0.1 + 0.18 * x for x in range(6)] + data_types = ["train", "test"] + for data_type in data_types: + scores[data_type] = {} + for kernel in kernels: + scores[data_type][kernel] = {} + for c_value in c_values: + clsvm = SVC(kernel=kernel, C=c_value) + clsvm.fit(X=x_train, y=y_train) + if data_type=="train": + scores[data_type][kernel][c_value] = (1 - clsvm.score(X=x_train, y=y_train)) * 100 + else: + scores[data_type][kernel][c_value] = (1 - clsvm.score(X=x_test, y=y_test)) * 100 + print("Score d'erreur données=%s avec kernel=%s: %.2f" % (data_type, kernel, scores[data_type][kernel][c_value])) + plt.figure(figsize=(12.8, 9.6)) + plt.plot(c_values, list(scores["train"]["linear"].values()), label="train linear", marker="o") + plt.plot(c_values, list(scores["train"]["poly"].values()), label="train poly", marker="o") + plt.plot(c_values, list(scores["train"]["rbf"].values()), label="train rbf", marker="o") + plt.plot(c_values, list(scores["train"]["sigmoid"].values()), label="train sigmoid", marker="o") + plt.plot(c_values, list(scores["test"]["linear"].values()), label="test linear", marker="*") + plt.plot(c_values, list(scores["test"]["poly"].values()), label="test poly", marker="*") + plt.plot(c_values, list(scores["test"]["rbf"].values()), label="test rbf", marker="*") + plt.plot(c_values, list(scores["test"]["sigmoid"].values()), label="test sigmoid", marker="*") + plt.xlabel("Valeurs de C") + plt.xlim(left=0.1) + plt.ylabel("Error score") + plt.title("Courbe d'évolution du pourcentage d'erreur de précision en fonction de C") + plt.legend() + plt.savefig("tp3Output/error_scores_train_test_with_kernel_and_c_variation.png", dpi="figure") + plt.show() + plt.close() + +def generate_confusion_matrix(): + kernels = ["linear", "poly", "rbf", "sigmoid"] + for kernel in kernels: + clsvm = SVC(kernel=kernel) + clsvm.fit(X=x_train, y=y_train) + plot_confusion_matrix(estimator=clsvm, X=x_test, y_true=y_test, values_format=".1f") + plt.show() + +def time_precision_recall_error(): + kernels = ["linear", "poly", "rbf", "sigmoid"] + times = {} + scores = {} + recalls = {} + errors = {} + c_values = [0.1 + 0.18 * x for x in range(6)] + for kernel in kernels: + times[kernel] = {} + scores[kernel] = {} + recalls[kernel] = {} + errors[kernel] = {} + for c_value in c_values: + clsvm = SVC(kernel=kernel, C=c_value) + tic = time.process_time() + clsvm.fit(X=x_train, y=y_train) + toc = time.process_time() + y_pred = clsvm.predict(X=x_test) + times[kernel][c_value] = toc - tic + scores[kernel][c_value] = clsvm.score(X=x_test, y=y_test) * 100 + recalls[kernel][c_value] = recall_score(y_true=y_test, y_pred=y_pred, average="micro") + errors[kernel][c_value] = 100 - scores[kernel][c_value] + print("Score avec kernel=%s: %.2f" % (kernel, clsvm.score(X=x_test, y=y_test) * 100)) + plt.figure(figsize=(12.8, 9.6)) + plt.plot(c_values, list(times["linear"].values()), label="linear") + plt.plot(c_values, list(times["poly"].values()), label="poly") + plt.plot(c_values, list(times["rbf"].values()), label="rbf") + plt.plot(c_values, list(times["sigmoid"].values()), label="sigmoid") + plt.xlabel("Valeurs de C") + plt.xlim(left=0.1) + plt.ylabel("Temps d'apprentissage (secondes)") + plt.title("Courbe d'évolution du temps d'apprentissage en fonction de C") + plt.legend() + plt.savefig("tp3Output/times_all_kernel_with_c_variation.png", dpi="figure") + plt.show(); plt.close() + ### + plt.figure(figsize=(12.8, 9.6)) + plt.plot(c_values, list(scores["linear"].values()), label="linear precision") + plt.plot(c_values, list(scores["poly"].values()), label="poly precision") + plt.plot(c_values, list(scores["rbf"].values()), label="rbf precision") + plt.plot(c_values, list(scores["sigmoid"].values()), label="sigmoid precision") + plt.plot(c_values, list(errors["linear"].values()), label="linear error", marker="*", linestyle="-.") + plt.plot(c_values, list(errors["poly"].values()), label="poly error", marker="*", linestyle="-.") + plt.plot(c_values, list(errors["rbf"].values()), label="rbf error", marker="*", linestyle="-.") + plt.plot(c_values, list(errors["sigmoid"].values()), label="sigmoid error", marker="*", linestyle="-.") + plt.xlabel("Valeurs de C") + plt.xlim(left=0.1) + plt.ylabel("Score") + plt.title("Courbe d'évolution de la précision et de l'erreur de classification en fonction de C") + plt.legend() + plt.savefig("tp3Output/scores_and_errors_all_kernel_with_c_variation.png", dpi="figure") + plt.show(); plt.close() + ### + plt.figure(figsize=(12.8, 9.6)) + plt.plot(c_values, list(recalls["linear"].values()), label="linear") + plt.plot(c_values, list(recalls["poly"].values()), label="poly") + plt.plot(c_values, list(recalls["rbf"].values()), label="rbf") + plt.plot(c_values, list(recalls["sigmoid"].values()), label="sigmoid") + plt.xlabel("Valeurs de C") + plt.xlim(left=0.1) + plt.ylabel("Recall score") + plt.title("Courbe d'évolution du recall en fonction de C") + plt.legend() + plt.savefig("tp3Output/recalls_all_kernel_with_c_variation.png", dpi="figure") + plt.show(); plt.close() + +def methods_comparison(): + times = {} + scores = {} + recalls = {} + errors = {} + matrices = {} + ### Méthode 1 + knn = KNeighborsClassifier(n_neighbors=10) + tic = time.process_time() + knn.fit(X=x_train, y=y_train) + toc = time.process_time() + y_pred = knn.predict(X=x_test) + times["knn"] = toc - tic + scores["knn"] = knn.score(X=x_test, y=y_pred) * 100 + recalls["knn"] = recall_score(y_true=y_test, y_pred=y_pred, average="micro") + errors["knn"] = 100 - scores["knn"] + # matrices["knn"] = confusion_matrix(y_true=y_test,y_pred=y_pred) + plot_confusion_matrix(estimator=knn, X=x_test, y_true=y_test, values_format=".1f"); plt.show() + ### Méthode 2 + mlp = MLPClassifier(hidden_layer_sizes=(50)) + tic = time.process_time() + mlp.fit(X=x_train, y=y_train) + toc = time.process_time() + y_pred = knn.predict(X=x_test) + times["mlp"] = toc - tic + scores["mlp"] = mlp.score(X=x_test, y=y_pred) * 100 + recalls["mlp"] = recall_score(y_true=y_test, y_pred=y_pred, average="micro") + errors["mlp"] = 100 - scores["mlp"] + # matrices["mlp"] = confusion_matrix(y_true=y_test, y_pred=y_pred) + plot_confusion_matrix(estimator=mlp, X=x_test, y_true=y_test, values_format=".1f"); plt.show() + ### Méthode 3 + svc = SVC(kernel="rbf", C=1) + tic = time.process_time() + svc.fit(X=x_train, y=y_train) + toc = time.process_time() + y_pred = knn.predict(X=x_test) + times["svc"] = toc - tic + scores["svc"] = svc.score(X=x_test, y=y_pred) * 100 + recalls["svc"] = recall_score(y_true=y_test, y_pred=y_pred, average="micro") + errors["svc"] = 100 - scores["svc"] + # matrices["svc"] = confusion_matrix(y_true=y_test, y_pred=y_pred) + plot_confusion_matrix(estimator=svc, X=x_test, y_true=y_test, values_format=".1f"); plt.show() + ### Plotting + barWidth = 0.5 + y1 = [times["knn"], times["mlp"], times["svc"]] + y2 = [scores["knn"], scores["mlp"], scores["svc"]] + y3 = [recalls["knn"], recalls["mlp"], recalls["svc"]] + y4 = [errors["knn"], errors["mlp"], errors["svc"]] + r1 = [0, 3, 6] + r2 = [x + barWidth for x in r1] + r3 = [x + 2 * barWidth for x in r1] + r4 = [x + 3 * barWidth for x in r1] + plt.figure(figsize=(12.8, 9.6)) + bar1 = plt.bar(r1, y1, width=barWidth, color=['green' for i in y1], linewidth=2, label="temps (sec)") + bar2 = plt.bar(r2, y2, width=barWidth, color=['yellow' for i in y2], linewidth=4, label="precision (%)") + bar3 = plt.bar(r3, y3, width=barWidth, color=['red' for i in y3], linewidth=1, label="recall") + bar4 = plt.bar(r4, y4, width=barWidth, color=['magenta' for i in y4], linewidth=3, label="error (%)") + plt.xticks([r + 1.5 * barWidth for r in r1], ['Modèle K-nn', 'Modèle MLP', 'Modèle SVM']) + plt.xlabel("Méthodes") + plt.ylabel("Valeur") + plt.title("Evaluation de différentes métriques de performance pour chacune des méthodes vues") + for rect in bar1 + bar2 + bar3 + bar4: + height = rect.get_height() + plt.text(rect.get_x() + rect.get_width() / 2.0, height, '%.2f' % height, ha='center', va='bottom') + plt.legend() + plt.show() + plt.savefig("tp3Output/global_comparison_of_methods_different_metrics", dpi="figure") + + +kernel_variation() +kernel_and_c_variation() +train_test_error_with_c_variation() +generate_confusion_matrix() +time_precision_recall_error() +methods_comparison() diff --git a/graphs.py b/graphs.py new file mode 100644 index 0000000..db4e44d --- /dev/null +++ b/graphs.py @@ -0,0 +1,54 @@ +def bars_plot(bar_labels=None, bar_heights=None, bar_colors="blue", bar_width=0.25, xlabel="xlabel", ylabel="ylabel", fig_title="title", figsize=(12.8, 9.6), output="./bars.png", show=False): + import matplotlib.pyplot as plt + plt.figure(figsize=figsize) + plt.bar(x=list(map(str,bar_labels)), height=bar_heights, width=bar_width, color=bar_colors) + plt.xlabel(xlabel) + plt.ylabel(ylabel) + plt.title(fig_title) + plt.savefig(output, dpi="figure") + if show: + plt.show() + plt.close() + +def clustering_plot(abcissas=None, ordinates=None, predictions=None, marker="o", xlabel="xlabel", ylabel="ylabel", fig_title="title", figsize=(12.8, 9.6), output="./bars.png", show=False): + import matplotlib.pyplot as plt + plt.figure(figsize=figsize) + plt.scatter(x=abcissas, y=ordinates, c=predictions, marker=marker) + plt.xlabel(xlabel) + plt.ylabel(ylabel) + plt.title(fig_title) + plt.savefig(output, dpi="figure") + if show: + plt.show() + plt.close() + +def bars_plot2(bar_labels=None, bar_heights=None, bar_colors="blue", bar_width=0.25, xlabel="xlabel", ylabel="ylabel", fig_title="title", figsize=(12.8, 9.6), output="./bars.png", show=False, show_heights=True, percent_mark=False): + import matplotlib.pyplot as plt + plt.figure(figsize=figsize) + bar = plt.bar(x=list(map(str,bar_labels)), height=bar_heights, width=bar_width, color=bar_colors) + plt.xlabel(xlabel) + plt.ylabel(ylabel) + plt.title(fig_title) + if show_heights: + for rect in bar: + height = rect.get_height() + if percent_mark: + plt.text(rect.get_x() + rect.get_width() / 2.0, height, '%.2f%%' % height, ha='center', va='bottom') + else: + plt.text(rect.get_x() + rect.get_width() / 2.0, height, '%.2f' % height, ha='center', va='bottom') + plt.savefig(output, dpi="figure") + if show: + plt.show() + plt.close() + +def curve_plot(abcissas=None, ordinates=None, xlabel="xlabel", ylabel="ylabel", fig_title="title", figsize=(12.8, 9.6), output="./bars.png", show=False): + import matplotlib.pyplot as plt + plt.figure(figsize=figsize) + plt.plot(abcissas,ordinates) + plt.xlabel(xlabel) + plt.ylabel(ylabel) + plt.title(fig_title) + plt.savefig(output, dpi="figure") + if show: + plt.show() + plt.close() diff --git a/tp1Output/mnist_k_fold_scores.png b/tp1Output/mnist_k_fold_scores.png new file mode 100644 index 0000000..29b4c42 Binary files /dev/null and b/tp1Output/mnist_k_fold_scores.png differ diff --git a/tp1Output/mnist_split_data_percentage_scores.png b/tp1Output/mnist_split_data_percentage_scores.png new file mode 100644 index 0000000..fcace7a Binary files /dev/null and b/tp1Output/mnist_split_data_percentage_scores.png differ diff --git a/tp1Output/mnist_split_data_percentage_scores1.png b/tp1Output/mnist_split_data_percentage_scores1.png new file mode 100644 index 0000000..967738f Binary files /dev/null and b/tp1Output/mnist_split_data_percentage_scores1.png differ diff --git a/tp1Output/mnist_split_various_train_percentage_scores_with_fixed_test.png b/tp1Output/mnist_split_various_train_percentage_scores_with_fixed_test.png new file mode 100644 index 0000000..4869458 Binary files /dev/null and b/tp1Output/mnist_split_various_train_percentage_scores_with_fixed_test.png differ diff --git a/tp1Output/mnist_various_distance_metric_njobs_at_1_timesX.png b/tp1Output/mnist_various_distance_metric_njobs_at_1_timesX.png new file mode 100644 index 0000000..20eac53 Binary files /dev/null and b/tp1Output/mnist_various_distance_metric_njobs_at_1_timesX.png differ diff --git a/tp1Output/mnist_various_distance_metric_njobs_at_less_1_timesX.png b/tp1Output/mnist_various_distance_metric_njobs_at_less_1_timesX.png new file mode 100644 index 0000000..3f666e9 Binary files /dev/null and b/tp1Output/mnist_various_distance_metric_njobs_at_less_1_timesX.png differ diff --git a/tp1Output/mnist_various_distance_metric_scores.png b/tp1Output/mnist_various_distance_metric_scores.png new file mode 100644 index 0000000..55a3c5c Binary files /dev/null and b/tp1Output/mnist_various_distance_metric_scores.png differ diff --git a/tp2Output/comparison_times_scores_of_methods_different_layers.png b/tp2Output/comparison_times_scores_of_methods_different_layers.png new file mode 100644 index 0000000..930ab5f Binary files /dev/null and b/tp2Output/comparison_times_scores_of_methods_different_layers.png differ diff --git a/tp2Output/comparison_times_scores_recalls_errors_of_methods_different_layers.png b/tp2Output/comparison_times_scores_recalls_errors_of_methods_different_layers.png new file mode 100644 index 0000000..f7d419b Binary files /dev/null and b/tp2Output/comparison_times_scores_recalls_errors_of_methods_different_layers.png differ diff --git a/tp2Output/scores_with_alpha_variation_same_hidden_layers_sizes.png b/tp2Output/scores_with_alpha_variation_same_hidden_layers_sizes.png new file mode 100644 index 0000000..b0ed9c3 Binary files /dev/null and b/tp2Output/scores_with_alpha_variation_same_hidden_layers_sizes.png differ diff --git a/tp2Output/scores_with_hidden_layers_variation.png b/tp2Output/scores_with_hidden_layers_variation.png new file mode 100644 index 0000000..a4df944 Binary files /dev/null and b/tp2Output/scores_with_hidden_layers_variation.png differ diff --git a/tp2Output/scores_with_hidden_layers_variation_different_activations_with_adam.png b/tp2Output/scores_with_hidden_layers_variation_different_activations_with_adam.png new file mode 100644 index 0000000..fa17636 Binary files /dev/null and b/tp2Output/scores_with_hidden_layers_variation_different_activations_with_adam.png differ diff --git a/tp2Output/scores_with_hidden_layers_variation_different_solvers.png b/tp2Output/scores_with_hidden_layers_variation_different_solvers.png new file mode 100644 index 0000000..7822c9a Binary files /dev/null and b/tp2Output/scores_with_hidden_layers_variation_different_solvers.png differ diff --git a/tp3Output/comparison_knn_confusion_matrix.png b/tp3Output/comparison_knn_confusion_matrix.png new file mode 100644 index 0000000..b7904b1 Binary files /dev/null and b/tp3Output/comparison_knn_confusion_matrix.png differ diff --git a/tp3Output/comparison_mlp_confusion_matrix.png b/tp3Output/comparison_mlp_confusion_matrix.png new file mode 100644 index 0000000..ef9909f Binary files /dev/null and b/tp3Output/comparison_mlp_confusion_matrix.png differ diff --git a/tp3Output/comparison_svc_confusion_matrix.png b/tp3Output/comparison_svc_confusion_matrix.png new file mode 100644 index 0000000..3917fd7 Binary files /dev/null and b/tp3Output/comparison_svc_confusion_matrix.png differ diff --git a/tp3Output/confusion_matrix_with_linear_as_kernel.png b/tp3Output/confusion_matrix_with_linear_as_kernel.png new file mode 100644 index 0000000..eb303e7 Binary files /dev/null and b/tp3Output/confusion_matrix_with_linear_as_kernel.png differ diff --git a/tp3Output/confusion_matrix_with_poly_as_kernel.png b/tp3Output/confusion_matrix_with_poly_as_kernel.png new file mode 100644 index 0000000..2ed3b6d Binary files /dev/null and b/tp3Output/confusion_matrix_with_poly_as_kernel.png differ diff --git a/tp3Output/confusion_matrix_with_rbf_as_kernel.png b/tp3Output/confusion_matrix_with_rbf_as_kernel.png new file mode 100644 index 0000000..667cfb7 Binary files /dev/null and b/tp3Output/confusion_matrix_with_rbf_as_kernel.png differ diff --git a/tp3Output/confusion_matrix_with_sigmoid_as_kernel.png b/tp3Output/confusion_matrix_with_sigmoid_as_kernel.png new file mode 100644 index 0000000..f6158ca Binary files /dev/null and b/tp3Output/confusion_matrix_with_sigmoid_as_kernel.png differ diff --git a/tp3Output/error_scores_train_test_with_kernel_and_c_variation.png b/tp3Output/error_scores_train_test_with_kernel_and_c_variation.png new file mode 100644 index 0000000..601a5ca Binary files /dev/null and b/tp3Output/error_scores_train_test_with_kernel_and_c_variation.png differ diff --git a/tp3Output/global_comparison_of_methods_different_metrics.png b/tp3Output/global_comparison_of_methods_different_metrics.png new file mode 100644 index 0000000..9e25f68 Binary files /dev/null and b/tp3Output/global_comparison_of_methods_different_metrics.png differ diff --git a/tp3Output/recalls_all_kernel_with_c_variation.png b/tp3Output/recalls_all_kernel_with_c_variation.png new file mode 100644 index 0000000..eacfbfb Binary files /dev/null and b/tp3Output/recalls_all_kernel_with_c_variation.png differ diff --git a/tp3Output/scores_all_kernel_with_c_variation.png b/tp3Output/scores_all_kernel_with_c_variation.png new file mode 100644 index 0000000..cb68536 Binary files /dev/null and b/tp3Output/scores_all_kernel_with_c_variation.png differ diff --git a/tp3Output/scores_and_errors_all_kernel_with_c_variation.png b/tp3Output/scores_and_errors_all_kernel_with_c_variation.png new file mode 100644 index 0000000..6bd8e6b Binary files /dev/null and b/tp3Output/scores_and_errors_all_kernel_with_c_variation.png differ diff --git a/tp3Output/scores_for_kernel_variation.png b/tp3Output/scores_for_kernel_variation.png new file mode 100644 index 0000000..f8a35dc Binary files /dev/null and b/tp3Output/scores_for_kernel_variation.png differ diff --git a/tp3Output/times_all_kernel_with_c_variation.png b/tp3Output/times_all_kernel_with_c_variation.png new file mode 100644 index 0000000..fde2e44 Binary files /dev/null and b/tp3Output/times_all_kernel_with_c_variation.png differ