Browse Source

Programmes python apprentissage supervise.

Abdel-Kader Chabi-Sika-Boni 3 years ago
parent
commit
1c40b662ce
32 changed files with 713 additions and 0 deletions
  1. 34
    0
      TP1_prog1.py
  2. 151
    0
      TP1_prog2.py
  3. 217
    0
      TP2_prog1.py
  4. 257
    0
      TP3_prog1.py
  5. 54
    0
      graphs.py
  6. BIN
      tp1Output/mnist_k_fold_scores.png
  7. BIN
      tp1Output/mnist_split_data_percentage_scores.png
  8. BIN
      tp1Output/mnist_split_data_percentage_scores1.png
  9. BIN
      tp1Output/mnist_split_various_train_percentage_scores_with_fixed_test.png
  10. BIN
      tp1Output/mnist_various_distance_metric_njobs_at_1_timesX.png
  11. BIN
      tp1Output/mnist_various_distance_metric_njobs_at_less_1_timesX.png
  12. BIN
      tp1Output/mnist_various_distance_metric_scores.png
  13. BIN
      tp2Output/comparison_times_scores_of_methods_different_layers.png
  14. BIN
      tp2Output/comparison_times_scores_recalls_errors_of_methods_different_layers.png
  15. BIN
      tp2Output/scores_with_alpha_variation_same_hidden_layers_sizes.png
  16. BIN
      tp2Output/scores_with_hidden_layers_variation.png
  17. BIN
      tp2Output/scores_with_hidden_layers_variation_different_activations_with_adam.png
  18. BIN
      tp2Output/scores_with_hidden_layers_variation_different_solvers.png
  19. BIN
      tp3Output/comparison_knn_confusion_matrix.png
  20. BIN
      tp3Output/comparison_mlp_confusion_matrix.png
  21. BIN
      tp3Output/comparison_svc_confusion_matrix.png
  22. BIN
      tp3Output/confusion_matrix_with_linear_as_kernel.png
  23. BIN
      tp3Output/confusion_matrix_with_poly_as_kernel.png
  24. BIN
      tp3Output/confusion_matrix_with_rbf_as_kernel.png
  25. BIN
      tp3Output/confusion_matrix_with_sigmoid_as_kernel.png
  26. BIN
      tp3Output/error_scores_train_test_with_kernel_and_c_variation.png
  27. BIN
      tp3Output/global_comparison_of_methods_different_metrics.png
  28. BIN
      tp3Output/recalls_all_kernel_with_c_variation.png
  29. BIN
      tp3Output/scores_all_kernel_with_c_variation.png
  30. BIN
      tp3Output/scores_and_errors_all_kernel_with_c_variation.png
  31. BIN
      tp3Output/scores_for_kernel_variation.png
  32. BIN
      tp3Output/times_all_kernel_with_c_variation.png

+ 34
- 0
TP1_prog1.py View File

@@ -0,0 +1,34 @@
1
+from sklearn import datasets
2
+import matplotlib.pyplot as plt
3
+
4
+
5
+###################
6
+###  EXERCICE 1 ###
7
+###################
8
+# Chargement de la base de données MNIST
9
+mnist = datasets.fetch_openml('mnist_784')
10
+
11
+# Affichage de quelques commandes
12
+affichage = True
13
+if affichage:
14
+    print(mnist)
15
+    print (mnist.data)
16
+    print (mnist.target)
17
+    print (len(mnist.data))
18
+    help(len)
19
+    print (mnist.data.shape)
20
+    print (mnist.target.shape)
21
+    print (mnist.data[0])
22
+    print (mnist.data[0][1])
23
+    print (mnist.data[:,1])
24
+    print (len(mnist.data[:100]))
25
+
26
+# Extraction et affichage de la première image de la base de données
27
+images = mnist.data.reshape((-1, 28, 28))
28
+for i in range(5):
29
+    plt.imshow(images[i],cmap=plt.gray(),interpolation="nearest")
30
+    plt.show()
31
+
32
+# Affichage de la classe de la première image de la base de données
33
+targets = mnist.target
34
+print("Classe de la première image: %s"%(targets[0]))

+ 151
- 0
TP1_prog2.py View File

@@ -0,0 +1,151 @@
1
+from sklearn import datasets
2
+from sklearn.model_selection import train_test_split, KFold
3
+from sklearn.neighbors import KNeighborsClassifier
4
+import matplotlib.pyplot as plt
5
+import numpy as np
6
+from random import seed
7
+from graphs import bars_plot2, bars_plot
8
+import time
9
+
10
+np.random.seed(0) # initialise le generateur aletoire avec seed=0
11
+seed(0)
12
+
13
+###################
14
+###  EXERCICE 2 ###
15
+###################
16
+# Chargement de la base de données MNIST
17
+mnist = datasets.fetch_openml('mnist_784')
18
+
19
+# Extraction des images de la base de données
20
+images = mnist.data.reshape((-1, 28, 28))
21
+
22
+# Extraction des classes des images de la base de données
23
+targets = mnist.target
24
+
25
+# Génération d'échantillon de données avec 5000 exemples
26
+indexes = np.random.randint(70000, size=5000)
27
+data = images[indexes,:]
28
+data_targets = targets[indexes]
29
+
30
+# Division de la base de données en lots d'entraînement et de test
31
+def k_10_classifier_and_score():
32
+    pourcentage_donnees = 0.8
33
+    xtrain, xtest, ytrain, ytest = train_test_split(data, data_targets, train_size=pourcentage_donnees)
34
+    # Mise en conformité des dimensions de xtrain et xtest aux dimensions attendues par le classifeur
35
+    xtrain = xtrain.reshape((-1, 784))
36
+    xtest = xtest.reshape((-1, 784))
37
+    # Instanciation et entraînement du classifieur
38
+    k = 10
39
+    clf = KNeighborsClassifier(n_neighbors=k)
40
+    clf.fit(X=xtrain, y=ytrain)
41
+    predictions = clf.predict(X=xtest)
42
+    # Classe d'image 4 et sa classe prédite
43
+    print("VERDICT IMAGE N°4\nClasse réelle: %s\nClasse prédite: %s"%(ytest[3], predictions[3]))
44
+    # Affichage du score sur l'échantillon de test
45
+    score_xtest = clf.score(X=xtest, y=ytest)
46
+    print("Score xtest = %.2f%%"%(score_xtest*100))
47
+    # Taux d'erreur sur les données d'apprentissage
48
+    score_xtrain = clf.score(X=xtrain, y=ytrain)
49
+    print("Score d'erreur xtrain = %.2f%%"%(100*(1-score_xtrain)))
50
+
51
+def k_variation_and_score():
52
+    # Variation du nombre k de voisins et score résultant
53
+    n_folds = 10
54
+    kf = KFold(n_splits=n_folds, shuffle=True)
55
+    k_score = {}
56
+    for k in range(2,16):
57
+        sum_score = 0
58
+        for train_index, test_index in kf.split(X=data):
59
+            x_train, x_test = data[train_index], data[test_index]
60
+            y_train, y_test = data_targets[train_index], data_targets[test_index]
61
+            x_train = x_train.reshape((-1, 784))
62
+            x_test = x_test.reshape((-1, 784))
63
+            clf = KNeighborsClassifier(n_neighbors=k)
64
+            clf.fit(X=x_train, y=y_train)
65
+            sum_score += clf.score(X=x_test, y=y_test)
66
+        k_score[k] = (sum_score/n_folds)*100
67
+    bars_plot2(bar_labels=list(k_score.keys()), bar_heights=list(k_score.values()), xlabel="Valeur de k",
68
+               ylabel="Score", fig_title="Score par valeur de k (nombre de voisins)", show=True, percent_mark=True,
69
+               output="tp1Output/mnist_k_variation_scores.png")
70
+
71
+def train_test_percent_variation_and_score():
72
+    # Variation du pourcentage des échantillons (training et test) et affichage des scores résultants
73
+    k_score_percent = {}
74
+    for pourcentage in [0.05*i for i in range(1,20)]:
75
+        xtrain_percent, xtest_percent, ytrain_percent, ytest_percent = train_test_split(data, data_targets, train_size=pourcentage)
76
+        xtrain_percent = xtrain_percent.reshape((-1, 784))
77
+        xtest_percent = xtest_percent.reshape((-1, 784))
78
+        clf = KNeighborsClassifier(n_neighbors=10)
79
+        clf.fit(X=xtrain_percent, y=ytrain_percent)
80
+        k_score_percent[pourcentage] = clf.score(X=xtest_percent, y=ytest_percent)*100
81
+    bars_plot2(bar_labels=["%.2f" % (percent) for percent in list(k_score_percent.keys())],
82
+               bar_heights=list(k_score_percent.values()), xlabel="Pourcentage d'échantillons train",
83
+               ylabel="Score",
84
+               fig_title="Score par pourcentage de découpage de DATA (5000 images) en échantillons train et test",
85
+               show=True, percent_mark=True, bar_colors="red",
86
+               output="tp1Output/mnist_split_data_percentage_scores.png")
87
+
88
+def train_percent_variation_and_score():
89
+    # Fixation de la taille de l'échantillon test (25% de DATA) et variation de la taille de l'échantillon train avec affichage de score
90
+    k_score_train_variation = {}
91
+    xtrain_global, xtest_25_percent, ytrain_global, ytest_25_percent = train_test_split(data, data_targets, train_size=0.75)
92
+    xtest_25_percent = xtest_25_percent.reshape((-1, 784))
93
+    for pourcentage in [(10*j)/100.0 for j in range(1,10)]:
94
+        xtrain_echantillon = xtrain_global[:int(pourcentage*len(xtrain_global))+1]
95
+        xtrain_echantillon = xtrain_echantillon.reshape((-1, 784))
96
+        ytrain_echantillon = ytrain_global[:int(pourcentage*len(xtrain_global))+1]
97
+        clf = KNeighborsClassifier(n_neighbors=10)
98
+        clf.fit(X=xtrain_echantillon, y=ytrain_echantillon)
99
+        k_score_train_variation[pourcentage] = clf.score(X=xtest_25_percent, y=ytest_25_percent)*100
100
+    bars_plot2(bar_labels=["%.2f" % (percent) for percent in list(k_score_train_variation.keys())],
101
+               bar_heights=list(k_score_train_variation.values()),
102
+               xlabel="Pourcentage d'échantillons train", ylabel="Score",
103
+               fig_title="Score par pourcentage de découpage de train (%s images initialement) et %s images comme test (25%% de DATA)" % (
104
+               len(xtrain_global), len(xtest_25_percent)),
105
+               show=True, percent_mark=True, bar_colors="magenta",
106
+               output="tp1Output/mnist_split_various_train_percentage_scores_with_fixed_test.png")
107
+
108
+def distance_type_variation_and_score():
109
+    # Variation du type de distance avec train=75% de DATA et test=25% de DATA puis affichage de score
110
+    xtrain_distance, xtest_distance, ytrain_distance, ytest_distance = train_test_split(data, data_targets, train_size=0.75)
111
+    xtrain_distance = xtrain_distance.reshape((-1, 784))
112
+    xtest_distance = xtest_distance.reshape((-1, 784))
113
+    distances = ["minkowski", "euclidean", "hamming", "canberra", "braycurtis"]
114
+    distance_score ={}
115
+    for distance in distances:
116
+        clf = KNeighborsClassifier(n_neighbors=10, metric=distance)
117
+        clf.fit(X=xtrain_distance, y=ytrain_distance)
118
+        distance_score[distance] = clf.score(X=xtest_distance, y=ytest_distance)*100
119
+    bars_plot2(bar_labels=list(distance_score.keys()), bar_heights=list(distance_score.values()),
120
+               xlabel="Types de distance", ylabel="Score",
121
+               fig_title="Score pour différent type de distance (métrique) avec train=75% de DATA et test=25% de DATA",
122
+               show=True, percent_mark=True, bar_colors="green",
123
+               output="tp1Output/mnist_various_distance_metric_scores.png")
124
+
125
+
126
+def parameter_njobs_variation_and_score():
127
+    xtrain_distance, xtest_distance, ytrain_distance, ytest_distance = train_test_split(data, data_targets, train_size=0.75)
128
+    xtrain_distance = xtrain_distance.reshape((-1, 784))
129
+    xtest_distance = xtest_distance.reshape((-1, 784))
130
+    distances = ["minkowski", "euclidean", "hamming", "canberra", "braycurtis"]
131
+    for n_jobs in [6, -1]:
132
+        distance_based_training_time = {}
133
+        for distance in distances:
134
+            clf = KNeighborsClassifier(n_neighbors=10, metric=distance, n_jobs=n_jobs)
135
+            tic = time.process_time()
136
+            clf.fit(X=xtrain_distance, y=ytrain_distance)
137
+            toc = time.process_time()
138
+            distance_based_training_time[distance] = (toc-tic)
139
+        bars_plot2(bar_labels=list(distance_based_training_time.keys()),
140
+                   bar_heights=list(distance_based_training_time.values()),
141
+                   xlabel="Types de distance", ylabel="Temps d'entraînement (seconde)", show=True, bar_colors="pink",
142
+                   fig_title="Temps d'entraînement pour différent type de distance avec train=75%% de DATA et test=25%% de DATA et\nN_JOBS=%s" % (
143
+                       n_jobs))
144
+
145
+
146
+k_10_classifier_and_score()
147
+k_variation_and_score()
148
+train_test_percent_variation_and_score()
149
+train_percent_variation_and_score()
150
+distance_type_variation_and_score()
151
+parameter_njobs_variation_and_score()

+ 217
- 0
TP2_prog1.py View File

@@ -0,0 +1,217 @@
1
+from sklearn import datasets
2
+from sklearn.model_selection import train_test_split, KFold
3
+from sklearn.neural_network import MLPClassifier
4
+from sklearn.metrics import precision_score, zero_one_loss, recall_score
5
+import numpy as np
6
+import matplotlib.pyplot as plt
7
+from random import seed, randint
8
+from graphs import bars_plot2, bars_plot
9
+import time
10
+
11
+
12
+np.random.seed(0) # initialise le generateur aletoire avec seed=0
13
+seed(0)
14
+
15
+#######################
16
+###  TP2 PROGRAMME  ###
17
+#######################
18
+# Chargement de la base de données MNIST
19
+mnist = datasets.fetch_openml('mnist_784')
20
+
21
+# Division de la base de données
22
+x_train = mnist.data[:4900,:] ## training
23
+y_train = mnist.target[:4900]
24
+x_test = mnist.data[4900:7000,:]  ## test
25
+y_test = mnist.target[4900:7000]
26
+
27
+# Mise en conformité des dimensions de xtrain et xtest aux dimensions attendues par le classifeur
28
+x_train = x_train.reshape((-1, 784))
29
+x_test = x_test.reshape((-1, 784))
30
+
31
+def modele_hidden_layer_sizes_50():
32
+    # Création du modèle, entraînement et génération de précession
33
+    modele = MLPClassifier(hidden_layer_sizes=(50))
34
+    modele.fit(X=x_train, y=y_train)
35
+    score = modele.score(X=x_test, y=y_test)
36
+    print("MLPClassifier[hidden_layer_sizes=(50)] score = %.2f"%(100*score)) # après compilation 95.19%
37
+    # Classe d'image 4 et sa classe prédite
38
+    y_pred = modele.predict(X=x_test)
39
+    print("VERDICT IMAGE N°4\nClasse réelle: %s\nClasse prédite: %s"%(y_test[3], y_pred[3]))
40
+    # Calcul de la précession à travers le package precision_score
41
+    score_package = precision_score(y_true=y_test, y_pred=y_pred, average="micro")
42
+    print("MLPClassifier[hidden_layer_sizes=(50)] score[using precision_score] = %.2f"%(100*score_package)) # après compilation %
43
+
44
+def layers_number_variation_and_scores():
45
+    # Variation du nombre de couches et affichage de scores résultants
46
+    scores_dictionary = {}
47
+    layers = []
48
+    for iteration in range(1,11):
49
+        layers.append(50)
50
+        hidden_layers = tuple(layers)
51
+        print(hidden_layers)
52
+        modele_iter = MLPClassifier(hidden_layer_sizes=hidden_layers)
53
+        modele_iter.fit(X=x_train, y=y_train)
54
+        scores_dictionary["%s couches"%(iteration)] = modele_iter.score(X=x_test, y=y_test) * 100
55
+    # Génération sous forme diagramme à barre
56
+    print(scores_dictionary)
57
+    plt.figure(figsize=(12.8, 9.6))
58
+    plt.plot(range(1,11), list(scores_dictionary.values()))
59
+    plt.xlabel("Nombre de couches de 50 neurones")
60
+    plt.xlim(left=1)
61
+    plt.ylabel("Score")
62
+    plt.title("Courbe d'évolution du score de précession en fonction du nombre de couches cachées")
63
+    plt.savefig("tp2Output/scores_with_hidden_layers_variation.png", dpi="figure")
64
+    plt.show(); plt.close()
65
+
66
+def five_models_with_different_layers():
67
+    scores = {}
68
+    times = {}
69
+    layers = {}
70
+    for counter in range(5):
71
+        hl = tuple([randint(10, 300) for i in range(counter*2 + 2)])
72
+        modele = MLPClassifier(hidden_layer_sizes=hl)
73
+        tic = time.process_time()
74
+        modele.fit(X=x_train, y=y_train)
75
+        toc = time.process_time()
76
+        times[counter] = toc - tic
77
+        scores[counter] = modele.score(X=x_test, y=y_test) * 100
78
+        layers[counter] = hl
79
+    barWidth = 0.4
80
+    y1 = [times[c] for c in range(5)]
81
+    y2 = [scores[c] for c in range(5)]
82
+    r1 = [0, 4, 8, 12, 16]
83
+    r2 = [x + barWidth for x in r1]
84
+    plt.figure(figsize=(12.8, 9.6))
85
+    bar1 = plt.bar(r1, y1, width=barWidth, color=['red' for i in y1], linewidth=2, label="temps (sec)")
86
+    bar2 = plt.bar(r2, y2, width=barWidth, color=['green' for i in y2], linewidth=4, label="precision (%)")
87
+    plt.xticks([r + barWidth for r in r1], ['%s couches\n%s'%(i*2+2, layers[i]) for i in range(5)], rotation=10)
88
+    plt.xlabel("Modèles")
89
+    plt.ylabel("Valeur")
90
+    plt.title("Evaluation du temps d'apprentissage et la précession de cinq modèles de différentes couches")
91
+    for rect in bar1 + bar2:
92
+        height = rect.get_height()
93
+        plt.text(rect.get_x() + rect.get_width() / 2.0, height, '%.2f' % height, ha='center', va='bottom')
94
+    plt.legend()
95
+    plt.savefig("tp2Output/comparison_times_scores_of_methods_different_layers", dpi="figure")
96
+    plt.show()
97
+
98
+def optimization_algorithms_variation():
99
+    solvers = ["lbfgs", "sgd", "adam"]
100
+    scores = {}
101
+    scores["lbfgs"] = {}
102
+    scores["sgd"] = {}
103
+    scores["adam"] = {}
104
+    for counter in range(1, 11):
105
+        hl = tuple([randint(10, 300) for i in range(counter)])
106
+        for solver in solvers:
107
+            modele = MLPClassifier(hidden_layer_sizes=hl, solver=solver)
108
+            modele.fit(X=x_train, y=y_train)
109
+            scores[solver][counter] = modele.score(X=x_test, y=y_test) * 100
110
+    print(scores)
111
+    plt.figure(figsize=(12.8, 9.6))
112
+    plt.plot(range(1,11), list(scores["lbfgs"].values()), label="L-BFGS")
113
+    plt.plot(range(1,11), list(scores["sgd"].values()), label="SGD")
114
+    plt.plot(range(1,11), list(scores["adam"].values()), label="ADAM")
115
+    plt.xlabel("Nombre de couches cachées")
116
+    plt.xlim(left=1)
117
+    plt.ylabel("Score de précession")
118
+    plt.title("Courbe d'évolution du score de précession en fonction du nombre de couches cachées")
119
+    plt.legend()
120
+    plt.savefig("tp2Output/scores_with_hidden_layers_variation_different_solvers.png", dpi="figure")
121
+    plt.show(); plt.close()
122
+
123
+def activation_functions_variation():
124
+    activations = ["identity", "logistic", "tanh", "relu"]
125
+    scores = {}
126
+    scores["identity"] = {}
127
+    scores["logistic"] = {}
128
+    scores["tanh"] = {}
129
+    scores["relu"] = {}
130
+    for counter in range(1, 11):
131
+        hl = tuple([randint(10, 300) for i in range(counter)])
132
+        for activation in activations:
133
+            modele = MLPClassifier(hidden_layer_sizes=hl, activation=activation)
134
+            modele.fit(X=x_train, y=y_train)
135
+            scores[activation][counter] = modele.score(X=x_test, y=y_test) * 100
136
+    plt.figure(figsize=(12.8, 9.6))
137
+    plt.plot(range(1,11), list(scores["identity"].values()), label="identity")
138
+    plt.plot(range(1,11), list(scores["logistic"].values()), label="logistic")
139
+    plt.plot(range(1,11), list(scores["tanh"].values()), label="tanh")
140
+    plt.plot(range(1, 11), list(scores["relu"].values()), label="relu")
141
+    plt.xlabel("Nombre de couches cachées")
142
+    plt.xlim(left=1)
143
+    plt.ylabel("Score de précession")
144
+    plt.title("Courbe d'évolution du score de précession en fonction du nombre de couches cachées")
145
+    plt.legend()
146
+    plt.savefig("tp2Output/scores_with_hidden_layers_variation_different_activations_with_adam.png", dpi="figure")
147
+    plt.show(); plt.close()
148
+
149
+def alpha_parameter_variation():
150
+    scores = []
151
+    alphas = [10**(-i) for i in range(1,9)]
152
+    alphas = alphas[::-1]
153
+    for alpha in alphas:
154
+        # hl = tuple([randint(10, 300) for i in range(counter)])
155
+        modele = MLPClassifier(hidden_layer_sizes=(60,47,179,251,296,61,191,232,171,114), alpha=alpha)
156
+        modele.fit(X=x_train, y=y_train)
157
+        scores.append(modele.score(X=x_test, y=y_test) * 100)
158
+    plt.figure(figsize=(12.8, 9.6))
159
+    plt.plot(alphas, scores)
160
+    plt.xlabel("Valeurs d'alpha")
161
+    plt.xlim(left=10e-8)
162
+    plt.ylabel("Score de précession")
163
+    plt.title("Courbe d'évolution du score de précession en fonction du paramètre alpha")
164
+    plt.savefig("tp2Output/scores_with_alpha_variation_same_hidden_layers_sizes.png", dpi="figure")
165
+    plt.show(); plt.close()
166
+
167
+def times_scores_recalls_errors_comparison():
168
+    scores = {}
169
+    times = {}
170
+    recalls = {}
171
+    errors = {}
172
+    layers = {}
173
+    for counter in range(5):
174
+        hl = tuple([randint(10, 300) for i in range(counter * 2 + 2)])
175
+        modele = MLPClassifier(hidden_layer_sizes=hl)
176
+        tic = time.process_time()
177
+        modele.fit(X=x_train, y=y_train)
178
+        toc = time.process_time()
179
+        y_pred = modele.predict(X=x_test)
180
+        times[counter] = toc - tic
181
+        scores[counter] = modele.score(X=x_test, y=y_test) * 100
182
+        recalls[counter] = recall_score(y_true=y_test, y_pred=y_pred, average="micro")
183
+        errors[counter] = zero_one_loss(y_true=y_test, y_pred=y_pred) * 100
184
+        layers[counter] = hl
185
+    barWidth = 0.4
186
+    y1 = [times[c] for c in range(5)]
187
+    y2 = [scores[c] for c in range(5)]
188
+    y3 = [recalls[c] for c in range(5)]
189
+    y4 = [errors[c] for c in range(5)]
190
+    r1 = [0, 4, 8, 12, 16]
191
+    r2 = [x + barWidth for x in r1]
192
+    r3 = [x + 2 * barWidth for x in r1]
193
+    r4 = [x + 3 * barWidth for x in r1]
194
+    plt.figure(figsize=(12.8, 9.6))
195
+    bar1 = plt.bar(r1, y1, width=barWidth, color=['red' for i in y1], linewidth=2, label="temps (sec)")
196
+    bar2 = plt.bar(r2, y2, width=barWidth, color=['green' for i in y2], linewidth=4, label="precision (%)")
197
+    bar3 = plt.bar(r3, y3, width=barWidth, color=['red' for i in y3], linewidth=1, label="recall")
198
+    bar4 = plt.bar(r4, y4, width=barWidth, color=['magenta' for i in y4], linewidth=3, label="error (%)")
199
+    plt.xticks([r + 1.5*barWidth for r in r1], ['%s couches\n%s' % (i * 2 + 2, layers[i]) for i in range(5)], rotation=10)
200
+    plt.xlabel("Modèles")
201
+    plt.ylabel("Valeur")
202
+    plt.title("Evaluation du temps d'apprentissage, la précession, le recall et l'erreur de cinq modèles de différentes couches")
203
+    for rect in bar1 + bar2 + bar3 + bar4:
204
+        height = rect.get_height()
205
+        plt.text(rect.get_x() + rect.get_width() / 2.0, height, '%.2f' % height, ha='center', va='bottom')
206
+    plt.legend()
207
+    plt.savefig("tp2Output/comparison_times_scores_recalls_errors_of_methods_different_layers", dpi="figure")
208
+    plt.show(); plt.close()
209
+
210
+
211
+modele_hidden_layer_sizes_50()
212
+layers_number_variation_and_scores()
213
+five_models_with_different_layers()
214
+optimization_algorithms_variation()
215
+activation_functions_variation()
216
+alpha_parameter_variation()
217
+times_scores_recalls_errors_comparison()

+ 257
- 0
TP3_prog1.py View File

@@ -0,0 +1,257 @@
1
+from sklearn import datasets
2
+from sklearn.model_selection import train_test_split, KFold
3
+from sklearn.neural_network import MLPClassifier
4
+from sklearn.svm import SVC
5
+from sklearn.neighbors import KNeighborsClassifier
6
+from sklearn.metrics import precision_score, recall_score, plot_confusion_matrix, confusion_matrix, ConfusionMatrixDisplay
7
+import numpy as np
8
+import matplotlib.pyplot as plt
9
+from random import seed, randint
10
+from graphs import bars_plot2, curve_plot
11
+from statistics import mean
12
+import time
13
+
14
+
15
+np.random.seed(0) # initialise le generateur aletoire avec seed=0
16
+seed(0)
17
+
18
+#######################
19
+###  TP3 PROGRAMME  ###
20
+#######################
21
+# Chargement de la base de données MNIST
22
+mnist = datasets.fetch_openml('mnist_784')
23
+
24
+# Division de la base de données
25
+x_train = mnist.data[:3500,:] ## training
26
+y_train = mnist.target[:3500]
27
+x_test = mnist.data[3500:5001,:]  ## test
28
+y_test = mnist.target[3500:5001]
29
+
30
+# print(x_train.shape,x_test.shape) #
31
+
32
+# Mise en conformité des dimensions de xtrain et xtest aux dimensions attendues par le classifeur
33
+x_train = x_train.reshape((-1, 784))
34
+x_test = x_test.reshape((-1, 784))
35
+
36
+def kernel_variation():
37
+    kernels = ["linear","poly","rbf","sigmoid"]
38
+    scores = {}
39
+    for kernel in kernels:
40
+        clsvm = SVC(kernel=kernel)
41
+        clsvm.fit(X=x_train, y=y_train)
42
+        scores[kernel] = clsvm.score(X=x_test, y=y_test) * 100
43
+        print("Score avec kernel=%s: %.2f"%(kernel, scores[kernel]))
44
+    bars_plot2(bar_labels=kernels, bar_heights=list(scores.values()), xlabel="Kernels utilisés",
45
+              ylabel="Score", fig_title="Score par kernel utilisé", show=True, percent_mark=True,
46
+               output="tp3Output/scores_for_kernel_variation.png")
47
+
48
+
49
+def kernel_and_c_variation():
50
+    kernels = ["linear", "poly", "rbf", "sigmoid"]
51
+    scores = {}
52
+    c_values = [0.1 + 0.18 * x for x in range(6)]
53
+    for kernel in kernels:
54
+        scores[kernel] = {}
55
+        for c_value in c_values:
56
+            clsvm = SVC(kernel=kernel, C=c_value)
57
+            clsvm.fit(X=x_train, y=y_train)
58
+            scores[kernel][c_value] = clsvm.score(X=x_test, y=y_test) * 100
59
+            print("Score avec kernel=%s: %.2f" % (kernel, clsvm.score(X=x_test, y=y_test) * 100))
60
+    plt.figure(figsize=(12.8, 9.6))
61
+    plt.plot(c_values, list(scores["linear"].values()), label="linear")
62
+    plt.plot(c_values, list(scores["poly"].values()), label="poly")
63
+    plt.plot(c_values, list(scores["rbf"].values()), label="rbf")
64
+    plt.plot(c_values, list(scores["sigmoid"].values()), label="sigmoid")
65
+    plt.xlabel("Valeurs de C")
66
+    plt.xlim(left=0.1)
67
+    plt.ylabel("Score (précision)")
68
+    plt.title("Courbe d'évolution de la précision en fonction de C")
69
+    plt.legend()
70
+    plt.savefig("tp3Output/scores_all_kernel_with_c_variation.png", dpi="figure")
71
+    plt.show()
72
+    plt.close()
73
+
74
+def train_test_error_with_c_variation():
75
+    kernels = ["linear", "poly", "rbf", "sigmoid"]
76
+    scores = {}
77
+    c_values = [0.1 + 0.18 * x for x in range(6)]
78
+    data_types = ["train", "test"]
79
+    for data_type in data_types:
80
+        scores[data_type] = {}
81
+        for kernel in kernels:
82
+            scores[data_type][kernel] = {}
83
+            for c_value in c_values:
84
+                clsvm = SVC(kernel=kernel, C=c_value)
85
+                clsvm.fit(X=x_train, y=y_train)
86
+                if data_type=="train":
87
+                    scores[data_type][kernel][c_value] = (1 - clsvm.score(X=x_train, y=y_train)) * 100
88
+                else:
89
+                    scores[data_type][kernel][c_value] = (1 - clsvm.score(X=x_test, y=y_test)) * 100
90
+                print("Score d'erreur données=%s avec kernel=%s: %.2f" % (data_type, kernel, scores[data_type][kernel][c_value]))
91
+    plt.figure(figsize=(12.8, 9.6))
92
+    plt.plot(c_values, list(scores["train"]["linear"].values()), label="train linear", marker="o")
93
+    plt.plot(c_values, list(scores["train"]["poly"].values()), label="train poly", marker="o")
94
+    plt.plot(c_values, list(scores["train"]["rbf"].values()), label="train rbf", marker="o")
95
+    plt.plot(c_values, list(scores["train"]["sigmoid"].values()), label="train sigmoid", marker="o")
96
+    plt.plot(c_values, list(scores["test"]["linear"].values()), label="test linear", marker="*")
97
+    plt.plot(c_values, list(scores["test"]["poly"].values()), label="test poly", marker="*")
98
+    plt.plot(c_values, list(scores["test"]["rbf"].values()), label="test rbf", marker="*")
99
+    plt.plot(c_values, list(scores["test"]["sigmoid"].values()), label="test sigmoid", marker="*")
100
+    plt.xlabel("Valeurs de C")
101
+    plt.xlim(left=0.1)
102
+    plt.ylabel("Error score")
103
+    plt.title("Courbe d'évolution du pourcentage d'erreur de précision en fonction de C")
104
+    plt.legend()
105
+    plt.savefig("tp3Output/error_scores_train_test_with_kernel_and_c_variation.png", dpi="figure")
106
+    plt.show()
107
+    plt.close()
108
+
109
+def generate_confusion_matrix():
110
+    kernels = ["linear", "poly", "rbf", "sigmoid"]
111
+    for kernel in kernels:
112
+        clsvm = SVC(kernel=kernel)
113
+        clsvm.fit(X=x_train, y=y_train)
114
+        plot_confusion_matrix(estimator=clsvm, X=x_test, y_true=y_test, values_format=".1f")
115
+        plt.show()
116
+
117
+def time_precision_recall_error():
118
+    kernels = ["linear", "poly", "rbf", "sigmoid"]
119
+    times = {}
120
+    scores = {}
121
+    recalls = {}
122
+    errors = {}
123
+    c_values = [0.1 + 0.18 * x for x in range(6)]
124
+    for kernel in kernels:
125
+        times[kernel] = {}
126
+        scores[kernel] = {}
127
+        recalls[kernel] = {}
128
+        errors[kernel] = {}
129
+        for c_value in c_values:
130
+            clsvm = SVC(kernel=kernel, C=c_value)
131
+            tic = time.process_time()
132
+            clsvm.fit(X=x_train, y=y_train)
133
+            toc = time.process_time()
134
+            y_pred = clsvm.predict(X=x_test)
135
+            times[kernel][c_value] = toc - tic
136
+            scores[kernel][c_value] = clsvm.score(X=x_test, y=y_test) * 100
137
+            recalls[kernel][c_value] = recall_score(y_true=y_test, y_pred=y_pred, average="micro")
138
+            errors[kernel][c_value] = 100 - scores[kernel][c_value]
139
+            print("Score avec kernel=%s: %.2f" % (kernel, clsvm.score(X=x_test, y=y_test) * 100))
140
+    plt.figure(figsize=(12.8, 9.6))
141
+    plt.plot(c_values, list(times["linear"].values()), label="linear")
142
+    plt.plot(c_values, list(times["poly"].values()), label="poly")
143
+    plt.plot(c_values, list(times["rbf"].values()), label="rbf")
144
+    plt.plot(c_values, list(times["sigmoid"].values()), label="sigmoid")
145
+    plt.xlabel("Valeurs de C")
146
+    plt.xlim(left=0.1)
147
+    plt.ylabel("Temps d'apprentissage (secondes)")
148
+    plt.title("Courbe d'évolution du temps d'apprentissage en fonction de C")
149
+    plt.legend()
150
+    plt.savefig("tp3Output/times_all_kernel_with_c_variation.png", dpi="figure")
151
+    plt.show(); plt.close()
152
+    ###
153
+    plt.figure(figsize=(12.8, 9.6))
154
+    plt.plot(c_values, list(scores["linear"].values()), label="linear precision")
155
+    plt.plot(c_values, list(scores["poly"].values()), label="poly precision")
156
+    plt.plot(c_values, list(scores["rbf"].values()), label="rbf precision")
157
+    plt.plot(c_values, list(scores["sigmoid"].values()), label="sigmoid precision")
158
+    plt.plot(c_values, list(errors["linear"].values()), label="linear error", marker="*", linestyle="-.")
159
+    plt.plot(c_values, list(errors["poly"].values()), label="poly error", marker="*", linestyle="-.")
160
+    plt.plot(c_values, list(errors["rbf"].values()), label="rbf error", marker="*", linestyle="-.")
161
+    plt.plot(c_values, list(errors["sigmoid"].values()), label="sigmoid error", marker="*", linestyle="-.")
162
+    plt.xlabel("Valeurs de C")
163
+    plt.xlim(left=0.1)
164
+    plt.ylabel("Score")
165
+    plt.title("Courbe d'évolution de la précision et de l'erreur de classification en fonction de C")
166
+    plt.legend()
167
+    plt.savefig("tp3Output/scores_and_errors_all_kernel_with_c_variation.png", dpi="figure")
168
+    plt.show(); plt.close()
169
+    ###
170
+    plt.figure(figsize=(12.8, 9.6))
171
+    plt.plot(c_values, list(recalls["linear"].values()), label="linear")
172
+    plt.plot(c_values, list(recalls["poly"].values()), label="poly")
173
+    plt.plot(c_values, list(recalls["rbf"].values()), label="rbf")
174
+    plt.plot(c_values, list(recalls["sigmoid"].values()), label="sigmoid")
175
+    plt.xlabel("Valeurs de C")
176
+    plt.xlim(left=0.1)
177
+    plt.ylabel("Recall score")
178
+    plt.title("Courbe d'évolution du recall en fonction de C")
179
+    plt.legend()
180
+    plt.savefig("tp3Output/recalls_all_kernel_with_c_variation.png", dpi="figure")
181
+    plt.show(); plt.close()
182
+
183
+def methods_comparison():
184
+    times = {}
185
+    scores = {}
186
+    recalls = {}
187
+    errors = {}
188
+    matrices = {}
189
+    ### Méthode 1
190
+    knn = KNeighborsClassifier(n_neighbors=10)
191
+    tic = time.process_time()
192
+    knn.fit(X=x_train, y=y_train)
193
+    toc = time.process_time()
194
+    y_pred = knn.predict(X=x_test)
195
+    times["knn"] = toc - tic
196
+    scores["knn"] = knn.score(X=x_test, y=y_pred) * 100
197
+    recalls["knn"] = recall_score(y_true=y_test, y_pred=y_pred, average="micro")
198
+    errors["knn"] = 100 - scores["knn"]
199
+    # matrices["knn"] = confusion_matrix(y_true=y_test,y_pred=y_pred)
200
+    plot_confusion_matrix(estimator=knn, X=x_test, y_true=y_test, values_format=".1f"); plt.show()
201
+    ### Méthode 2
202
+    mlp = MLPClassifier(hidden_layer_sizes=(50))
203
+    tic = time.process_time()
204
+    mlp.fit(X=x_train, y=y_train)
205
+    toc = time.process_time()
206
+    y_pred = knn.predict(X=x_test)
207
+    times["mlp"] = toc - tic
208
+    scores["mlp"] = mlp.score(X=x_test, y=y_pred) * 100
209
+    recalls["mlp"] = recall_score(y_true=y_test, y_pred=y_pred, average="micro")
210
+    errors["mlp"] = 100 - scores["mlp"]
211
+    # matrices["mlp"] = confusion_matrix(y_true=y_test, y_pred=y_pred)
212
+    plot_confusion_matrix(estimator=mlp, X=x_test, y_true=y_test, values_format=".1f"); plt.show()
213
+    ### Méthode 3
214
+    svc = SVC(kernel="rbf", C=1)
215
+    tic = time.process_time()
216
+    svc.fit(X=x_train, y=y_train)
217
+    toc = time.process_time()
218
+    y_pred = knn.predict(X=x_test)
219
+    times["svc"] = toc - tic
220
+    scores["svc"] = svc.score(X=x_test, y=y_pred) * 100
221
+    recalls["svc"] = recall_score(y_true=y_test, y_pred=y_pred, average="micro")
222
+    errors["svc"] = 100 - scores["svc"]
223
+    # matrices["svc"] = confusion_matrix(y_true=y_test, y_pred=y_pred)
224
+    plot_confusion_matrix(estimator=svc, X=x_test, y_true=y_test, values_format=".1f"); plt.show()
225
+    ### Plotting
226
+    barWidth = 0.5
227
+    y1 = [times["knn"], times["mlp"], times["svc"]]
228
+    y2 = [scores["knn"], scores["mlp"], scores["svc"]]
229
+    y3 = [recalls["knn"], recalls["mlp"], recalls["svc"]]
230
+    y4 = [errors["knn"], errors["mlp"], errors["svc"]]
231
+    r1 = [0, 3, 6]
232
+    r2 = [x + barWidth for x in r1]
233
+    r3 = [x + 2 * barWidth for x in r1]
234
+    r4 = [x + 3 * barWidth for x in r1]
235
+    plt.figure(figsize=(12.8, 9.6))
236
+    bar1 = plt.bar(r1, y1, width=barWidth, color=['green' for i in y1], linewidth=2, label="temps (sec)")
237
+    bar2 = plt.bar(r2, y2, width=barWidth, color=['yellow' for i in y2], linewidth=4, label="precision (%)")
238
+    bar3 = plt.bar(r3, y3, width=barWidth, color=['red' for i in y3], linewidth=1, label="recall")
239
+    bar4 = plt.bar(r4, y4, width=barWidth, color=['magenta' for i in y4], linewidth=3, label="error (%)")
240
+    plt.xticks([r + 1.5 * barWidth for r in r1], ['Modèle K-nn', 'Modèle MLP', 'Modèle SVM'])
241
+    plt.xlabel("Méthodes")
242
+    plt.ylabel("Valeur")
243
+    plt.title("Evaluation de différentes métriques de performance pour chacune des méthodes vues")
244
+    for rect in bar1 + bar2 + bar3 + bar4:
245
+        height = rect.get_height()
246
+        plt.text(rect.get_x() + rect.get_width() / 2.0, height, '%.2f' % height, ha='center', va='bottom')
247
+    plt.legend()
248
+    plt.show()
249
+    plt.savefig("tp3Output/global_comparison_of_methods_different_metrics", dpi="figure")
250
+
251
+
252
+kernel_variation()
253
+kernel_and_c_variation()
254
+train_test_error_with_c_variation()
255
+generate_confusion_matrix()
256
+time_precision_recall_error()
257
+methods_comparison()

+ 54
- 0
graphs.py View File

@@ -0,0 +1,54 @@
1
+def bars_plot(bar_labels=None, bar_heights=None, bar_colors="blue", bar_width=0.25, xlabel="xlabel", ylabel="ylabel", fig_title="title", figsize=(12.8, 9.6), output="./bars.png", show=False):
2
+    import matplotlib.pyplot as plt
3
+    plt.figure(figsize=figsize)
4
+    plt.bar(x=list(map(str,bar_labels)), height=bar_heights, width=bar_width, color=bar_colors)
5
+    plt.xlabel(xlabel)
6
+    plt.ylabel(ylabel)
7
+    plt.title(fig_title)
8
+    plt.savefig(output, dpi="figure")
9
+    if show:
10
+        plt.show()
11
+    plt.close()
12
+
13
+def clustering_plot(abcissas=None, ordinates=None, predictions=None, marker="o", xlabel="xlabel", ylabel="ylabel", fig_title="title", figsize=(12.8, 9.6), output="./bars.png", show=False):
14
+    import matplotlib.pyplot as plt
15
+    plt.figure(figsize=figsize)
16
+    plt.scatter(x=abcissas, y=ordinates, c=predictions, marker=marker)
17
+    plt.xlabel(xlabel)
18
+    plt.ylabel(ylabel)
19
+    plt.title(fig_title)
20
+    plt.savefig(output, dpi="figure")
21
+    if show:
22
+        plt.show()
23
+    plt.close()
24
+
25
+def bars_plot2(bar_labels=None, bar_heights=None, bar_colors="blue", bar_width=0.25, xlabel="xlabel", ylabel="ylabel", fig_title="title", figsize=(12.8, 9.6), output="./bars.png", show=False, show_heights=True, percent_mark=False):
26
+    import matplotlib.pyplot as plt
27
+    plt.figure(figsize=figsize)
28
+    bar = plt.bar(x=list(map(str,bar_labels)), height=bar_heights, width=bar_width, color=bar_colors)
29
+    plt.xlabel(xlabel)
30
+    plt.ylabel(ylabel)
31
+    plt.title(fig_title)
32
+    if show_heights:
33
+        for rect in bar:
34
+            height = rect.get_height()
35
+            if percent_mark:
36
+                plt.text(rect.get_x() + rect.get_width() / 2.0, height, '%.2f%%' % height, ha='center', va='bottom')
37
+            else:
38
+                plt.text(rect.get_x() + rect.get_width() / 2.0, height, '%.2f' % height, ha='center', va='bottom')
39
+    plt.savefig(output, dpi="figure")
40
+    if show:
41
+        plt.show()
42
+    plt.close()
43
+
44
+def curve_plot(abcissas=None, ordinates=None, xlabel="xlabel", ylabel="ylabel", fig_title="title", figsize=(12.8, 9.6), output="./bars.png", show=False):
45
+    import matplotlib.pyplot as plt
46
+    plt.figure(figsize=figsize)
47
+    plt.plot(abcissas,ordinates)
48
+    plt.xlabel(xlabel)
49
+    plt.ylabel(ylabel)
50
+    plt.title(fig_title)
51
+    plt.savefig(output, dpi="figure")
52
+    if show:
53
+        plt.show()
54
+    plt.close()

BIN
tp1Output/mnist_k_fold_scores.png View File


BIN
tp1Output/mnist_split_data_percentage_scores.png View File


BIN
tp1Output/mnist_split_data_percentage_scores1.png View File


BIN
tp1Output/mnist_split_various_train_percentage_scores_with_fixed_test.png View File


BIN
tp1Output/mnist_various_distance_metric_njobs_at_1_timesX.png View File


BIN
tp1Output/mnist_various_distance_metric_njobs_at_less_1_timesX.png View File


BIN
tp1Output/mnist_various_distance_metric_scores.png View File


BIN
tp2Output/comparison_times_scores_of_methods_different_layers.png View File


BIN
tp2Output/comparison_times_scores_recalls_errors_of_methods_different_layers.png View File


BIN
tp2Output/scores_with_alpha_variation_same_hidden_layers_sizes.png View File


BIN
tp2Output/scores_with_hidden_layers_variation.png View File


BIN
tp2Output/scores_with_hidden_layers_variation_different_activations_with_adam.png View File


BIN
tp2Output/scores_with_hidden_layers_variation_different_solvers.png View File


BIN
tp3Output/comparison_knn_confusion_matrix.png View File


BIN
tp3Output/comparison_mlp_confusion_matrix.png View File


BIN
tp3Output/comparison_svc_confusion_matrix.png View File


BIN
tp3Output/confusion_matrix_with_linear_as_kernel.png View File


BIN
tp3Output/confusion_matrix_with_poly_as_kernel.png View File


BIN
tp3Output/confusion_matrix_with_rbf_as_kernel.png View File


BIN
tp3Output/confusion_matrix_with_sigmoid_as_kernel.png View File


BIN
tp3Output/error_scores_train_test_with_kernel_and_c_variation.png View File


BIN
tp3Output/global_comparison_of_methods_different_metrics.png View File


BIN
tp3Output/recalls_all_kernel_with_c_variation.png View File


BIN
tp3Output/scores_all_kernel_with_c_variation.png View File


BIN
tp3Output/scores_and_errors_all_kernel_with_c_variation.png View File


BIN
tp3Output/scores_for_kernel_variation.png View File


BIN
tp3Output/times_all_kernel_with_c_variation.png View File


Loading…
Cancel
Save