added training precision and zero one loss in plot with varying tolerance

This commit is contained in:
Titouan Labourdette 2021-11-27 17:00:09 +01:00
parent 13bac40fa4
commit d286bedb4d
6 changed files with 413 additions and 251 deletions

View file

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 1,
"id": "530f620c",
"metadata": {},
"outputs": [],
@ -22,7 +22,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"id": "68b6a517",
"metadata": {},
"outputs": [],
@ -864,7 +864,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 26,
"id": "98107e41",
"metadata": {},
"outputs": [
@ -872,37 +872,59 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Matrice de confusion K-NN :\n",
" [[51 0 0 0 0 1 0 0 0 0]\n",
" [ 0 56 0 0 0 0 0 0 0 0]\n",
" [ 3 1 45 1 0 0 1 1 0 0]\n",
" [ 0 1 1 35 0 1 0 1 1 1]\n",
" [ 0 3 0 0 48 0 0 0 0 2]\n",
" [ 0 1 0 1 0 38 0 0 0 0]\n",
" [ 0 0 0 0 0 2 44 0 0 0]\n",
" [ 0 2 0 0 3 0 0 47 0 0]\n",
" [ 2 0 0 0 0 3 1 0 42 2]\n",
" [ 0 0 0 0 4 1 0 1 2 50]]\n"
"Métriques pour K-NN :\n",
"Paramètres : (n_neighbors=3,p=2,n_jobs=1)\n",
"Taille de l'échantillon : 10000\n",
"Proportion des datasets : 90%\n",
"Temps d'entraînement (secondes) : 0.01596\n",
"Temps de prédiction (secondes) : 0.30718\n",
"Précision pour chaque classe : [0.942, 0.891, 0.962, 0.959, 0.988, 0.944, 0.961, 0.97, 0.989, 0.918]\n",
"Précision : 0.95\n",
"Erreur : 0.05\n",
"Matrice de confusion :\n",
" [[ 98 0 1 0 0 0 1 0 1 0]\n",
" [ 0 114 0 0 0 0 0 0 0 0]\n",
" [ 2 2 102 0 0 0 0 0 0 0]\n",
" [ 0 1 1 93 0 3 0 1 0 0]\n",
" [ 1 5 0 0 82 0 0 0 0 5]\n",
" [ 0 1 1 1 0 84 3 0 0 1]\n",
" [ 0 0 0 0 0 0 99 0 0 0]\n",
" [ 0 3 0 0 0 0 0 97 0 2]\n",
" [ 2 1 1 3 1 2 0 0 92 0]\n",
" [ 1 1 0 0 0 0 0 2 0 89]]\n"
]
}
],
"source": [
"### Create vector of 5000 random indexes\n",
"rand_indexes = np.random.randint(70000, size=5000)\n",
"rand_indexes = np.random.randint(70000, size=10000)\n",
"### Load data with the previous vector\n",
"data = mnist.data[rand_indexes]\n",
"# print(\"Dataset : \", data)\n",
"target = mnist.target[rand_indexes]\n",
"\n",
"# Split the dataset\n",
"xtrain, xtest, ytrain, ytest = model_selection.train_test_split(data, target,train_size=0.9)\n",
"\n",
"# Training on xtrain,ytrain\n",
"clf = neighbors.KNeighborsClassifier(n_neighbors=3,p=2,n_jobs=1)\n",
"# Training on xtrain,ytrain\n",
"t1 = time.time()\n",
"clf.fit(xtrain, ytrain)\n",
"t2 = time.time()\n",
"# Predicting on xtest\n",
"pred = clf.predict(xtest)\n",
"print(\"Matrice de confusion K-NN :\\n\", metrics.confusion_matrix(ytest, pred))"
"t3 = time.time()\n",
"#Calcul de différentes metrics\n",
"precisions = [round(i,3) for i in metrics.precision_score(ytest, pred,average=None)]\n",
"\n",
"print(\"Métriques pour K-NN :\")\n",
"print(\"Paramètres : (n_neighbors=3,p=2,n_jobs=1)\")\n",
"print(\"Taille de l'échantillon :\", 10000)\n",
"print(\"Proportion des datasets :\", \"90%\")\n",
"print(\"Temps d'entraînement (secondes) :\", round(t2-t1,5))\n",
"print(\"Temps de prédiction (secondes) :\", round(t3-t2,5))\n",
"print(\"Précision pour chaque classe :\", precisions)\n",
"print(\"Précision :\", clf.score(xtest, ytest))\n",
"print(\"Erreur :\", round(metrics.zero_one_loss(ytest, pred),5))\n",
"print(\"Matrice de confusion :\\n\", metrics.confusion_matrix(ytest, pred))"
]
},
{

View file

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"id": "3eb7a65b",
"metadata": {},
"outputs": [],
@ -22,7 +22,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"id": "a8812842",
"metadata": {},
"outputs": [],
@ -1233,7 +1233,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 9,
"id": "abb0fcf1",
"metadata": {},
"outputs": [
@ -1241,23 +1241,33 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Matrice de confusion A-NN :\n",
" [[59 0 0 0 0 0 0 0 0 0]\n",
" [ 0 60 0 0 0 0 0 1 0 0]\n",
" [ 0 0 42 0 0 1 2 1 2 0]\n",
" [ 0 0 1 44 0 1 0 0 0 0]\n",
" [ 0 0 0 0 46 0 1 0 0 4]\n",
" [ 0 0 0 0 0 31 0 0 1 0]\n",
" [ 0 0 0 0 0 0 48 0 0 0]\n",
" [ 1 0 0 1 0 0 0 49 0 0]\n",
" [ 0 1 1 5 0 1 0 0 48 0]\n",
" [ 2 0 0 1 1 2 0 0 2 40]]\n"
"Métriques pour A-NN\n",
"Paramètres : (random_state=1, max_iter=300, hidden_layer_sizes=((85,)*15),\n",
"solver=adam, activation=relu, alpha= 0.0000001)\n",
"Taille de l'échantillon : 10000\n",
"Proportion des datasets : 90%\n",
"Temps d'entraînement (secondes) : 27.9214\n",
"Temps de prédiction (secondes) : 0.01396\n",
"Précision pour chaque classe : [0.972, 0.974, 0.926, 0.97, 0.919, 0.939, 0.971, 0.967, 0.951, 0.927]\n",
"Précision : 0.952\n",
"Erreur : 0.048\n",
"Matrice de confusion :\n",
" [[103 0 1 0 0 0 1 0 0 0]\n",
" [ 0 114 0 0 1 0 0 0 0 0]\n",
" [ 0 0 100 0 2 1 1 1 1 0]\n",
" [ 0 1 3 97 0 1 0 0 1 1]\n",
" [ 0 0 0 0 79 0 0 0 0 4]\n",
" [ 2 0 0 3 0 93 1 0 1 0]\n",
" [ 0 0 0 0 0 3 99 0 1 0]\n",
" [ 1 1 1 0 1 0 0 89 0 1]\n",
" [ 0 1 3 0 0 1 0 0 77 2]\n",
" [ 0 0 0 0 3 0 0 2 0 101]]\n"
]
}
],
"source": [
"### Create vector of 5000 random indexes\n",
"rand_indexes = np.random.randint(70000, size=5000)\n",
"rand_indexes = np.random.randint(70000, size=10000)\n",
"### Load data with the previous vector\n",
"data = mnist.data[rand_indexes]\n",
"# print(\"Dataset : \", data)\n",
@ -1266,10 +1276,6 @@
"# Split the dataset\n",
"xtrain, xtest, ytrain, ytest = model_selection.train_test_split(data, target,train_size=0.9)\n",
"\n",
"best_training_time = 0\n",
"best_precision_score = 0\n",
"best_zero_one_loss = 0\n",
"\n",
"r = 1\n",
"max_i = 300\n",
"nb_hl = 15\n",
@ -1281,31 +1287,27 @@
"\n",
"#Entraîne le classifier\n",
"clf = neural_network.MLPClassifier(random_state=r, max_iter=max_i, hidden_layer_sizes=hl, solver=sol, activation=act, alpha=a, verbose=False)\n",
"t1 = round(time.time(),5)\n",
"t1 = time.time()\n",
"clf.fit(xtrain, ytrain)\n",
"t2 = round(time.time(),5)\n",
"t2 = time.time()\n",
"#Prédiction sur le jeu de tests\n",
"pred = clf.predict(xtest)\n",
"# Probabilités des prédictions sur xtest\n",
"pred_proba = clf.predict_proba(xtest)\n",
"# On sauvegarde le temps de calcul, la précision et \n",
"# les taux d'erreurs par classe\n",
"best_training_time = t2-t1\n",
"best_precision_score = clf.score(xtest, ytest)\n",
"best_zero_one_loss = metrics.zero_one_loss(ytest, pred)\n",
"t3 = time.time()\n",
"\n",
"# print(\"Paramètre :\\n\")\n",
"# print(\"random_state = \", r)\n",
"# print(\"max_iter = \", max_i)\n",
"# print(\"nb_hidden_layer = \", nb_hl)\n",
"# print(\"hidden_layer_size = \", hl_size)\n",
"# print(\"solver = \", sol)\n",
"# print(\"activation = \", act)\n",
"# print(\"alpha = \", a)\n",
"# print(\"Temps d'entraînement : \", best_training_time)\n",
"# print(\"Score : \", best_precision_score)\n",
"# print(\"Zero-one loss : \", best_zero_one_loss)\n",
"print(\"Matrice de confusion A-NN :\\n\", metrics.confusion_matrix(ytest, pred))"
"#Calcul de différentes metrics\n",
"precisions = [round(i,3) for i in metrics.precision_score(ytest, pred,average=None)]\n",
"\n",
"print(\"Métriques pour A-NN\")\n",
"print(\"Paramètres : (random_state=1, max_iter=300, hidden_layer_sizes=((85,)*15),\")\n",
"print(\"solver=adam, activation=relu, alpha= 0.0000001)\")\n",
"print(\"Taille de l'échantillon :\", 10000)\n",
"print(\"Proportion des datasets :\", \"90%\")\n",
"print(\"Temps d'entraînement (secondes) :\", round(t2-t1,5))\n",
"print(\"Temps de prédiction (secondes) :\", round(t3-t2,5))\n",
"print(\"Précision pour chaque classe :\", precisions)\n",
"print(\"Précision :\", clf.score(xtest, ytest))\n",
"print(\"Erreur :\", round(metrics.zero_one_loss(ytest, pred),5))\n",
"print(\"Matrice de confusion :\\n\", metrics.confusion_matrix(ytest, pred))"
]
},
{

File diff suppressed because one or more lines are too long

View file

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 1,
"id": "530f620c",
"metadata": {},
"outputs": [],
@ -22,7 +22,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"id": "68b6a517",
"metadata": {},
"outputs": [],
@ -864,7 +864,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 26,
"id": "98107e41",
"metadata": {},
"outputs": [
@ -872,37 +872,59 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Matrice de confusion K-NN :\n",
" [[51 0 0 0 0 1 0 0 0 0]\n",
" [ 0 56 0 0 0 0 0 0 0 0]\n",
" [ 3 1 45 1 0 0 1 1 0 0]\n",
" [ 0 1 1 35 0 1 0 1 1 1]\n",
" [ 0 3 0 0 48 0 0 0 0 2]\n",
" [ 0 1 0 1 0 38 0 0 0 0]\n",
" [ 0 0 0 0 0 2 44 0 0 0]\n",
" [ 0 2 0 0 3 0 0 47 0 0]\n",
" [ 2 0 0 0 0 3 1 0 42 2]\n",
" [ 0 0 0 0 4 1 0 1 2 50]]\n"
"Métriques pour K-NN :\n",
"Paramètres : (n_neighbors=3,p=2,n_jobs=1)\n",
"Taille de l'échantillon : 10000\n",
"Proportion des datasets : 90%\n",
"Temps d'entraînement (secondes) : 0.01596\n",
"Temps de prédiction (secondes) : 0.30718\n",
"Précision pour chaque classe : [0.942, 0.891, 0.962, 0.959, 0.988, 0.944, 0.961, 0.97, 0.989, 0.918]\n",
"Précision : 0.95\n",
"Erreur : 0.05\n",
"Matrice de confusion :\n",
" [[ 98 0 1 0 0 0 1 0 1 0]\n",
" [ 0 114 0 0 0 0 0 0 0 0]\n",
" [ 2 2 102 0 0 0 0 0 0 0]\n",
" [ 0 1 1 93 0 3 0 1 0 0]\n",
" [ 1 5 0 0 82 0 0 0 0 5]\n",
" [ 0 1 1 1 0 84 3 0 0 1]\n",
" [ 0 0 0 0 0 0 99 0 0 0]\n",
" [ 0 3 0 0 0 0 0 97 0 2]\n",
" [ 2 1 1 3 1 2 0 0 92 0]\n",
" [ 1 1 0 0 0 0 0 2 0 89]]\n"
]
}
],
"source": [
"### Create vector of 5000 random indexes\n",
"rand_indexes = np.random.randint(70000, size=5000)\n",
"rand_indexes = np.random.randint(70000, size=10000)\n",
"### Load data with the previous vector\n",
"data = mnist.data[rand_indexes]\n",
"# print(\"Dataset : \", data)\n",
"target = mnist.target[rand_indexes]\n",
"\n",
"# Split the dataset\n",
"xtrain, xtest, ytrain, ytest = model_selection.train_test_split(data, target,train_size=0.9)\n",
"\n",
"# Training on xtrain,ytrain\n",
"clf = neighbors.KNeighborsClassifier(n_neighbors=3,p=2,n_jobs=1)\n",
"# Training on xtrain,ytrain\n",
"t1 = time.time()\n",
"clf.fit(xtrain, ytrain)\n",
"t2 = time.time()\n",
"# Predicting on xtest\n",
"pred = clf.predict(xtest)\n",
"print(\"Matrice de confusion K-NN :\\n\", metrics.confusion_matrix(ytest, pred))"
"t3 = time.time()\n",
"#Calcul de différentes metrics\n",
"precisions = [round(i,3) for i in metrics.precision_score(ytest, pred,average=None)]\n",
"\n",
"print(\"Métriques pour K-NN :\")\n",
"print(\"Paramètres : (n_neighbors=3,p=2,n_jobs=1)\")\n",
"print(\"Taille de l'échantillon :\", 10000)\n",
"print(\"Proportion des datasets :\", \"90%\")\n",
"print(\"Temps d'entraînement (secondes) :\", round(t2-t1,5))\n",
"print(\"Temps de prédiction (secondes) :\", round(t3-t2,5))\n",
"print(\"Précision pour chaque classe :\", precisions)\n",
"print(\"Précision :\", clf.score(xtest, ytest))\n",
"print(\"Erreur :\", round(metrics.zero_one_loss(ytest, pred),5))\n",
"print(\"Matrice de confusion :\\n\", metrics.confusion_matrix(ytest, pred))"
]
},
{

View file

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"id": "3eb7a65b",
"metadata": {},
"outputs": [],
@ -22,7 +22,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"id": "a8812842",
"metadata": {},
"outputs": [],
@ -1233,7 +1233,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 9,
"id": "abb0fcf1",
"metadata": {},
"outputs": [
@ -1241,23 +1241,33 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Matrice de confusion A-NN :\n",
" [[59 0 0 0 0 0 0 0 0 0]\n",
" [ 0 60 0 0 0 0 0 1 0 0]\n",
" [ 0 0 42 0 0 1 2 1 2 0]\n",
" [ 0 0 1 44 0 1 0 0 0 0]\n",
" [ 0 0 0 0 46 0 1 0 0 4]\n",
" [ 0 0 0 0 0 31 0 0 1 0]\n",
" [ 0 0 0 0 0 0 48 0 0 0]\n",
" [ 1 0 0 1 0 0 0 49 0 0]\n",
" [ 0 1 1 5 0 1 0 0 48 0]\n",
" [ 2 0 0 1 1 2 0 0 2 40]]\n"
"Métriques pour A-NN\n",
"Paramètres : (random_state=1, max_iter=300, hidden_layer_sizes=((85,)*15),\n",
"solver=adam, activation=relu, alpha= 0.0000001)\n",
"Taille de l'échantillon : 10000\n",
"Proportion des datasets : 90%\n",
"Temps d'entraînement (secondes) : 27.9214\n",
"Temps de prédiction (secondes) : 0.01396\n",
"Précision pour chaque classe : [0.972, 0.974, 0.926, 0.97, 0.919, 0.939, 0.971, 0.967, 0.951, 0.927]\n",
"Précision : 0.952\n",
"Erreur : 0.048\n",
"Matrice de confusion :\n",
" [[103 0 1 0 0 0 1 0 0 0]\n",
" [ 0 114 0 0 1 0 0 0 0 0]\n",
" [ 0 0 100 0 2 1 1 1 1 0]\n",
" [ 0 1 3 97 0 1 0 0 1 1]\n",
" [ 0 0 0 0 79 0 0 0 0 4]\n",
" [ 2 0 0 3 0 93 1 0 1 0]\n",
" [ 0 0 0 0 0 3 99 0 1 0]\n",
" [ 1 1 1 0 1 0 0 89 0 1]\n",
" [ 0 1 3 0 0 1 0 0 77 2]\n",
" [ 0 0 0 0 3 0 0 2 0 101]]\n"
]
}
],
"source": [
"### Create vector of 5000 random indexes\n",
"rand_indexes = np.random.randint(70000, size=5000)\n",
"rand_indexes = np.random.randint(70000, size=10000)\n",
"### Load data with the previous vector\n",
"data = mnist.data[rand_indexes]\n",
"# print(\"Dataset : \", data)\n",
@ -1266,10 +1276,6 @@
"# Split the dataset\n",
"xtrain, xtest, ytrain, ytest = model_selection.train_test_split(data, target,train_size=0.9)\n",
"\n",
"best_training_time = 0\n",
"best_precision_score = 0\n",
"best_zero_one_loss = 0\n",
"\n",
"r = 1\n",
"max_i = 300\n",
"nb_hl = 15\n",
@ -1281,31 +1287,27 @@
"\n",
"#Entraîne le classifier\n",
"clf = neural_network.MLPClassifier(random_state=r, max_iter=max_i, hidden_layer_sizes=hl, solver=sol, activation=act, alpha=a, verbose=False)\n",
"t1 = round(time.time(),5)\n",
"t1 = time.time()\n",
"clf.fit(xtrain, ytrain)\n",
"t2 = round(time.time(),5)\n",
"t2 = time.time()\n",
"#Prédiction sur le jeu de tests\n",
"pred = clf.predict(xtest)\n",
"# Probabilités des prédictions sur xtest\n",
"pred_proba = clf.predict_proba(xtest)\n",
"# On sauvegarde le temps de calcul, la précision et \n",
"# les taux d'erreurs par classe\n",
"best_training_time = t2-t1\n",
"best_precision_score = clf.score(xtest, ytest)\n",
"best_zero_one_loss = metrics.zero_one_loss(ytest, pred)\n",
"t3 = time.time()\n",
"\n",
"# print(\"Paramètre :\\n\")\n",
"# print(\"random_state = \", r)\n",
"# print(\"max_iter = \", max_i)\n",
"# print(\"nb_hidden_layer = \", nb_hl)\n",
"# print(\"hidden_layer_size = \", hl_size)\n",
"# print(\"solver = \", sol)\n",
"# print(\"activation = \", act)\n",
"# print(\"alpha = \", a)\n",
"# print(\"Temps d'entraînement : \", best_training_time)\n",
"# print(\"Score : \", best_precision_score)\n",
"# print(\"Zero-one loss : \", best_zero_one_loss)\n",
"print(\"Matrice de confusion A-NN :\\n\", metrics.confusion_matrix(ytest, pred))"
"#Calcul de différentes metrics\n",
"precisions = [round(i,3) for i in metrics.precision_score(ytest, pred,average=None)]\n",
"\n",
"print(\"Métriques pour A-NN\")\n",
"print(\"Paramètres : (random_state=1, max_iter=300, hidden_layer_sizes=((85,)*15),\")\n",
"print(\"solver=adam, activation=relu, alpha= 0.0000001)\")\n",
"print(\"Taille de l'échantillon :\", 10000)\n",
"print(\"Proportion des datasets :\", \"90%\")\n",
"print(\"Temps d'entraînement (secondes) :\", round(t2-t1,5))\n",
"print(\"Temps de prédiction (secondes) :\", round(t3-t2,5))\n",
"print(\"Précision pour chaque classe :\", precisions)\n",
"print(\"Précision :\", clf.score(xtest, ytest))\n",
"print(\"Erreur :\", round(metrics.zero_one_loss(ytest, pred),5))\n",
"print(\"Matrice de confusion :\\n\", metrics.confusion_matrix(ytest, pred))"
]
},
{

File diff suppressed because one or more lines are too long