added training precision and zero one loss in plot with varying tolerance

2021-11-27 17:00:09 +01:00 · 2021-11-27 17:00:09 +01:00 · d286bedb4d
commit d286bedb4d
parent 13bac40fa4
6 changed files with 413 additions and 251 deletions
--- a/.ipynb_checkpoints/TP1_prog2.py-checkpoint.ipynb
+++ b/.ipynb_checkpoints/TP1_prog2.py-checkpoint.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 1,
   "id": "530f620c",
   "metadata": {},
   "outputs": [],
@ -22,7 +22,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
   "id": "68b6a517",
   "metadata": {},
   "outputs": [],
@ -864,7 +864,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 26,
   "id": "98107e41",
   "metadata": {},
   "outputs": [
@ -872,37 +872,59 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Matrice de confusion K-NN :\n",
+      "Métriques pour K-NN :\n",
-      " [[51  0  0  0  0  1  0  0  0  0]\n",
+      "Paramètres : (n_neighbors=3,p=2,n_jobs=1)\n",
-      " [ 0 56  0  0  0  0  0  0  0  0]\n",
+      "Taille de l'échantillon : 10000\n",
-      " [ 3  1 45  1  0  0  1  1  0  0]\n",
+      "Proportion des datasets : 90%\n",
-      " [ 0  1  1 35  0  1  0  1  1  1]\n",
+      "Temps d'entraînement (secondes) : 0.01596\n",
-      " [ 0  3  0  0 48  0  0  0  0  2]\n",
+      "Temps de prédiction (secondes) : 0.30718\n",
-      " [ 0  1  0  1  0 38  0  0  0  0]\n",
+      "Précision pour chaque classe : [0.942, 0.891, 0.962, 0.959, 0.988, 0.944, 0.961, 0.97, 0.989, 0.918]\n",
-      " [ 0  0  0  0  0  2 44  0  0  0]\n",
+      "Précision : 0.95\n",
-      " [ 0  2  0  0  3  0  0 47  0  0]\n",
+      "Erreur : 0.05\n",
-      " [ 2  0  0  0  0  3  1  0 42  2]\n",
+      "Matrice de confusion :\n",
-      " [ 0  0  0  0  4  1  0  1  2 50]]\n"
+      " [[ 98   0   1   0   0   0   1   0   1   0]\n",
      " [  0 114   0   0   0   0   0   0   0   0]\n",
      " [  2   2 102   0   0   0   0   0   0   0]\n",
      " [  0   1   1  93   0   3   0   1   0   0]\n",
      " [  1   5   0   0  82   0   0   0   0   5]\n",
      " [  0   1   1   1   0  84   3   0   0   1]\n",
      " [  0   0   0   0   0   0  99   0   0   0]\n",
      " [  0   3   0   0   0   0   0  97   0   2]\n",
      " [  2   1   1   3   1   2   0   0  92   0]\n",
      " [  1   1   0   0   0   0   0   2   0  89]]\n"
     ]
    }
   ],
   "source": [
    "### Create vector of 5000 random indexes\n",
-    "rand_indexes = np.random.randint(70000, size=5000)\n",
+    "rand_indexes = np.random.randint(70000, size=10000)\n",
    "### Load data with the previous vector\n",
    "data = mnist.data[rand_indexes]\n",
    "# print(\"Dataset : \", data)\n",
    "target = mnist.target[rand_indexes]\n",
    "\n",
    "# Split the dataset\n",
    "xtrain, xtest, ytrain, ytest = model_selection.train_test_split(data, target,train_size=0.9)\n",
    "\n",
    "# Training on xtrain,ytrain\n",
    "clf = neighbors.KNeighborsClassifier(n_neighbors=3,p=2,n_jobs=1)\n",
    "# Training on xtrain,ytrain\n",
    "t1 = time.time()\n",
    "clf.fit(xtrain, ytrain)\n",
    "t2 = time.time()\n",
    "# Predicting on xtest\n",
    "pred = clf.predict(xtest)\n",
-    "print(\"Matrice de confusion K-NN :\\n\", metrics.confusion_matrix(ytest, pred))"
+    "t3 = time.time()\n",
    "#Calcul de différentes metrics\n",
    "precisions = [round(i,3) for i in metrics.precision_score(ytest, pred,average=None)]\n",
    "\n",
    "print(\"Métriques pour K-NN :\")\n",
    "print(\"Paramètres : (n_neighbors=3,p=2,n_jobs=1)\")\n",
    "print(\"Taille de l'échantillon :\", 10000)\n",
    "print(\"Proportion des datasets :\", \"90%\")\n",
    "print(\"Temps d'entraînement (secondes) :\", round(t2-t1,5))\n",
    "print(\"Temps de prédiction (secondes) :\", round(t3-t2,5))\n",
    "print(\"Précision pour chaque classe :\", precisions)\n",
    "print(\"Précision :\", clf.score(xtest, ytest))\n",
    "print(\"Erreur :\", round(metrics.zero_one_loss(ytest, pred),5))\n",
    "print(\"Matrice de confusion :\\n\", metrics.confusion_matrix(ytest, pred))"
   ]
  },
  {
--- a/.ipynb_checkpoints/TP2_prog1.py-checkpoint.ipynb
+++ b/.ipynb_checkpoints/TP2_prog1.py-checkpoint.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
   "id": "3eb7a65b",
   "metadata": {},
   "outputs": [],
@ -22,7 +22,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
   "id": "a8812842",
   "metadata": {},
   "outputs": [],
@ -1233,7 +1233,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 9,
   "id": "abb0fcf1",
   "metadata": {},
   "outputs": [
@ -1241,23 +1241,33 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Matrice de confusion A-NN :\n",
+      "Métriques pour A-NN\n",
-      " [[59  0  0  0  0  0  0  0  0  0]\n",
+      "Paramètres : (random_state=1, max_iter=300, hidden_layer_sizes=((85,)*15),\n",
-      " [ 0 60  0  0  0  0  0  1  0  0]\n",
+      "solver=adam, activation=relu, alpha= 0.0000001)\n",
-      " [ 0  0 42  0  0  1  2  1  2  0]\n",
+      "Taille de l'échantillon : 10000\n",
-      " [ 0  0  1 44  0  1  0  0  0  0]\n",
+      "Proportion des datasets : 90%\n",
-      " [ 0  0  0  0 46  0  1  0  0  4]\n",
+      "Temps d'entraînement (secondes) : 27.9214\n",
-      " [ 0  0  0  0  0 31  0  0  1  0]\n",
+      "Temps de prédiction (secondes) : 0.01396\n",
-      " [ 0  0  0  0  0  0 48  0  0  0]\n",
+      "Précision pour chaque classe : [0.972, 0.974, 0.926, 0.97, 0.919, 0.939, 0.971, 0.967, 0.951, 0.927]\n",
-      " [ 1  0  0  1  0  0  0 49  0  0]\n",
+      "Précision : 0.952\n",
-      " [ 0  1  1  5  0  1  0  0 48  0]\n",
+      "Erreur : 0.048\n",
-      " [ 2  0  0  1  1  2  0  0  2 40]]\n"
+      "Matrice de confusion :\n",
      " [[103   0   1   0   0   0   1   0   0   0]\n",
      " [  0 114   0   0   1   0   0   0   0   0]\n",
      " [  0   0 100   0   2   1   1   1   1   0]\n",
      " [  0   1   3  97   0   1   0   0   1   1]\n",
      " [  0   0   0   0  79   0   0   0   0   4]\n",
      " [  2   0   0   3   0  93   1   0   1   0]\n",
      " [  0   0   0   0   0   3  99   0   1   0]\n",
      " [  1   1   1   0   1   0   0  89   0   1]\n",
      " [  0   1   3   0   0   1   0   0  77   2]\n",
      " [  0   0   0   0   3   0   0   2   0 101]]\n"
     ]
    }
   ],
   "source": [
    "### Create vector of 5000 random indexes\n",
-    "rand_indexes = np.random.randint(70000, size=5000)\n",
+    "rand_indexes = np.random.randint(70000, size=10000)\n",
    "### Load data with the previous vector\n",
    "data = mnist.data[rand_indexes]\n",
    "# print(\"Dataset : \", data)\n",
@ -1266,10 +1276,6 @@
    "# Split the dataset\n",
    "xtrain, xtest, ytrain, ytest = model_selection.train_test_split(data, target,train_size=0.9)\n",
    "\n",
    "best_training_time = 0\n",
    "best_precision_score = 0\n",
    "best_zero_one_loss = 0\n",
    "\n",
    "r = 1\n",
    "max_i = 300\n",
    "nb_hl = 15\n",
@ -1281,31 +1287,27 @@
    "\n",
    "#Entraîne le classifier\n",
    "clf = neural_network.MLPClassifier(random_state=r, max_iter=max_i, hidden_layer_sizes=hl, solver=sol, activation=act, alpha=a, verbose=False)\n",
-    "t1 = round(time.time(),5)\n",
+    "t1 = time.time()\n",
    "clf.fit(xtrain, ytrain)\n",
-    "t2 = round(time.time(),5)\n",
+    "t2 = time.time()\n",
    "#Prédiction sur le jeu de tests\n",
    "pred = clf.predict(xtest)\n",
-    "# Probabilités des prédictions sur xtest\n",
+    "t3 = time.time()\n",
    "pred_proba = clf.predict_proba(xtest)\n",
    "# On sauvegarde le temps de calcul, la précision et \n",
    "# les taux d'erreurs par classe\n",
    "best_training_time = t2-t1\n",
    "best_precision_score = clf.score(xtest, ytest)\n",
    "best_zero_one_loss = metrics.zero_one_loss(ytest, pred)\n",
    "\n",
-    "# print(\"Paramètre :\\n\")\n",
+    "#Calcul de différentes metrics\n",
-    "# print(\"random_state = \", r)\n",
+    "precisions = [round(i,3) for i in metrics.precision_score(ytest, pred,average=None)]\n",
-    "# print(\"max_iter = \", max_i)\n",
+    "\n",
-    "# print(\"nb_hidden_layer = \", nb_hl)\n",
+    "print(\"Métriques pour A-NN\")\n",
-    "# print(\"hidden_layer_size = \", hl_size)\n",
+    "print(\"Paramètres : (random_state=1, max_iter=300, hidden_layer_sizes=((85,)*15),\")\n",
-    "# print(\"solver = \", sol)\n",
+    "print(\"solver=adam, activation=relu, alpha= 0.0000001)\")\n",
-    "# print(\"activation = \", act)\n",
+    "print(\"Taille de l'échantillon :\", 10000)\n",
-    "# print(\"alpha = \", a)\n",
+    "print(\"Proportion des datasets :\", \"90%\")\n",
-    "# print(\"Temps d'entraînement : \", best_training_time)\n",
+    "print(\"Temps d'entraînement (secondes) :\", round(t2-t1,5))\n",
-    "# print(\"Score : \", best_precision_score)\n",
+    "print(\"Temps de prédiction (secondes) :\", round(t3-t2,5))\n",
-    "# print(\"Zero-one loss : \", best_zero_one_loss)\n",
+    "print(\"Précision pour chaque classe :\", precisions)\n",
-    "print(\"Matrice de confusion A-NN :\\n\", metrics.confusion_matrix(ytest, pred))"
+    "print(\"Précision :\", clf.score(xtest, ytest))\n",
    "print(\"Erreur :\", round(metrics.zero_one_loss(ytest, pred),5))\n",
    "print(\"Matrice de confusion :\\n\", metrics.confusion_matrix(ytest, pred))"
   ]
  },
  {
--- a/.ipynb_checkpoints/TP3_prog1.py-checkpoint.ipynb
+++ b/.ipynb_checkpoints/TP3_prog1.py-checkpoint.ipynb
--- a/TP1_prog2.py.ipynb
+++ b/TP1_prog2.py.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 1,
   "id": "530f620c",
   "metadata": {},
   "outputs": [],
@ -22,7 +22,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
   "id": "68b6a517",
   "metadata": {},
   "outputs": [],
@ -864,7 +864,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 26,
   "id": "98107e41",
   "metadata": {},
   "outputs": [
@ -872,37 +872,59 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Matrice de confusion K-NN :\n",
+      "Métriques pour K-NN :\n",
-      " [[51  0  0  0  0  1  0  0  0  0]\n",
+      "Paramètres : (n_neighbors=3,p=2,n_jobs=1)\n",
-      " [ 0 56  0  0  0  0  0  0  0  0]\n",
+      "Taille de l'échantillon : 10000\n",
-      " [ 3  1 45  1  0  0  1  1  0  0]\n",
+      "Proportion des datasets : 90%\n",
-      " [ 0  1  1 35  0  1  0  1  1  1]\n",
+      "Temps d'entraînement (secondes) : 0.01596\n",
-      " [ 0  3  0  0 48  0  0  0  0  2]\n",
+      "Temps de prédiction (secondes) : 0.30718\n",
-      " [ 0  1  0  1  0 38  0  0  0  0]\n",
+      "Précision pour chaque classe : [0.942, 0.891, 0.962, 0.959, 0.988, 0.944, 0.961, 0.97, 0.989, 0.918]\n",
-      " [ 0  0  0  0  0  2 44  0  0  0]\n",
+      "Précision : 0.95\n",
-      " [ 0  2  0  0  3  0  0 47  0  0]\n",
+      "Erreur : 0.05\n",
-      " [ 2  0  0  0  0  3  1  0 42  2]\n",
+      "Matrice de confusion :\n",
-      " [ 0  0  0  0  4  1  0  1  2 50]]\n"
+      " [[ 98   0   1   0   0   0   1   0   1   0]\n",
      " [  0 114   0   0   0   0   0   0   0   0]\n",
      " [  2   2 102   0   0   0   0   0   0   0]\n",
      " [  0   1   1  93   0   3   0   1   0   0]\n",
      " [  1   5   0   0  82   0   0   0   0   5]\n",
      " [  0   1   1   1   0  84   3   0   0   1]\n",
      " [  0   0   0   0   0   0  99   0   0   0]\n",
      " [  0   3   0   0   0   0   0  97   0   2]\n",
      " [  2   1   1   3   1   2   0   0  92   0]\n",
      " [  1   1   0   0   0   0   0   2   0  89]]\n"
     ]
    }
   ],
   "source": [
    "### Create vector of 5000 random indexes\n",
-    "rand_indexes = np.random.randint(70000, size=5000)\n",
+    "rand_indexes = np.random.randint(70000, size=10000)\n",
    "### Load data with the previous vector\n",
    "data = mnist.data[rand_indexes]\n",
    "# print(\"Dataset : \", data)\n",
    "target = mnist.target[rand_indexes]\n",
    "\n",
    "# Split the dataset\n",
    "xtrain, xtest, ytrain, ytest = model_selection.train_test_split(data, target,train_size=0.9)\n",
    "\n",
    "# Training on xtrain,ytrain\n",
    "clf = neighbors.KNeighborsClassifier(n_neighbors=3,p=2,n_jobs=1)\n",
    "# Training on xtrain,ytrain\n",
    "t1 = time.time()\n",
    "clf.fit(xtrain, ytrain)\n",
    "t2 = time.time()\n",
    "# Predicting on xtest\n",
    "pred = clf.predict(xtest)\n",
-    "print(\"Matrice de confusion K-NN :\\n\", metrics.confusion_matrix(ytest, pred))"
+    "t3 = time.time()\n",
    "#Calcul de différentes metrics\n",
    "precisions = [round(i,3) for i in metrics.precision_score(ytest, pred,average=None)]\n",
    "\n",
    "print(\"Métriques pour K-NN :\")\n",
    "print(\"Paramètres : (n_neighbors=3,p=2,n_jobs=1)\")\n",
    "print(\"Taille de l'échantillon :\", 10000)\n",
    "print(\"Proportion des datasets :\", \"90%\")\n",
    "print(\"Temps d'entraînement (secondes) :\", round(t2-t1,5))\n",
    "print(\"Temps de prédiction (secondes) :\", round(t3-t2,5))\n",
    "print(\"Précision pour chaque classe :\", precisions)\n",
    "print(\"Précision :\", clf.score(xtest, ytest))\n",
    "print(\"Erreur :\", round(metrics.zero_one_loss(ytest, pred),5))\n",
    "print(\"Matrice de confusion :\\n\", metrics.confusion_matrix(ytest, pred))"
   ]
  },
  {
--- a/TP2_prog1.py.ipynb
+++ b/TP2_prog1.py.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
   "id": "3eb7a65b",
   "metadata": {},
   "outputs": [],
@ -22,7 +22,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
   "id": "a8812842",
   "metadata": {},
   "outputs": [],
@ -1233,7 +1233,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 9,
   "id": "abb0fcf1",
   "metadata": {},
   "outputs": [
@ -1241,23 +1241,33 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Matrice de confusion A-NN :\n",
+      "Métriques pour A-NN\n",
-      " [[59  0  0  0  0  0  0  0  0  0]\n",
+      "Paramètres : (random_state=1, max_iter=300, hidden_layer_sizes=((85,)*15),\n",
-      " [ 0 60  0  0  0  0  0  1  0  0]\n",
+      "solver=adam, activation=relu, alpha= 0.0000001)\n",
-      " [ 0  0 42  0  0  1  2  1  2  0]\n",
+      "Taille de l'échantillon : 10000\n",
-      " [ 0  0  1 44  0  1  0  0  0  0]\n",
+      "Proportion des datasets : 90%\n",
-      " [ 0  0  0  0 46  0  1  0  0  4]\n",
+      "Temps d'entraînement (secondes) : 27.9214\n",
-      " [ 0  0  0  0  0 31  0  0  1  0]\n",
+      "Temps de prédiction (secondes) : 0.01396\n",
-      " [ 0  0  0  0  0  0 48  0  0  0]\n",
+      "Précision pour chaque classe : [0.972, 0.974, 0.926, 0.97, 0.919, 0.939, 0.971, 0.967, 0.951, 0.927]\n",
-      " [ 1  0  0  1  0  0  0 49  0  0]\n",
+      "Précision : 0.952\n",
-      " [ 0  1  1  5  0  1  0  0 48  0]\n",
+      "Erreur : 0.048\n",
-      " [ 2  0  0  1  1  2  0  0  2 40]]\n"
+      "Matrice de confusion :\n",
      " [[103   0   1   0   0   0   1   0   0   0]\n",
      " [  0 114   0   0   1   0   0   0   0   0]\n",
      " [  0   0 100   0   2   1   1   1   1   0]\n",
      " [  0   1   3  97   0   1   0   0   1   1]\n",
      " [  0   0   0   0  79   0   0   0   0   4]\n",
      " [  2   0   0   3   0  93   1   0   1   0]\n",
      " [  0   0   0   0   0   3  99   0   1   0]\n",
      " [  1   1   1   0   1   0   0  89   0   1]\n",
      " [  0   1   3   0   0   1   0   0  77   2]\n",
      " [  0   0   0   0   3   0   0   2   0 101]]\n"
     ]
    }
   ],
   "source": [
    "### Create vector of 5000 random indexes\n",
-    "rand_indexes = np.random.randint(70000, size=5000)\n",
+    "rand_indexes = np.random.randint(70000, size=10000)\n",
    "### Load data with the previous vector\n",
    "data = mnist.data[rand_indexes]\n",
    "# print(\"Dataset : \", data)\n",
@ -1266,10 +1276,6 @@
    "# Split the dataset\n",
    "xtrain, xtest, ytrain, ytest = model_selection.train_test_split(data, target,train_size=0.9)\n",
    "\n",
    "best_training_time = 0\n",
    "best_precision_score = 0\n",
    "best_zero_one_loss = 0\n",
    "\n",
    "r = 1\n",
    "max_i = 300\n",
    "nb_hl = 15\n",
@ -1281,31 +1287,27 @@
    "\n",
    "#Entraîne le classifier\n",
    "clf = neural_network.MLPClassifier(random_state=r, max_iter=max_i, hidden_layer_sizes=hl, solver=sol, activation=act, alpha=a, verbose=False)\n",
-    "t1 = round(time.time(),5)\n",
+    "t1 = time.time()\n",
    "clf.fit(xtrain, ytrain)\n",
-    "t2 = round(time.time(),5)\n",
+    "t2 = time.time()\n",
    "#Prédiction sur le jeu de tests\n",
    "pred = clf.predict(xtest)\n",
-    "# Probabilités des prédictions sur xtest\n",
+    "t3 = time.time()\n",
    "pred_proba = clf.predict_proba(xtest)\n",
    "# On sauvegarde le temps de calcul, la précision et \n",
    "# les taux d'erreurs par classe\n",
    "best_training_time = t2-t1\n",
    "best_precision_score = clf.score(xtest, ytest)\n",
    "best_zero_one_loss = metrics.zero_one_loss(ytest, pred)\n",
    "\n",
-    "# print(\"Paramètre :\\n\")\n",
+    "#Calcul de différentes metrics\n",
-    "# print(\"random_state = \", r)\n",
+    "precisions = [round(i,3) for i in metrics.precision_score(ytest, pred,average=None)]\n",
-    "# print(\"max_iter = \", max_i)\n",
+    "\n",
-    "# print(\"nb_hidden_layer = \", nb_hl)\n",
+    "print(\"Métriques pour A-NN\")\n",
-    "# print(\"hidden_layer_size = \", hl_size)\n",
+    "print(\"Paramètres : (random_state=1, max_iter=300, hidden_layer_sizes=((85,)*15),\")\n",
-    "# print(\"solver = \", sol)\n",
+    "print(\"solver=adam, activation=relu, alpha= 0.0000001)\")\n",
-    "# print(\"activation = \", act)\n",
+    "print(\"Taille de l'échantillon :\", 10000)\n",
-    "# print(\"alpha = \", a)\n",
+    "print(\"Proportion des datasets :\", \"90%\")\n",
-    "# print(\"Temps d'entraînement : \", best_training_time)\n",
+    "print(\"Temps d'entraînement (secondes) :\", round(t2-t1,5))\n",
-    "# print(\"Score : \", best_precision_score)\n",
+    "print(\"Temps de prédiction (secondes) :\", round(t3-t2,5))\n",
-    "# print(\"Zero-one loss : \", best_zero_one_loss)\n",
+    "print(\"Précision pour chaque classe :\", precisions)\n",
-    "print(\"Matrice de confusion A-NN :\\n\", metrics.confusion_matrix(ytest, pred))"
+    "print(\"Précision :\", clf.score(xtest, ytest))\n",
    "print(\"Erreur :\", round(metrics.zero_one_loss(ytest, pred),5))\n",
    "print(\"Matrice de confusion :\\n\", metrics.confusion_matrix(ytest, pred))"
   ]
  },
  {
--- a/TP3_prog1.py.ipynb
+++ b/TP3_prog1.py.ipynb