tp-apprentissage-supervise/TP1_prog2.py.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "530f620c",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.datasets import fetch_openml\n",
    "from sklearn import model_selection\n",
    "from sklearn import neighbors\n",
    "import sklearn\n",
    "import numpy as np\n",
    "\n",
    "mnist = fetch_openml('mnist_784',as_frame=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "eb2c4496",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataset :  [[0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " ...\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]]\n",
      "Etiquettes :  ['1' '3' '4' ... '5' '1' '2']\n",
      "Prédiction :  ['6' '7' '1' '4' '2' '7' '6' '6' '4' '9' '8' '4' '0' '0' '6' '8' '5' '0'\n",
      " '9' '6' '5' '0' '7' '7' '0' '7' '6' '1' '0' '1' '6' '6' '5' '8' '5' '6'\n",
      " '6' '5' '0' '7' '7' '5' '2' '7' '3' '2' '2' '6' '0' '0' '5' '8' '2' '4'\n",
      " '1' '0' '9' '6' '3' '7' '6' '3' '9' '4' '0' '0' '8' '8' '0' '6' '7' '1'\n",
      " '8' '3' '1' '6' '9' '1' '8' '0' '2' '0' '4' '5' '9' '3' '4' '3' '6' '3'\n",
      " '2' '3' '8' '0' '8' '6' '1' '7' '3' '8' '4' '2' '0' '7' '9' '4' '0' '2'\n",
      " '2' '0' '2' '2' '3' '0' '0' '0' '6' '8' '2' '4' '3' '7' '2' '6' '8' '4'\n",
      " '3' '8' '8' '0' '4' '6' '1' '0' '4' '6' '6' '0' '0' '6' '1' '6' '5' '5'\n",
      " '1' '5' '8' '2' '6' '4' '7' '5' '3' '2' '5' '8' '5' '2' '2' '3' '0' '3'\n",
      " '6' '1' '4' '8' '1' '7' '7' '5' '9' '1' '3' '5' '0' '7' '8' '6' '5' '0'\n",
      " '6' '6' '8' '5' '9' '5' '3' '9' '7' '4' '9' '0' '1' '5' '3' '3' '6' '1'\n",
      " '1' '1' '8' '7' '7' '1' '7' '4' '1' '1' '3' '8' '4' '4' '3' '9' '8' '4'\n",
      " '0' '4' '4' '9' '6' '0' '6' '0' '3' '8' '8' '0' '9' '1' '4' '4' '2' '1'\n",
      " '5' '7' '5' '0' '7' '6' '0' '4' '5' '7' '5' '9' '4' '3' '4' '4' '0' '5'\n",
      " '0' '0' '1' '9' '1' '7' '3' '4' '6' '0' '5' '9' '6' '1' '1' '5' '6' '5'\n",
      " '2' '9' '4' '3' '4' '1' '0' '0' '4' '2' '1' '7' '1' '4' '1' '3' '9' '2'\n",
      " '0' '8' '7' '7' '4' '4' '7' '1' '8' '7' '1' '4' '6' '9' '2' '7' '1' '4'\n",
      " '5' '1' '1' '4' '2' '7' '3' '8' '5' '8' '3' '3' '4' '7' '2' '1' '4' '9'\n",
      " '9' '4' '7' '9' '3' '4' '9' '7' '1' '0' '7' '7' '3' '8' '4' '6' '1' '3'\n",
      " '5' '5' '4' '9' '6' '0' '1' '1' '0' '0' '0' '3' '2' '7' '9' '8' '0' '3'\n",
      " '6' '1' '9' '4' '0' '1' '0' '0' '1' '6' '9' '6' '3' '8' '2' '5' '9' '5'\n",
      " '1' '3' '7' '0' '9' '3' '2' '6' '8' '5' '1' '5' '4' '1' '4' '1' '1' '3'\n",
      " '1' '5' '7' '2' '3' '2' '6' '1' '2' '6' '3' '8' '7' '3' '3' '9' '8' '0'\n",
      " '4' '3' '7' '7' '9' '3' '9' '8' '7' '8' '0' '4' '8' '8' '0' '4' '1' '5'\n",
      " '1' '2' '1' '3' '5' '4' '9' '8' '1' '3' '1' '5' '8' '4' '8' '2' '9' '8'\n",
      " '2' '3' '6' '3' '5' '2' '4' '0' '1' '0' '1' '8' '9' '9' '6' '2' '4' '1'\n",
      " '5' '6' '7' '7' '1' '5' '0' '2' '6' '5' '0' '3' '2' '8' '8' '9' '7' '9'\n",
      " '4' '4' '1' '9' '7' '8' '2' '1' '9' '6' '2' '4' '8' '7' '8' '9' '9' '4'\n",
      " '6' '9' '9' '5' '6' '9' '9' '8' '5' '5' '6' '4' '6' '8' '8' '7' '6' '0'\n",
      " '0' '9' '2' '3' '7' '7' '1' '5' '9' '1' '9' '9' '1' '4' '1' '9' '6' '9'\n",
      " '0' '9' '4' '6' '1' '0' '7' '0' '8' '9' '7' '3' '8' '2' '3' '0' '2' '8'\n",
      " '3' '1' '7' '0' '2' '1' '0' '4' '2' '0' '8' '1' '5' '2' '4' '5' '0' '9'\n",
      " '8' '1' '3' '9' '8' '7' '2' '4' '6' '2' '3' '9' '1' '8' '2' '1' '9' '0'\n",
      " '2' '4' '0' '9' '1' '4' '1' '3' '2' '4' '9' '5' '0' '2' '2' '1' '1' '7'\n",
      " '6' '8' '4' '9' '7' '7' '9' '4' '2' '3' '8' '1' '3' '5' '7' '9' '2' '0'\n",
      " '4' '8' '1' '6' '1' '7' '9' '6' '3' '6' '0' '0' '4' '7' '1' '1' '1' '4'\n",
      " '5' '6' '6' '1' '7' '6' '1' '7' '6' '1' '1' '2' '0' '8' '6' '1' '4' '3'\n",
      " '3' '6' '8' '7' '1' '1' '1' '4' '3' '3' '2' '6' '3' '3' '8' '8' '3' '1'\n",
      " '8' '6' '6' '8' '8' '9' '6' '7' '6' '7' '8' '9' '1' '8' '3' '9' '5' '0'\n",
      " '6' '6' '9' '3' '1' '2' '5' '5' '0' '9' '5' '9' '0' '0' '6' '1' '8' '5'\n",
      " '0' '2' '2' '8' '3' '9' '7' '2' '7' '6' '2' '8' '6' '8' '8' '0' '2' '0'\n",
      " '6' '2' '7' '7' '3' '7' '2' '7' '1' '7' '9' '3' '4' '7' '7' '9' '9' '2'\n",
      " '5' '8' '3' '7' '7' '2' '1' '7' '1' '1' '9' '9' '3' '0' '9' '4' '9' '0'\n",
      " '7' '6' '7' '7' '7' '7' '9' '7' '8' '1' '1' '6' '2' '6' '3' '8' '2' '8'\n",
      " '1' '5' '7' '0' '8' '3' '2' '7' '5' '1' '5' '3' '5' '2' '1' '7' '6' '0'\n",
      " '2' '6' '3' '2' '6' '0' '6' '2' '3' '9' '8' '6' '4' '9' '1' '3' '0' '4'\n",
      " '2' '3' '8' '1' '9' '0' '3' '5' '4' '5' '3' '2' '5' '0' '1' '1' '8' '3'\n",
      " '5' '6' '2' '1' '9' '3' '0' '4' '5' '9' '7' '2' '2' '1' '2' '1' '1' '5'\n",
      " '0' '9' '3' '7' '1' '9' '6' '5' '1' '6' '0' '1' '1' '6' '5' '8' '2' '2'\n",
      " '1' '8' '9' '7' '6' '8' '4' '5' '2' '3' '0' '7' '6' '0' '6' '6' '6' '0'\n",
      " '8' '8' '3' '4' '0' '9' '7' '5' '1' '1' '1' '4' '6' '7' '9' '6' '3' '9'\n",
      " '3' '9' '1' '9' '6' '4' '5' '4' '7' '0' '1' '9' '4' '8' '4' '6' '1' '8'\n",
      " '5' '6' '5' '1' '2' '7' '9' '5' '8' '0' '8' '8' '3' '2' '9' '4' '4' '8'\n",
      " '3' '0' '6' '5' '9' '7' '0' '0' '9' '7' '0' '3' '2' '1' '0' '5' '6' '4'\n",
      " '0' '4' '6' '9' '3' '0' '4' '1' '5' '6' '3' '6' '9' '1' '5' '6' '3' '0'\n",
      " '1' '6' '1' '0' '6' '2' '1' '7' '1' '9']\n",
      "Probabilités :  [[0.  0.  0.  ... 0.  0.  0. ]\n",
      " [0.  0.  0.  ... 1.  0.  0. ]\n",
      " [0.  1.  0.  ... 0.  0.  0. ]\n",
      " ...\n",
      " [0.  0.  0.  ... 1.  0.  0. ]\n",
      " [0.  0.4 0.  ... 0.1 0.  0.3]\n",
      " [0.  0.  0.  ... 0.1 0.  0.9]]\n",
      "Classe image 4 :  9\n",
      "Classe prédite image 4 :  4\n",
      "Score échantillon de test :  0.912\n",
      "Score données apprentissage :  0.94325\n"
     ]
    }
   ],
   "source": [
    "rand_indexes = np.random.randint(70000, size=5000)\n",
    "\n",
    "data = mnist.data[rand_indexes]\n",
    "print(\"Dataset : \", data)\n",
    "target = mnist.target[rand_indexes]\n",
    "print(\"Etiquettes : \", target)\n",
    "\n",
    "# xtrain data set d'entraînement et ytrain étiquettes de xtrain\n",
    "# xtest dataset de prédiction et ytest étiquettes de xtest\n",
    "xtrain, xtest, ytrain, ytest = model_selection.train_test_split(data, target,train_size=0.8)\n",
    "\n",
    "n_neighbors = 10\n",
    "clf = neighbors.KNeighborsClassifier(n_neighbors)\n",
    "# On entraîne l'algorithme sur xtrain et ytrain\n",
    "clf.fit(xtrain, ytrain)\n",
    "# On prédit sur xtest\n",
    "pred = clf.predict(xtest)\n",
    "print(\"Prédiction : \", pred)\n",
    "# Probabilités des prédictions sur xtest\n",
    "pred_proba = clf.predict_proba(xtest)\n",
    "print(\"Probabilités : \", pred_proba)\n",
    "# On calcule le score obtenu sur xtest avec les étiquettes ytest\n",
    "score = clf.score(xtest, ytest)\n",
    "print(\"Classe image 4 : \", target[3])\n",
    "print(\"Classe prédite image 4 : \", pred[3])\n",
    "print(\"Score échantillon de test : \", score)\n",
    "\n",
    "scoreApp = clf.score(xtrain, ytrain)\n",
    "print(\"Score données apprentissage : \", scoreApp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "90db6e29",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataset :  [[0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " ...\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]]\n",
      "Etiquettes :  ['9' '9' '8' ... '9' '4' '6']\n",
      "[0.92, 0.922, 0.93, 0.966, 0.924, 0.922, 0.922, 0.896, 0.92, 0.91, 0.916, 0.94, 0.938, 0.938, 0.926, 0.936, 0.932, 0.932, 0.934, 0.938, 0.922, 0.934, 0.96, 0.926, 0.942, 0.934, 0.908, 0.926, 0.92, 0.936, 0.932, 0.924, 0.922, 0.938, 0.938, 0.916, 0.932, 0.96, 0.942, 0.922, 0.926, 0.938, 0.936, 0.924, 0.938, 0.946, 0.922, 0.928, 0.912, 0.908, 0.916, 0.932, 0.932, 0.93, 0.92, 0.928, 0.908, 0.932, 0.918, 0.938, 0.92, 0.93, 0.938, 0.924, 0.924, 0.932, 0.916, 0.916, 0.934, 0.928, 0.924, 0.94, 0.942, 0.926, 0.924, 0.912, 0.93, 0.906, 0.894, 0.922, 0.924, 0.912, 0.906, 0.942, 0.95, 0.924, 0.926, 0.92, 0.92, 0.9, 0.918, 0.908, 0.93, 0.942, 0.916, 0.934, 0.916, 0.92, 0.91, 0.918, 0.93, 0.918, 0.916, 0.894, 0.934, 0.926, 0.934, 0.91, 0.9, 0.914, 0.928, 0.918, 0.924, 0.916, 0.908, 0.904, 0.922, 0.912, 0.92, 0.914, 0.926, 0.906, 0.902, 0.914, 0.9, 0.936, 0.906, 0.942, 0.922, 0.906]\n"
     ]
    }
   ],
   "source": [
    "from sklearn.model_selection import KFold\n",
    "\n",
    "rand_indexes = np.random.randint(70000, size=5000)\n",
    "\n",
    "data = mnist.data[rand_indexes]\n",
    "print(\"Dataset : \", data)\n",
    "target = mnist.target[rand_indexes]\n",
    "print(\"Etiquettes : \", target)\n",
    "\n",
    "# xtrain data set d'entraînement et ytrain étiquettes de xtrain\n",
    "# xtest dataset de prédiction et ytest étiquettes de xtest\n",
    "# xtrain, xtest, ytrain, ytest = model_selection.train_test_split(data, target,train_size=0.8)\n",
    "\n",
    "kf = KFold(n_splits=10, random_state=None, shuffle=True)\n",
    "scores = []\n",
    "\n",
    "for k in range(2,15):\n",
    "    \n",
    "    for train_index, test_index in kf.split(data):\n",
    "#         print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n",
    "        X_train, X_test = data[train_index], data[test_index]\n",
    "        y_train, y_test = target[train_index], target[test_index]\n",
    "        \n",
    "        clf = neighbors.KNeighborsClassifier(k)\n",
    "        # On entraîne l'algorithme sur xtrain et ytrain\n",
    "        clf.fit(X_train, y_train)\n",
    "        # On prédit sur xtest\n",
    "        pred = clf.predict(X_test)\n",
    "#         print(\"Prédiction : \", pred)\n",
    "        # Probabilités des prédictions sur xtest\n",
    "        pred_proba = clf.predict_proba(X_test)\n",
    "#         print(\"Probabilités : \", pred_proba)\n",
    "        # On calcule le score obtenu sur xtest avec les étiquettes ytest\n",
    "        score = clf.score(X_test, y_test)\n",
    "        scores += [score]\n",
    "#         print(\"Classe image 4 : \", target[3])\n",
    "#         print(\"Classe prédite image 4 : \", pred[3])\n",
    "#         print(\"Score échantillon de test : \", score)\n",
    "        scoreApp = clf.score(X_train, y_train)\n",
    "#         print(\"Score données apprentissage : \", scoreApp)\n",
    "print(scores)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "bf91b914",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2  :  0.9232000000000001\n",
      "3  :  0.933\n",
      "4  :  0.9308\n",
      "5  :  0.9326000000000001\n",
      "6  :  0.9300000000000002\n",
      "7  :  0.922888888888889\n",
      "8  :  0.9266666666666666\n",
      "9  :  0.9273333333333333\n",
      "10  :  0.9206666666666666\n",
      "11  :  0.9208888888888889\n",
      "12  :  0.9197777777777778\n",
      "13  :  0.9175555555555555\n",
      "14  :  0.9162222222222223\n",
      "15  :  0.9148888888888889\n"
     ]
    }
   ],
   "source": [
    "nice_scores = np.array_split(scores, 14)\n",
    "for i in range (0,14):\n",
    "    print (i+2, \" : \", nice_scores[i].mean())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "cc24e898",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataset :  [[0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " ...\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]]\n",
      "Etiquettes :  ['0' '0' '5' ... '9' '8' '6']\n",
      "Temps d'entraînement :  0.002\n",
      "Temps de prédiction :  0.338\n",
      "Temps total :  0.34\n",
      "Temps d'entraînement :  0.003\n",
      "Temps de prédiction :  0.31\n",
      "Temps total :  0.313\n",
      "Temps d'entraînement :  0.002\n",
      "Temps de prédiction :  0.328\n",
      "Temps total :  0.33\n",
      "Temps d'entraînement :  0.003\n",
      "Temps de prédiction :  0.305\n",
      "Temps total :  0.308\n",
      "Temps d'entraînement :  0.003\n",
      "Temps de prédiction :  0.254\n",
      "Temps total :  0.257\n",
      "Temps d'entraînement :  0.003\n",
      "Temps de prédiction :  0.244\n",
      "Temps total :  0.247\n",
      "Temps d'entraînement :  0.004\n",
      "Temps de prédiction :  0.203\n",
      "Temps total :  0.207\n",
      "3  :  0.9045714285714286\n",
      "4  :  0.91\n",
      "5  :  0.9168\n",
      "6  :  0.925\n",
      "7  :  0.934\n",
      "8  :  0.922\n",
      "9  :  0.952\n"
     ]
    }
   ],
   "source": [
    "from sklearn.model_selection import KFold\n",
    "import time\n",
    "\n",
    "rand_indexes = np.random.randint(70000, size=5000)\n",
    "\n",
    "data = mnist.data[rand_indexes]\n",
    "print(\"Dataset : \", data)\n",
    "target = mnist.target[rand_indexes]\n",
    "print(\"Etiquettes : \", target)\n",
    "\n",
    "# xtrain data set d'entraînement et ytrain étiquettes de xtrain\n",
    "# xtest dataset de prédiction et ytest étiquettes de xtest\n",
    "\n",
    "scores = []\n",
    "\n",
    "for j in range (3, 10):\n",
    "    xtrain, xtest, ytrain, ytest = model_selection.train_test_split(data, target,train_size=(j/10))\n",
    "    \n",
    "    t1 = round(time.time(),3)\n",
    "    clf = neighbors.KNeighborsClassifier(n_neighbors=3,p = 2, n_jobs=-1)\n",
    "    # On entraîne l'algorithme sur xtrain et ytrain\n",
    "    clf.fit(xtrain, ytrain)\n",
    "    t2 = round(time.time(),3)\n",
    "    # On prédit sur xtest\n",
    "    pred = clf.predict(xtest)\n",
    "    t3 = round(time.time(),3)\n",
    "    \n",
    "    print(\"Temps d'entraînement : \", round(t2-t1,3))\n",
    "    print(\"Temps de prédiction : \", round(t3-t2,3))\n",
    "    print(\"Temps total : \", round(t3-t1,3))\n",
    "#         print(\"Prédiction : \", pred)\n",
    "    # Probabilités des prédictions sur xtest\n",
    "    pred_proba = clf.predict_proba(xtest)\n",
    "#         print(\"Probabilités : \", pred_proba)\n",
    "    # On calcule le score obtenu sur xtest avec les étiquettes ytest\n",
    "    score = clf.score(xtest, ytest)\n",
    "    scores += [score]\n",
    "#         print(\"Classe image 4 : \", target[3])\n",
    "#         print(\"Classe prédite image 4 : \", pred[3])\n",
    "#         print(\"Score échantillon de test : \", score)\n",
    "    scoreApp = clf.score(xtrain, ytrain)\n",
    "#         print(\"Score données apprentissage : \", scoreApp)\n",
    "\n",
    "# nice_scores = np.array_split(scores, 7)\n",
    "# print(scores)\n",
    "n = 3\n",
    "for i in scores:\n",
    "    print (n, \" : \", i)\n",
    "    n += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cbb5eda6",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}