{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "530f620c", "metadata": {}, "outputs": [], "source": [ "from sklearn.datasets import fetch_openml\n", "from sklearn import model_selection\n", "from sklearn import neighbors\n", "from sklearn.svm import SVC\n", "import sklearn\n", "import numpy as np\n", "\n", "mnist = fetch_openml('mnist_784',as_frame=False)" ] }, { "cell_type": "code", "execution_count": 3, "id": "eb2c4496", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Dataset : [[0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " ...\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]]\n", "Etiquettes : ['1' '3' '4' ... '5' '1' '2']\n", "Prédiction : ['6' '7' '1' '4' '2' '7' '6' '6' '4' '9' '8' '4' '0' '0' '6' '8' '5' '0'\n", " '9' '6' '5' '0' '7' '7' '0' '7' '6' '1' '0' '1' '6' '6' '5' '8' '5' '6'\n", " '6' '5' '0' '7' '7' '5' '2' '7' '3' '2' '2' '6' '0' '0' '5' '8' '2' '4'\n", " '1' '0' '9' '6' '3' '7' '6' '3' '9' '4' '0' '0' '8' '8' '0' '6' '7' '1'\n", " '8' '3' '1' '6' '9' '1' '8' '0' '2' '0' '4' '5' '9' '3' '4' '3' '6' '3'\n", " '2' '3' '8' '0' '8' '6' '1' '7' '3' '8' '4' '2' '0' '7' '9' '4' '0' '2'\n", " '2' '0' '2' '2' '3' '0' '0' '0' '6' '8' '2' '4' '3' '7' '2' '6' '8' '4'\n", " '3' '8' '8' '0' '4' '6' '1' '0' '4' '6' '6' '0' '0' '6' '1' '6' '5' '5'\n", " '1' '5' '8' '2' '6' '4' '7' '5' '3' '2' '5' '8' '5' '2' '2' '3' '0' '3'\n", " '6' '1' '4' '8' '1' '7' '7' '5' '9' '1' '3' '5' '0' '7' '8' '6' '5' '0'\n", " '6' '6' '8' '5' '9' '5' '3' '9' '7' '4' '9' '0' '1' '5' '3' '3' '6' '1'\n", " '1' '1' '8' '7' '7' '1' '7' '4' '1' '1' '3' '8' '4' '4' '3' '9' '8' '4'\n", " '0' '4' '4' '9' '6' '0' '6' '0' '3' '8' '8' '0' '9' '1' '4' '4' '2' '1'\n", " '5' '7' '5' '0' '7' '6' '0' '4' '5' '7' '5' '9' '4' '3' '4' '4' '0' '5'\n", " '0' '0' '1' '9' '1' '7' '3' '4' '6' '0' '5' '9' '6' '1' '1' '5' '6' '5'\n", " '2' '9' '4' '3' '4' '1' '0' '0' '4' '2' '1' '7' '1' '4' '1' '3' '9' '2'\n", " '0' '8' '7' '7' '4' '4' '7' '1' '8' '7' '1' '4' '6' '9' '2' '7' '1' '4'\n", " '5' '1' '1' '4' '2' '7' '3' '8' '5' '8' '3' '3' '4' '7' '2' '1' '4' '9'\n", " '9' '4' '7' '9' '3' '4' '9' '7' '1' '0' '7' '7' '3' '8' '4' '6' '1' '3'\n", " '5' '5' '4' '9' '6' '0' '1' '1' '0' '0' '0' '3' '2' '7' '9' '8' '0' '3'\n", " '6' '1' '9' '4' '0' '1' '0' '0' '1' '6' '9' '6' '3' '8' '2' '5' '9' '5'\n", " '1' '3' '7' '0' '9' '3' '2' '6' '8' '5' '1' '5' '4' '1' '4' '1' '1' '3'\n", " '1' '5' '7' '2' '3' '2' '6' '1' '2' '6' '3' '8' '7' '3' '3' '9' '8' '0'\n", " '4' '3' '7' '7' '9' '3' '9' '8' '7' '8' '0' '4' '8' '8' '0' '4' '1' '5'\n", " '1' '2' '1' '3' '5' '4' '9' '8' '1' '3' '1' '5' '8' '4' '8' '2' '9' '8'\n", " '2' '3' '6' '3' '5' '2' '4' '0' '1' '0' '1' '8' '9' '9' '6' '2' '4' '1'\n", " '5' '6' '7' '7' '1' '5' '0' '2' '6' '5' '0' '3' '2' '8' '8' '9' '7' '9'\n", " '4' '4' '1' '9' '7' '8' '2' '1' '9' '6' '2' '4' '8' '7' '8' '9' '9' '4'\n", " '6' '9' '9' '5' '6' '9' '9' '8' '5' '5' '6' '4' '6' '8' '8' '7' '6' '0'\n", " '0' '9' '2' '3' '7' '7' '1' '5' '9' '1' '9' '9' '1' '4' '1' '9' '6' '9'\n", " '0' '9' '4' '6' '1' '0' '7' '0' '8' '9' '7' '3' '8' '2' '3' '0' '2' '8'\n", " '3' '1' '7' '0' '2' '1' '0' '4' '2' '0' '8' '1' '5' '2' '4' '5' '0' '9'\n", " '8' '1' '3' '9' '8' '7' '2' '4' '6' '2' '3' '9' '1' '8' '2' '1' '9' '0'\n", " '2' '4' '0' '9' '1' '4' '1' '3' '2' '4' '9' '5' '0' '2' '2' '1' '1' '7'\n", " '6' '8' '4' '9' '7' '7' '9' '4' '2' '3' '8' '1' '3' '5' '7' '9' '2' '0'\n", " '4' '8' '1' '6' '1' '7' '9' '6' '3' '6' '0' '0' '4' '7' '1' '1' '1' '4'\n", " '5' '6' '6' '1' '7' '6' '1' '7' '6' '1' '1' '2' '0' '8' '6' '1' '4' '3'\n", " '3' '6' '8' '7' '1' '1' '1' '4' '3' '3' '2' '6' '3' '3' '8' '8' '3' '1'\n", " '8' '6' '6' '8' '8' '9' '6' '7' '6' '7' '8' '9' '1' '8' '3' '9' '5' '0'\n", " '6' '6' '9' '3' '1' '2' '5' '5' '0' '9' '5' '9' '0' '0' '6' '1' '8' '5'\n", " '0' '2' '2' '8' '3' '9' '7' '2' '7' '6' '2' '8' '6' '8' '8' '0' '2' '0'\n", " '6' '2' '7' '7' '3' '7' '2' '7' '1' '7' '9' '3' '4' '7' '7' '9' '9' '2'\n", " '5' '8' '3' '7' '7' '2' '1' '7' '1' '1' '9' '9' '3' '0' '9' '4' '9' '0'\n", " '7' '6' '7' '7' '7' '7' '9' '7' '8' '1' '1' '6' '2' '6' '3' '8' '2' '8'\n", " '1' '5' '7' '0' '8' '3' '2' '7' '5' '1' '5' '3' '5' '2' '1' '7' '6' '0'\n", " '2' '6' '3' '2' '6' '0' '6' '2' '3' '9' '8' '6' '4' '9' '1' '3' '0' '4'\n", " '2' '3' '8' '1' '9' '0' '3' '5' '4' '5' '3' '2' '5' '0' '1' '1' '8' '3'\n", " '5' '6' '2' '1' '9' '3' '0' '4' '5' '9' '7' '2' '2' '1' '2' '1' '1' '5'\n", " '0' '9' '3' '7' '1' '9' '6' '5' '1' '6' '0' '1' '1' '6' '5' '8' '2' '2'\n", " '1' '8' '9' '7' '6' '8' '4' '5' '2' '3' '0' '7' '6' '0' '6' '6' '6' '0'\n", " '8' '8' '3' '4' '0' '9' '7' '5' '1' '1' '1' '4' '6' '7' '9' '6' '3' '9'\n", " '3' '9' '1' '9' '6' '4' '5' '4' '7' '0' '1' '9' '4' '8' '4' '6' '1' '8'\n", " '5' '6' '5' '1' '2' '7' '9' '5' '8' '0' '8' '8' '3' '2' '9' '4' '4' '8'\n", " '3' '0' '6' '5' '9' '7' '0' '0' '9' '7' '0' '3' '2' '1' '0' '5' '6' '4'\n", " '0' '4' '6' '9' '3' '0' '4' '1' '5' '6' '3' '6' '9' '1' '5' '6' '3' '0'\n", " '1' '6' '1' '0' '6' '2' '1' '7' '1' '9']\n", "Probabilités : [[0. 0. 0. ... 0. 0. 0. ]\n", " [0. 0. 0. ... 1. 0. 0. ]\n", " [0. 1. 0. ... 0. 0. 0. ]\n", " ...\n", " [0. 0. 0. ... 1. 0. 0. ]\n", " [0. 0.4 0. ... 0.1 0. 0.3]\n", " [0. 0. 0. ... 0.1 0. 0.9]]\n", "Classe image 4 : 9\n", "Classe prédite image 4 : 4\n", "Score échantillon de test : 0.912\n", "Score données apprentissage : 0.94325\n" ] } ], "source": [ "rand_indexes = np.random.randint(70000, size=5000)\n", "\n", "data = mnist.data[rand_indexes]\n", "print(\"Dataset : \", data)\n", "target = mnist.target[rand_indexes]\n", "print(\"Etiquettes : \", target)\n", "\n", "# xtrain data set d'entraînement et ytrain étiquettes de xtrain\n", "# xtest dataset de prédiction et ytest étiquettes de xtest\n", "xtrain, xtest, ytrain, ytest = model_selection.train_test_split(data, target,train_size=0.7)\n", "\n", "n_neighbors = 10\n", "clf = svm.SVC(kernel=\"linear\")\n", "# On entraîne l'algorithme sur xtrain et ytrain\n", "clf.fit(xtrain, ytrain)\n", "# On prédit sur xtest\n", "pred = clf.predict(xtest)\n", "print(\"Prédiction : \", pred)\n", "# Probabilités des prédictions sur xtest\n", "pred_proba = clf.predict_proba(xtest)\n", "print(\"Probabilités : \", pred_proba)\n", "# On calcule le score obtenu sur xtest avec les étiquettes ytest\n", "score = clf.score(xtest, ytest)\n", "print(\"Classe image 4 : \", target[3])\n", "print(\"Classe prédite image 4 : \", pred[3])\n", "print(\"Score échantillon de test : \", score)\n", "\n", "scoreApp = clf.score(xtrain, ytrain)\n", "print(\"Score données apprentissage : \", scoreApp)" ] }, { "cell_type": "code", "execution_count": 4, "id": "90db6e29", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Dataset : [[0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " ...\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]]\n", "Etiquettes : ['9' '9' '8' ... '9' '4' '6']\n", "[0.92, 0.922, 0.93, 0.966, 0.924, 0.922, 0.922, 0.896, 0.92, 0.91, 0.916, 0.94, 0.938, 0.938, 0.926, 0.936, 0.932, 0.932, 0.934, 0.938, 0.922, 0.934, 0.96, 0.926, 0.942, 0.934, 0.908, 0.926, 0.92, 0.936, 0.932, 0.924, 0.922, 0.938, 0.938, 0.916, 0.932, 0.96, 0.942, 0.922, 0.926, 0.938, 0.936, 0.924, 0.938, 0.946, 0.922, 0.928, 0.912, 0.908, 0.916, 0.932, 0.932, 0.93, 0.92, 0.928, 0.908, 0.932, 0.918, 0.938, 0.92, 0.93, 0.938, 0.924, 0.924, 0.932, 0.916, 0.916, 0.934, 0.928, 0.924, 0.94, 0.942, 0.926, 0.924, 0.912, 0.93, 0.906, 0.894, 0.922, 0.924, 0.912, 0.906, 0.942, 0.95, 0.924, 0.926, 0.92, 0.92, 0.9, 0.918, 0.908, 0.93, 0.942, 0.916, 0.934, 0.916, 0.92, 0.91, 0.918, 0.93, 0.918, 0.916, 0.894, 0.934, 0.926, 0.934, 0.91, 0.9, 0.914, 0.928, 0.918, 0.924, 0.916, 0.908, 0.904, 0.922, 0.912, 0.92, 0.914, 0.926, 0.906, 0.902, 0.914, 0.9, 0.936, 0.906, 0.942, 0.922, 0.906]\n" ] } ], "source": [ "for k in range(2,15):\n", " \n", " for train_index, test_index in kf.split(data):\n", "# print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n", " X_train, X_test = data[train_index], data[test_index]\n", " y_train, y_test = target[train_index], target[test_index]\n", " \n", " clf = neighbors.KNeighborsClassifier(k)\n", " # On entraîne l'algorithme sur xtrain et ytrain\n", " clf.fit(X_train, y_train)\n", " # On prédit sur xtest\n", " pred = clf.predict(X_test)\n", "# print(\"Prédiction : \", pred)\n", " # Probabilités des prédictions sur xtest\n", " pred_proba = clf.predict_proba(X_test)\n", "# print(\"Probabilités : \", pred_proba)\n", " # On calcule le score obtenu sur xtest avec les étiquettes ytest\n", " score = clf.score(X_test, y_test)\n", " scores += [score]\n", "# print(\"Classe image 4 : \", target[3])\n", "# print(\"Classe prédite image 4 : \", pred[3])\n", "# print(\"Score échantillon de test : \", score)\n", " scoreApp = clf.score(X_train, y_train)\n", "# print(\"Score données apprentissage : \", scoreApp)\n", "print(scores)" ] }, { "cell_type": "code", "execution_count": 5, "id": "bf91b914", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2 : 0.9232000000000001\n", "3 : 0.933\n", "4 : 0.9308\n", "5 : 0.9326000000000001\n", "6 : 0.9300000000000002\n", "7 : 0.922888888888889\n", "8 : 0.9266666666666666\n", "9 : 0.9273333333333333\n", "10 : 0.9206666666666666\n", "11 : 0.9208888888888889\n", "12 : 0.9197777777777778\n", "13 : 0.9175555555555555\n", "14 : 0.9162222222222223\n", "15 : 0.9148888888888889\n" ] } ], "source": [ "nice_scores = np.array_split(scores, 14)\n", "for i in range (0,14):\n", " print (i+2, \" : \", nice_scores[i].mean())\n" ] }, { "cell_type": "code", "execution_count": 11, "id": "cc24e898", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Dataset : [[0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " ...\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]]\n", "Etiquettes : ['0' '0' '5' ... '9' '8' '6']\n", "Temps d'entraînement : 0.002\n", "Temps de prédiction : 0.338\n", "Temps total : 0.34\n", "Temps d'entraînement : 0.003\n", "Temps de prédiction : 0.31\n", "Temps total : 0.313\n", "Temps d'entraînement : 0.002\n", "Temps de prédiction : 0.328\n", "Temps total : 0.33\n", "Temps d'entraînement : 0.003\n", "Temps de prédiction : 0.305\n", "Temps total : 0.308\n", "Temps d'entraînement : 0.003\n", "Temps de prédiction : 0.254\n", "Temps total : 0.257\n", "Temps d'entraînement : 0.003\n", "Temps de prédiction : 0.244\n", "Temps total : 0.247\n", "Temps d'entraînement : 0.004\n", "Temps de prédiction : 0.203\n", "Temps total : 0.207\n", "3 : 0.9045714285714286\n", "4 : 0.91\n", "5 : 0.9168\n", "6 : 0.925\n", "7 : 0.934\n", "8 : 0.922\n", "9 : 0.952\n" ] } ], "source": [ "from sklearn.model_selection import KFold\n", "import time\n", "\n", "rand_indexes = np.random.randint(70000, size=5000)\n", "\n", "data = mnist.data[rand_indexes]\n", "print(\"Dataset : \", data)\n", "target = mnist.target[rand_indexes]\n", "print(\"Etiquettes : \", target)\n", "\n", "# xtrain data set d'entraînement et ytrain étiquettes de xtrain\n", "# xtest dataset de prédiction et ytest étiquettes de xtest\n", "\n", "scores = []\n", "\n", "for j in range (3, 10):\n", " xtrain, xtest, ytrain, ytest = model_selection.train_test_split(data, target,train_size=(j/10))\n", " \n", " t1 = round(time.time(),3)\n", " clf = neighbors.KNeighborsClassifier(n_neighbors=3,p = 2, n_jobs=-1)\n", " # On entraîne l'algorithme sur xtrain et ytrain\n", " clf.fit(xtrain, ytrain)\n", " t2 = round(time.time(),3)\n", " # On prédit sur xtest\n", " pred = clf.predict(xtest)\n", " t3 = round(time.time(),3)\n", " \n", " print(\"Temps d'entraînement : \", round(t2-t1,3))\n", " print(\"Temps de prédiction : \", round(t3-t2,3))\n", " print(\"Temps total : \", round(t3-t1,3))\n", "# print(\"Prédiction : \", pred)\n", " # Probabilités des prédictions sur xtest\n", " pred_proba = clf.predict_proba(xtest)\n", "# print(\"Probabilités : \", pred_proba)\n", " # On calcule le score obtenu sur xtest avec les étiquettes ytest\n", " score = clf.score(xtest, ytest)\n", " scores += [score]\n", "# print(\"Classe image 4 : \", target[3])\n", "# print(\"Classe prédite image 4 : \", pred[3])\n", "# print(\"Score échantillon de test : \", score)\n", " scoreApp = clf.score(xtrain, ytrain)\n", "# print(\"Score données apprentissage : \", scoreApp)\n", "\n", "# nice_scores = np.array_split(scores, 7)\n", "# print(scores)\n", "n = 3\n", "for i in scores:\n", " print (n, \" : \", i)\n", " n += 1" ] }, { "cell_type": "code", "execution_count": null, "id": "cbb5eda6", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 5 }