This commit is contained in:
Titouan Labourdette 2021-11-03 12:20:12 +01:00
parent 6057e61422
commit 5bf4068e41
6 changed files with 1070 additions and 0 deletions

View file

@ -0,0 +1,6 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -0,0 +1,335 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 14,
"id": "530f620c",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.datasets import fetch_openml\n",
"from sklearn import model_selection\n",
"from sklearn import neighbors\n",
"import sklearn\n",
"import numpy as np\n",
"\n",
"mnist = fetch_openml('mnist_784',as_frame=False)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "eb2c4496",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset : [[0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" ...\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]]\n",
"Etiquettes : ['3' '6' '3' ... '0' '1' '2']\n",
"Prédiction : ['4' '1' '6' '7' '4' '0' '4' '5' '6' '3' '3' '0' '0' '4' '7' '0' '8' '9'\n",
" '6' '0' '8' '8' '1' '7' '7' '9' '7' '5' '9' '7' '0' '8' '5' '8' '7' '0'\n",
" '0' '1' '3' '3' '3' '4' '1' '5' '8' '6' '9' '5' '4' '1' '3' '0' '3' '4'\n",
" '3' '6' '2' '5' '2' '4' '2' '8' '6' '1' '6' '0' '2' '9' '2' '7' '3' '4'\n",
" '2' '6' '7' '9' '0' '0' '0' '2' '7' '6' '4' '8' '4' '1' '9' '2' '3' '9'\n",
" '5' '1' '1' '8' '9' '8' '6' '4' '3' '1' '6' '6' '2' '1' '8' '7' '5' '2'\n",
" '7' '0' '6' '6' '7' '2' '4' '3' '2' '3' '0' '4' '7' '9' '0' '0' '7' '9'\n",
" '7' '7' '2' '6' '4' '6' '0' '6' '8' '3' '1' '4' '6' '7' '3' '1' '9' '2'\n",
" '1' '2' '3' '0' '4' '5' '5' '5' '3' '3' '9' '9' '1' '0' '5' '1' '2' '3'\n",
" '1' '6' '1' '7' '2' '4' '8' '4' '1' '6' '2' '9' '4' '4' '2' '1' '8' '8'\n",
" '6' '2' '5' '8' '6' '7' '6' '9' '3' '3' '9' '4' '5' '0' '5' '4' '0' '0'\n",
" '4' '6' '3' '3' '6' '9' '3' '5' '8' '2' '9' '2' '5' '1' '9' '1' '9' '6'\n",
" '3' '5' '4' '9' '6' '7' '1' '0' '1' '0' '9' '6' '8' '4' '9' '2' '2' '5'\n",
" '0' '7' '3' '1' '6' '1' '3' '1' '9' '7' '4' '6' '8' '0' '3' '8' '5' '1'\n",
" '7' '7' '3' '1' '1' '4' '0' '8' '1' '6' '4' '2' '5' '1' '3' '1' '3' '0'\n",
" '9' '7' '2' '6' '2' '5' '4' '7' '0' '4' '9' '8' '2' '1' '4' '9' '1' '0'\n",
" '7' '3' '7' '1' '4' '8' '2' '0' '6' '7' '6' '6' '3' '5' '2' '4' '1' '0'\n",
" '9' '9' '9' '7' '2' '4' '0' '7' '7' '4' '8' '6' '8' '7' '7' '9' '6' '4'\n",
" '6' '4' '2' '4' '4' '1' '5' '1' '4' '4' '9' '8' '7' '4' '3' '0' '1' '2'\n",
" '7' '9' '4' '7' '7' '1' '7' '5' '7' '6' '4' '1' '9' '6' '2' '2' '1' '3'\n",
" '7' '0' '6' '3' '9' '6' '0' '7' '3' '9' '4' '5' '0' '5' '4' '1' '7' '0'\n",
" '7' '7' '5' '1' '9' '3' '2' '3' '7' '2' '1' '0' '5' '8' '5' '5' '3' '7'\n",
" '7' '4' '5' '2' '9' '9' '2' '2' '3' '5' '1' '8' '6' '2' '3' '7' '9' '8'\n",
" '4' '3' '9' '4' '0' '4' '1' '8' '9' '0' '3' '2' '5' '1' '7' '0' '7' '3'\n",
" '5' '6' '8' '5' '6' '1' '6' '9' '4' '1' '7' '1' '8' '1' '3' '7' '8' '6'\n",
" '1' '1' '7' '0' '5' '2' '7' '4' '4' '8' '4' '2' '7' '2' '9' '2' '8' '7'\n",
" '7' '9' '7' '6' '4' '0' '1' '6' '8' '4' '4' '6' '9' '6' '3' '6' '4' '9'\n",
" '8' '5' '2' '2' '7' '0' '7' '9' '7' '2' '7' '0' '5' '4' '8' '6' '6' '3'\n",
" '1' '5' '1' '5' '9' '7' '3' '4' '6' '5' '1' '9' '6' '8' '4' '5' '5' '2'\n",
" '1' '3' '4' '3' '6' '1' '6' '9' '0' '2' '1' '5' '8' '6' '7' '0' '1' '3'\n",
" '7' '6' '5' '6' '4' '0' '8' '1' '9' '0' '4' '2' '1' '5' '2' '0' '6' '6'\n",
" '4' '8' '0' '2' '7' '5' '9' '3' '3' '6' '3' '3' '0' '2' '8' '6' '6' '5'\n",
" '5' '0' '1' '2' '6' '6' '7' '1' '1' '9' '3' '8' '8' '2' '4' '7' '5' '5'\n",
" '7' '5' '2' '1' '1' '1' '6' '0' '4' '8' '4' '1' '8' '3' '4' '4' '9' '3'\n",
" '7' '6' '3' '5' '7' '5' '4' '7' '1' '7' '5' '4' '7' '9' '4' '8' '6' '9'\n",
" '1' '2' '8' '5' '4' '8' '3' '1' '5' '7' '3' '2' '1' '4' '4' '1' '2' '1'\n",
" '2' '1' '7' '3' '2' '1' '0' '7' '6' '7' '2' '5' '2' '5' '7' '3' '7' '2'\n",
" '9' '1' '4' '3' '3' '7' '6' '8' '5' '1' '2' '8' '2' '0' '3' '1' '8' '4'\n",
" '5' '4' '9' '1' '7' '2' '4' '9' '4' '9' '2' '8' '5' '8' '9' '7' '4' '7'\n",
" '9' '4' '9' '5' '2' '7' '4' '5' '5' '1' '5' '0' '9' '5' '2' '6' '9' '7'\n",
" '3' '8' '1' '9' '6' '6' '5' '3' '1' '2' '8' '0' '5' '9' '3' '3' '5' '3'\n",
" '5' '1' '6' '3' '0' '1' '3' '0' '7' '6' '2' '7' '9' '9' '7' '4' '6' '4'\n",
" '7' '3' '1' '9' '9' '7' '2' '9' '4' '5' '0' '1' '4' '1' '7' '6' '0' '7'\n",
" '5' '2' '6' '4' '8' '5' '3' '7' '9' '4' '3' '1' '9' '2' '2' '8' '5' '7'\n",
" '1' '9' '4' '3' '2' '4' '2' '6' '9' '1' '1' '0' '7' '7' '3' '7' '8' '9'\n",
" '6' '6' '9' '3' '7' '7' '6' '6' '3' '7' '3' '3' '6' '0' '3' '1' '0' '0'\n",
" '8' '1' '3' '5' '7' '7' '9' '3' '9' '3' '1' '7' '2' '3' '6' '7' '0' '4'\n",
" '9' '3' '3' '1' '8' '9' '0' '3' '9' '1' '7' '1' '8' '4' '7' '8' '4' '1'\n",
" '5' '4' '7' '1' '1' '8' '3' '7' '8' '3' '1' '7' '4' '3' '1' '2' '7' '5'\n",
" '7' '9' '5' '9' '5' '4' '7' '4' '0' '4' '2' '4' '2' '1' '7' '9' '3' '0'\n",
" '1' '7' '8' '0' '2' '8' '7' '1' '8' '4' '1' '6' '9' '9' '9' '3' '7' '1'\n",
" '2' '4' '5' '9' '7' '2' '1' '6' '7' '4' '5' '9' '7' '7' '9' '8' '5' '2'\n",
" '5' '4' '0' '1' '9' '8' '2' '2' '9' '7' '3' '5' '2' '1' '4' '6' '7' '3'\n",
" '1' '1' '1' '8' '6' '0' '8' '0' '1' '6' '6' '7' '1' '4' '8' '0' '8' '6'\n",
" '2' '6' '2' '8' '7' '9' '1' '9' '2' '1' '9' '2' '5' '4' '5' '5' '1' '0'\n",
" '6' '3' '8' '5' '0' '2' '6' '8' '7' '2']\n",
"Probabilités : [[0. 0.4 0. ... 0. 0. 0.1]\n",
" [0. 1. 0. ... 0. 0. 0. ]\n",
" [0. 0. 0. ... 0. 0. 0. ]\n",
" ...\n",
" [0. 0. 0. ... 0.1 0.5 0.3]\n",
" [0. 0. 0. ... 1. 0. 0. ]\n",
" [0. 0. 1. ... 0. 0. 0. ]]\n",
"Classe image 4 : 9\n",
"Classe prédite image 4 : 7\n",
"Score échantillon de test : 0.922\n",
"Score données apprentissage : 0.9395\n"
]
}
],
"source": [
"rand_indexes = np.random.randint(70000, size=5000)\n",
"\n",
"data = mnist.data[rand_indexes]\n",
"print(\"Dataset : \", data)\n",
"target = mnist.target[rand_indexes]\n",
"print(\"Etiquettes : \", target)\n",
"\n",
"# xtrain data set d'entraînement et ytrain étiquettes de xtrain\n",
"# xtest dataset de prédiction et ytest étiquettes de xtest\n",
"xtrain, xtest, ytrain, ytest = model_selection.train_test_split(data, target,train_size=0.8)\n",
"\n",
"n_neighbors = 10\n",
"clf = neighbors.KNeighborsClassifier(n_neighbors)\n",
"# On entraîne l'algorithme sur xtrain et ytrain\n",
"clf.fit(xtrain, ytrain)\n",
"# On prédit sur xtest\n",
"pred = clf.predict(xtest)\n",
"print(\"Prédiction : \", pred)\n",
"# Probabilités des prédictions sur xtest\n",
"pred_proba = clf.predict_proba(xtest)\n",
"print(\"Probabilités : \", pred_proba)\n",
"# On calcule le score obtenu sur xtest avec les étiquettes ytest\n",
"score = clf.score(xtest, ytest)\n",
"print(\"Classe image 4 : \", target[3])\n",
"print(\"Classe prédite image 4 : \", pred[3])\n",
"print(\"Score échantillon de test : \", score)\n",
"\n",
"scoreApp = clf.score(xtrain, ytrain)\n",
"print(\"Score données apprentissage : \", scoreApp)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "90db6e29",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset : [[0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" ...\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]]\n",
"Etiquettes : ['8' '3' '7' ... '5' '9' '1']\n",
"[0.924, 0.91, 0.938, 0.932, 0.932, 0.928, 0.918, 0.93, 0.94, 0.914, 0.938, 0.946, 0.94, 0.952, 0.95, 0.932, 0.934, 0.938, 0.92, 0.916, 0.92, 0.942, 0.944, 0.926, 0.918, 0.928, 0.95, 0.932, 0.936, 0.94, 0.94, 0.954, 0.91, 0.942, 0.928, 0.928, 0.93, 0.91, 0.944, 0.956, 0.928, 0.94, 0.938, 0.942, 0.914, 0.922, 0.916, 0.93, 0.938, 0.934, 0.936, 0.946, 0.922, 0.938, 0.926, 0.922, 0.92, 0.904, 0.938, 0.922, 0.92, 0.924, 0.904, 0.934, 0.924, 0.952, 0.928, 0.936, 0.934, 0.922, 0.926, 0.922, 0.926, 0.922, 0.92, 0.934, 0.922, 0.912, 0.95, 0.918, 0.946, 0.92, 0.928, 0.914, 0.928, 0.924, 0.91, 0.92, 0.934, 0.936, 0.898, 0.914, 0.92, 0.928, 0.92, 0.92, 0.93, 0.944, 0.924, 0.934, 0.922, 0.926, 0.93, 0.924, 0.922, 0.898, 0.924, 0.916, 0.942, 0.898, 0.93, 0.908, 0.928, 0.91, 0.93, 0.95, 0.938, 0.89, 0.932, 0.898, 0.924, 0.902, 0.894, 0.912, 0.922, 0.932, 0.932, 0.924, 0.924, 0.932]\n"
]
}
],
"source": [
"from sklearn.model_selection import KFold\n",
"\n",
"rand_indexes = np.random.randint(70000, size=5000)\n",
"\n",
"data = mnist.data[rand_indexes]\n",
"print(\"Dataset : \", data)\n",
"target = mnist.target[rand_indexes]\n",
"print(\"Etiquettes : \", target)\n",
"\n",
"# xtrain data set d'entraînement et ytrain étiquettes de xtrain\n",
"# xtest dataset de prédiction et ytest étiquettes de xtest\n",
"# xtrain, xtest, ytrain, ytest = model_selection.train_test_split(data, target,train_size=0.8)\n",
"\n",
"kf = KFold(n_splits=10, random_state=None, shuffle=True)\n",
"scores = []\n",
"\n",
"for k in range(2,15):\n",
" \n",
" for train_index, test_index in kf.split(data):\n",
"# print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n",
" X_train, X_test = data[train_index], data[test_index]\n",
" y_train, y_test = target[train_index], target[test_index]\n",
" \n",
" clf = neighbors.KNeighborsClassifier(k)\n",
" # On entraîne l'algorithme sur xtrain et ytrain\n",
" clf.fit(X_train, y_train)\n",
" # On prédit sur xtest\n",
" pred = clf.predict(X_test)\n",
"# print(\"Prédiction : \", pred)\n",
" # Probabilités des prédictions sur xtest\n",
" pred_proba = clf.predict_proba(X_test)\n",
"# print(\"Probabilités : \", pred_proba)\n",
" # On calcule le score obtenu sur xtest avec les étiquettes ytest\n",
" score = clf.score(X_test, y_test)\n",
" scores += [score]\n",
"# print(\"Classe image 4 : \", target[3])\n",
"# print(\"Classe prédite image 4 : \", pred[3])\n",
"# print(\"Score échantillon de test : \", score)\n",
" scoreApp = clf.score(X_train, y_train)\n",
"# print(\"Score données apprentissage : \", scoreApp)\n",
"print(scores)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "bf91b914",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2 : 0.9266\n",
"3 : 0.9366\n",
"4 : 0.9336\n",
"5 : 0.9341999999999999\n",
"6 : 0.9297777777777778\n",
"7 : 0.9275555555555557\n",
"8 : 0.9273333333333333\n",
"9 : 0.926888888888889\n",
"10 : 0.9264444444444445\n",
"11 : 0.9204444444444445\n",
"12 : 0.9277777777777779\n",
"13 : 0.918\n",
"14 : 0.922222222222222\n",
"15 : 0.9193333333333334\n"
]
}
],
"source": [
"nice_scores = np.array_split(scores, 14)\n",
"for i in range (0,14):\n",
" print (i+2, \" : \", nice_scores[i].mean())\n"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "cc24e898",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset : [[0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" ...\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]]\n",
"Etiquettes : ['3' '0' '3' ... '4' '0' '6']\n",
"3 : 0.8908571428571429\n",
"4 : 0.893\n",
"5 : 0.92\n",
"6 : 0.9105\n",
"7 : 0.9326666666666666\n",
"8 : 0.926\n",
"9 : 0.946\n"
]
}
],
"source": [
"from sklearn.model_selection import KFold\n",
"\n",
"rand_indexes = np.random.randint(70000, size=5000)\n",
"\n",
"data = mnist.data[rand_indexes]\n",
"print(\"Dataset : \", data)\n",
"target = mnist.target[rand_indexes]\n",
"print(\"Etiquettes : \", target)\n",
"\n",
"# xtrain data set d'entraînement et ytrain étiquettes de xtrain\n",
"# xtest dataset de prédiction et ytest étiquettes de xtest\n",
"\n",
"scores = []\n",
"\n",
"for j in range (3, 10):\n",
" xtrain, xtest, ytrain, ytest = model_selection.train_test_split(data, target,train_size=(j/10))\n",
"\n",
"\n",
" \n",
"\n",
" clf = neighbors.KNeighborsClassifier(n_neighbors=3,p = 2, n_jobs=-1)\n",
" # On entraîne l'algorithme sur xtrain et ytrain\n",
" clf.fit(xtrain, ytrain)\n",
" # On prédit sur xtest\n",
" pred = clf.predict(xtest)\n",
"# print(\"Prédiction : \", pred)\n",
" # Probabilités des prédictions sur xtest\n",
" pred_proba = clf.predict_proba(xtest)\n",
"# print(\"Probabilités : \", pred_proba)\n",
" # On calcule le score obtenu sur xtest avec les étiquettes ytest\n",
" score = clf.score(xtest, ytest)\n",
" scores += [score]\n",
"# print(\"Classe image 4 : \", target[3])\n",
"# print(\"Classe prédite image 4 : \", pred[3])\n",
"# print(\"Score échantillon de test : \", score)\n",
" scoreApp = clf.score(xtrain, ytrain)\n",
"# print(\"Score données apprentissage : \", scoreApp)\n",
"\n",
"# nice_scores = np.array_split(scores, 7)\n",
"# print(scores)\n",
"n = 3\n",
"for i in scores:\n",
" print (n, \" : \", i)\n",
" n += 1"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because one or more lines are too long

62
TP1_prog1.py.ipynb Normal file

File diff suppressed because one or more lines are too long

335
TP1_prog2.py.ipynb Normal file
View file

@ -0,0 +1,335 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 14,
"id": "530f620c",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.datasets import fetch_openml\n",
"from sklearn import model_selection\n",
"from sklearn import neighbors\n",
"import sklearn\n",
"import numpy as np\n",
"\n",
"mnist = fetch_openml('mnist_784',as_frame=False)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "eb2c4496",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset : [[0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" ...\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]]\n",
"Etiquettes : ['3' '6' '3' ... '0' '1' '2']\n",
"Prédiction : ['4' '1' '6' '7' '4' '0' '4' '5' '6' '3' '3' '0' '0' '4' '7' '0' '8' '9'\n",
" '6' '0' '8' '8' '1' '7' '7' '9' '7' '5' '9' '7' '0' '8' '5' '8' '7' '0'\n",
" '0' '1' '3' '3' '3' '4' '1' '5' '8' '6' '9' '5' '4' '1' '3' '0' '3' '4'\n",
" '3' '6' '2' '5' '2' '4' '2' '8' '6' '1' '6' '0' '2' '9' '2' '7' '3' '4'\n",
" '2' '6' '7' '9' '0' '0' '0' '2' '7' '6' '4' '8' '4' '1' '9' '2' '3' '9'\n",
" '5' '1' '1' '8' '9' '8' '6' '4' '3' '1' '6' '6' '2' '1' '8' '7' '5' '2'\n",
" '7' '0' '6' '6' '7' '2' '4' '3' '2' '3' '0' '4' '7' '9' '0' '0' '7' '9'\n",
" '7' '7' '2' '6' '4' '6' '0' '6' '8' '3' '1' '4' '6' '7' '3' '1' '9' '2'\n",
" '1' '2' '3' '0' '4' '5' '5' '5' '3' '3' '9' '9' '1' '0' '5' '1' '2' '3'\n",
" '1' '6' '1' '7' '2' '4' '8' '4' '1' '6' '2' '9' '4' '4' '2' '1' '8' '8'\n",
" '6' '2' '5' '8' '6' '7' '6' '9' '3' '3' '9' '4' '5' '0' '5' '4' '0' '0'\n",
" '4' '6' '3' '3' '6' '9' '3' '5' '8' '2' '9' '2' '5' '1' '9' '1' '9' '6'\n",
" '3' '5' '4' '9' '6' '7' '1' '0' '1' '0' '9' '6' '8' '4' '9' '2' '2' '5'\n",
" '0' '7' '3' '1' '6' '1' '3' '1' '9' '7' '4' '6' '8' '0' '3' '8' '5' '1'\n",
" '7' '7' '3' '1' '1' '4' '0' '8' '1' '6' '4' '2' '5' '1' '3' '1' '3' '0'\n",
" '9' '7' '2' '6' '2' '5' '4' '7' '0' '4' '9' '8' '2' '1' '4' '9' '1' '0'\n",
" '7' '3' '7' '1' '4' '8' '2' '0' '6' '7' '6' '6' '3' '5' '2' '4' '1' '0'\n",
" '9' '9' '9' '7' '2' '4' '0' '7' '7' '4' '8' '6' '8' '7' '7' '9' '6' '4'\n",
" '6' '4' '2' '4' '4' '1' '5' '1' '4' '4' '9' '8' '7' '4' '3' '0' '1' '2'\n",
" '7' '9' '4' '7' '7' '1' '7' '5' '7' '6' '4' '1' '9' '6' '2' '2' '1' '3'\n",
" '7' '0' '6' '3' '9' '6' '0' '7' '3' '9' '4' '5' '0' '5' '4' '1' '7' '0'\n",
" '7' '7' '5' '1' '9' '3' '2' '3' '7' '2' '1' '0' '5' '8' '5' '5' '3' '7'\n",
" '7' '4' '5' '2' '9' '9' '2' '2' '3' '5' '1' '8' '6' '2' '3' '7' '9' '8'\n",
" '4' '3' '9' '4' '0' '4' '1' '8' '9' '0' '3' '2' '5' '1' '7' '0' '7' '3'\n",
" '5' '6' '8' '5' '6' '1' '6' '9' '4' '1' '7' '1' '8' '1' '3' '7' '8' '6'\n",
" '1' '1' '7' '0' '5' '2' '7' '4' '4' '8' '4' '2' '7' '2' '9' '2' '8' '7'\n",
" '7' '9' '7' '6' '4' '0' '1' '6' '8' '4' '4' '6' '9' '6' '3' '6' '4' '9'\n",
" '8' '5' '2' '2' '7' '0' '7' '9' '7' '2' '7' '0' '5' '4' '8' '6' '6' '3'\n",
" '1' '5' '1' '5' '9' '7' '3' '4' '6' '5' '1' '9' '6' '8' '4' '5' '5' '2'\n",
" '1' '3' '4' '3' '6' '1' '6' '9' '0' '2' '1' '5' '8' '6' '7' '0' '1' '3'\n",
" '7' '6' '5' '6' '4' '0' '8' '1' '9' '0' '4' '2' '1' '5' '2' '0' '6' '6'\n",
" '4' '8' '0' '2' '7' '5' '9' '3' '3' '6' '3' '3' '0' '2' '8' '6' '6' '5'\n",
" '5' '0' '1' '2' '6' '6' '7' '1' '1' '9' '3' '8' '8' '2' '4' '7' '5' '5'\n",
" '7' '5' '2' '1' '1' '1' '6' '0' '4' '8' '4' '1' '8' '3' '4' '4' '9' '3'\n",
" '7' '6' '3' '5' '7' '5' '4' '7' '1' '7' '5' '4' '7' '9' '4' '8' '6' '9'\n",
" '1' '2' '8' '5' '4' '8' '3' '1' '5' '7' '3' '2' '1' '4' '4' '1' '2' '1'\n",
" '2' '1' '7' '3' '2' '1' '0' '7' '6' '7' '2' '5' '2' '5' '7' '3' '7' '2'\n",
" '9' '1' '4' '3' '3' '7' '6' '8' '5' '1' '2' '8' '2' '0' '3' '1' '8' '4'\n",
" '5' '4' '9' '1' '7' '2' '4' '9' '4' '9' '2' '8' '5' '8' '9' '7' '4' '7'\n",
" '9' '4' '9' '5' '2' '7' '4' '5' '5' '1' '5' '0' '9' '5' '2' '6' '9' '7'\n",
" '3' '8' '1' '9' '6' '6' '5' '3' '1' '2' '8' '0' '5' '9' '3' '3' '5' '3'\n",
" '5' '1' '6' '3' '0' '1' '3' '0' '7' '6' '2' '7' '9' '9' '7' '4' '6' '4'\n",
" '7' '3' '1' '9' '9' '7' '2' '9' '4' '5' '0' '1' '4' '1' '7' '6' '0' '7'\n",
" '5' '2' '6' '4' '8' '5' '3' '7' '9' '4' '3' '1' '9' '2' '2' '8' '5' '7'\n",
" '1' '9' '4' '3' '2' '4' '2' '6' '9' '1' '1' '0' '7' '7' '3' '7' '8' '9'\n",
" '6' '6' '9' '3' '7' '7' '6' '6' '3' '7' '3' '3' '6' '0' '3' '1' '0' '0'\n",
" '8' '1' '3' '5' '7' '7' '9' '3' '9' '3' '1' '7' '2' '3' '6' '7' '0' '4'\n",
" '9' '3' '3' '1' '8' '9' '0' '3' '9' '1' '7' '1' '8' '4' '7' '8' '4' '1'\n",
" '5' '4' '7' '1' '1' '8' '3' '7' '8' '3' '1' '7' '4' '3' '1' '2' '7' '5'\n",
" '7' '9' '5' '9' '5' '4' '7' '4' '0' '4' '2' '4' '2' '1' '7' '9' '3' '0'\n",
" '1' '7' '8' '0' '2' '8' '7' '1' '8' '4' '1' '6' '9' '9' '9' '3' '7' '1'\n",
" '2' '4' '5' '9' '7' '2' '1' '6' '7' '4' '5' '9' '7' '7' '9' '8' '5' '2'\n",
" '5' '4' '0' '1' '9' '8' '2' '2' '9' '7' '3' '5' '2' '1' '4' '6' '7' '3'\n",
" '1' '1' '1' '8' '6' '0' '8' '0' '1' '6' '6' '7' '1' '4' '8' '0' '8' '6'\n",
" '2' '6' '2' '8' '7' '9' '1' '9' '2' '1' '9' '2' '5' '4' '5' '5' '1' '0'\n",
" '6' '3' '8' '5' '0' '2' '6' '8' '7' '2']\n",
"Probabilités : [[0. 0.4 0. ... 0. 0. 0.1]\n",
" [0. 1. 0. ... 0. 0. 0. ]\n",
" [0. 0. 0. ... 0. 0. 0. ]\n",
" ...\n",
" [0. 0. 0. ... 0.1 0.5 0.3]\n",
" [0. 0. 0. ... 1. 0. 0. ]\n",
" [0. 0. 1. ... 0. 0. 0. ]]\n",
"Classe image 4 : 9\n",
"Classe prédite image 4 : 7\n",
"Score échantillon de test : 0.922\n",
"Score données apprentissage : 0.9395\n"
]
}
],
"source": [
"rand_indexes = np.random.randint(70000, size=5000)\n",
"\n",
"data = mnist.data[rand_indexes]\n",
"print(\"Dataset : \", data)\n",
"target = mnist.target[rand_indexes]\n",
"print(\"Etiquettes : \", target)\n",
"\n",
"# xtrain data set d'entraînement et ytrain étiquettes de xtrain\n",
"# xtest dataset de prédiction et ytest étiquettes de xtest\n",
"xtrain, xtest, ytrain, ytest = model_selection.train_test_split(data, target,train_size=0.8)\n",
"\n",
"n_neighbors = 10\n",
"clf = neighbors.KNeighborsClassifier(n_neighbors)\n",
"# On entraîne l'algorithme sur xtrain et ytrain\n",
"clf.fit(xtrain, ytrain)\n",
"# On prédit sur xtest\n",
"pred = clf.predict(xtest)\n",
"print(\"Prédiction : \", pred)\n",
"# Probabilités des prédictions sur xtest\n",
"pred_proba = clf.predict_proba(xtest)\n",
"print(\"Probabilités : \", pred_proba)\n",
"# On calcule le score obtenu sur xtest avec les étiquettes ytest\n",
"score = clf.score(xtest, ytest)\n",
"print(\"Classe image 4 : \", target[3])\n",
"print(\"Classe prédite image 4 : \", pred[3])\n",
"print(\"Score échantillon de test : \", score)\n",
"\n",
"scoreApp = clf.score(xtrain, ytrain)\n",
"print(\"Score données apprentissage : \", scoreApp)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "90db6e29",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset : [[0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" ...\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]]\n",
"Etiquettes : ['8' '3' '7' ... '5' '9' '1']\n",
"[0.924, 0.91, 0.938, 0.932, 0.932, 0.928, 0.918, 0.93, 0.94, 0.914, 0.938, 0.946, 0.94, 0.952, 0.95, 0.932, 0.934, 0.938, 0.92, 0.916, 0.92, 0.942, 0.944, 0.926, 0.918, 0.928, 0.95, 0.932, 0.936, 0.94, 0.94, 0.954, 0.91, 0.942, 0.928, 0.928, 0.93, 0.91, 0.944, 0.956, 0.928, 0.94, 0.938, 0.942, 0.914, 0.922, 0.916, 0.93, 0.938, 0.934, 0.936, 0.946, 0.922, 0.938, 0.926, 0.922, 0.92, 0.904, 0.938, 0.922, 0.92, 0.924, 0.904, 0.934, 0.924, 0.952, 0.928, 0.936, 0.934, 0.922, 0.926, 0.922, 0.926, 0.922, 0.92, 0.934, 0.922, 0.912, 0.95, 0.918, 0.946, 0.92, 0.928, 0.914, 0.928, 0.924, 0.91, 0.92, 0.934, 0.936, 0.898, 0.914, 0.92, 0.928, 0.92, 0.92, 0.93, 0.944, 0.924, 0.934, 0.922, 0.926, 0.93, 0.924, 0.922, 0.898, 0.924, 0.916, 0.942, 0.898, 0.93, 0.908, 0.928, 0.91, 0.93, 0.95, 0.938, 0.89, 0.932, 0.898, 0.924, 0.902, 0.894, 0.912, 0.922, 0.932, 0.932, 0.924, 0.924, 0.932]\n"
]
}
],
"source": [
"from sklearn.model_selection import KFold\n",
"\n",
"rand_indexes = np.random.randint(70000, size=5000)\n",
"\n",
"data = mnist.data[rand_indexes]\n",
"print(\"Dataset : \", data)\n",
"target = mnist.target[rand_indexes]\n",
"print(\"Etiquettes : \", target)\n",
"\n",
"# xtrain data set d'entraînement et ytrain étiquettes de xtrain\n",
"# xtest dataset de prédiction et ytest étiquettes de xtest\n",
"# xtrain, xtest, ytrain, ytest = model_selection.train_test_split(data, target,train_size=0.8)\n",
"\n",
"kf = KFold(n_splits=10, random_state=None, shuffle=True)\n",
"scores = []\n",
"\n",
"for k in range(2,15):\n",
" \n",
" for train_index, test_index in kf.split(data):\n",
"# print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n",
" X_train, X_test = data[train_index], data[test_index]\n",
" y_train, y_test = target[train_index], target[test_index]\n",
" \n",
" clf = neighbors.KNeighborsClassifier(k)\n",
" # On entraîne l'algorithme sur xtrain et ytrain\n",
" clf.fit(X_train, y_train)\n",
" # On prédit sur xtest\n",
" pred = clf.predict(X_test)\n",
"# print(\"Prédiction : \", pred)\n",
" # Probabilités des prédictions sur xtest\n",
" pred_proba = clf.predict_proba(X_test)\n",
"# print(\"Probabilités : \", pred_proba)\n",
" # On calcule le score obtenu sur xtest avec les étiquettes ytest\n",
" score = clf.score(X_test, y_test)\n",
" scores += [score]\n",
"# print(\"Classe image 4 : \", target[3])\n",
"# print(\"Classe prédite image 4 : \", pred[3])\n",
"# print(\"Score échantillon de test : \", score)\n",
" scoreApp = clf.score(X_train, y_train)\n",
"# print(\"Score données apprentissage : \", scoreApp)\n",
"print(scores)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "bf91b914",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2 : 0.9266\n",
"3 : 0.9366\n",
"4 : 0.9336\n",
"5 : 0.9341999999999999\n",
"6 : 0.9297777777777778\n",
"7 : 0.9275555555555557\n",
"8 : 0.9273333333333333\n",
"9 : 0.926888888888889\n",
"10 : 0.9264444444444445\n",
"11 : 0.9204444444444445\n",
"12 : 0.9277777777777779\n",
"13 : 0.918\n",
"14 : 0.922222222222222\n",
"15 : 0.9193333333333334\n"
]
}
],
"source": [
"nice_scores = np.array_split(scores, 14)\n",
"for i in range (0,14):\n",
" print (i+2, \" : \", nice_scores[i].mean())\n"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "cc24e898",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset : [[0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" ...\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]\n",
" [0. 0. 0. ... 0. 0. 0.]]\n",
"Etiquettes : ['3' '0' '3' ... '4' '0' '6']\n",
"3 : 0.8908571428571429\n",
"4 : 0.893\n",
"5 : 0.92\n",
"6 : 0.9105\n",
"7 : 0.9326666666666666\n",
"8 : 0.926\n",
"9 : 0.946\n"
]
}
],
"source": [
"from sklearn.model_selection import KFold\n",
"\n",
"rand_indexes = np.random.randint(70000, size=5000)\n",
"\n",
"data = mnist.data[rand_indexes]\n",
"print(\"Dataset : \", data)\n",
"target = mnist.target[rand_indexes]\n",
"print(\"Etiquettes : \", target)\n",
"\n",
"# xtrain data set d'entraînement et ytrain étiquettes de xtrain\n",
"# xtest dataset de prédiction et ytest étiquettes de xtest\n",
"\n",
"scores = []\n",
"\n",
"for j in range (3, 10):\n",
" xtrain, xtest, ytrain, ytest = model_selection.train_test_split(data, target,train_size=(j/10))\n",
"\n",
"\n",
" \n",
"\n",
" clf = neighbors.KNeighborsClassifier(n_neighbors=3,p = 2, n_jobs=-1)\n",
" # On entraîne l'algorithme sur xtrain et ytrain\n",
" clf.fit(xtrain, ytrain)\n",
" # On prédit sur xtest\n",
" pred = clf.predict(xtest)\n",
"# print(\"Prédiction : \", pred)\n",
" # Probabilités des prédictions sur xtest\n",
" pred_proba = clf.predict_proba(xtest)\n",
"# print(\"Probabilités : \", pred_proba)\n",
" # On calcule le score obtenu sur xtest avec les étiquettes ytest\n",
" score = clf.score(xtest, ytest)\n",
" scores += [score]\n",
"# print(\"Classe image 4 : \", target[3])\n",
"# print(\"Classe prédite image 4 : \", pred[3])\n",
"# print(\"Score échantillon de test : \", score)\n",
" scoreApp = clf.score(xtrain, ytrain)\n",
"# print(\"Score données apprentissage : \", scoreApp)\n",
"\n",
"# nice_scores = np.array_split(scores, 7)\n",
"# print(scores)\n",
"n = 3\n",
"for i in scores:\n",
" print (n, \" : \", i)\n",
" n += 1"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

166
tp1.ipynb Normal file

File diff suppressed because one or more lines are too long