commit 7e21ba4f78d203829265706e0e5c65b1a79c0da9
Author: Chouiya Asma <chouiya@localhost>
Date:   Wed Dec 15 19:33:35 2021 +0100

    KNN algorithm

diff --git a/TP1/KNN.py b/TP1/KNN.py
new file mode 100644
index 0000000..2c859f2
--- /dev/null
+++ b/TP1/KNN.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Dec 15 19:07:59 2021
+
+@author: chouiya
+"""
+
+
+
+from time import time
+
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn import neighbors
+
+from sklearn.datasets import fetch_openml
+from sklearn.model_selection import train_test_split, KFold
+
+from sklearn.metrics import accuracy_score
+
+
+
+#**********Echantillons de données "data" avec une taille de 5000 échantillons **********
+
+mnist = fetch_openml('mnist_784', as_frame=False)
+index= np.random.randint(70000, size=5000)
+data = mnist.data[index]
+target = mnist.target[index]
+
+# *************utilisation de 80% de la base de données pour le training ***********
+
+xtrain, xtest, ytrain, ytest = train_test_split(data, target, train_size=0.8)
+
+# **********classifieur k-nn avec k=10 ********
+
+xtrain, xtest, ytrain, ytest = train_test_split(data, target, train_size=0.8, test_size=0.2)
+clf = neighbors.KNeighborsClassifier(10)
+
+clf.fit(xtrain,ytrain)
+prediction = clf.predict(xtest)
+score = clf.score(xtest, ytest)
+
+# **********Classe de l'image 4 et sa classe prédite ****************
+    
+print("Prédiction : {}, Valeur : {}, Score : {}".format(prediction[4], ytest[4], score))
+
+
+#*********Taux d'erreur sur les données d'apprentissage *******
+
+xtrain, xtest, ytrain, ytest = train_test_split(data, target, train_size=0.8, test_size=0.2)
+clf = neighbors.KNeighborsClassifier(10)
+clf.fit(xtrain,ytrain)
+prediction = clf.predict(xtrain)
+score = clf.score(xtrain, ytrain)
+print("score: ", score*100)
+  
+
+# **********Variation du nombre de voisins k de 2 à 15 en utilisant une boucle*****
+
+
+xtrain, xtest, ytrain, ytest = train_test_split(data, target, train_size=0.8, test_size=0.2)
+
+tab_scores=[]
+for i in range (2,16):
+    clf = neighbors.KNeighborsClassifier(i)
+    clf.fit(xtrain, ytrain)
+    prediction = clf.predict(xtest)
+    score = clf.score(xtest, ytest)
+    tab_scores.append(score)
+    print("K : {}, Score: {}".format(i, score*100))
+
+#plot score=f(k)
+range_tab=range(2,16)
+plt.plot(range_tab,tab_scores)
+plt.xlabel("valeurs de K pour KNN")
+plt.ylabel("score")
+    
+# ******** Variation du nombre de voisins k de 2 à 15 en utilisant la fonction KFold******
+
+
+kf = KFold(14,shuffle=True)
+kf.get_n_splits(data)
+k = 2
+for train_index, test_index in kf.split(data):
+    xtrain, xtest = data[train_index], data[test_index]
+    ytrain, ytest = target[train_index], target[test_index]
+    clf = neighbors.KNeighborsClassifier(k)
+    clf.fit(xtrain,ytrain)
+    prediction = clf.predict(xtest)
+    score = clf.score(xtest, ytest)
+    print("K : {}, Score : {}".format(k, score*100))
+    k = k + 1
+
+    
+    
+    
+# *********Variation du pourcentage des échantillons du training et test************
+
+change_percent = range (2,10)
+for s in change_percent:
+    xtrain, xtest, ytrain, ytest = train_test_split(data, target, train_size=(s/10))
+    clasifier = neighbors.KNeighborsClassifier(5)
+    clasifier.fit(xtrain,ytrain)
+    prediction = clasifier.predict(xtest)
+    print("Training size = {} %, Score = {} ".format((s/10), clasifier.score(xtest, ytest)))
+    
+    
+    
+    
\ No newline at end of file