commit 7e21ba4f78d203829265706e0e5c65b1a79c0da9 Author: Chouiya Asma Date: Wed Dec 15 19:33:35 2021 +0100 KNN algorithm diff --git a/TP1/KNN.py b/TP1/KNN.py new file mode 100644 index 0000000..2c859f2 --- /dev/null +++ b/TP1/KNN.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Dec 15 19:07:59 2021 + +@author: chouiya +""" + + + +from time import time + +import numpy as np +import matplotlib.pyplot as plt +from sklearn import neighbors + +from sklearn.datasets import fetch_openml +from sklearn.model_selection import train_test_split, KFold + +from sklearn.metrics import accuracy_score + + + +#**********Echantillons de données "data" avec une taille de 5000 échantillons ********** + +mnist = fetch_openml('mnist_784', as_frame=False) +index= np.random.randint(70000, size=5000) +data = mnist.data[index] +target = mnist.target[index] + +# *************utilisation de 80% de la base de données pour le training *********** + +xtrain, xtest, ytrain, ytest = train_test_split(data, target, train_size=0.8) + +# **********classifieur k-nn avec k=10 ******** + +xtrain, xtest, ytrain, ytest = train_test_split(data, target, train_size=0.8, test_size=0.2) +clf = neighbors.KNeighborsClassifier(10) + +clf.fit(xtrain,ytrain) +prediction = clf.predict(xtest) +score = clf.score(xtest, ytest) + +# **********Classe de l'image 4 et sa classe prédite **************** + +print("Prédiction : {}, Valeur : {}, Score : {}".format(prediction[4], ytest[4], score)) + + +#*********Taux d'erreur sur les données d'apprentissage ******* + +xtrain, xtest, ytrain, ytest = train_test_split(data, target, train_size=0.8, test_size=0.2) +clf = neighbors.KNeighborsClassifier(10) +clf.fit(xtrain,ytrain) +prediction = clf.predict(xtrain) +score = clf.score(xtrain, ytrain) +print("score: ", score*100) + + +# **********Variation du nombre de voisins k de 2 à 15 en utilisant une boucle***** + + +xtrain, xtest, ytrain, ytest = train_test_split(data, target, train_size=0.8, test_size=0.2) + +tab_scores=[] +for i in range (2,16): + clf = neighbors.KNeighborsClassifier(i) + clf.fit(xtrain, ytrain) + prediction = clf.predict(xtest) + score = clf.score(xtest, ytest) + tab_scores.append(score) + print("K : {}, Score: {}".format(i, score*100)) + +#plot score=f(k) +range_tab=range(2,16) +plt.plot(range_tab,tab_scores) +plt.xlabel("valeurs de K pour KNN") +plt.ylabel("score") + +# ******** Variation du nombre de voisins k de 2 à 15 en utilisant la fonction KFold****** + + +kf = KFold(14,shuffle=True) +kf.get_n_splits(data) +k = 2 +for train_index, test_index in kf.split(data): + xtrain, xtest = data[train_index], data[test_index] + ytrain, ytest = target[train_index], target[test_index] + clf = neighbors.KNeighborsClassifier(k) + clf.fit(xtrain,ytrain) + prediction = clf.predict(xtest) + score = clf.score(xtest, ytest) + print("K : {}, Score : {}".format(k, score*100)) + k = k + 1 + + + + +# *********Variation du pourcentage des échantillons du training et test************ + +change_percent = range (2,10) +for s in change_percent: + xtrain, xtest, ytrain, ytest = train_test_split(data, target, train_size=(s/10)) + clasifier = neighbors.KNeighborsClassifier(5) + clasifier.fit(xtrain,ytrain) + prediction = clasifier.predict(xtest) + print("Training size = {} %, Score = {} ".format((s/10), clasifier.score(xtest, ytest))) + + + + \ No newline at end of file