TP_Apprentissage/tp1.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
import random
import time


"""
from sklearn.datasets import fetch_openml 
mnist = fetch_openml('mnist_784') 
"""

"""
---Ex 1---
images = mnist.data.values.reshape((-1, 28, 28)) 
plt.imshow(images[0],cmap=plt.cm.gray_r,interpolation="nearest") 
plt.show()

print(mnist.target[0])
"""

"""
---Ex 2---
"""
indices = [i for i in range(len(mnist.data))]
random.shuffle(indices)
indices = indices[:5000]

data = [mnist.data.values[i] for i in indices]
target = [mnist.target[i] for i in indices]

bestClf = None
bestScore = 0
bestK = 0
scores = []

kvalues = [i for i in range(2,16)]
train_sizes = [0.05*i for i in range(1,20)]
pvalues = [i for i in range(1,11)]

k = 3
t = 0.9

start = time.time()
xtrain,  xtest,  ytrain,  ytest  =  train_test_split(data, target, train_size=t)
clf = KNeighborsClassifier(3, n_jobs = 1)
clf.fit(xtrain, ytrain)
score = clf.score(xtest, ytest)
end = time.time()
print(f"n_jobs = 1, training + evaluating time : {end - start}")

start = time.time()
xtrain,  xtest,  ytrain,  ytest  =  train_test_split(data, target, train_size=t)
clf = KNeighborsClassifier(3, n_jobs = -1)
clf.fit(xtrain, ytrain)
score = clf.score(xtest, ytest)
end = time.time()
print(f"n_jobs = -1, training + evaluating time : {end - start}")


"""
#for k in kvalues:    
#for t in train_sizes:
for p in pvalues:
    print(p)
    xtrain,  xtest,  ytrain,  ytest  =  train_test_split(data, target, train_size=t)
    clf = KNeighborsClassifier(k, p = p)
    clf.fit(xtrain, ytrain)
    score = clf.score(xtest, ytest)
    scores += [score]
    if score > bestScore:
        bestClf = clf
        #bestK = k
        #bestSize = t
        bestP = p
        bestScore = score


print(target[4])
print(bestClf.predict(data[4].reshape(1,-1)))
plt.imshow(data[4].reshape(28, 28), cmap=plt.cm.gray_r, interpolation="nearest")
plt.show()

#plt.plot(kvalues, scores)
#plt.plot(train_sizes, scores)
plt.plot(pvalues, scores)

print(bestScore)
#print(bestSize)
"""
première version des TP 2021-12-08 15:03:00 +01:00			`#!/usr/bin/env python3`
			`# -- coding: utf-8 --`

			`import matplotlib.pyplot as plt`
			`from sklearn.neighbors import KNeighborsClassifier`
			`from sklearn.model_selection import train_test_split`
			`from sklearn.model_selection import KFold`
			`import random`
			`import time`


			`"""`
			`from sklearn.datasets import fetch_openml`
			`mnist = fetch_openml('mnist_784')`
			`"""`

			`"""`
			`---Ex 1---`
			`images = mnist.data.values.reshape((-1, 28, 28))`
			`plt.imshow(images[0],cmap=plt.cm.gray_r,interpolation="nearest")`
			`plt.show()`

			`print(mnist.target[0])`
			`"""`

			`"""`
			`---Ex 2---`
			`"""`
			`indices = [i for i in range(len(mnist.data))]`
			`random.shuffle(indices)`
			`indices = indices[:5000]`

			`data = [mnist.data.values[i] for i in indices]`
			`target = [mnist.target[i] for i in indices]`

			`bestClf = None`
			`bestScore = 0`
			`bestK = 0`
			`scores = []`

			`kvalues = [i for i in range(2,16)]`
			`train_sizes = [0.05*i for i in range(1,20)]`
			`pvalues = [i for i in range(1,11)]`

			`k = 3`
			`t = 0.9`

			`start = time.time()`
			`xtrain, xtest, ytrain, ytest = train_test_split(data, target, train_size=t)`
			`clf = KNeighborsClassifier(3, n_jobs = 1)`
			`clf.fit(xtrain, ytrain)`
			`score = clf.score(xtest, ytest)`
			`end = time.time()`
			`print(f"n_jobs = 1, training + evaluating time : {end - start}")`

			`start = time.time()`
			`xtrain, xtest, ytrain, ytest = train_test_split(data, target, train_size=t)`
			`clf = KNeighborsClassifier(3, n_jobs = -1)`
			`clf.fit(xtrain, ytrain)`
			`score = clf.score(xtest, ytest)`
			`end = time.time()`
			`print(f"n_jobs = -1, training + evaluating time : {end - start}")`


			`"""`
			`#for k in kvalues:`
			`#for t in train_sizes:`
			`for p in pvalues:`
			`print(p)`
			`xtrain, xtest, ytrain, ytest = train_test_split(data, target, train_size=t)`
			`clf = KNeighborsClassifier(k, p = p)`
			`clf.fit(xtrain, ytrain)`
			`score = clf.score(xtest, ytest)`
			`scores += [score]`
			`if score > bestScore:`
			`bestClf = clf`
			`#bestK = k`
			`#bestSize = t`
			`bestP = p`
			`bestScore = score`


			`print(target[4])`
			`print(bestClf.predict(data[4].reshape(1,-1)))`
			`plt.imshow(data[4].reshape(28, 28), cmap=plt.cm.gray_r, interpolation="nearest")`
			`plt.show()`

			`#plt.plot(kvalues, scores)`
			`#plt.plot(train_sizes, scores)`
			`plt.plot(pvalues, scores)`

			`print(bestScore)`
			`#print(bestSize)`
			`"""`