94 lines
No EOL
2.1 KiB
Python
94 lines
No EOL
2.1 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import matplotlib.pyplot as plt
|
|
from sklearn.neighbors import KNeighborsClassifier
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.model_selection import KFold
|
|
import random
|
|
import time
|
|
|
|
|
|
"""
|
|
from sklearn.datasets import fetch_openml
|
|
mnist = fetch_openml('mnist_784')
|
|
"""
|
|
|
|
"""
|
|
---Ex 1---
|
|
images = mnist.data.values.reshape((-1, 28, 28))
|
|
plt.imshow(images[0],cmap=plt.cm.gray_r,interpolation="nearest")
|
|
plt.show()
|
|
|
|
print(mnist.target[0])
|
|
"""
|
|
|
|
"""
|
|
---Ex 2---
|
|
"""
|
|
indices = [i for i in range(len(mnist.data))]
|
|
random.shuffle(indices)
|
|
indices = indices[:5000]
|
|
|
|
data = [mnist.data.values[i] for i in indices]
|
|
target = [mnist.target[i] for i in indices]
|
|
|
|
bestClf = None
|
|
bestScore = 0
|
|
bestK = 0
|
|
scores = []
|
|
|
|
kvalues = [i for i in range(2,16)]
|
|
train_sizes = [0.05*i for i in range(1,20)]
|
|
pvalues = [i for i in range(1,11)]
|
|
|
|
k = 3
|
|
t = 0.9
|
|
|
|
start = time.time()
|
|
xtrain, xtest, ytrain, ytest = train_test_split(data, target, train_size=t)
|
|
clf = KNeighborsClassifier(3, n_jobs = 1)
|
|
clf.fit(xtrain, ytrain)
|
|
score = clf.score(xtest, ytest)
|
|
end = time.time()
|
|
print(f"n_jobs = 1, training + evaluating time : {end - start}")
|
|
|
|
start = time.time()
|
|
xtrain, xtest, ytrain, ytest = train_test_split(data, target, train_size=t)
|
|
clf = KNeighborsClassifier(3, n_jobs = -1)
|
|
clf.fit(xtrain, ytrain)
|
|
score = clf.score(xtest, ytest)
|
|
end = time.time()
|
|
print(f"n_jobs = -1, training + evaluating time : {end - start}")
|
|
|
|
|
|
"""
|
|
#for k in kvalues:
|
|
#for t in train_sizes:
|
|
for p in pvalues:
|
|
print(p)
|
|
xtrain, xtest, ytrain, ytest = train_test_split(data, target, train_size=t)
|
|
clf = KNeighborsClassifier(k, p = p)
|
|
clf.fit(xtrain, ytrain)
|
|
score = clf.score(xtest, ytest)
|
|
scores += [score]
|
|
if score > bestScore:
|
|
bestClf = clf
|
|
#bestK = k
|
|
#bestSize = t
|
|
bestP = p
|
|
bestScore = score
|
|
|
|
|
|
print(target[4])
|
|
print(bestClf.predict(data[4].reshape(1,-1)))
|
|
plt.imshow(data[4].reshape(28, 28), cmap=plt.cm.gray_r, interpolation="nearest")
|
|
plt.show()
|
|
|
|
#plt.plot(kvalues, scores)
|
|
#plt.plot(train_sizes, scores)
|
|
plt.plot(pvalues, scores)
|
|
|
|
print(bestScore)
|
|
#print(bestSize)
|
|
""" |