12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
-
- import matplotlib.pyplot as plt
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.model_selection import train_test_split
- from sklearn.model_selection import KFold
- import random
- import time
-
-
- """
- from sklearn.datasets import fetch_openml
- mnist = fetch_openml('mnist_784')
- """
-
- """
- ---Ex 1---
- images = mnist.data.values.reshape((-1, 28, 28))
- plt.imshow(images[0],cmap=plt.cm.gray_r,interpolation="nearest")
- plt.show()
-
- print(mnist.target[0])
- """
-
- """
- ---Ex 2---
- """
- indices = [i for i in range(len(mnist.data))]
- random.shuffle(indices)
- indices = indices[:5000]
-
- data = [mnist.data.values[i] for i in indices]
- target = [mnist.target[i] for i in indices]
-
- bestClf = None
- bestScore = 0
- bestK = 0
- scores = []
-
- kvalues = [i for i in range(2,16)]
- train_sizes = [0.05*i for i in range(1,20)]
- pvalues = [i for i in range(1,11)]
-
- k = 3
- t = 0.9
-
- start = time.time()
- xtrain, xtest, ytrain, ytest = train_test_split(data, target, train_size=t)
- clf = KNeighborsClassifier(3, n_jobs = 1)
- clf.fit(xtrain, ytrain)
- score = clf.score(xtest, ytest)
- end = time.time()
- print(f"n_jobs = 1, training + evaluating time : {end - start}")
-
- start = time.time()
- xtrain, xtest, ytrain, ytest = train_test_split(data, target, train_size=t)
- clf = KNeighborsClassifier(3, n_jobs = -1)
- clf.fit(xtrain, ytrain)
- score = clf.score(xtest, ytest)
- end = time.time()
- print(f"n_jobs = -1, training + evaluating time : {end - start}")
-
-
- """
- #for k in kvalues:
- #for t in train_sizes:
- for p in pvalues:
- print(p)
- xtrain, xtest, ytrain, ytest = train_test_split(data, target, train_size=t)
- clf = KNeighborsClassifier(k, p = p)
- clf.fit(xtrain, ytrain)
- score = clf.score(xtest, ytest)
- scores += [score]
- if score > bestScore:
- bestClf = clf
- #bestK = k
- #bestSize = t
- bestP = p
- bestScore = score
-
-
- print(target[4])
- print(bestClf.predict(data[4].reshape(1,-1)))
- plt.imshow(data[4].reshape(28, 28), cmap=plt.cm.gray_r, interpolation="nearest")
- plt.show()
-
- #plt.plot(kvalues, scores)
- #plt.plot(train_sizes, scores)
- plt.plot(pvalues, scores)
-
- print(bestScore)
- #print(bestSize)
- """
|