No Description
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

tp1.py 2.1KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. import matplotlib.pyplot as plt
  4. from sklearn.neighbors import KNeighborsClassifier
  5. from sklearn.model_selection import train_test_split
  6. from sklearn.model_selection import KFold
  7. import random
  8. import time
  9. """
  10. from sklearn.datasets import fetch_openml
  11. mnist = fetch_openml('mnist_784')
  12. """
  13. """
  14. ---Ex 1---
  15. images = mnist.data.values.reshape((-1, 28, 28))
  16. plt.imshow(images[0],cmap=plt.cm.gray_r,interpolation="nearest")
  17. plt.show()
  18. print(mnist.target[0])
  19. """
  20. """
  21. ---Ex 2---
  22. """
  23. indices = [i for i in range(len(mnist.data))]
  24. random.shuffle(indices)
  25. indices = indices[:5000]
  26. data = [mnist.data.values[i] for i in indices]
  27. target = [mnist.target[i] for i in indices]
  28. bestClf = None
  29. bestScore = 0
  30. bestK = 0
  31. scores = []
  32. kvalues = [i for i in range(2,16)]
  33. train_sizes = [0.05*i for i in range(1,20)]
  34. pvalues = [i for i in range(1,11)]
  35. k = 3
  36. t = 0.9
  37. start = time.time()
  38. xtrain, xtest, ytrain, ytest = train_test_split(data, target, train_size=t)
  39. clf = KNeighborsClassifier(3, n_jobs = 1)
  40. clf.fit(xtrain, ytrain)
  41. score = clf.score(xtest, ytest)
  42. end = time.time()
  43. print(f"n_jobs = 1, training + evaluating time : {end - start}")
  44. start = time.time()
  45. xtrain, xtest, ytrain, ytest = train_test_split(data, target, train_size=t)
  46. clf = KNeighborsClassifier(3, n_jobs = -1)
  47. clf.fit(xtrain, ytrain)
  48. score = clf.score(xtest, ytest)
  49. end = time.time()
  50. print(f"n_jobs = -1, training + evaluating time : {end - start}")
  51. """
  52. #for k in kvalues:
  53. #for t in train_sizes:
  54. for p in pvalues:
  55. print(p)
  56. xtrain, xtest, ytrain, ytest = train_test_split(data, target, train_size=t)
  57. clf = KNeighborsClassifier(k, p = p)
  58. clf.fit(xtrain, ytrain)
  59. score = clf.score(xtest, ytest)
  60. scores += [score]
  61. if score > bestScore:
  62. bestClf = clf
  63. #bestK = k
  64. #bestSize = t
  65. bestP = p
  66. bestScore = score
  67. print(target[4])
  68. print(bestClf.predict(data[4].reshape(1,-1)))
  69. plt.imshow(data[4].reshape(28, 28), cmap=plt.cm.gray_r, interpolation="nearest")
  70. plt.show()
  71. #plt.plot(kvalues, scores)
  72. #plt.plot(train_sizes, scores)
  73. plt.plot(pvalues, scores)
  74. print(bestScore)
  75. #print(bestSize)
  76. """