#
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib as mp
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import laUtilities as ut
import slideUtilities as sl
import demoUtilities as dm
from matplotlib import animation
from importlib import reload
from datetime import datetime
from IPython.display import Image, display_html, display, Math, HTML;
qr_setting = None

mp.rcParams['animation.html'] = 'jshtml';


#
def centerAxes(ax):
    ax.spines['left'].set_position('zero')
    ax.spines['right'].set_color('none')
    ax.spines['bottom'].set_position('zero')
    ax.spines['top'].set_color('none')
    ax.xaxis.set_ticks_position('bottom')
    ax.yaxis.set_ticks_position('left')
    bounds = np.array([ax.axes.get_xlim(), ax.axes.get_ylim()])
    ax.plot(bounds[0][0],bounds[1][0],'')
    ax.plot(bounds[0][1],bounds[1][1],'')


#
%matplotlib inline
%config InlineBackend.figure_format='retina'
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import Image, display_html, display, Math, HTML;


#
plt.scatter(demo_X[:,0], demo_X[:,1], c=demo_y, cmap=cmap_bold)
plt.plot(test_X[0], test_X[1], 'ok')
plt.annotate('Test Point', test_X, [75, 25], 
             textcoords = 'offset points', fontsize = 14, 
             arrowprops = {'arrowstyle': '->'})
plt.axis('equal')
plt.axis('off')
plt.title('Training Points: 2 Classes');


#
plt.scatter(demo_X[:,0], demo_X[:,1], c=demo_y, cmap=cmap_bold)
plt.plot(test_X[0], test_X[1], 'ok')
ax=plt.gcf().gca()
circle = mp.patches.Circle(test_X, 0.5, facecolor = 'red', alpha = 0.2)
plt.axis('equal')
plt.axis('off')
ax.add_artist(circle)
plt.title('1-Nearest-Neighbor: Classification: Red');


#
plt.scatter(demo_X[:,0], demo_X[:,1], c=demo_y, cmap=cmap_bold)
test_X = [-0.3, 0.7]
plt.plot(test_X[0], test_X[1], 'ok')
ax=plt.gcf().gca()
    #ellipse = mp.patches.Ellipse(gmm.means_[clus], 3 * e[0], 3 * e[1], angle, color = 'r')
circle = mp.patches.Circle(test_X, 0.9, facecolor = 'gray', alpha = 0.3)
plt.axis('equal')
plt.axis('off')
ax.add_artist(circle)
plt.title('2-Nearest-Neighbor');


#
plt.figure()
ax=plt.gcf().gca()
    #ellipse = mp.patches.Ellipse(gmm.means_[clus], 3 * e[0], 3 * e[1], angle, color = 'r')
circle = mp.patches.Circle(test_X, 1.4, facecolor = 'blue', alpha = 0.2)
ax.add_artist(circle)
plt.scatter(demo_X[:,0], demo_X[:,1], c=demo_y, cmap=cmap_bold)
test_X = [-0.3, 0.7]
plt.plot(test_X[0], test_X[1], 'ok')
plt.axis('equal')
plt.axis('off')
plt.title('3-Nearest-Neighbor: Classification: Blue');


#
display(Image("images/23-confusion-matrix.png", width=300))


X, y = datasets.make_circles(noise=.1, factor=.5, random_state=1)
print('Shape of data: {}'.format(X.shape))
print('Unique labels: {}'.format(np.unique(y)))

Shape of data: (100, 2)
Unique labels: [0 1]


#
plt.figure(figsize = (6,6))
plt.prism()  # this sets a nice color map
plt.scatter(X[:, 0], X[:, 1], c=y, s = 80)
plt.axis('off')
plt.axis('equal');


X_train = X[:50]
y_train = y[:50]
X_test = X[50:]
y_test = y[50:]


#
fig_size = (12, 5)

plt.figure(figsize = fig_size)
plt.subplot(1, 2, 1)
plt.scatter(X_train[:, 0], X_train[:, 1], c = y_train, s = 80)
plt.axis('equal')
plt.axis('off')
plt.title('Training Data')

plt.subplot(1, 2, 2)
plt.scatter(X_test[:, 0], X_test[:, 1], c = y_test, s = 80)
plt.title('Test Data')
plt.axis('equal')
plt.axis('off');


k = 5
knn5 = KNeighborsClassifier(n_neighbors = k)


knn5.fit(X_train,y_train)
print(f'Accuracy on test data: {knn5.score(X_test, y_test)}')

Accuracy on test data: 0.72


y_pred_test = knn5.predict(X_test)
pd.DataFrame(metrics.confusion_matrix(y_test, y_pred_test), 
             columns = ['Predicted +', 'Predicted -'], 
             index = ['Actual +', 'Actual -'])


#
k = 5

plt.figure(figsize = fig_size)
plt.subplot(1, 2, 1)

plt.scatter(X_train[:, 0], X_train[:, 1], c = y_train, s = 80)
plt.axis('equal')
plt.title('Training')
plt.axis('off')

plt.subplot(1, 2, 2)
plt.scatter(X_test[:, 0], X_test[:, 1], c = y_pred_test, s = 80)
plt.title(f'Testing $k$={k}\nAccuracy: {knn5.score(X_test, y_test)}')
plt.axis('off')
plt.axis('equal');


#
k=5 
test_point = np.argmax(X_test[:,1])
neighbors = knn5.kneighbors([X_test[test_point]])[1]

plt.figure(figsize = fig_size)
plt.subplot(1, 2, 1)
plt.scatter(X_train[:, 0], X_train[:, 1], c = y_train, s = 80)
plt.scatter(X_train[neighbors,0], X_train[neighbors,1],
            c = y_train[neighbors], marker='o', 
            facecolors='none', edgecolors='b', s = 80)
radius = np.max(metrics.euclidean_distances(X_test[test_point].reshape(1, -1), X_train[neighbors][0]))
ax = plt.gcf().gca()
circle = mp.patches.Circle(X_test[test_point], radius, facecolor = 'blue', alpha = 0.2)
ax.add_artist(circle)
plt.axis('equal')
plt.axis('off')
plt.title(r'Training')

plt.subplot(1, 2, 2)
plt.scatter(X_test[:, 0], X_test[:, 1], c = y_pred_test, s = 80)
plt.scatter(X_test[test_point,0], X_test[test_point,1], marker='o', 
            facecolors='none', edgecolors='b', s = 80)
plt.title('Testing $k$={}\nAccuracy: {}'.format(k,knn5.score(X_test, y_test)))
plt.axis('equal')
plt.axis('off');


#
k = 3
knn3 = KNeighborsClassifier(n_neighbors=k)    
knn3.fit(X_train,y_train)
y_pred_test = knn3.predict(X_test)

plt.figure(figsize = fig_size)
plt.subplot(1, 2, 1)
plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, s = 80)
plt.axis('equal')
plt.axis('off')
plt.title(r'Training')

plt.subplot(1, 2, 2)
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_pred_test, s = 80)
plt.title(f'Testing $k$={k}\nAccuracy: {knn3.score(X_test, y_test)}')
plt.axis('off')
plt.axis('equal');


#
k = 3
test_point = np.argmax(X_test[:,1])
X_test[test_point]
neighbors = knn3.kneighbors([X_test[test_point]])[1]

plt.figure(figsize = fig_size)
plt.subplot(1, 2, 1)
plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, s = 80)
plt.scatter(X_train[neighbors, 0], X_train[neighbors, 1], marker = 'o', 
            facecolors = 'none', edgecolors = 'b', s = 80)
radius = np.max(metrics.euclidean_distances(X_test[test_point].reshape(1, -1), 
                                            X_train[neighbors][0]))
ax = plt.gcf().gca()
circle = mp.patches.Circle(X_test[test_point], radius, facecolor = 'blue', alpha = 0.2)
ax.add_artist(circle)
plt.axis('equal')
plt.axis('off')
plt.title(r'Training')
plt.subplot(1, 2, 2)
plt.scatter(X_test[:, 0], X_test[:, 1], c = y_pred_test, s = 80)
plt.scatter(X_test[test_point,0], X_test[test_point,1], marker = 'o', 
            facecolors = 'none', edgecolors = 'b', s = 80)
plt.title(f'Testing $k$={k}\nAccuracy: {knn3.score(X_test, y_test)}')
plt.axis('off')
plt.axis('equal');


import sklearn.model_selection as model_selection

nreps = 50
kvals = range(1, 10)
acc = []
np.random.seed(4)
for k in kvals:
    test_rep = []
    train_rep = []
    for i in range(nreps):
        X_train, X_test, y_train, y_test = model_selection.train_test_split(X, 
                                                                            y, 
                                                                            test_size = 0.5)
        knn = KNeighborsClassifier(n_neighbors = k)    
        knn.fit(X_train, y_train)
        train_rep.append(knn.score(X_train, y_train))
        test_rep.append(knn.score(X_test, y_test))
    acc.append([np.mean(np.array(test_rep)), np.mean(np.array(train_rep))])
accy = np.array(acc)


plt.plot(kvals, accy[:, 0], '.-', label = 'Accuracy on Test Data')
plt.plot(kvals, accy[:, 1], '.-', label = 'Accuracy on Training Data')
plt.xlabel(r'$k$')
plt.ylabel('Accuracy')
plt.title('Train/Test Comparision of $k$-NN')
plt.legend(loc = 'best');


#
x_min, x_max = X[:, 0].min() - .1, X[:, 0].max() + .1
y_min, y_max = X[:, 1].min() - .1, X[:, 1].max() + .1
plot_step = 0.02
xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
                     np.arange(y_min, y_max, plot_step))


#
np.random.seed(1)
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size = 0.5)

k = 2
knn = KNeighborsClassifier(n_neighbors = k)  
knn.fit(X_train, y_train)
y_pred_train = knn.predict(X_train)
y_pred_test = knn.predict(X_test)

Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.figure(figsize = fig_size)
plt.subplot(1, 2, 1)
cs = plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.3)
plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, s=30)
plt.axis('equal')
plt.axis('off')
plt.xlim((x_min, x_max))
plt.ylim((y_min, y_max))
plt.title(f'{k}-NN - Training Data\nAccuracy: {knn.score(X_train, y_train)}');

plt.subplot(1, 2, 2)
cs = plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.3)
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, s=30)
plt.axis('equal')
plt.axis('off')
plt.xlim((x_min, x_max))
plt.ylim((y_min, y_max))
plt.title(f'{k}-NN - Test Data\nAccuracy: {knn.score(X_test, y_test)}');


import sklearn.tree as tree
dtc = tree.DecisionTreeClassifier(max_leaf_nodes = 5)

dtc.fit(X_train,y_train)
y_pred_test = dtc.predict(X_test)
print('DT accuracy on test data: ', dtc.score(X_test, y_test))
y_pred_train = dtc.predict(X_train)
print('DT accuracy on training data: ', dtc.score(X_train, y_train))

DT accuracy on test data:  0.94
DT accuracy on training data:  0.98


#
plt.figure(figsize = (6, 6))
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_pred_test, marker='^', s=80)
plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, s=80)
plt.axis('equal')
plt.axis('off')
plt.title(F'Decision Tree\n Triangles: Test Data, Circles: Training Data\nAccuracy: {dtc.score(X_test, y_test)}');


#
Z = dtc.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.figure(figsize = (12, 6))
plt.subplot(1, 2, 1)
cs = plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.3)
plt.scatter(X_train[:, 0], X_train[:, 1], c = y_train, s = 80)
plt.axis('equal')
plt.axis('off')
plt.xlim((x_min, x_max))
plt.ylim((y_min, y_max))
plt.title(f'Decision Tree - Training Data\nAccuracy: {dtc.score(X_train, y_train)}');

plt.subplot(1, 2, 2)
cs = plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.3)
plt.scatter(X_test[:, 0], X_test[:, 1], c = y_pred_test, marker = '^', s = 80)
plt.axis('equal')
plt.axis('off')
plt.xlim((x_min, x_max))
plt.ylim((y_min, y_max))
plt.title(f'Decision Tree - Test Data\nAccuracy: {dtc.score(X_test, y_test)}');


dot_data = tree.export_graphviz(dtc, out_file=None,
                         feature_names=['X','Y'],
                         class_names=['Red','Green'],
                         filled=True, rounded=True,  
                         special_characters=True) 
import pydotplus
graph = pydotplus.graph_from_dot_data(dot_data) 
# graph.write_pdf("dt.pdf") 
Image(graph.create_png())


#
display(Image("images/01-iris.png", width=600))


iris = datasets.load_iris()

X = iris.data
y = iris.target
ynames = iris.target_names
print(X.shape, y.shape)
print(X[1,:])
print(iris.target_names)
print(y)

(150, 4) (150,)
[4.9 3.  1.4 0.2]
['setosa' 'versicolor' 'virginica']
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


kvals = range(2, 20)
nreps = 50

acc = []
std = []
np.random.seed(0)
for k in kvals:
    test_rep = []
    train_rep = []
    for i in range(nreps):
        X_train, X_test, y_train, y_test = model_selection.train_test_split(
            X, y, test_size = 0.33)
        knn = KNeighborsClassifier(n_neighbors = k)    
        knn.fit(X_train, y_train)
        train_rep.append(knn.score(X_train, y_train))
        test_rep.append(knn.score(X_test, y_test))
    acc.append([np.mean(np.array(test_rep)), np.mean(np.array(train_rep))])
    std.append([np.std(np.array(test_rep)), np.std(np.array(train_rep))])


#
accy = np.array(acc)
stds = np.array(std)/np.sqrt(nreps)
plt.plot(kvals, accy[:, 0], '.-', label = 'Accuracy on Test Data')
plt.plot(kvals, accy[:, 1], '.-', label = 'Accuracy on Training Data')
plt.xlabel('k')
plt.ylabel('Accuracy')
plt.xticks(kvals)
plt.legend(loc = 'best');
print(f'Max Test Accuracy at k = {kvals[np.argmax(accy[:, 0])]} with accuracy {np.max(accy[:, 0]):.03f}')

Max Test Accuracy at k = 13 with accuracy 0.969


#
plt.errorbar(kvals, accy[:, 0], stds[:, 0], label = 'Accuracy on Test Data')
plt.xlabel('k')
plt.ylabel('Accuracy')
plt.legend(loc = 'lower center')
plt.xticks(kvals)
plt.title(r'Test Accuracy with $\pm 1\sigma$ Errorbars');


# Create color maps
from matplotlib.colors import ListedColormap
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])

# we will use only the first two (of four) features, so we can visualize
X = X_train[:, :2] 
h = .02  # step size in the mesh
k = 13
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X, y_train)
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, x_max]x[y_min, y_max].
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                      np.arange(y_min, y_max, h))
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])

# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure()
plt.pcolormesh(xx, yy, Z, cmap=cmap_light, shading='auto')

# Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=y_train, cmap=cmap_bold)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.title(f"3-Class $k$-NN classification ($k$ = {k})");


X = iris.data
y = iris.target


leaf_vals = range(3, 20)
nreps = 50

acc = []
std = []
np.random.seed(0)
for leaf_count in leaf_vals:
    test_rep = []
    train_rep = []
    for i in range(nreps):
        X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size = 0.10)
        dtc = tree.DecisionTreeClassifier(max_leaf_nodes = leaf_count)   
        dtc.fit(X_train, y_train)
        train_rep.append(dtc.score(X_train, y_train))
        test_rep.append(dtc.score(X_test, y_test))
    acc.append([np.mean(np.array(test_rep)), np.mean(np.array(train_rep))])
    std.append([np.std(np.array(test_rep)), np.std(np.array(train_rep))])
accy = np.array(acc)
stds = np.array(std)/np.sqrt(nreps)


#
plt.plot(leaf_vals, accy[:, 0], '.-', label = 'Accuracy on Test Data')
plt.plot(leaf_vals, accy[:, 1], '.-', label = 'Accuracy on Training Data')
plt.xlabel('Max Leaf Nodes')
plt.ylabel('Accuracy')
plt.legend(loc = 'best')
plt.xticks(leaf_vals)
best_leaf = leaf_vals[np.argmax(accy[:, 0])]
plt.title(f'Test/Train Error for Decision Tree\nMax Test Accuracy at {best_leaf} leaf nodes with accuracy {np.max(accy[:, 0]):.03f}');


#
plt.errorbar(leaf_vals, accy[:, 0], stds[:, 0], label = 'Accuracy on Test Data')
plt.xlabel('Max Leaf Nodes')
plt.ylabel('Accuracy')
plt.legend(loc = 'lower center')
plt.xticks(leaf_vals)
plt.title(r'Test Accuracy with $\pm 1\sigma$ Errorbars');


# we will use only the first two (of four) features, so we can visualize
X = X_train[:, :2] 
h = .02  # step size in the mesh
dtc = tree.DecisionTreeClassifier(max_leaf_nodes = best_leaf) 
dtc.fit(X, y_train)
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, x_max]x[y_min, y_max].
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                      np.arange(y_min, y_max, h))
Z = dtc.predict(np.c_[xx.ravel(), yy.ravel()])

# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure()
plt.pcolormesh(xx, yy, Z, cmap=cmap_light, shading='auto')

# Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=y_train, cmap=cmap_bold)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.title(f"3-Class DT Classification\n{best_leaf} leaf nodes");


#
display(Image("images/13-reference-pb.jpg", width=600))


import sklearn.utils as utils
import sklearn.datasets as datasets

digits = datasets.load_digits()
X, y = utils.shuffle(digits.data, digits.target, random_state = 1)

print ('Data shape: {}'.format(X.shape))
print ('Data labels: {}'.format(y))
print ('Unique labels: {}'.format(digits.target_names))

Data shape: (1797, 64)
Data labels: [1 5 0 ... 9 1 5]
Unique labels: [0 1 2 3 4 5 6 7 8 9]


digits.images[3]

array([[ 0.,  0.,  7., 15., 13.,  1.,  0.,  0.],
       [ 0.,  8., 13.,  6., 15.,  4.,  0.,  0.],
       [ 0.,  2.,  1., 13., 13.,  0.,  0.,  0.],
       [ 0.,  0.,  2., 15., 11.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  1., 12., 12.,  1.,  0.],
       [ 0.,  0.,  0.,  0.,  1., 10.,  8.,  0.],
       [ 0.,  0.,  8.,  4.,  5., 14.,  9.,  0.],
       [ 0.,  0.,  7., 13., 13.,  9.,  0.,  0.]])


plt.gray() 
# plt.rc('axes', grid = False);
plt.matshow(digits.images[3], cmap = plt.cm.gray_r);

<Figure size 432x288 with 0 Axes>


plt.rc('image', cmap = 'binary', interpolation = 'bilinear')
plt.figure(figsize = (4, 4))
plt.axis('off')
plt.imshow(digits.images[3]);


for t in range(4):
    plt.figure(figsize = (8, 2))
    for j in range(4):
        plt.subplot(1, 4, 1 + j)
        plt.imshow(X[4*t + j].reshape(8, 8))
        plt.axis('off');


import time
import sklearn.model_selection as model_selection
from sklearn.neighbors import KNeighborsClassifier
def test_knn(kvals, test_fraction, nreps):
    acc = []
    std = []
    np.random.seed(0)
    #
    for k in kvals:
        test_rep = []
        train_rep = []
        for i in range(nreps):
            X_train, X_test, y_train, y_test = model_selection.train_test_split(
                X, y, test_size = test_fraction)
            knn = KNeighborsClassifier(n_neighbors = k)    
            knn.fit(X_train, y_train)
            test_rep.append(knn.score(X_test, y_test))
        acc.append(np.mean(np.array(test_rep)))
        std.append(np.std(np.array(test_rep)))
    return(np.array(acc), np.array(std)/np.sqrt(nreps))

test_fraction1 = 0.33
start = time.time()
accy1, stds1 = test_knn(range(2, 20), test_fraction1, 50)
end = time.time()
print("time to train/test on 66% of the dataset: ", end-start)

test_fraction2 = 0.10
start = time.time()
accy2, stds2 = test_knn(range(2, 20), test_fraction2, 50)
end = time.time()
print("time to train/test on 90% of the dataset: ", end-start)

time to train/test on 66% of the dataset:  8.791742086410522
time to train/test on 90% of the dataset:  4.361397981643677


#
plt.figure(figsize = (7, 5))
plt.errorbar(kvals, accy1, stds1, 
             label = f'{test_fraction1:.0%} Used for Testing; accuracy {np.max(accy1):.03f}')
plt.errorbar(kvals, accy2, stds2, 
             label = f'{test_fraction2:.0%} Used for Testing; accuracy {np.max(accy2):.03f}')
plt.xlabel('k')
plt.ylabel('Accuracy')
plt.legend(loc = 'best')
plt.xticks(kvals)
best_k = kvals[np.argmax(accy1)]
plt.title(f'Test Accuracy with $\pm 1\sigma$ Error Bars');


knn = KNeighborsClassifier(n_neighbors = 3)    
knn.fit(X, y)
neighbors = knn.kneighbors(X[:3,:], n_neighbors=3, return_distance=False)


#
plt.rc("image", cmap="binary")  # this sets a black on white colormap
# plot X_digits_valid[0]
for t in range(3):
    plt.figure(figsize=(8,2))
    plt.subplot(1, 4, 1)
    plt.imshow(X[t].reshape(8, 8))
    plt.axis('off')
    plt.title("Query")
    # plot three nearest neighbors from the training set
    for i in [0, 1, 2]:
        plt.subplot(1, 4, 2 + i)
        plt.title("neighbor {}".format(i))
        plt.imshow(X[neighbors[t, i]].reshape(8, 8))
        plt.axis('off')

Announcements¶

13. $k$ -Nearest Neighbors in action¶

Recap of last lecture¶

Challenges for $k$ -NN¶

13.1 Comparing Classification Methods on Synthetic Data¶

Evaluating $k$ - Nearest Neighbors¶

Evaluating a Decision Tree¶

Comparing $k$ -NN and Decision Tree¶

13.2 Comparing on Real Data¶

The Iris Dataset¶

MNIST dataset¶

	Predicted +	Predicted -
Actual +	14	14
Actual -	0	22

Announcements¶

13. kk-Nearest Neighbors in action¶

Recap of last lecture¶

Challenges for kk-NN¶

13.1 Comparing Classification Methods on Synthetic Data¶

Evaluating kk- Nearest Neighbors¶

Evaluating a Decision Tree¶

Comparing kk-NN and Decision Tree¶

13.2 Comparing on Real Data¶

The Iris Dataset¶

MNIST dataset¶

13. $k$ -Nearest Neighbors in action¶

Challenges for $k$ -NN¶

Evaluating $k$ - Nearest Neighbors¶

Comparing $k$ -NN and Decision Tree¶