#! /usr/bin/env python3 # def forge_classify_knn ( ): #*****************************************************************************80 # ## forge_classify_knn() uses k-nearest neighbor classification on forge data. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 15 June 2023 # # Author: # # Andreas Mueller, Sarah Guido. # Modifications by John Burkardt. # # Reference: # # Andreas Mueller, Sarah Guido, # Introduction to Machine Learning with Python, # OReilly, 2017, # ISBN: 978-1-449-36941-5 # import matplotlib.pyplot as plt import mglearn import numpy as np import pandas as pd import platform import sklearn print ( '' ) print ( 'forge_classify_knn():' ) print ( ' Python version: ' + platform.python_version ( ) ) print ( ' scikit-learn version: '+ sklearn.__version__ ) print ( ' Classify the data.' ) print ( ' Use the k-nearest neighbor method.' ) print ( '' ) # # Generate the dataset. # print ( ' Generate the forge dataset, (X, y).' ) X, y = mglearn.datasets.make_forge ( ) print ( " X.shape:", X.shape ) # # Plot the dataset. # print ( ' Plot the dataset (X0,X1).' ) plt.clf ( ) mglearn.discrete_scatter ( X[:,0], X[:,1], y ) plt.legend ( [ "Class 0", "Class 1" ], loc = 4 ) plt.xlabel ( "First feature" ) plt.ylabel ( "Second feature" ) filename = "forge_classify_data.png" plt.savefig ( filename ) print ( " Graphics saved as '" + filename + "'" ) # # Creat a plot demonstrating k-nearest neighbors with k = 1 and 3. # print ( ' Demonstrate k-nearest-neighbors with k = 1.' ) plt.clf ( ) mglearn.plots.plot_knn_classification ( n_neighbors = 1 ) filename = "forge_classify_k1.png" plt.savefig ( filename ) print ( " Graphics saved as '" + filename + "'" ) print ( ' Demonstrate k-nearest-neighbors with k = 3.' ) plt.clf ( ) mglearn.plots.plot_knn_classification ( n_neighbors = 3 ) filename = "forge_classify_k3.png" plt.savefig ( filename ) print ( " Graphics saved as '" + filename + "'" ) # # Randomly split the data into training and testing sets. # For reproducibility, specify the initial random state. # from sklearn.model_selection import train_test_split print ( '' ) print ( 'Randomly split the data into training and testing sets.' ) X_train, X_test, y_train, y_test = train_test_split ( \ X, y, random_state = 0 ) # # Get the k-nearest-neighbors classifier. # from sklearn.neighbors import KNeighborsClassifier clf = KNeighborsClassifier ( n_neighbors = 3 ) # # Evaluate the accuracy of the 3-neighbor model. # clf.fit ( X_train, y_train ) print ( " Test set predictions:", clf.predict ( X_test ) ) print ( " Test set accuracy", clf.score ( X_test, y_test ) ) # # Show the decision boundary for 1, 3 and 9 neighbors. # print ( '' ) print ( 'Plot the decision boundary for 1, 3, and 9 neighbors.' ) fig, axes = plt.subplots ( 1, 3, figsize = ( 10, 3 ) ) for n_neighbors, ax in zip ( [ 1, 3, 9 ], axes ): clf = KNeighborsClassifier ( n_neighbors = n_neighbors).fit(X,y) mglearn.plots.plot_2d_separator ( clf, X, fill = True, \ eps = 0.5, ax = ax, alpha = 0.4 ) mglearn.discrete_scatter ( X[:,0], X[:,1], y, ax = ax ) ax.set_title ( "{} neighbor(s)".format(n_neighbors) ) ax.set_xlabel ( "Feature 0" ) ax.set_ylabel ( "Feature 1" ) axes[0].legend ( loc = 3 ) filename = "forge_classify_boundary.png" plt.savefig ( filename ) print ( " Graphics saved as '" + filename + "'" ) # # Terminate. # print ( '' ) print ( 'forge_classify_knn():' ) print ( ' Normal end of execution.' ) return def timestamp ( ): #*****************************************************************************80 # ## timestamp() prints the date as a timestamp. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 21 August 2019 # # Author: # # John Burkardt # import time t = time.time ( ) print ( time.ctime ( t ) ) return if ( __name__ == '__main__' ): timestamp ( ) forge_classify_knn ( ) timestamp ( )