#! /usr/bin/env python3 # def iris_classify_knn ( ): #*****************************************************************************80 # ## iris_classify_knn() uses k-nearest neighbor classification on iris data. # # Discussion: # # There are three species of iris. # # Each species has characteristic values for four quantities: # petal length, # petal width, # sepal length, # sepal width. # # We are given data containing 150 samples, specifying the species and # the four measurements. # # Our task is to construct a model which can accept a set of measurements # corresponding to a new iris, and estimate the corresponding species # to which it belongs. # # We will do this using the k-nearest-neighbors algorithm, which classifies # a new data item by looking at the k nearest sets of measurements, and # choosing the species that most of those neighbors share. In this example, # k is simply 1. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 15 June 2023 # # Author: # # Andreas Mueller, Sarah Guido. # Modifications by John Burkardt. # # Reference: # # Andreas Mueller, Sarah Guido, # Introduction to Machine Learning with Python, # OReilly, 2017, # ISBN: 978-1-449-36941-5 # import matplotlib.pyplot as plt import mglearn import numpy as np import pandas as pd import platform import sklearn from sklearn.datasets import load_iris print ( '' ) print ( 'iris_classify_knn():' ) print ( ' Python version: ' + platform.python_version ( ) ) print ( ' scikit-learn version: '+ sklearn.__version__ ) print ( ' Retrieve the iris data set.' ) print ( ' Classify the data.' ) print ( ' Use the k-nearest neighbor method.' ) print ( '' ) # # Load the iris dataset. # iris_dataset = load_iris ( ) # # Print the keys. # print ( " iris_dataset.keys():" ) print ( iris_dataset.keys ( ) ) # # Print the dataset description. # print ( '' ) print ( " iris_dataset['DESCR']:" ) print ( iris_dataset['DESCR'] ) # # Print the names of the three species. # print ( '' ) print ( " iris_dataset['target_names']:" ) print ( iris_dataset['target_names'] ) # # Print the feature names. # print ( '' ) print ( " iris_dataset['feature_names']:" ) print ( iris_dataset['feature_names'] ) # # Print the type, shape, and sample of data. # print ( '' ) print ( " type ( iris_dataset['data'] ):" ) print ( type ( iris_dataset['data'] ) ) print ( " iris_dataset['data'].shape:" ) print ( iris_dataset['data'].shape ) print ( '' ) print ( " Initial rows of data:" ) print ( iris_dataset['data'][:5] ) # # Print the type, shape, and all the target values. # print ( '' ) print ( " type ( iris_dataset['target'] ):" ) print ( type ( iris_dataset['target'] ) ) print ( " iris_dataset['target'].shape:" ) print ( iris_dataset['target'].shape ) print ( '' ) print ( " target values:" ) print ( iris_dataset['target'] ) # # Randomly split the data into training and testing sets. # For reproducibility, specify the initial random state. # from sklearn.model_selection import train_test_split print ( '' ) print ( 'Randomly split the data into training and testing sets.' ) X_train, X_test, y_train, y_test = train_test_split ( \ iris_dataset['data'], iris_dataset['target'], random_state = 0 ) # # To verify, print shapes and samples of training and testing sets. # print ( ' X_train.shape = ', X_train.shape ) print ( ' y_train.shape = ', y_train.shape ) print ( ' X_test.shape = ', X_test.shape ) print ( ' y_test.shape = ', y_test.shape ) # # Create a dataframe so we can use pandas to plot. # iris_dataframe = pd.DataFrame ( X_train, columns = iris_dataset.feature_names ) pd.plotting.scatter_matrix ( iris_dataframe, c = y_train, figsize = (15,15), \ marker = 'o', hist_kwds = {'bins' : 20 }, s = 60, alpha = 0.8, \ cmap = mglearn.cm3 ) plt.savefig ( 'iris_classify_knn.png' ) # # Get the k-nearest-neighbors classifier. # from sklearn.neighbors import KNeighborsClassifier knn = KNeighborsClassifier ( n_neighbors = 1 ) # # Build the model using the training set. # knn.fit ( X_train, y_train ) KNeighborsClassifier ( \ algorithm = 'auto', \ leaf_size = 30, \ metric = 'minkowski', \ metric_params = None, \ n_jobs = None, \ n_neighbors = 1, \ p = 2, \ weights = 'uniform' ) # # Use the model to predict the class of a new piece of data. # print ( "" ) print ( "Use the model to predict the class of a new piece of data." ) X_new = np.array ( [ [ 5.0, 2.9, 1.0, 0.2 ] ] ) print ( " X_new.shape:", X_new.shape ) y_new = knn.predict ( X_new ) print ( " Prediction:", y_new ) print ( " Predicted target name:", \ iris_dataset [ 'target_names' ][y_new] ) # # Use the test data to verify the model. # print ( "" ) print ( "Use the test data to verify the model." ) y_pred = knn.predict ( X_test ) print ( " Test set predictions\n", y_pred ) print ( " Test set score:", np.mean ( y_pred == y_test ) ) print ( " Test set score:", knn.score ( X_test, y_test ) ) # # Terminate. # print ( '' ) print ( 'iris_classify_knn():' ) print ( ' Normal end of execution.' ) return def timestamp ( ): #*****************************************************************************80 # ## timestamp() prints the date as a timestamp. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 21 August 2019 # # Author: # # John Burkardt # import time t = time.time ( ) print ( time.ctime ( t ) ) return if ( __name__ == '__main__' ): timestamp ( ) iris_classify_knn ( ) timestamp ( )