#! /usr/bin/env python3 # def draft_nn_normal ( ): from pandas import read_csv import matplotlib.pyplot as plt import numpy as np import pandas as pd print ( "draft_nn_normal():" ) print ( " Demonstrate nearest neighbor classification with normalized data." ) filename = "draft.csv" df = pd.read_csv ( filename, header = 0 ) # # Print column headers. # print ( df.columns ) # # Select Draft columns of yes, and of no. # dfyes = df[df['Draft']=='yes'] dfno = df[df['Draft']=='no'] # # Save only the numeric data. # dfall = df._get_numeric_data ( ) dfyes = dfyes._get_numeric_data ( ) dfno = dfno._get_numeric_data ( ) # # Convert the numeric data to numpy arrays. # npall = dfall.to_numpy ( ) npyes = dfyes.to_numpy ( ) npno = dfno.to_numpy ( ) # # Get min/max of speed and agility. # xmin = np.min ( npall[:,1] ) xmax = np.max ( npall[:,1] ) ymin = np.min ( npall[:,2] ) ymax = np.max ( npall[:,2] ) print ( xmin, "<= speed <=", xmax ) print ( ymin, "<= agility <= ", ymax ) # # Introduce a new data item. # npmaybe = np.array ( [ 21, 6.75, 3.00 ] ) # # Normalize npall and npmaybe to equalize distances. # npall[:,1] = ( npall[:,1] - xmin ) / ( xmax - xmin ) npall[:,2] = ( npall[:,2] - ymin ) / ( ymax - ymin ) npyes[:,1] = ( npyes[:,1] - xmin ) / ( xmax - xmin ) npyes[:,2] = ( npyes[:,2] - ymin ) / ( ymax - ymin ) npno[:,1] = ( npno[:,1] - xmin ) / ( xmax - xmin ) npno[:,2] = ( npno[:,2] - ymin ) / ( ymax - ymin ) npmaybe[1] = ( npmaybe[1] - xmin ) / ( xmax - xmin ) npmaybe[2] = ( npmaybe[2] - ymin ) / ( ymax - ymin ) # # Display the results. # plt.plot ( npyes[:,1], npyes[:,2], 'b^', markersize = 10 ) plt.plot ( npno[:,1], npno[:,2], 'kv', markersize = 10 ) plt.grid ( True ) plt.xlabel ( "Speed" ) plt.ylabel ( "Agility" ) plt.title ( "Athletic ratings for drafting" ) plt.legend ( ( "Drafted", "Rejected" ), loc = "upper left" ) filename = "draft_results_normal.png" plt.savefig ( filename ) plt.show ( ) plt.close ( ) # # Display data with new item. # plt.plot ( npyes[:,1], npyes[:,2], 'b^', markersize = 10 ) plt.plot ( npno[:,1], npno[:,2], 'kv', markersize = 10 ) plt.plot ( npmaybe[1], npmaybe[2], 'r*', markersize = 10 ) plt.grid ( True ) plt.xlabel ( "Speed" ) plt.ylabel ( "Agility" ) plt.title ( "Athletic ratings for drafting" ) plt.legend ( ( "Drafted", "Rejected", "New Candidate" ), loc = "upper left" ) filename = "draft_candidate_normal.png" plt.savefig ( filename ) plt.show ( ) plt.close ( ) # # Compute distances. # dist = np.zeros ( 20 ) for i in range ( 0, 20 ): dist[i] = np.sqrt ( ( npall[i,1]-npmaybe[1] )**2 \ + ( npall[i,2]-npmaybe[2] )**2 ) print ( '' ) print ( 'index distance draft/nodraft:' ) print ( '' ) index = np.argsort ( dist ) distances = ( index, dist[index], df['Draft'][index] ) print ( np.transpose ( distances ) ) print ( "" ) print ( " Draft/no draft prediction:", df['Draft'][index[0]] ) print ( " A 3-nearest neighbor prediction would be YES" ) if ( __name__ == "__main__" ): draft_nn_normal ( )