#! /usr/bin/env python3
#
def forge_classify_knn ( ):

#*****************************************************************************80
#
## forge_classify_knn() uses k-nearest neighbor classification on forge data.
#
#  Licensing:
#
#    This code is distributed under the MIT license.
#
#  Modified:
#
#    15 June 2023
#
#  Author:
#
#    Andreas Mueller, Sarah Guido.
#    Modifications by John Burkardt.
#
#  Reference:
#
#    Andreas Mueller, Sarah Guido,
#    Introduction to Machine Learning with Python,
#    OReilly, 2017,
#    ISBN: 978-1-449-36941-5
#
  import matplotlib.pyplot as plt
  import mglearn
  import numpy as np
  import pandas as pd
  import platform
  import sklearn

  print ( '' )
  print ( 'forge_classify_knn():' )
  print ( '  Python version: ' + platform.python_version ( ) )
  print ( '  scikit-learn version: '+ sklearn.__version__ )
  print ( '  Classify the data.' )
  print ( '  Use the k-nearest neighbor method.' )
  print ( '' )
#
#  Generate the dataset.
#
  print ( '  Generate the forge dataset, (X, y).' )
  X, y = mglearn.datasets.make_forge ( )

  print ( "  X.shape:", X.shape )
#
#  Plot the dataset.
#
  print ( '  Plot the dataset (X0,X1).' )

  plt.clf ( )

  mglearn.discrete_scatter ( X[:,0], X[:,1], y )

  plt.legend ( [ "Class 0", "Class 1" ], loc = 4 )
  plt.xlabel ( "First feature" )
  plt.ylabel ( "Second feature" )
  filename = "forge_classify_data.png"
  plt.savefig ( filename )
  print ( "  Graphics saved as '" + filename + "'" )
#
#  Creat a plot demonstrating k-nearest neighbors with k = 1 and 3.
#
  print ( '  Demonstrate k-nearest-neighbors with k = 1.' )
  plt.clf ( )
  mglearn.plots.plot_knn_classification ( n_neighbors = 1 )
  filename = "forge_classify_k1.png"
  plt.savefig ( filename )
  print ( "  Graphics saved as '" + filename + "'" )

  print ( '  Demonstrate k-nearest-neighbors with k = 3.' )
  plt.clf ( )
  mglearn.plots.plot_knn_classification ( n_neighbors = 3 )
  filename = "forge_classify_k3.png"
  plt.savefig ( filename )
  print ( "  Graphics saved as '" + filename + "'" )
#
#  Randomly split the data into training and testing sets.
#  For reproducibility, specify the initial random state.
#
  from sklearn.model_selection import train_test_split

  print ( '' )
  print ( 'Randomly split the data into training and testing sets.' )

  X_train, X_test, y_train, y_test = train_test_split ( \
    X, y, random_state = 0 )
#
#  Get the k-nearest-neighbors classifier.
#
  from sklearn.neighbors import KNeighborsClassifier
  clf = KNeighborsClassifier ( n_neighbors = 3 )
#
#  Evaluate the accuracy of the 3-neighbor model.
#
  clf.fit ( X_train, y_train )

  print ( "  Test set predictions:", clf.predict ( X_test ) )

  print ( "  Test set accuracy", clf.score ( X_test, y_test ) )
#
#  Show the decision boundary for 1, 3 and 9 neighbors.
#
  print ( '' )
  print ( 'Plot the decision boundary for 1, 3, and 9 neighbors.' )

  fig, axes = plt.subplots ( 1, 3, figsize = ( 10, 3 ) )
  for n_neighbors, ax in zip ( [ 1, 3, 9 ], axes ):
    clf = KNeighborsClassifier ( n_neighbors = n_neighbors).fit(X,y)
    mglearn.plots.plot_2d_separator ( clf, X, fill = True, \
      eps = 0.5,  ax = ax, alpha = 0.4 )
    mglearn.discrete_scatter ( X[:,0], X[:,1], y, ax = ax )
    ax.set_title ( "{} neighbor(s)".format(n_neighbors) )
    ax.set_xlabel ( "Feature 0" )
    ax.set_ylabel ( "Feature 1" )
  axes[0].legend ( loc = 3 )
  filename = "forge_classify_boundary.png"
  plt.savefig ( filename )
  print ( "  Graphics saved as '" + filename + "'" )
#
#  Terminate.
#
  print ( '' )
  print ( 'forge_classify_knn():' )
  print ( '  Normal end of execution.' )

  return

def timestamp ( ):

#*****************************************************************************80
#
## timestamp() prints the date as a timestamp.
#
#  Licensing:
#
#    This code is distributed under the MIT license. 
#
#  Modified:
#
#    21 August 2019
#
#  Author:
#
#    John Burkardt
#
  import time

  t = time.time ( )
  print ( time.ctime ( t ) )

  return

if ( __name__ == '__main__' ):
  timestamp ( )
  forge_classify_knn ( )
  timestamp ( )