#! /usr/bin/env python3
#
def circle_classify_gradboost ( ):

#*****************************************************************************80
#
## circle_classify_gradboost() uses gradient boosting to classify circle data.
#
#  Discussion:
#
#    This example estimates the uncertainty in the predictions.
#
#  Licensing:
#
#    This code is distributed under the MIT license.
#
#  Modified:
#
#    08 August 2023
#
#  Author:
#
#    Andreas Mueller, Sarah Guido.
#    This version by John Burkardt.
#
#  Reference:
#
#    Andreas Mueller, Sarah Guido,
#    Introduction to Machine Learning with Python,
#    OReilly, 2017,
#    ISBN: 978-1-449-36941-5
#
  from sklearn.datasets import make_circles
  from sklearn.ensemble import GradientBoostingClassifier
  import matplotlib.pyplot as plt
  import mglearn
  import numpy as np
  import platform
  import sklearn

  print ( '' )
  print ( 'circle_classify_gradboost():' )
  print ( '  Python version: ' + platform.python_version ( ) )
  print ( '  scikit-learn version: '+ sklearn.__version__ )
  print ( '  Estimate uncertainty in classification of circle data.' )
#
#  Generate the dataset.
#
  print ( '' )
  print ( '  Generate circle data.' )

  X, y = make_circles ( noise = 0.25, factor = 0.5, random_state = 1 )
#
#  Rename the classes "red" and "blue".
#
  y_named = np.array ( [ "blue", "red" ] )[y]
#
#  Split the dataset.
#
  from sklearn.model_selection import train_test_split
  X_train, X_test, y_train_named, y_test_named, y_train, y_test = \
    train_test_split ( X, y_named, y, random_state = 0 )
#
#  Get the classifier.
#
  gbrt = GradientBoostingClassifier ( random_state = 0 )
#
#  Fit the training data.
#
  gbrt.fit ( X_train, y_train_named )
#
#  Look at the decision function.
#
  print ( '' )
  print ( '  X_test.shape:', X_test.shape )
  print ( '  Decision function shape:', gbrt.decision_function ( X_test ).shape )
  print ( '  First few entries of decision function:' )
  print ( gbrt.decision_function ( X_test )[:6] )
  print ( '  Thresholded decision function:' )
  print ( gbrt.decision_function ( X_test ) > 0 )
  print ( '  Predictions:' )
  print ( gbrt.predict ( X_test ) )
#
#  Make the boolean True/False into 1/0.
#
  greater_zero = ( gbrt.decision_function ( X_test ) > 0 ).astype ( int )
  pred = gbrt.classes_[greater_zero]
  print ( '  pred is equal to predictions:' )
  print ( np.all ( pred == gbrt.predict ( X_test ) ) )

  decision_function = gbrt.decision_function ( X_test )
  print ( '  Decision function minimum = ', np.min ( decision_function ) )
  print ( '  Decision function maximum = ', np.max ( decision_function ) )
#
#  Plot the decision function.
#
  plt.clf ( )
  fig, axes = plt.subplots ( 1, 2, figsize = ( 13, 5 ) )
  mglearn.tools.plot_2d_separator ( 
    gbrt,
    X,
    ax = axes[0],
    alpha = 0.4,
    fill = True,
    cm = mglearn.cm2 )
  scores_image = mglearn.tools.plot_2d_scores (
    gbrt,
    X,
    ax = axes[1],
    alpha = 0.4,
    cm = mglearn.ReBl )

  for ax in axes:

    mglearn.discrete_scatter ( 
      X_test[:,0],
      X_test[:,1],
      y_test,
      markers = '^',
      ax = ax )

    mglearn.discrete_scatter ( 
      X_train[:,0],
      X_train[:,1],
      y_train,
      markers = 'o',
      ax = ax )
 
    ax.set_xlabel ( "Feature 0" )
    ax.set_ylabel ( "Feature 1" )

  cbar = plt.colorbar ( scores_image, ax = axes.tolist() )
  axes[0].legend ( 
    [ "Test class 0", "Test class 1", "Train class 0", "Train class 1" ],
    ncol = 4, loc = ( 0.1, 1.1 ) )

  filename = 'circle_classify_gradboost_1.png'
  plt.savefig ( filename )
  print ( '  Graphics saved as "' + filename + '"' )
  plt.close ( )
#
#  Predicting probabilities.
#
  print ( '' )
  print ( '  Shape of probabilities:', gbrt.predict_proba ( X_test ).shape )
  print ( '' )
  print ( '  First few probabilities:' )
  print (  gbrt.predict_proba ( X_test[:6] ) )

#
#  Plot the decision boundary and class probabilities
#
  plt.clf ( )
  fig, axes = plt.subplots ( 1, 2, figsize = ( 13, 5 ) )
  mglearn.tools.plot_2d_separator ( 
    gbrt,
    X,
    ax = axes[0],
    alpha = 0.4,
    fill = True,
    cm = mglearn.cm2 )
  scores_image = mglearn.tools.plot_2d_scores (
    gbrt,
    X,
    ax = axes[1],
    alpha = 0.5,
    cm = mglearn.ReBl,
    function = 'predict_proba' )

  for ax in axes:

    mglearn.discrete_scatter ( 
      X_test[:,0],
      X_test[:,1],
      y_test,
      markers = '^',
      ax = ax )

    mglearn.discrete_scatter ( 
      X_train[:,0],
      X_train[:,1],
      y_train,
      markers = 'o',
      ax = ax )
 
    ax.set_xlabel ( "Feature 0" )
    ax.set_ylabel ( "Feature 1" )

  cbar = plt.colorbar ( scores_image, ax = axes.tolist() )
  axes[0].legend ( 
    [ "Test class 0", "Test class 1", "Train class 0", "Train class 1" ],
    ncol = 4, loc = ( 0.1, 1.1 ) )

  filename = 'circle_classify_gradboost_2.png'
  plt.savefig ( filename )
  print ( '  Graphics saved as "' + filename + '"' )
  plt.close ( )
#
#  Terminate.
#
  print ( '' )
  print ( 'circle_classify_gradboost():' )
  print ( '  Normal end of execution.' )

  return

def timestamp ( ):

#*****************************************************************************80
#
## timestamp() prints the date as a timestamp.
#
#  Licensing:
#
#    This code is distributed under the MIT license. 
#
#  Modified:
#
#    21 August 2019
#
#  Author:
#
#    John Burkardt
#
  import time

  t = time.time ( )
  print ( time.ctime ( t ) )

  return

if ( __name__ == '__main__' ):
  timestamp ( )
  circle_classify_gradboost ( )
  timestamp ( )