#! /usr/bin/env python3
#
def iris_classify_gradboost ( ):

#*****************************************************************************80
#
## iris_classify_gradboost() uses gradient boosting to classify iris data.
#
#  Licensing:
#
#    This code is distributed under the MIT license.
#
#  Modified:
#
#    09 August 2023
#
#  Author:
#
#    Andreas Mueller, Sarah Guido.
#    This version by John Burkardt.
#
#  Reference:
#
#    Andreas Mueller, Sarah Guido,
#    Introduction to Machine Learning with Python,
#    OReilly, 2017,
#    ISBN: 978-1-449-36941-5
#
  from sklearn.datasets import load_iris
  from sklearn.ensemble import GradientBoostingClassifier
  from sklearn.linear_model import LogisticRegression
  from sklearn.model_selection import train_test_split
  import matplotlib.pyplot as plt
  import mglearn
  import numpy as np
  import platform
  import sklearn

  print ( '' )
  print ( 'iris_classify_gradboost():' )
  print ( '  Python version: ' + platform.python_version ( ) )
  print ( '  scikit-learn version: '+ sklearn.__version__ )
  print ( '  Using gradient boost to classify the iris dataset.' )
  print ( '  Then estimate the probability uncertainties.' )
#
#  Generate the dataset.
#
  print ( '' )
  print ( '  Retrieve the iris dataset, (X, y).' )

  iris = load_iris ( )
#
#  Split the dataset.
#
  X_train, X_test, y_train, y_test = train_test_split ( \
    iris.data, iris.target, random_state = 42 )
#
#  Get the classifier.
#
  gbrt = GradientBoostingClassifier ( learning_rate = 0.01, random_state = 0 )
#
#  Fit the training data.
#
  gbrt.fit ( X_train, y_train )
#
#  Compute training and testing accuracy for varying number of neighbors.
#
  print ( '' )
  print ( '  Decision function shape:', gbrt.decision_function ( X_test ).shape )
  print ( '  First few entries in decision function:' )
  print ( gbrt.decision_function ( X_test )[:6,:] )
  print ( '' )
  print ( '  argmax of decision function:' )
  print ( np.argmax ( gbrt.decision_function ( X_test ), axis = 1 ) )
  print ( '  Predictions' )
  print ( gbrt.predict ( X_test ) )
#
  print ( '' )
  print ( '  First few entries of predicted probabilities:' )
  print ( gbrt.predict_proba ( X_test )[:6] )
  print ( '  Row sums should be 1:' )
  print ( gbrt.predict_proba ( X_test )[:6].sum ( axis = 1 ) )
#
  print ( '' )
  print ( '  Recover the predictions:' )
  print ( '  argmax of predicted probabilities:' )
  print ( np.argmax ( gbrt.predict_proba ( X_test ), axis = 1 ) )
  print ( '  Predictions:' )
  print ( gbrt.predict ( X_test ) )
#
#  The "logreg.fit" command fails!
#
  print ( '' )
  print ( '  Skipping code that does not work!' )

# logreg = LogisticRegression
# named_target = iris.target_names[y_train]
# print ( named_target )
# logreg.fit ( X_train, named_target )
# print ( '  Unique classes in training data:', logreg.classes_ )
# print ( '  Predictions:', logreg.predict ( X_test)[:10] )
# argmax_dec_func = np.argmax ( logreg.decision_function ( X_test ), axis = 1 )
# print ( '  argmax of decision function:', argmax_deca_func[:10] )
# print ( '  argmax combined with classes:', logreg.classes_[argmax_dec_func][:10] )
#
#  Terminate.
#
  print ( '' )
  print ( 'iris_classify_gradboost():' )
  print ( '  Normal end of execution.' )

  return

def timestamp ( ):

#*****************************************************************************80
#
## timestamp() prints the date as a timestamp.
#
#  Licensing:
#
#    This code is distributed under the MIT license. 
#
#  Modified:
#
#    21 August 2019
#
#  Author:
#
#    John Burkardt
#
  import time

  t = time.time ( )
  print ( time.ctime ( t ) )

  return

if ( __name__ == '__main__' ):
  timestamp ( )
  iris_classify_gradboost ( )
  timestamp ( )