#! /usr/bin/env python3 # def circle_classify_gradboost ( ): #*****************************************************************************80 # ## circle_classify_gradboost() uses gradient boosting to classify circle data. # # Discussion: # # This example estimates the uncertainty in the predictions. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 08 August 2023 # # Author: # # Andreas Mueller, Sarah Guido. # This version by John Burkardt. # # Reference: # # Andreas Mueller, Sarah Guido, # Introduction to Machine Learning with Python, # OReilly, 2017, # ISBN: 978-1-449-36941-5 # from sklearn.datasets import make_circles from sklearn.ensemble import GradientBoostingClassifier import matplotlib.pyplot as plt import mglearn import numpy as np import platform import sklearn print ( '' ) print ( 'circle_classify_gradboost():' ) print ( ' Python version: ' + platform.python_version ( ) ) print ( ' scikit-learn version: '+ sklearn.__version__ ) print ( ' Estimate uncertainty in classification of circle data.' ) # # Generate the dataset. # print ( '' ) print ( ' Generate circle data.' ) X, y = make_circles ( noise = 0.25, factor = 0.5, random_state = 1 ) # # Rename the classes "red" and "blue". # y_named = np.array ( [ "blue", "red" ] )[y] # # Split the dataset. # from sklearn.model_selection import train_test_split X_train, X_test, y_train_named, y_test_named, y_train, y_test = \ train_test_split ( X, y_named, y, random_state = 0 ) # # Get the classifier. # gbrt = GradientBoostingClassifier ( random_state = 0 ) # # Fit the training data. # gbrt.fit ( X_train, y_train_named ) # # Look at the decision function. # print ( '' ) print ( ' X_test.shape:', X_test.shape ) print ( ' Decision function shape:', gbrt.decision_function ( X_test ).shape ) print ( ' First few entries of decision function:' ) print ( gbrt.decision_function ( X_test )[:6] ) print ( ' Thresholded decision function:' ) print ( gbrt.decision_function ( X_test ) > 0 ) print ( ' Predictions:' ) print ( gbrt.predict ( X_test ) ) # # Make the boolean True/False into 1/0. # greater_zero = ( gbrt.decision_function ( X_test ) > 0 ).astype ( int ) pred = gbrt.classes_[greater_zero] print ( ' pred is equal to predictions:' ) print ( np.all ( pred == gbrt.predict ( X_test ) ) ) decision_function = gbrt.decision_function ( X_test ) print ( ' Decision function minimum = ', np.min ( decision_function ) ) print ( ' Decision function maximum = ', np.max ( decision_function ) ) # # Plot the decision function. # plt.clf ( ) fig, axes = plt.subplots ( 1, 2, figsize = ( 13, 5 ) ) mglearn.tools.plot_2d_separator ( gbrt, X, ax = axes[0], alpha = 0.4, fill = True, cm = mglearn.cm2 ) scores_image = mglearn.tools.plot_2d_scores ( gbrt, X, ax = axes[1], alpha = 0.4, cm = mglearn.ReBl ) for ax in axes: mglearn.discrete_scatter ( X_test[:,0], X_test[:,1], y_test, markers = '^', ax = ax ) mglearn.discrete_scatter ( X_train[:,0], X_train[:,1], y_train, markers = 'o', ax = ax ) ax.set_xlabel ( "Feature 0" ) ax.set_ylabel ( "Feature 1" ) cbar = plt.colorbar ( scores_image, ax = axes.tolist() ) axes[0].legend ( [ "Test class 0", "Test class 1", "Train class 0", "Train class 1" ], ncol = 4, loc = ( 0.1, 1.1 ) ) filename = 'circle_classify_gradboost_1.png' plt.savefig ( filename ) print ( ' Graphics saved as "' + filename + '"' ) plt.close ( ) # # Predicting probabilities. # print ( '' ) print ( ' Shape of probabilities:', gbrt.predict_proba ( X_test ).shape ) print ( '' ) print ( ' First few probabilities:' ) print ( gbrt.predict_proba ( X_test[:6] ) ) # # Plot the decision boundary and class probabilities # plt.clf ( ) fig, axes = plt.subplots ( 1, 2, figsize = ( 13, 5 ) ) mglearn.tools.plot_2d_separator ( gbrt, X, ax = axes[0], alpha = 0.4, fill = True, cm = mglearn.cm2 ) scores_image = mglearn.tools.plot_2d_scores ( gbrt, X, ax = axes[1], alpha = 0.5, cm = mglearn.ReBl, function = 'predict_proba' ) for ax in axes: mglearn.discrete_scatter ( X_test[:,0], X_test[:,1], y_test, markers = '^', ax = ax ) mglearn.discrete_scatter ( X_train[:,0], X_train[:,1], y_train, markers = 'o', ax = ax ) ax.set_xlabel ( "Feature 0" ) ax.set_ylabel ( "Feature 1" ) cbar = plt.colorbar ( scores_image, ax = axes.tolist() ) axes[0].legend ( [ "Test class 0", "Test class 1", "Train class 0", "Train class 1" ], ncol = 4, loc = ( 0.1, 1.1 ) ) filename = 'circle_classify_gradboost_2.png' plt.savefig ( filename ) print ( ' Graphics saved as "' + filename + '"' ) plt.close ( ) # # Terminate. # print ( '' ) print ( 'circle_classify_gradboost():' ) print ( ' Normal end of execution.' ) return def timestamp ( ): #*****************************************************************************80 # ## timestamp() prints the date as a timestamp. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 21 August 2019 # # Author: # # John Burkardt # import time t = time.time ( ) print ( time.ctime ( t ) ) return if ( __name__ == '__main__' ): timestamp ( ) circle_classify_gradboost ( ) timestamp ( )