#! /usr/bin/env python3 # def cancer_classify_svm_rbf ( ): #*****************************************************************************80 # ## cancer_classify_svm_rbf() classifies cancer data using SVM with RBF. # # Discussion: # # It is discovered that the data is badly scaled, so that overfitting # results. # # The data is rescaled, and the fitting of the test data # improves. # # None of my accuracy values correspond to those in the text for # this example. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 04 August 2023 # # Author: # # Andreas Mueller, Sarah Guido. # Modifications by John Burkardt. # # Reference: # # Andreas Mueller, Sarah Guido, # Introduction to Machine Learning with Python, # OReilly, 2017, # ISBN: 978-1-449-36941-5 # from sklearn.datasets import load_breast_cancer from sklearn.model_selection import train_test_split from sklearn.svm import SVC import matplotlib.pyplot as plt import mglearn import numpy as np import platform import sklearn print ( '' ) print ( 'cancer_classify_svm_rbf():' ) print ( ' Python version: ' + platform.python_version ( ) ) print ( ' scikit-learn version: '+ sklearn.__version__ ) print ( ' Classify data from the cancer dataset.' ) print ( ' Use the support vector classifier with the RBF kernel.' ) print ( ' Rescale the data to improve the fitting of the test data.' ) # # Generate the dataset. # print ( '' ) print ( ' Retrieve the cancer dataset, (X, y).' ) cancer = load_breast_cancer ( ) # # Split the data. # X_train, X_test, y_train, y_test = train_test_split ( \ cancer.data, cancer.target, random_state = 0 ) # # Fit the support vector classifier. # svc = SVC ( kernel = 'rbf', C = 1.0 ) svc.fit ( X_train, y_train ) # # Compute the training and testing accuracy. # print ( ' Training accuracy = ', svc.score ( X_train, y_train ) ) print ( ' Testing accuracy = ', svc.score ( X_test, y_test ) ) # # Plot the data ranges. # plt.clf ( ) plt.boxplot ( X_train, manage_ticks = False ) plt.grid ( True ) plt.yscale ( "symlog" ) plt.xlabel ( "Feature index" ) plt.ylabel ( "Feature magnitude" ) plt.title ( "Cancer feature ranges very too widely" ) filename = 'cancer_classify_svm_rbf_feature_range' plt.savefig ( filename ) print ( ' Graphics saved as "' + filename + '"' ) plt.close ( ) # # Rescale the data. # min_on_training = X_train.min ( axis = 0 ) range_on_training = ( X_train - min_on_training ).max ( axis = 0 ) X_train_scaled = ( X_train - min_on_training ) / range_on_training print ( '' ) print ( 'Rescale the training data:' ) print ( '' ) print ( ' Minimum for each feature = ', X_train_scaled.min ( axis = 0 ) ) print ( ' Maximum for each feature = ', X_train_scaled.max ( axis = 0 ) ) # # Rescale the test data. # X_test_scaled = ( X_test - min_on_training ) / range_on_training # # Work with the scaled data. # svc = SVC ( kernel = 'rbf', C = 1.0 ) svc.fit ( X_train_scaled, y_train ) print ( ' Scaled training accuracy = ', svc.score ( X_train_scaled, y_train ) ) print ( ' Scaled testing accuracy = ', svc.score ( X_test_scaled, y_test ) ) # # Now try increasing C to fit a more complex model. # print ( '' ) print ( 'Now try SVC with C = 1000 to fit a more complex model.' ) svc = SVC ( kernel = 'rbf', C = 1000.0 ) svc.fit ( X_train_scaled, y_train ) print ( ' Scaled training accuracy = ', svc.score ( X_train_scaled, y_train ) ) print ( ' Scaled testing accuracy = ', svc.score ( X_test_scaled, y_test ) ) # # Terminate. # print ( '' ) print ( 'cancer_classify_svm_rbf():' ) print ( ' Normal end of execution.' ) return def timestamp ( ): #*****************************************************************************80 # ## timestamp() prints the date as a timestamp. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 21 August 2019 # # Author: # # John Burkardt # import time t = time.time ( ) print ( time.ctime ( t ) ) return if ( __name__ == '__main__' ): timestamp ( ) cancer_classify_svm_rbf ( ) timestamp ( )