#! /usr/bin/env python3 # def cancer_scale_minmax ( ): #*****************************************************************************80 # ## cancer_scale_minmax() applies min-max scalling to cancer data. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 04 August 2023 # # Author: # # Andreas Mueller, Sarah Guido. # This version by John Burkardt. # # Reference: # # Andreas Mueller, Sarah Guido, # Introduction to Machine Learning with Python, # OReilly, 2017, # ISBN: 978-1-449-36941-5 # from sklearn.datasets import load_breast_cancer from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler from sklearn.svm import SVC import matplotlib.pyplot as plt import mglearn import numpy as np import platform import sklearn print ( '' ) print ( 'cancer_scale_minmax():' ) print ( ' Python version: ' + platform.python_version ( ) ) print ( ' scikit-learn version: '+ sklearn.__version__ ) print ( ' Classify data from the cancer dataset.' ) print ( ' Rescale the data using the minmax scaler.' ) # # Generate the dataset. # print ( '' ) print ( ' Retrieve the cancer dataset, (X, y).' ) cancer = load_breast_cancer ( ) # # Split the data. # X_train, X_test, y_train, y_test = train_test_split ( \ cancer.data, cancer.target, random_state = 0 ) print ( "" ) print ( " X_train.shape:" ) print ( X_train.shape ) print ( " X_test.shape:" ) print ( X_test.shape ) # # Set up the scaler for the X training data. # scaler = MinMaxScaler ( ) scaler.fit ( X_train ) # # Apply the scaler to the training data. # X_train_scaled = scaler.transform ( X_train ) print ( "" ) print ( " X_train_scaled.shape:", X_train_scaled.shape ) print ( " Minima before scaling:", X_train.min(axis=0) ) print ( " Maxima before scaling:", X_train.max(axis=0) ) print ( " Minima after scaling:", X_train_scaled.min(axis=0) ) print ( " Maxima after scaling:", X_train_scaled.max(axis=0) ) # # Apply the scaler to the test data. # X_test_scaled = scaler.transform ( X_test ) print ( "" ) print ( " X_test_scaled.shape:", X_test_scaled.shape ) print ( " Minima before scaling:", X_test.min(axis=0) ) print ( " Maxima before scaling:", X_test.max(axis=0) ) print ( " Minima after scaling:", X_test_scaled.min(axis=0) ) print ( " Maxima after scaling:", X_test_scaled.max(axis=0) ) # # Plot the data ranges. # plt.clf ( ) plt.boxplot ( X_train, manage_ticks = False ) plt.grid ( True ) plt.yscale ( "symlog" ) plt.xlabel ( "Feature index" ) plt.ylabel ( "Feature magnitude" ) plt.title ( "Range of unscaled training data" ) filename = 'cancer_data_range_unscaled.png' plt.savefig ( filename ) print ( ' Graphics saved as "' + filename + '"' ) plt.close ( ) plt.clf ( ) plt.boxplot ( X_train_scaled, manage_ticks = False ) plt.grid ( True ) plt.yscale ( "symlog" ) plt.xlabel ( "Feature index" ) plt.ylabel ( "Feature magnitude" ) plt.title ( "Range of scaled training data" ) filename = 'cancer_data_range_scaled.png' plt.savefig ( filename ) print ( ' Graphics saved as "' + filename + '"' ) plt.close ( ) # # Work with the scaled data. # svc = SVC ( kernel = 'rbf', C = 1.0 ) svc.fit ( X_train_scaled, y_train ) print ( ' Scaled training accuracy = ', svc.score ( X_train_scaled, y_train ) ) print ( ' Scaled testing accuracy = ', svc.score ( X_test_scaled, y_test ) ) # # Now try increasing C to fit a more complex model. # print ( '' ) print ( 'Now try SVC with C = 1000 to fit a more complex model.' ) svc = SVC ( kernel = 'rbf', C = 1000.0 ) svc.fit ( X_train_scaled, y_train ) print ( ' Scaled training accuracy = ', svc.score ( X_train_scaled, y_train ) ) print ( ' Scaled testing accuracy = ', svc.score ( X_test_scaled, y_test ) ) # # Terminate. # print ( '' ) print ( 'cancer_scale_minmax():' ) print ( ' Normal end of execution.' ) return def timestamp ( ): #*****************************************************************************80 # ## timestamp() prints the date as a timestamp. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 21 August 2019 # # Author: # # John Burkardt # import time t = time.time ( ) print ( time.ctime ( t ) ) return if ( __name__ == '__main__' ): timestamp ( ) cancer_scale_minmax ( ) timestamp ( )