#! /usr/bin/env python3
#
def cancer_classify_svm_rbf ( ):

#*****************************************************************************80
#
## cancer_classify_svm_rbf() classifies cancer data using SVM with RBF.
#
#  Discussion:
#
#    It is discovered that the data is badly scaled, so that overfitting
#    results. 
#
#    The data is rescaled, and the fitting of the test data
#    improves.
#
#    None of my accuracy values correspond to those in the text for
#    this example.
#
#  Licensing:
#
#    This code is distributed under the MIT license.
#
#  Modified:
#
#    04 August 2023
#
#  Author:
#
#    Andreas Mueller, Sarah Guido.
#    Modifications by John Burkardt.
#
#  Reference:
#
#    Andreas Mueller, Sarah Guido,
#    Introduction to Machine Learning with Python,
#    OReilly, 2017,
#    ISBN: 978-1-449-36941-5
#
  from sklearn.datasets import load_breast_cancer
  from sklearn.model_selection import train_test_split
  from sklearn.svm import SVC
  import matplotlib.pyplot as plt
  import mglearn
  import numpy as np
  import platform
  import sklearn

  print ( '' )
  print ( 'cancer_classify_svm_rbf():' )
  print ( '  Python version: ' + platform.python_version ( ) )
  print ( '  scikit-learn version: '+ sklearn.__version__ )
  print ( '  Classify data from the cancer dataset.' )
  print ( '  Use the support vector classifier with the RBF kernel.' )
  print ( '  Rescale the data to improve the fitting of the test data.' )
#
#  Generate the dataset.
#
  print ( '' )
  print ( '  Retrieve the cancer dataset, (X, y).' )
  cancer = load_breast_cancer ( )
#
#  Split the data.
#
  X_train, X_test, y_train, y_test = train_test_split ( \
    cancer.data, cancer.target, random_state = 0 )
#
#  Fit the support vector classifier.
#
  svc = SVC ( kernel = 'rbf', C = 1.0 )
  svc.fit ( X_train, y_train )
#
#  Compute the training and testing accuracy.
#
  print ( '  Training accuracy = ', svc.score ( X_train, y_train ) )
  print ( '  Testing accuracy  = ', svc.score ( X_test, y_test ) )
#
#  Plot the data ranges.
#
  plt.clf ( )
  plt.boxplot ( X_train, manage_ticks = False )
  plt.grid ( True )
  plt.yscale ( "symlog" )
  plt.xlabel ( "Feature index" )
  plt.ylabel ( "Feature magnitude" )
  plt.title ( "Cancer feature ranges very too widely" )
  filename = 'cancer_classify_svm_rbf_feature_range'
  plt.savefig ( filename )
  print ( '  Graphics saved as "' + filename + '"' )
  plt.close ( )
#
#  Rescale the data.
#
  min_on_training = X_train.min ( axis = 0 )
  range_on_training = ( X_train - min_on_training ).max ( axis = 0 )
  X_train_scaled = ( X_train - min_on_training ) / range_on_training
  print ( '' )
  print ( 'Rescale the training data:' )
  print ( '' )
  print ( '  Minimum for each feature = ', X_train_scaled.min ( axis = 0 ) )
  print ( '  Maximum for each feature = ', X_train_scaled.max ( axis = 0 ) )
#
#  Rescale the test data.
#
  X_test_scaled = ( X_test - min_on_training ) / range_on_training
#
#  Work with the scaled data.
#
  svc = SVC ( kernel = 'rbf', C = 1.0 )
  svc.fit ( X_train_scaled, y_train )
  print ( '  Scaled training accuracy = ', svc.score ( X_train_scaled, y_train ) )
  print ( '  Scaled testing accuracy  = ', svc.score ( X_test_scaled, y_test ) )
#
#  Now try increasing C to fit a more complex model.
#
  print ( '' )
  print ( 'Now try SVC with C = 1000 to fit a more complex model.' )

  svc = SVC ( kernel = 'rbf', C = 1000.0 )
  svc.fit ( X_train_scaled, y_train )
  print ( '  Scaled training accuracy = ', svc.score ( X_train_scaled, y_train ) )
  print ( '  Scaled testing accuracy  = ', svc.score ( X_test_scaled, y_test ) )
#
#  Terminate.
#
  print ( '' )
  print ( 'cancer_classify_svm_rbf():' )
  print ( '  Normal end of execution.' )

  return

def timestamp ( ):

#*****************************************************************************80
#
## timestamp() prints the date as a timestamp.
#
#  Licensing:
#
#    This code is distributed under the MIT license. 
#
#  Modified:
#
#    21 August 2019
#
#  Author:
#
#    John Burkardt
#
  import time

  t = time.time ( )
  print ( time.ctime ( t ) )

  return

if ( __name__ == '__main__' ):
  timestamp ( )
  cancer_classify_svm_rbf ( )
  timestamp ( )