#! /usr/bin/env python3 # def cancer_classify_logistic ( ): #*****************************************************************************80 # ## cancer_classify_logistic() uses logistic regression to classify cancer data. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 05 July 2023 # # Author: # # Andreas Mueller, Sarah Guido. # Modifications by John Burkardt. # # Reference: # # Andreas Mueller, Sarah Guido, # Introduction to Machine Learning with Python, # OReilly, 2017, # ISBN: 978-1-449-36941-5 # from sklearn.datasets import load_breast_cancer from sklearn.linear_model import LogisticRegression import matplotlib.pyplot as plt import numpy as np import platform import sklearn print ( '' ) print ( 'cancer_classify_logistic():' ) print ( ' Python version: ' + platform.python_version ( ) ) print ( ' scikit-learn version: '+ sklearn.__version__ ) print ( ' Use logistic regression on the breast cancer dataset.' ) print ( ' Explore the influence of the C parameter.' ) # # Generate the dataset. # print ( '' ) print ( ' Retrieve the cancer dataset, (X, y).' ) cancer = load_breast_cancer ( ) from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split ( \ cancer.data, cancer.target, stratify = cancer.target, random_state = 42 ) # # Compute training and testing accuracy for varying values of C. # logreg = LogisticRegression().fit ( X_train, y_train ) print ( '' ) print ( 'With default C = 1:' ) print ( ' Training set score ', logreg.score ( X_train, y_train ) ) print ( ' Test set score ', logreg.score ( X_test, y_test ) ) logreg100 = LogisticRegression ( C = 100 ).fit ( X_train, y_train ) print ( '' ) print ( 'With C = 100:' ) print ( ' Training set score ', logreg100.score ( X_train, y_train ) ) print ( ' Test set score ', logreg100.score ( X_test, y_test ) ) logreg001 = LogisticRegression ( C = 0.001 ).fit ( X_train, y_train ) print ( '' ) print ( 'With C = 0.01:' ) print ( ' Training set score ', logreg001.score ( X_train, y_train ) ) print ( ' Test set score ', logreg001.score ( X_test, y_test ) ) plt.clf ( ) plt.plot ( logreg.coef_.T, 'o', label = 'C=1' ) plt.plot ( logreg100.coef_.T, '^', label = 'C=100' ) plt.plot ( logreg001.coef_.T, 'v', label = 'C=0.001' ) plt.xticks ( range ( cancer.data.shape[1] ), cancer.feature_names, rotation = 90 ) plt.hlines ( 0, 0, cancer.data.shape[1] ) plt.ylim ( -5.0, +5.0 ) plt.xlabel ( 'Feature' ) plt.ylabel ( 'Coefficient magnitude' ) plt.title ( 'Coefficients learned by logistic regression, L2 norm' ) plt.legend ( ) filename = 'cancer_classify_logistic_l2.png' plt.savefig ( filename ) print ( ' Graphics saved as "' + filename + '"' ) plt.close ( ) # # Repeat using L1 norm. # # FAILS with error message: # "Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty" # print ( '' ) print ( 'Repeat using l1 norm.' ) print ( ' This exercise omitted; it fails with "l1" penalty.' ) if ( false ): print ( '' ) plt.clf ( ) for C, marker in zip ( [ 0.001, 1, 100 ], [ 'o', '^', 'v' ] ): lr_l1 = LogisticRegression ( C = C, penalty = "l1" ).fit ( X_train, y_train ) print ( 'With default C = 1:' ) print ( ' Training set score, l1 ', lr_l1.score ( X_train, y_train ) ) print ( ' Test set score, l1 ', lr_l1.score ( X_test, y_test ) ) plt.plot ( lr_l1.coef_.T, marker, label = "C = " + str ( C ) ) plt.xticks ( range ( cancer.data.shape[1] ), cancer.feature_names, rotation = 90 ) plt.hlines ( 0, 0, cancer.data.shape[1] ) plt.ylim ( -5.0, +5.0 ) plt.xlabel ( 'Feature' ) plt.ylabel ( 'Coefficient magnitude' ) plt.title ( 'Coefficients learned by logistic regression, L1 norm' ) plt.legend ( ) filename = 'cancer_classify_logistic_l1.png' plt.savefig ( filename ) print ( ' Graphics saved as "' + filename + '"' ) plt.close ( ) # # Terminate. # print ( '' ) print ( 'cancer_classify_logistic():' ) print ( ' Normal end of execution.' ) return def timestamp ( ): #*****************************************************************************80 # ## timestamp() prints the date as a timestamp. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 21 August 2019 # # Author: # # John Burkardt # import time t = time.time ( ) print ( time.ctime ( t ) ) return if ( __name__ == '__main__' ): timestamp ( ) cancer_classify_logistic ( ) timestamp ( )