#! /usr/bin/env python3 # def blob_classify_kernelized_svm ( ): #*****************************************************************************80 # ## blob_classify_kernelized_svm() uses kernelized SVM on a blob dataset. # # Discussion: # # The data involves four groups of "blobs" that represent samples # of two classes. # # A linear SVM cannot properly separate the two classes. # By adding the feature y^2, the data can be separated. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 01 August 2023 # # Author: # # Original code by Andreas Mueller, Sarah Guido. # Modifications by John Burkardt. # # Reference: # # Andreas Mueller, Sarah Guido, # Introduction to Machine Learning with Python, # OReilly, 2017, # ISBN: 978-1-449-36941-5 # from mpl_toolkits.mplot3d import Axes3D from sklearn.datasets import make_blobs from sklearn.svm import LinearSVC import matplotlib.pyplot as plt import mglearn import numpy as np import platform import sklearn print ( '' ) print ( 'blob_classify_kernelized_svm()' ) print ( ' python version: %s' % ( platform.python_version ( ) ) ) print ( ' scikit-learn version %s' % ( sklearn.__version__ ) ) print ( ' Use a kernelized support vector machine to classify blob' ) print ( ' data, involving 2 classes that cannot be linearly separated.' ) # # Define the data. # X, y = make_blobs ( centers = 4, random_state = 8 ) # # Redefine y so there are just two classes. # y = y % 2 # # Plot the blobs. # plt.clf ( ) mglearn.discrete_scatter ( X[:,0], X[:,1], y ) plt.xlabel ( 'Feature 0' ) plt.ylabel ( 'Feature 1' ) plt.title ( 'The raw data' ) plt.legend ( [ "Class 0", "Class 1" ] ) plt.grid ( True ) filename = 'blob_data.png' plt.savefig ( filename ) print ( ' Graphics saved as "' + filename + '"' ) plt.close ( ) # # Define a linear SVM model. # linear_svm = LinearSVC ( dual = 'auto' ).fit ( X, y ) # # Plot the decision boundary. # plt.clf ( ) mglearn.plots.plot_2d_separator ( linear_svm, X ) mglearn.discrete_scatter ( X[:,0], X[:,1], y ) plt.grid ( True ) plt.xlabel ( 'Feature 0' ) plt.ylabel ( 'Feature 1' ) plt.title ( 'The SVM decision boundary' ) filename = 'blob_boundary_linear.png' plt.savefig ( filename ) print ( ' Graphics saved as "' + filename + '"' ) plt.close ( ) # # Now add the squared second feature. # X_new = np.hstack ( [ X, X[:,1:]**2 ] ) # # Visualize. # The code given in the text did not work properly! # figure = plt.figure ( ) ax = figure.add_subplot ( 111, projection = '3d', elev = -152, azim = -26 ) mask = ( y == 0 ) ax.scatter ( X_new[mask,0], X_new[mask,1], X_new[mask,2], c = 'blue', marker = 'o', s = 60, edgecolor = 'k' ) mask = ( y == 1 ) ax.scatter ( X_new[mask,0], X_new[mask,1], X_new[mask,2], c = 'red', marker = '^', s = 60, edgecolor = 'k' ) ax.grid ( True ) ax.set_xlabel ( 'Feature 0' ) ax.set_ylabel ( 'Feature 1' ) ax.set_zlabel ( 'Feature 1**2' ) ax.set_title ( 'The kernelized data.' ) filename = 'blob_data_kernelized.png' plt.savefig ( filename ) print ( ' Graphics saved as "' + filename + '"' ) plt.close ( ) # # Apply svm to kernelized data. # linear_svm_3d = LinearSVC ( dual = 'auto' ).fit ( X_new, y ) coef, intercept = linear_svm_3d.coef_.ravel(), linear_svm_3d.intercept_ # # Plot the decision boundary in 3D. # figure = plt.figure ( ) ax = figure.add_subplot ( 111, projection = '3d', elev = -152, azim = -26 ) xx = np.linspace ( X_new[:,0].min() - 2, X_new[:,0].max() + 2, 50 ) yy = np.linspace ( X_new[:,1].min() - 2, X_new[:,1].max() + 2, 50 ) XX, YY = np.meshgrid ( xx, yy ) ZZ = ( coef[0] * XX + coef[1] * YY + intercept ) / - coef[2] ax.plot_surface ( XX, YY, ZZ, rstride = 8, cstride = 8, alpha = 0.3 ) mask = ( y == 0 ) ax.scatter ( X_new[mask,0], X_new[mask,1], X_new[mask,2], c = 'blue', marker = 'o', s = 60, edgecolor = 'k' ) mask = ( y == 1 ) ax.scatter ( X_new[mask,0], X_new[mask,1], X_new[mask,2], c = 'red', marker = '^', s = 60, edgecolor = 'k' ) ax.grid ( True ) ax.set_xlabel ( 'Feature 0' ) ax.set_ylabel ( 'Feature 1' ) ax.set_zlabel ( 'Feature 1**2' ) ax.set_title ( 'The kernelized SVM decision boundary.' ) filename = 'blob_boundary_kernelized.png' plt.savefig ( filename ) print ( ' Graphics saved as "' + filename + '"' ) plt.close ( ) # # Project the 3D decision boundary to 2D # plt.clf ( ) ZZ = YY ** 2 dec = linear_svm_3d.decision_function ( np.c_[XX.ravel(), YY.ravel(), ZZ.ravel() ] ) plt.contour ( XX, YY, dec.reshape ( XX.shape ), levels = [ dec.min(), 0, dec.max() ], cmap = mglearn.cm2, alpha = 0.5 ) mglearn.discrete_scatter ( X[:,0], X[:,1], y ) plt.grid ( True ) plt.xlabel ( 'Feature 0' ) plt.ylabel ( 'Feature 1' ) plt.title ( 'Projected kernelized SVM decision boundary' ) filename = 'blob_boundary_projected.png' plt.savefig ( filename ) print ( ' Graphics saved as "' + filename + '"' ) plt.close ( ) # # Terminate. # print ( '' ) print ( 'blob_classify_kernelized_svm()' ) print ( ' Normal end of execution.' ) return def timestamp ( ): #*****************************************************************************80 # ## timestamp() prints the date as a timestamp. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 21 August 2019 # # Author: # # John Burkardt # import time t = time.time ( ) print ( time.ctime ( t ) ) return if ( __name__ == '__main__' ): timestamp ( ) blob_classify_kernelized_svm ( ) timestamp ( )