#! /usr/bin/env python3 # def faces_classify_pca ( ): #*****************************************************************************80 # ## faces_classify_pca() uses principal component analysis (PCA) on face data. # # Licensing: # # This code is distributed under the GNU LGPL license. # # Modified: # # 21 September 2023 # # Author: # # Andreas Mueller, Sarah Guido. # This version by John Burkardt. # # Reference: # # Andreas Mueller, Sarah Guido, # Introduction to Machine Learning with Python, # OReilly, 2017, # ISBN: 978-1-449-36941-5 # from sklearn.datasets import fetch_lfw_people from sklearn.decomposition import PCA from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier import matplotlib.pyplot as plt import mglearn import numpy as np import platform import sklearn print ( '' ) print ( 'faces_classify_pca():' ) print ( ' Python version: ' + platform.python_version ( ) ) print ( ' scikit-learn version: '+ sklearn.__version__ ) print ( ' Match a face to an item in the faces dataset.' ) print ( ' Use principal component analysis (PCA).' ) print ( '' ) # # Generate the dataset. # print ( ' Generate the dataset' ) people = fetch_lfw_people ( min_faces_per_person = 20, resize = 0.7 ) image_shape = people.images[0].shape # # Trim the dataset so no more than 50 images of any person. # mask = np.zeros ( people.target.shape, dtype = bool ) for target in np.unique ( people.target ): mask[np.where ( people.target == target) [0][:50]] = 1 X_people = people.data[mask] y_people = people.target[mask] # # Scale the grayscale values to be between 0 and 1. # X_people = X_people / 255.0 # # Split the data. # X_train, X_test, y_train, y_test = train_test_split ( X_people, y_people, stratify = y_people, random_state = 0 ) # # Not sure how this helps! # mglearn.plots.plot_pca_whitening ( ) # # Extract first 100 PCA omponents. # pca = PCA ( n_components = 100, whiten = True, random_state = 0 ).fit ( X_train ) X_train_pca = pca.transform ( X_train ) X_test_pca = pca.transform ( X_test ) print ( " " ) print ( " X_train_pca.shape = ", X_train_pca.shape ) # # Now use KNN on the PCA data: # knn = KNeighborsClassifier ( n_neighbors = 1 ) knn.fit ( X_train_pca, y_train ) print ( "" ) print ( " Test set using 1 neighbor KNN on PCA data", knn.score(X_test_pca,y_test) ) # # Plot PCA components. # print ( "" ) print ( " pca.components_.shape:", pca.components_.shape ) plt.clf ( ) fig, axes = plt.subplots ( 3, 5, figsize = ( 15, 12 ), subplot_kw = { 'xticks':(), 'yticks':() } ) for i, ( component, ax ) in enumerate ( zip ( pca.components_, axes.ravel() ) ): ax.imshow ( component.reshape ( image_shape ), cmap = 'viridis' ) ax.set_title ( "Component {}".format ( ( i+1) ) ) filename = 'pca_components.png' plt.savefig ( filename ) print ( " Graphics saved as '" + filename + "'" ) # # Visualize reconstruction of some faces using 10, 50, 100 and 500 components: # (THIS PRODUCES AN ARRAY OF BLACK PLOTS) # (TURN THIS OFF IF IT ANNOYS YOU!) # if ( True ): plt.clf ( ) mglearn.plots.plot_pca_faces ( X_train, X_test, image_shape ) filename = 'pca_reconstruction.png' plt.savefig ( filename ) print ( " Graphics saved as '" + filename + "'" ) # # Scatterplot of first two components. # plt.clf ( ) mglearn.discrete_scatter ( X_train_pca[:,0], X_train_pca[:,1], y_train ) plt.xlabel ( "First principal component" ) plt.ylabel ( "Second principal component" ) filename = 'pca_scatter.png' plt.savefig ( filename ) print ( " Graphics saved as '" + filename + "'" ) # # Terminate. # print ( '' ) print ( 'faces_classify_pca():' ) print ( ' Normal end of execution.' ) return def timestamp ( ): #*****************************************************************************80 # ## timestamp() prints the date as a timestamp. # # Licensing: # # This code is distributed under the GNU LGPL license. # # Modified: # # 21 August 2019 # # Author: # # John Burkardt # import time t = time.time ( ) print ( time.ctime ( t ) ) return if ( __name__ == '__main__' ): timestamp ( ) faces_classify_pca ( ) timestamp ( )