#! /usr/bin/env python3
#
def faces_classify_pca ( ):

#*****************************************************************************80
#
## faces_classify_pca() uses principal component analysis (PCA) on face data.
#
#  Licensing:
#
#    This code is distributed under the GNU LGPL license.
#
#  Modified:
#
#    21 September 2023
#
#  Author:
#
#    Andreas Mueller, Sarah Guido.
#    This version by John Burkardt.
#
#  Reference:
#
#    Andreas Mueller, Sarah Guido,
#    Introduction to Machine Learning with Python,
#    OReilly, 2017,
#    ISBN: 978-1-449-36941-5
#
  from sklearn.datasets import fetch_lfw_people
  from sklearn.decomposition import PCA
  from sklearn.model_selection import train_test_split
  from sklearn.neighbors import KNeighborsClassifier
  import matplotlib.pyplot as plt
  import mglearn
  import numpy as np
  import platform
  import sklearn

  print ( '' )
  print ( 'faces_classify_pca():' )
  print ( '  Python version: ' + platform.python_version ( ) )
  print ( '  scikit-learn version: '+ sklearn.__version__ )
  print ( '  Match a face to an item in the faces dataset.' )
  print ( '  Use principal component analysis (PCA).' )
  print ( '' )
#
#  Generate the dataset.
#
  print ( '  Generate the dataset' )
  people = fetch_lfw_people ( min_faces_per_person = 20, resize = 0.7 )
  image_shape = people.images[0].shape
#
#  Trim the dataset so no more than 50 images of any person.
#
  mask = np.zeros ( people.target.shape, dtype = bool )
  for target in np.unique ( people.target ):
    mask[np.where ( people.target == target) [0][:50]] = 1 
  X_people = people.data[mask]
  y_people = people.target[mask]
#
#  Scale the grayscale values to be between 0 and 1.
#
  X_people = X_people / 255.0
#
#  Split the data.
#
  X_train, X_test, y_train, y_test = train_test_split ( 
    X_people, y_people, stratify = y_people, random_state = 0 )
#
#  Not sure how this helps!
#
  mglearn.plots.plot_pca_whitening ( )
#
#  Extract first 100 PCA omponents.
#
  pca = PCA ( n_components = 100, whiten = True, random_state = 0 ).fit ( X_train )
  X_train_pca = pca.transform ( X_train )
  X_test_pca = pca.transform ( X_test )

  print ( " " )
  print ( "  X_train_pca.shape = ", X_train_pca.shape )
#
#  Now use KNN on the PCA data:
#
  knn = KNeighborsClassifier ( n_neighbors = 1 )
  knn.fit ( X_train_pca, y_train )

  print ( "" )
  print ( "  Test set using 1 neighbor KNN on PCA data", knn.score(X_test_pca,y_test) )
#
#  Plot PCA components.
#
  print ( "" )
  print ( "  pca.components_.shape:", pca.components_.shape )
  plt.clf ( )
  fig, axes = plt.subplots ( 3, 5, figsize = ( 15, 12 ),
    subplot_kw = { 'xticks':(), 'yticks':() } )
  for i, ( component, ax ) in enumerate ( zip ( pca.components_, axes.ravel() ) ):
    ax.imshow ( component.reshape ( image_shape ), cmap = 'viridis' )
    ax.set_title ( "Component {}".format ( ( i+1) ) )

  filename = 'pca_components.png'
  plt.savefig ( filename )
  print ( "  Graphics saved as '" + filename + "'" )
#
#  Visualize reconstruction of some faces using 10, 50, 100 and 500 components:
#  (THIS PRODUCES AN ARRAY OF BLACK PLOTS)
#  (TURN THIS OFF IF IT ANNOYS YOU!)
#
  if ( True ):
    plt.clf ( )
    mglearn.plots.plot_pca_faces ( X_train, X_test, image_shape )
    filename = 'pca_reconstruction.png'
    plt.savefig ( filename )
    print ( "  Graphics saved as '" + filename + "'" )
#
#  Scatterplot of first two components.
#
  plt.clf ( )
  mglearn.discrete_scatter ( X_train_pca[:,0], X_train_pca[:,1], y_train )
  plt.xlabel ( "First principal component" )
  plt.ylabel ( "Second principal component" )
  filename = 'pca_scatter.png'
  plt.savefig ( filename )
  print ( "  Graphics saved as '" + filename + "'" )
#
#  Terminate.
#
  print ( '' )
  print ( 'faces_classify_pca():' )
  print ( '  Normal end of execution.' )

  return

def timestamp ( ):

#*****************************************************************************80
#
## timestamp() prints the date as a timestamp.
#
#  Licensing:
#
#    This code is distributed under the GNU LGPL license. 
#
#  Modified:
#
#    21 August 2019
#
#  Author:
#
#    John Burkardt
#
  import time

  t = time.time ( )
  print ( time.ctime ( t ) )

  return

if ( __name__ == '__main__' ):
  timestamp ( )
  faces_classify_pca ( )
  timestamp ( )