#! /usr/bin/env python3 # def faithful_guess ( ): #*****************************************************************************80 # ## faithful_guess() guesses a good clustering of the Old Faithful data. # # Licensing: # # This code is distributed under the GNU LGPL license. # # Modified: # # 29 September 2023 # # Author: # # John Burkardt # import matplotlib.pyplot as plt import numpy as np import platform print ( '' ) print ( 'faithful_guess():' ) print ( ' Python version: %s' % ( platform.python_version ( ) ) ) print ( ' Guess a good clustering of the Old Faithful (Erupt,Wait) observations.' ) print ( ' Then do a few steps of Lloyd iteration for improvement.' ) # # Read the data. # data = np.loadtxt ( 'faithful_normalized.txt' ) n = data.shape[0] # # Guess two good centers. # Z = np.array ( [ \ [ 0.05, 0.2 ],\ [ 0.8, 0.75 ] ] ) # # Display normalized data and centers. # plt.clf ( ) plt.plot ( data[:,0], data[:,1], 'c.', markersize = 10 ) plt.plot ( Z[:,0], Z[:,1], 'k*', markersize = 20 ) plt.xlabel ( '<-- Erupt -->', fontsize = 16 ) plt.ylabel ( '<-- Wait -->', fontsize = 16 ) plt.title ( 'Estimated centers for Old Faithful data', fontsize = 16 ) plt.grid ( True ) filename = 'faithful_guess_centers.png' plt.savefig ( filename ) print ( ' Graphics saved as "%s"' % ( filename ) ) plt.show ( ) # # Compute distance to each center # d = np.zeros ( [ n, 2 ] ) d[:,0] = ( data[:,0] - Z[0,0] )**2 + ( data[:,1] - Z[0,1] )**2 d[:,1] = ( data[:,0] - Z[1,0] )**2 + ( data[:,1] - Z[1,1] )**2 # # C assigns data to nearest center # C = np.zeros ( n, dtype = int ) c0 = np.where ( d[:,0] <= d[:,1] ) n0 = np.sum ( d[:,0] <= d[:,1] ) C[c0] = 0 c1 = np.where ( d[:,1] < d[:,0] ) n1 = np.sum ( d[:,1] < d[:,0] ) C[c1] = 1 # # Display data in clusters. # plt.clf ( ) plt.plot ( data[C==0,0], data[C==0,1], 'c.', markersize = 10 ) plt.plot ( data[C==1,0], data[C==1,1], 'r.', markersize = 10 ) plt.plot ( Z[:,0], Z[:,1], 'k*', markersize = 20 ) plt.xlabel ( '<-- Erupt -->', fontsize = 16 ) plt.ylabel ( '<-- Wait -->', fontsize = 16 ) plt.title ( 'Estimated clusters for Old Faithful data', fontsize = 16 ) plt.grid ( True ) filename = 'faithful_guess_clusters.png' plt.savefig ( filename ) print ( ' Graphics saved as "%s"' % ( filename ) ) plt.show ( ) # # Compute 1-cluster energy. # xmean = np.mean ( data[:,0] ) ymean = np.mean ( data[:,1] ) E = np.sum ( ( xmean - data[:,0] )**2 + ( ymean - data[:,1] )**2 ) # # Compute 2-cluster energies. # e0 = np.sum ( ( data[C==0,0] - Z[0,0] )**2 + ( data[C==0,1] - Z[0,1] )**2 ) e1 = np.sum ( ( data[C==1,0] - Z[1,0] )**2 + ( data[C==1,1] - Z[1,1] )**2 ) print ( "" ) print ( " Single cluster size = ", n ) print ( " Two cluster size = ", n0 + n1, ' = ', n0, ' + ', n1 ) print ( "" ) print ( " Single cluster energy = ", E ) print ( " Two cluster energy = ", e0 + e1, ' = ', e0, ' + ', e1 ) # # Terminate. # print ( '' ) print ( 'faithful_guess():' ) print ( ' Normal end of execution.' ) return if ( __name__ == '__main__' ): faithful_guess ( )