#! /usr/bin/env python3 # def faithful_guess ( ): #*****************************************************************************80 # ## faithful_guess() guesses a good clustering of the Old Faithful data. # # Licensing: # # This code is distributed under the GNU LGPL license. # # Modified: # # 21 January 2022 # # Author: # # John Burkardt # import matplotlib.pyplot as plt import numpy as np import platform print ( '' ) print ( 'faithful_guess:' ) print ( ' Python version: %s' % ( platform.python_version ( ) ) ) print ( ' Guess a good clustering of the Old Faithful (Erupt,Quiet) observations.' ) # # Read the data. # data = np.loadtxt ( 'faithful_data.txt' ) n, d = np.shape ( data ) print ( '' ) print ( ' Data involves ', n, ' values of ', d, ' dimension.' ) # # Normalize the data # data[:,0] = ( data[:,0] - np.min ( data[:,0] ) ) / ( np.max ( data[:,0] ) - np.min ( data[:,0] ) ) data[:,1] = ( data[:,1] - np.min ( data[:,1] ) ) / ( np.max ( data[:,1] ) - np.min ( data[:,1] ) ) # # Guess two good centers. # C = np.array ( [ \ [ 0.05, 0.2 ],\ [ 0.8, 0.75 ] ] ) # # Display normalized data and centers. # plt.plot ( data[:,0], data[:,1], 'c.', markersize = 10 ) plt.plot ( C[:,0], C[:,1], 'k*', markersize = 20 ) plt.xlabel ( '<-- Erupt -->', fontsize = 16 ) plt.ylabel ( '<-- Quiet -->', fontsize = 16 ) plt.title ( 'Estimated centers for Old Faithful data', fontsize = 16 ) plt.grid ( True ) filename = 'faithful_guess_centers.png' plt.savefig ( filename ) print ( ' Graphics saved as "%s"' % ( filename ) ) plt.show ( ) plt.clf ( ) # # Compute distance to each center # d = np.zeros ( [ n, 2 ] ) d[:,0] = ( C[0,0] - data[:,0] )**2 + ( C[0,1] - data[:,1] )**2 d[:,1] = ( C[1,0] - data[:,0] )**2 + ( C[1,1] - data[:,1] )**2 # # Y assigns data to nearest center # y0 = np.where ( d[:,0] <= d[:,1] ) y1 = np.where ( d[:,1] < d[:,0] ) # # Display data in clusters. # plt.plot ( data[y0,0], data[y0,1], 'c.', markersize = 10 ) plt.plot ( data[y1,0], data[y1,1], 'r.', markersize = 10 ) plt.plot ( C[:,0], C[:,1], 'k*', markersize = 20 ) plt.xlabel ( '<-- Erupt -->', fontsize = 16 ) plt.ylabel ( '<-- Quiet -->', fontsize = 16 ) plt.title ( 'Estimated clusters for Old Faithful data', fontsize = 16 ) plt.grid ( True ) filename = 'faithful_guess_clusters.png' plt.savefig ( filename ) print ( ' Graphics saved as "%s"' % ( filename ) ) plt.show ( ) plt.clf ( ) # # Compute variances. # Careful! # A) np.cov() requires the rowvar = False argument! # B) y0 and y1 are arrays of a single entry, which is the array you want, # because you applied where() to a matrix. # covar = np.cov ( data, rowvar = False ) var = covar[0,0] + covar[1,1] n0 = len ( y0[0] ) var0 = np.sum ( ( C[0,0] - data[y0,0] )**2 + ( C[0,1] - data[y0,1] )**2 ) / n0 n1 = len ( y1[0] ) var1 = np.sum ( ( C[1,0] - data[y1,0] )**2 + ( C[1,1] - data[y1,1] )**2 ) / n1 print ( "" ) print ( " 1 Cluster size = ", n ) print ( " 2 Cluster size = ", n0 + n1, ' = ', n0, ' + ', n1 ) print ( "" ) print ( " 1 Cluster variance = ", var ) print ( " 2 Cluster variance = ", var0 + var1, ' = ', var0, ' + ', var1 ) # # Terminate. # print ( '' ) print ( 'faithful_guess:' ) print ( ' Normal end of execution.' ) return if ( __name__ == '__main__' ): faithful_guess ( )