#! /usr/bin/env python3 # def faithful_kmeans_user ( ): #*****************************************************************************80 # ## faithful_kmeans_user() applies a user's version of K-Means to data. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 22 January 2022 # # Author: # # John Burkardt # import matplotlib.pyplot as plt import numpy as np import platform print ( '' ) print ( 'faithful_kmeans_user:' ) print ( ' Python version: %s' % ( platform.python_version ( ) ) ) print ( ' Implement a user version of K-Means to the Old Faithful data.' ) # # Read the data. # data = np.loadtxt ( 'faithful_data.txt' ) # # Create x and y. # x = data[:,0] y = data[:,1] n = len ( x ) print ( '' ) print ( ' Number of data values is %d' % ( n ) ) # # Normalize the data. # xmin = np.min ( x ) xmax = np.max ( x ) ymin = np.min ( y ) ymax = np.max ( y ) data[:,0] = ( data[:,0] - xmin ) / ( xmax - xmin ) data[:,1] = ( data[:,1] - ymin ) / ( ymax - ymin ) x = data[:,0] y = data[:,1] plt.plot ( x, y, 'k.', markersize = 10 ) plt.xlabel ( '<-- Duration (normalized) -->', fontsize = 16 ) plt.ylabel ( '<-- Wait (normalized) -->', fontsize = 16 ) plt.title ( 'Old Faithful eruption durations and waits', fontsize = 16 ) plt.grid ( True ) plt.axis ( 'equal' ) filename = 'faithful_kmeans_user_plot.png' plt.savefig ( filename ) plt.show ( ) plt.clf ( ) print ( '' ) print ( ' Graphics saved as "%s"' % ( filename ) ) # # Determine clusters using arbitrary centers. # c = np.array ( [ [ 0.4, 0.6 ], [ 0.6, 0.3 ] ] ) for iteration in range ( 0, 4 ): # bd = np.sqrt ( ( x - c[0,0] )**2 + ( y - c[0,1] )**2 ) # rd = np.sqrt ( ( x - c[1,0] )**2 + ( y - c[1,1] )**2 ) bd = ( x - c[0,0] )**2 + ( y - c[0,1] )**2 rd = ( x - c[1,0] )**2 + ( y - c[1,1] )**2 bc = np.where ( bd < rd ) rc = np.where ( rd < bd ) # # Compute and print cost. # cost0 = sum ( bd[bc] ) cost1 = sum ( rd[rc] ) cost = cost0 + cost1 print ( ' Step ', iteration, ': Cluster variance = ', cost, '=', cost0, '+', cost1 ) plt.plot ( x[bc], y[bc], 'c.', markersize = 10 ) plt.plot ( x[rc], y[rc], 'm.', markersize = 10 ) plt.plot ( c[0,0], c[0,1], 'bo', markersize = 15 ) plt.plot ( c[1,0], c[1,1], 'ro', markersize = 15 ) plt.xlabel ( '<-- Duration -->' ) plt.ylabel ( '<-- Wait -->' ) s = ( 'Clusters on iteration ' + str ( iteration ) ) plt.title ( s ) plt.grid ( True ) filename = 'faithful_kmeans_user_iteration' + str ( iteration ) + '.png' plt.savefig ( filename ) plt.show ( ) plt.clf ( ) # # Prepare for next step by updating cluster centers. # c[0,0] = np.mean ( x[bc] ) c[0,1] = np.mean ( y[bc] ) c[1,0] = np.mean ( x[rc] ) c[1,1] = np.mean ( y[rc] ) # # Terminate. # print ( '' ) print ( 'faithful_kmeans_user():' ) print ( ' Normal end of execution.' ) return if ( __name__ == '__main__' ): faithful_kmeans_user ( )