#! /usr/bin/env python3 # def faithful_kmeans2 ( ): #*****************************************************************************80 # ## faithful_kmeans2() does a simple clustering exercise using scipy kmeans2(). # # Discussion: # # Clustering data. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 22 March 2025 # # Author: # # John Burkardt # import matplotlib.pyplot as plt import numpy as np import platform from scipy.cluster.vq import kmeans2 print ( '' ) print ( 'faithful_kmeans2():' ) print ( ' Python version: ' + platform.python_version ( ) ) print ( ' Cluster Old Faithful data using scipy kmeans2().' ) # # Read the normalized data; # For convienence, make separate xn and yn copies. # datan = np.loadtxt ( 'faithful_normalized.txt' ) xn = datan[:,0] yn = datan[:,1] n = len ( xn ) print ( '' ) print ( ' Number of data values is ', n ) # # Display the data. # plt.plot ( xn, yn, 'k.', markersize = 10 ) plt.xlabel ( '<-- Duration (normalized) -->' ) plt.ylabel ( '<-- Wait (normalized) -->' ) plt.title ( 'Old Faithful eruption durations and waits' ) plt.grid ( True ) plt.axis ( 'equal' ) filename = 'faithful_normalized_data.png' plt.savefig ( filename ) print ( ' Graphics saved as "' + filename + '"' ) plt.show ( ) plt.clf ( ) # # Choose the number of clusters, and call kmeans2(). # k = 2 c, label = kmeans2 ( datan, k ) # # Report the cluster centers. # print ( ' Kmeans2 cluster centers C:' ) print ( c ) # # Separate the data into "blue" and "red" groups, based on # which cluster they belong to. # # Compute the separate energies. # bd = ( xn - c[0,0] )**2 + ( yn - c[0,1] )**2 rd = ( xn - c[1,0] )**2 + ( yn - c[1,1] )**2 # # Sum the blue and red cluster energies. # cost0 = sum ( bd[label==0] ) cost1 = sum ( rd[label==1] ) cost = cost0 + cost1 print ( ' Cluster variance = ', cost, '=', cost0, '+', cost1 ) # # Count the blue and red cluster elements. # bn = sum ( label==0 ) cn = sum ( label== 1) print ( ' Cluster size = ', bn + cn, '=', bn, '+', cn ) # # Plot the red and blue data, and their cluster centers. # plt.clf ( ) plt.plot ( xn[label==0], yn[label==0], 'c.', markersize = 10 ) plt.plot ( xn[label==1], yn[label==1], 'm.', markersize = 10 ) plt.plot ( c[0,0], c[0,1], 'bo', markersize = 15 ) plt.plot ( c[1,0], c[1,1], 'ro', markersize = 15 ) plt.xlabel ( '<-- Duration -->' ) plt.ylabel ( '<-- Wait -->' ) plt.title ( 'Clusters using kmeans2()' ) plt.grid ( True ) filename = 'faithful_kmeans2.png' plt.savefig ( filename ) print ( ' Graphics saved as "' + filename + '"' ) plt.show ( ) plt.clf ( ) # # Terminate. # print ( '' ) print ( 'faithful_kmeans2():' ) print ( ' Normal end of execution.' ) return if ( __name__ == '__main__' ): faithful_kmeans2 ( )