#! /usr/bin/env python3 # def exercise2(): #*****************************************************************************80 # ## exercise2() processes the faithful_data dataset. # # Discussion: # # The data is stored as a text file, with spaces used as the delimiter. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 25 March 2025 # # Author: # # John Burkardt # from scipy.cluster.vq import kmeans2 import matplotlib.pyplot as plt import numpy as np print ( "exercise2():" ) print ( " Read the data from the datafile." ) print ( " Print the first five lines." ) print ( " Compute some statistical measurements." ) print ( " Standardize the data." ) print ( " Call kmeans2() for 1, 2, or 3 clusters." ) # # Read the data. # filename = 'faithful_data.txt' data = np.loadtxt ( filename ) rows, cols = np.shape ( data ) print ( '' ) print ( ' "' + filename + '" contains', rows, 'rows and', cols, 'columns.' ) # # Print the first five lines. # print ( '' ) print ( ' First five lines of data:' ) print ( '' ) print ( data[0:5,:] ) # # Statistics. # print ( '' ) print ( ' Statistics:' ) print ( '' ) print ( ' data.shape: ', data.shape ) print ( ' np.min(data,axis=0): ', np.min ( data, axis = 0 ) ) print ( ' np.mean(data,axis=0): ', np.mean ( data, axis = 0 ) ) print ( ' np.max(data,axis=0): ', np.max ( data, axis = 0 ) ) print ( ' np.std(data,axis=0): ', np.std ( data, axis = 0 ) ) print ( ' np.var(data,axis=0): ', np.var ( data, axis = 0 ) ) # # Standardize. # data = ( data - np.mean ( data, axis = 0 ) ) / np.std ( data, axis = 0 ) # # Kmeans, one cluster. # k = 1 Z, C = kmeans2 ( data, k ) bd = ( data[:,0] - Z[0,0] )**2 + ( data[:,1] - Z[0,1] )**2 E = sum ( bd[C==0] ) print ( '' ) print ( ' One-cluster energy E = ', E ) print ( ' One-cluster center Z:' ) print ( Z ) # # Scatter plot. # plt.clf ( ) plt.scatter ( data[:,0], data[:,1] ) tc = np.linspace ( 0, 2.0 * np.pi, 51 ) xc = np.cos ( tc ) yc = np.sin ( tc ) plt.plot ( xc, yc, 'r-', linewidth = 2 ) plt.plot ( 2.0*xc, 2.0*yc, 'r-', linewidth = 2 ) plt.plot ( 3.0*xc, 3.0*yc, 'r-', linewidth = 2 ) plt.xlabel ( 'Erupt' ) plt.ylabel ( 'Quiet' ) plt.title ( '(Erupt,Quiet) standardized' ) plt.grid ( True ) plt.axis ( 'equal' ) plotfile = 'exercise2_one_cluster.png' plt.savefig ( plotfile ) print ( ' Graphics saved as "' + plotfile + '"' ) plt.show ( ) plt.close ( ) # # Kmeans, two clusters. # k = 2 Z, C = kmeans2 ( data, k ) bd = ( data[:,0] - Z[0,0] )**2 + ( data[:,1] - Z[0,1] )**2 rd = ( data[:,0] - Z[1,0] )**2 + ( data[:,1] - Z[1,1] )**2 E0 = sum ( bd[C==0] ) E1 = sum ( rd[C==1] ) E = E0 + E1 print ( '' ) print ( ' Two-cluster energy E = ', E ) print ( ' Two-cluster centers Z:' ) print ( Z ) # # Plot the two clusters using different colors. # plt.clf ( ) plt.scatter ( data[C==0,0], data[C==0,1], c = 'red' ) plt.scatter ( data[C==1,0], data[C==1,1], c = 'cyan' ) plt.scatter ( Z[:,0], Z[:,1], c = 'black', s = 250, marker = '*' ) tc = np.linspace ( 0, 2.0 * np.pi, 51 ) xc = np.cos ( tc ) yc = np.sin ( tc ) plt.plot ( xc + Z[0,0], yc + Z[0,0], 'r-', linewidth = 2 ) plt.plot ( 2.0*xc + Z[0,0], 2.0*yc + Z[0,0], 'r-', linewidth = 2 ) plt.plot ( 3.0*xc + Z[0,0], 3.0*yc + Z[0,0], 'r-', linewidth = 2 ) plt.plot ( xc + Z[1,0], yc + Z[1,0], 'c-', linewidth = 2 ) plt.plot ( 2.0*xc + Z[1,0], 2.0*yc + Z[1,0], 'c-', linewidth = 2 ) plt.plot ( 3.0*xc + Z[1,0], 3.0*yc + Z[1,0], 'c-', linewidth = 2 ) plt.xlabel ( 'Erupt' ) plt.ylabel ( 'Quiet' ) plt.title ( '(Erupt,Quiet) standardized, 2 clusters' ) plt.grid ( True ) plt.axis ( 'equal' ) plotfile = 'exercise2_two_clusters.png' plt.savefig ( plotfile ) print ( ' Graphics saved as "' + plotfile + '"' ) plt.show ( ) plt.close ( ) # # Kmeans, 3 clusters. # k = 3 Z, C = kmeans2 ( data, k ) bd = ( data[:,0] - Z[0,0] )**2 + ( data[:,1] - Z[0,1] )**2 rd = ( data[:,0] - Z[1,0] )**2 + ( data[:,1] - Z[1,1] )**2 gd = ( data[:,0] - Z[2,0] )**2 + ( data[:,1] - Z[2,1] )**2 E0 = sum ( bd[C==0] ) E1 = sum ( rd[C==1] ) E2 = sum ( gd[C==2] ) E = E0 + E1 + E2 print ( '' ) print ( ' 3-cluster energy E = ', E ) print ( Z ) # # Scatter plot # plt.clf ( ) plt.scatter ( data[C==0,0], data[C==0,1], c = 'red' ) plt.scatter ( data[C==1,0], data[C==1,1], c = 'cyan' ) plt.scatter ( data[C==2,0], data[C==2,1], c = 'green' ) plt.scatter ( Z[:,0], Z[:,1], c = 'black', s = 250, marker = '*' ) tc = np.linspace ( 0, 2.0 * np.pi, 51 ) xc = np.cos ( tc ) yc = np.sin ( tc ) plt.plot ( xc + Z[0,0], yc + Z[0,0], 'r-', linewidth = 2 ) plt.plot ( 2.0*xc + Z[0,0], 2.0*yc + Z[0,0], 'r-', linewidth = 2 ) plt.plot ( 3.0*xc + Z[0,0], 3.0*yc + Z[0,0], 'r-', linewidth = 2 ) plt.plot ( xc + Z[1,0], yc + Z[1,0], 'c-', linewidth = 2 ) plt.plot ( 2.0*xc + Z[1,0], 2.0*yc + Z[1,0], 'c-', linewidth = 2 ) plt.plot ( 3.0*xc + Z[1,0], 3.0*yc + Z[1,0], 'c-', linewidth = 2 ) plt.plot ( xc + Z[2,0], yc + Z[2,0], 'g-', linewidth = 2 ) plt.plot ( 2.0*xc + Z[2,0], 2.0*yc + Z[2,0], 'g-', linewidth = 2 ) plt.plot ( 3.0*xc + Z[2,0], 3.0*yc + Z[2,0], 'g-', linewidth = 2 ) plt.xlabel ( 'Erupt' ) plt.ylabel ( 'Quiet' ) plt.title ( '(Erupt,Quiet) standardized, 3 clusters' ) plt.grid ( True ) plt.axis ( 'equal' ) plotfile = 'exercise2_three_clusters.png' plt.savefig ( plotfile ) print ( ' Graphics saved as "' + plotfile + '"' ) plt.show ( ) plt.close ( ) # # Terminate. # print ( "" ) print ( "exercise2():" ) print ( " Normal end of execution." ) return if ( __name__ == "__main__" ): exercise2 ( )