#! /usr/bin/env python3 # def faithful_simulate ( ): #*****************************************************************************80 # ## faithful_simulate() simulates Old Faithful using a two cluster model. # # Discussion: # # Clustering data. # # Licensing: # # This code is distributed under the GNU LGPL license. # # Modified: # # 19 September 2019 # # Author: # # John Burkardt # import matplotlib.pyplot as plt import numpy as np import platform from sklearn.cluster import KMeans print ( '' ) print ( 'faithful_simulate():' ) print ( ' Python version: %s' % ( platform.python_version ( ) ) ) print ( ' Use KMeans model to simulate fresh Old Faithful data.' ) # # Set the observed cluster parameters. # nparam = np.array ( [ 98, 174 ] ) Zparam = np.array ( [ \ [ 0.12818076, 0.21967655 ], \ [ 0.77095402, 0.69908913 ] ] ) eparam = np.array ( [ 1.88340, 4.45703 ] ) print ( '' ) print ( ' Data cluster sizes = ', np.sum ( nparam ), '=', nparam[0], '+', nparam[1] ) print ( ' Data cluster energies = ', np.sum ( eparam ), '=', eparam[0], '+', eparam[1] ) print ( ' Data cluster centers:' ) print ( Zparam ) # # Simulate new data. # n = 272 data = cluster_sample ( n, nparam, Zparam, eparam ) # # Create x and y. # x = data[:,0] y = data[:,1] print ( '' ) print ( ' Number of simulated values is', n ) # # Call kmeans2 for the simulated data. # k = 2 kmeans = KMeans ( n_clusters = k, n_init = 'auto' ) kmeans.fit ( data ) Z = kmeans.cluster_centers_ C = kmeans.labels_ E = kmeans.inertia_ n0 = np.sum ( C == 0 ) n1 = np.sum ( C == 1 ) print ( ' Kmeans cluster centers Z:' ) print ( Z ) print ( ' Cluster energy = ', E ) print ( ' Cluster size = ', n0 + n1, '=', n0, '+', n1 ) plt.clf ( ) plt.plot ( x[C==0], y[C==0], 'c.', markersize = 10 ) plt.plot ( x[C==1], y[C==1], 'm.', markersize = 10 ) plt.plot ( Z[0,0], Z[0,1], 'bo', markersize = 15 ) plt.plot ( Z[1,0], Z[1,1], 'ro', markersize = 15 ) plt.xlabel ( '<-- Duration -->', fontsize = 16 ) plt.ylabel ( '<-- Wait -->', fontsize = 16 ) plt.title ( 'Simulated clusters', fontsize = 16 ) plt.grid ( True ) filename = 'faithful_simulate.png' plt.savefig ( filename ) plt.show ( ) print ( ' Graphics saved as "%s"' % ( filename ) ) # # Terminate. # print ( '' ) print ( 'faithful_simulate():' ) print ( ' Normal end of execution.' ) return def cluster_sample ( n, nparam, Z, e ): #*****************************************************************************80 # ## cluster_sample() samples points from a KMeans cluster. # # Discussion: # # Clustering data. # # Licensing: # # This code is distributed under the GNU LGPL license. # # Modified: # # 19 September 2019 # # Author: # # John Burkardt # from numpy.random import default_rng import numpy as np rng = default_rng() npsum = np.sum ( nparam ) p = np.array ( [ nparam[0] / npsum, nparam[1] / npsum ] ) print ( ' P array:', p ) C = rng.choice ( [ 0, 1 ], size = n, replace = True, p = p, shuffle = True ) t = rng.uniform ( low = 0.0, high = 2.0 * np.pi, size = n ) r = rng.standard_normal ( n ) data = np.zeros ( [ n, 2 ] ) data[:,0] = Z[C,0] + np.sqrt ( e[C] / nparam[C] ) * r * np.cos ( t ) data[:,1] = Z[C,1] + np.sqrt ( e[C] / nparam[C] ) * r * np.sin ( t ) return data if ( __name__ == '__main__' ): faithful_simulate ( )