#! /usr/bin/env python3 # def faithful_simulate ( ): #*****************************************************************************80 # ## faithful_simulate() simulates Old Faithful using a two cluster model. # # Discussion: # # Clustering data. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 19 September 2019 # # Author: # # John Burkardt # import matplotlib.pyplot as plt import numpy as np import platform from scipy.cluster.vq import kmeans2 print ( '' ) print ( 'faithful_simulate():' ) print ( ' Python version: %s' % ( platform.python_version ( ) ) ) print ( ' Use KMeans model to simulate fresh Old Faithful data.' ) # # Set the simulation parameters. # nparam = np.array ( [ 98, 174 ] ) Zparam = np.array ( [ \ [ 0.12818076, 0.21967655 ], \ [ 0.77095402, 0.69908913 ] ] ) vparam = np.array ( [ 1.88340, 4.45703 ] ) print ( '' ) print ( ' Data cluster sizes = ', np.sum ( nparam ), '=', nparam[0], '+', nparam[1] ) print ( ' Data cluster variances = ', np.sum ( vparam ), '=', vparam[0], '+', vparam[1] ) print ( ' Data cluster centers:' ) print ( Zparam ) # # Simulate new data. # n = 272 data = cluster_sample ( n, nparam, Zparam, vparam ) # # Create x and y. # x = data[:,0] y = data[:,1] print ( '' ) print ( ' Number of simulated values is', n ) # # Normalize the data. # if ( False ): xmin = np.min ( x ) xmax = np.max ( x ) ymin = np.min ( y ) ymax = np.max ( y ) data[:,0] = ( data[:,0] - xmin ) / ( xmax - xmin ) data[:,1] = ( data[:,1] - ymin ) / ( ymax - ymin ) x = data[:,0] y = data[:,1] # # Call kmeans2 for the simulated data. # k = 2 Z, label = kmeans2 ( data, k ) print ( ' Simulated cluster centers Z:' ) print ( Z ) bd = ( x - Z[0,0] )**2 + ( y - Z[0,1] )**2 rd = ( x - Z[1,0] )**2 + ( y - Z[1,1] )**2 bc = np.where ( bd < rd ) rc = np.where ( rd < bd ) cost0 = sum ( bd[bc] ) cost1 = sum ( rd[rc] ) bn = len ( bd[bc] ) cn = len ( rd[rc] ) cost = cost0 + cost1 print ( ' Simulated cluster variances = ', cost, '=', cost0, '+', cost1 ) print ( ' Simulated cluster sizes = ', bn + cn, '=', bn, '+', cn ) plt.plot ( x[label==0], y[label==0], 'c.', markersize = 10 ) plt.plot ( x[label==1], y[label==1], 'm.', markersize = 10 ) plt.plot ( Z[0,0], Z[0,1], 'bo', markersize = 15 ) plt.plot ( Z[1,0], Z[1,1], 'ro', markersize = 15 ) plt.xlabel ( '<-- Duration -->', fontsize = 16 ) plt.ylabel ( '<-- Wait -->', fontsize = 16 ) plt.title ( 'Clusters using kmeans2()', fontsize = 16 ) plt.grid ( True ) filename = 'faithful_simulate.png' plt.savefig ( filename ) plt.show ( ) plt.clf ( ) print ( '' ) print ( ' Graphics saved as "%s"' % ( filename ) ) # # Terminate. # print ( '' ) print ( 'faithful_simulate():' ) print ( ' Normal end of execution.' ) return def cluster_sample ( n, nparam, Z, V ): #*****************************************************************************80 # ## cluster_sample() samples points from a KMeans cluster. # # Discussion: # # Clustering data. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 19 September 2019 # # Author: # # John Burkardt # from numpy.random import default_rng import numpy as np rng = default_rng() npsum = np.sum ( nparam ) p = np.array ( [ nparam[0] / npsum, nparam[1] / npsum ] ) print ( ' P array:', p ) C = rng.choice ( [ 0, 1 ], size = n, replace = True, p = p, shuffle = True ) t = rng.uniform ( low = 0.0, high = 2.0 * np.pi, size = n ) r = rng.standard_normal ( n ) data = np.zeros ( [ n, 2 ] ) data[:,0] = Z[C,0] + np.sqrt ( V[C] / nparam[C] ) * r * np.cos ( t ) data[:,1] = Z[C,1] + np.sqrt ( V[C] / nparam[C] ) * r * np.sin ( t ) return data if ( __name__ == '__main__' ): faithful_simulate ( )