#! /usr/bin/env python3 # def cluster_demo ( ): #*****************************************************************************80 # ## cluster_demo() runs examples for the cluster lecture. # # Licensing: # # This code is distributed under the GNU LGPL license. # # Modified: # # 24 September 2023 # # Author: # # John Burkardt # print ( '' ) print ( 'cluster_demo():' ) print ( ' Run all examples for cluster lecture.' ) # # Generate examples of n data points. # if ( False ): n = 400 data_all ( n ) # # One cluster analysis of blobs data. # n = 30 blobs_cluster_one ( n ) # # Terminate. # print ( '' ) print ( 'cluster_demo():' ) print ( ' Normal end of execution.' ) return def data_all ( n ): import matplotlib.pyplot as plt import numpy as np print ( '' ) print ( 'data_all():' ) print ( ' Generate examples of "random" data points in unit square..' ) for f in ( data_uniform, data_normal, data_grid, data_blobs ): Z = f ( n ) Z_mean = np.mean ( Z, axis = 0 ) Z_std = np.std ( Z, axis = 0 ) t = np.linspace ( 0.0, 2.0 * np.pi, 25 ) x = Z_mean[0] + Z_std[0] * np.cos ( t ) y = Z_mean[1] + Z_std[1] * np.sin ( t ) print ( '' ) print ( f.__name__ ) print ( ' Z.shape = ', Z.shape ) print ( ' Z min = ', np.min ( Z, axis = 0 ) ) print ( ' Z mean = ', np.mean ( Z, axis = 0 ) ) print ( ' Z max = ', np.max ( Z, axis = 0 ) ) print ( ' Z var = ', np.var ( Z, axis = 0 ) ) print ( ' Z std = ', np.std ( Z, axis = 0 ) ) plt.clf ( ) plt.plot ( Z[:,0], Z[:,1], 'o' ) plt.plot ( Z_mean[0], Z_mean[1], 'r.', markersize = 20 ) plt.plot ( x, y, 'r-', linewidth = 3 ) plt.grid ( True ) plt.axis ( 'square' ) filename = f.__name__ + '.png' plt.savefig ( filename ) print ( ' Graphics saved as "' + filename + '"' ) plt.show ( ) return def data_uniform ( n ): from numpy.random import default_rng import numpy as np rng = default_rng ( ) z = rng.uniform ( size = ( n, 2 ) ) return z def data_normal ( n ): from numpy.random import default_rng import numpy as np rng = default_rng ( ) z = rng.normal ( loc = 0.5, scale = np.sqrt ( 1.0 / 12.0 ), size = ( n, 2 ) ) return z def data_grid ( n ): import numpy as np m = int ( np.sqrt ( n ) ) z = np.linspace ( 0.0, 1.0, m ) X, Y = np.meshgrid ( z, z ) x = X.flatten ( ) y = Y.flatten ( ) Z = np.vstack ( ( x, y ) ) return Z.T def data_blobs ( n ): import numpy as np from sklearn.datasets import make_blobs n1 = int ( 2 * n / 3 ) n2 = n - n1 Z, y = make_blobs ( n_samples = [ n1, n2 ], n_features = 2, random_state = 42 ) return Z def blobs_cluster_one ( n ): import matplotlib.pyplot as plt import numpy as np print ( '' ) print ( 'blobs_cluster_one():' ) print ( ' Analyze blobs data as one cluster.' ) Z = data_blobs ( n ) Z_mean = np.mean ( Z, axis = 0 ) Z_std = np.std ( Z, axis = 0 ) t = np.linspace ( 0.0, 2.0 * np.pi, 25 ) x = Z_mean[0] + Z_std[0] * np.cos ( t ) y = Z_mean[1] + Z_std[1] * np.sin ( t ) W = Z_mean E = cluster_energy_one ( Z, W ) print ( '' ) print ( ' Z.shape = ', Z.shape ) print ( ' Z min = ', np.min ( Z, axis = 0 ) ) print ( ' Z mean = ', np.mean ( Z, axis = 0 ) ) print ( ' Z max = ', np.max ( Z, axis = 0 ) ) print ( ' Z var = ', np.var ( Z, axis = 0 ) ) print ( ' Z std = ', np.std ( Z, axis = 0 ) ) print ( ' Energy = ', E ) plt.clf ( ) plt.plot ( Z[:,0], Z[:,1], 'o' ) plt.plot ( Z_mean[0], Z_mean[1], 'r.', markersize = 20 ) plt.plot ( x, y, 'r-', linewidth = 3 ) plt.grid ( True ) plt.axis ( 'square' ) filename = 'blobs_cluster_one.png' plt.savefig ( filename ) print ( ' Graphics saved as "' + filename + '"' ) plt.show ( ) return def cluster_energy_one ( Z, W ): # Discussion: # # Compute the sum of the squares of the distances of each data item # from the center. # import numpy as np ZmW = Z - W v = np.linalg.norm ( ZmW, axis = 1 ) E = np.sum ( v**2 ) return E def timestamp ( ): #*****************************************************************************80 # ## timestamp() prints the date as a timestamp. # # Licensing: # # This code is distributed under the GNU LGPL license. # # Modified: # # 21 August 2019 # # Author: # # John Burkardt # import time t = time.time ( ) print ( time.ctime ( t ) ) return if ( __name__ == "__main__" ): timestamp ( ) cluster_demo ( ) timestamp ( )