#! /usr/bin/env python3 # def subset_distance_hamming ( t1, t2 ): #*****************************************************************************80 # ## subset_distance_hamming() computes the Hamming distance between two sets. # # Discussion: # # The sets T1 and T2 are assumed to be subsets of a set of N elements. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 30 March 2022 # # Author: # # John Burkardt # # Reference: # # Donald Kreher, Douglas Simpson, # Combinatorial Algorithms, # CRC Press, 1998, # ISBN: 0-8493-3988-X, # LC: QA164.K73. # # Input: # # integer T1(M), T2(M), two subsets of the master set. # T1(I) = 0 if the I-th element is in the subset T1, and is # 1 otherwise T2 is defined similarly. # # Output: # # integer DIST, the Hamming distance between T1 and T2, # defined as the number of elements of the master set which are # in either T1 or T2 but not both. # import numpy as np dist = np.sum ( t1 != t2 ) return dist def subset_distance_histogram ( m, n ): #*****************************************************************************80 # ## subset_distance_histogram() histograms subset distance statistics. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 30 March 2022 # # Author: # # John Burkardt # # Input: # # integer M, the size of the master set. # # integer N, the number of samples to use. # import matplotlib.pyplot as plt import numpy as np t = np.zeros ( n ) for i in range ( 0, n ): s1 = subset_sample ( m ) s2 = subset_sample ( m ) t[i] = subset_distance_hamming ( s1, s2 ) bins = m + 1 plt.hist ( t, bins = bins, rwidth = 0.95, \ range = np.array ( [ 0, m ] ), density = True ) plt.grid ( True ) plt.xlabel ( '<-- Distance -->' ) plt.ylabel ( '<-- Frequency -->' ) title_string = \ 'Hamming distance between random subsets of a ' + str ( m ) + '-set' plt.title ( title_string ) return def subset_distance_pdf ( m ): #*****************************************************************************80 # ## subset_distance_pdf() plots the PDF for the subset distance problem. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 30 March 2022 # # Author: # # John Burkardt # from scipy.special import comb import matplotlib.pyplot as plt import numpy as np d = np.linspace ( 0.0, m, m + 1 ) pdf = np.zeros ( m + 1, dtype = float ) for k in range ( 0, m + 1 ): pdf[k] = comb ( m, k ) / 2.0**m plt.plot ( d, pdf, 'r-', linewidth = 2 ) plt.grid ( True ) plt.xlabel ( '<-- Distance -->' ) plt.ylabel ( '<-- Probability -->' ) title_string = \ 'PDF for distance of random subsets of a ' + str ( m ) + ' set' plt.title ( title_string ) return def subset_distance_stats ( m, n ): #*****************************************************************************80 # ## subset_distance_stats() estimates subset distance statistics. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 30 March 2022 # # Author: # # John Burkardt # # Input: # # integer M, the size of the master set. # # integer N, the number of sample points to use. # # Output: # # real MU, VAR, the estimated mean and variance of the # distance between two random subsets of an M set. # import numpy as np t = np.zeros ( n ) for i in range ( 0, n ): s1 = subset_sample ( m ) s2 = subset_sample ( m ) t[i] = subset_distance_hamming ( s1, s2 ) mu = np.mean ( t ) var = np.var ( t ) return mu, var def subset_distance_test ( ): #*****************************************************************************80 # ## subset_distance_test() tests subset_distance(). # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 30 March 2022 # # Author: # # John Burkardt # import matplotlib.pyplot as plt import platform print ( '' ) print ( 'subset_distance_test():' ) print ( ' Python version: ' + platform.python_version ( ) ) print ( ' Test subset_distance().' ) m = 16 n = 10000 mu, var = subset_distance_stats ( m, n ) mu_exact = m / 2.0 var_exact = m / 4.0 print ( '' ) print ( ' Master set size = ', m ) print ( ' Using N = ' + str ( n ) + ' sample points,' ) print ( ' Estimated mean distance = ', mu ) print ( ' Exact mean distance = ', mu_exact ) print ( ' Estimated variance = ', var ) print ( ' Exact variance = ', var_exact ) m = 16 n = 10000 subset_distance_histogram ( m, n ) filename = 'subset_distance_histogram.png' plt.savefig ( filename ) print ( '' ) print ( ' Graphics saved as "' + filename + '"' ) plt.show ( block = False ) plt.close ( ) subset_distance_pdf ( m ) filename = 'subset_distance_pdf.png' plt.savefig ( filename ) print ( '' ) print ( ' Graphics saved as "' + filename + '"' ) plt.show ( block = False ) plt.close ( ) plt.clf ( ) n = 10000 subset_distance_histogram ( m, n ) subset_distance_pdf ( m ) filename = 'subset_distance_compare.png' plt.savefig ( filename ) print ( '' ) print ( ' Graphics saved as "' + filename + '"' ) plt.show ( block = False ) plt.close ( ) # # Terminate. # print ( '' ) print ( 'subset_distance_test()::' ) print ( ' Normal end of execution.' ) return def subset_sample ( m ): #*****************************************************************************80 # ## subset_sample() returns a random subset of an M set. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 30 March 2022 # # Author: # # John Burkardt # # Input: # # Input, integer M, the size of the set. # # Output: # # integer S(M), defines the subset using 0 and 1 values. # import numpy as np s = np.random.random_integers ( 0, 1, m ) return s def timestamp ( ): #*****************************************************************************80 # ## timestamp() prints the date as a timestamp. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 21 August 2019 # # Author: # # John Burkardt # import time t = time.time ( ) print ( time.ctime ( t ) ) return if ( __name__ == '__main__' ): timestamp ( ) subset_distance_test ( ) timestamp ( )