#! /usr/bin/env python3 # def normalize ( n, x ): #****************************************************************************80 # ## normalize() scales a vector X so its entries sum to 1. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 19 February 2016 # # Author: # # Original C version by Warren Smith. # Python version by John Burkardt. # # Input: # # integer N, indicates the size of X. # # real X[N+2], the vector to be normalized. # # Output: # # real X[N+2], the normalized vector. # # Sum X. # sum = 0.0 for i in range ( 1, n + 1 ): sum = sum + abs ( x[i] ) # # Normalize so that the new sum of X will be 1. # for i in range ( 1, n + 1 ): x[i] = x[i] / sum return x def normalize_test ( ): #*****************************************************************************80 # ## normalize_test() tests normalize(). # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 19 February 2016 # # Author: # # John Burkardt # import numpy as np import platform print ( '' ) print ( 'NORMALIZE_TEST' ) print ( ' Python version: %s' % ( platform.python_version ( ) ) ) print ( ' NORMALIZE normalizes entries 1 through N of a vector' ) print ( ' of length N+2.' ) n = 5 x = np.random.rand ( n + 2 ) r8vec_print ( n + 2, x, ' Initial X:' ) x_norm = 0.0 for i in range ( 1, n + 1 ): x_norm = x_norm + abs ( x[i] ) print ( '' ) print ( ' Initial L1 norm of X(1:%d) = %10.6g' % ( n, x_norm ) ) x = normalize ( n, x ) r8vec_print ( n + 2, x, ' Normalized X:' ) x_norm = 0.0 for i in range ( 1, n + 1 ): x_norm = x_norm + abs ( x[i] ) print ( '' ) print ( ' Final L1 norm of X(1:%d) = %10.6g' % ( n, x_norm ) ) # # Terminate. # print ( '' ) print ( 'NORMALIZE_TEST' ) print ( ' Normal end of execution.' ) return def r8vec_indicator0 ( n ): #*****************************************************************************80 # ## r8vec_indicator0() sets an R8VEC to the indicator vector (0,1,2,...). # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 27 September 2014 # # Author: # # John Burkardt # # Input: # # integer N, the number of elements of the vector. # # Output: # # real A(N), the indicator array. # import numpy a = numpy.zeros ( n ); for i in range ( 0, n ): a[i] = i return a def r8vec_indicator0_test ( ): #*****************************************************************************80 # ## r8vec_indicator0_test() tests r8vec_indicator0(). # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 27 September 2014 # # Author: # # John Burkardt # import numpy as np import platform print ( '' ) print ( 'R8VEC_INDICATOR0_TEST' ) print ( ' Python version: %s' % ( platform.python_version ( ) ) ) print ( ' R8VEC_INDICATOR0 returns an indicator matrix.' ) n = 10 a = r8vec_indicator0 ( n ) r8vec_print ( n, a, ' The indicator0 vector:' ) # # Terminate. # print ( '' ) print ( 'R8VEC_INDICATOR0_TEST' ) print ( ' Normal end of execution.' ) return def r8vec_print ( n, a, title ): #*****************************************************************************80 # ## r8vec_print() prints an R8VEC. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 31 August 2014 # # Author: # # John Burkardt # # Input: # # integer N, the dimension of the vector. # # real A(N), the vector to be printed. # # string TITLE, a title. # print ( '' ) print ( title ) print ( '' ) for i in range ( 0, n ): print ( '%6d: %12g' % ( i, a[i] ) ) def random_permutation ( n, x ): #****************************************************************************80 # ## random_permutation() applies a random permutation to an array. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 19 February 2016 # # Author: # # Original C version by Warren Smith. # Python version by John Burkardt. # # Input: # # integer N, indicates the size of X. # # real X[N+2], the vector to be permuted. # # Output: # # real X[N+2], the permuted vector. # import numpy as np for i in range ( 1, n + 1 ): j = np.random.random_integers ( i, n ) t = x[i] x[i] = x[j] x[j] = t return x def random_permutation_test ( ): #*****************************************************************************80 # ## random_permutation_test() tests random_permutation(). # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 19 February 2016 # # Author: # # John Burkardt # import platform print ( '' ) print ( 'RANDOM_PERMUTATION_TEST' ) print ( ' Python version: %s' % ( platform.python_version ( ) ) ) print ( ' RANDOM_PERMUTATION randomly permutes entries 1 through N of' ) print ( ' a vector X[0:N+1].' ) n = 5 x = r8vec_indicator0 ( n + 2 ) r8vec_print ( n + 2, x, ' Initial X:' ) x = random_permutation ( n, x ) r8vec_print ( n + 2, x, ' Permuted X:' ) # # Terminate. # print ( '' ) print ( 'RANDOM_PERMUTATION_TEST' ) print ( ' Normal end of execution.' ) return def timestamp ( ): #*****************************************************************************80 # ## timestamp() prints the date as a timestamp. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 06 April 2013 # # Author: # # John Burkardt # import time t = time.time ( ) print ( time.ctime ( t ) ) return None def walker_build ( n, x ): #****************************************************************************80 # ## walker_build() sets up the data for a Walker sampler. # # Discussion: # # From the probability vector X, this function creates the Y and A # vectors used by the Walker sampler. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 20 February 2016 # # Author: # # Original C version by Warren Smith. # Python version by John Burkardt. # # Reference: # # Warren Smith, # How to sample from a probability distribution, # April 18, 2002. # # Input: # # integer N, indicates the size of X. # # real X[N+2], contains in X[1] through X[N] the # probabilities of outcomes 1 through N. # # Output: # # real [N+2], the Walker threshold vector. # # integer int A[N+2], the Walker index vector. # import numpy as np # # Initialize A. # a = np.zeros ( n + 2, dtype = np.int32 ) for i in range ( 0, n + 2 ): a[i] = i # # Initialize B to the "stay here" value, and set sentinel values at the ends. # b = np.zeros ( n + 2, dtype = np.int32 ) for i in range ( 0, n + 2 ): b[i] = a[i] # # Copy X into Y. # Scale the probability vector and set sentinel values at the ends. # y = np.zeros ( n + 2, dtype = np.float64 ) y[0] = 0.0 for i in range ( 1, n + 1 ): y[i] = x[i] * float ( n ) y[n+1] = 2.0 i = 0 j = n + 1 while ( True ): # # Find i so Y[B[i]] needs more. # while ( True ): i = i + 1 if ( 1.0 <= y[b[i]] ): break # # Find j so Y[B[j]] wants less. # while ( True ): j = j - 1 if ( y[b[j]] < 1.0 ): break if ( j <= i ): break # # Swap B[i] and B[j]. # k = b[i] b[i] = b[j] b[j] = k i = j j = j + 1 while ( 0 < i ): # # Find J such that Y[B[j]] needs more. # while ( y[b[j]] <= 1.0 ): j = j + 1 # # Meanwhile, Y[B[i]] wants less. # if ( n < j ): break # # B[i] will donate to B[j] to fix up. # y[b[j]] = y[b[j]] - ( 1.0 - y[b[i]] ) a[b[i]] = b[j] # # Y[B[j]] now wants less so readjust ordering. # if ( y[b[j]] < 1.0 ): k = b[i] b[i] = b[j] b[j] = k j = j + 1 else: i = i - 1 return y, a def walker_build_test ( ): #*****************************************************************************80 # ## walker_build_test() tests walker_build(). # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 25 March 2021 # # Author: # # John Burkardt # from scipy.special import comb import numpy as np import platform print ( '' ) print ( 'WALKER_BUILD_TEST' ) print ( ' Python version: %s' % ( platform.python_version ( ) ) ) print ( ' WALKER_BUILD builds the Walker sampler data vectors Y and A,' ) print ( ' given a probability vector X.' ) n = 5 x = np.zeros ( n + 2, dtype = np.float64 ) for i in range ( 1, n + 1 ): x[i] = float ( comb ( n - 1, i - 1 ) ) / float ( 2 ** ( n - 1 ) ) r8vec_print ( n + 2, x, ' Binomial PDF (ignore first and last entries):' ) y, a = walker_build ( n, x ) print ( '' ) print ( ' I A[I] Y[i] (ignore first and last entries)' ) print ( '' ) for i in range ( 0, n + 2 ): print ( ' %2d %2d %10.4g' % ( i, a[i], y[i] ) ) # # Terminate. # print ( '' ) print ( 'WALKER_BUILD_TEST' ) print ( ' Normal end of execution.' ) return def walker_sampler ( n, y, a ): #****************************************************************************80 # ## walker_sampler() returns a random sample i=1..N with prob X[i]. # # Discussion: # # Implementation of algorithm for sampling from a discrete # probability N-vector X[1], X[2], ..., X[N]. (N>=1.) # Runs on O(1) worst case time per sample, # and uses one integer and one real N-element array for storage. # Preprocessing consumes O(N) time and temporarily uses one # additional integer array (B[0..N+1]) for bookkeeping. # X[0] and X[N+1] are also used as sentinels in the Build() algorithm. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 20 February 2016 # # Author: # # Original C version by Warren Smith. # Python version by John Burkardt. # # Reference: # # Warren Smith, # How to sample from a probability distribution, # April 18, 2002. # # Input: # # integer N, indicates the size of the probability vector X. # # real Y[N+2], the Walker threshold vector. # # integer A[N+2], the Walker index vector. # # Output: # # integer I, a sample value between 1 and N, # selected according to the probability vector X. # import numpy as np import random as rn # # Let i = random uniform integer from {1,2,...N} # i = 1 + int ( np.fix ( float ( n ) * rn.random ( ) ) ) r = rn.random ( ) if ( y[i] < r ): i = a[i] return i def walker_sampler_test ( ): #****************************************************************************80 # ## walker_sampler_test() tests walker_sampler(). # # Discussion: # # A Zipf-type probability vector is used. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 20 February 2016 # # Author: # # Original C version by Warren Smith. # Python version by John Burkardt. # # Input: # # integer N, indicates the size of the probability vector X. # # real P, the value of the Zipf parameter # 1 <= P. # import numpy as np import platform import random as rn n = 10 p = 2.0 print ( '' ) print ( 'WALKER_SAMPLER_TEST:' ) print ( ' Python version: %s' % ( platform.python_version ( ) ) ) print ( ' WALKER_SAMPLER creates Walker sample vectors Y and A' ) print ( ' for efficient sampling of a discrete probability vector.' ) print ( ' Test the Walker sampler with a Zipf-type probability.' ) # # Generate a standard Zipf probability vector for cases 1 - N, # with parameter P. # x = zipf_probability ( n, p ) print ( '' ) print ( ' Zipf probabilities' ) print ( ' for N = %d' % ( n ) ) print ( ' and parameter P = %g' % ( p ) ) print ( '' ) print ( ' I X[I]' ) print ( '' ) for i in range ( 1, n + 1 ): print ( ' %4d %16g' % ( i, x[i] ) ) # # For better testing, randomly scramble the probabilities. # x = random_permutation ( n, x ) print ( '' ) print ( ' Randomly permuted X:' ) print ( '' ) print ( ' I X[I]' ) print ( '' ) for i in range ( 1, n + 1 ): print ( ' %4d %16g' % ( i, x[i] ) ) # # Build the Walker sampler. # y, a = walker_build ( n, x ) print ( '' ) print ( ' Built the sampler' ) print ( ' i Y[i] A[i]:' ) print ( '' ) for i in range ( 0, n + 2 ): print ( ' %3d %16g %4d' % ( i, y[i], a[i] ) ) # # Prepare to count the frequency of each outcome. # count = np.zeros ( n + 2, dtype = np.int32 ) # # Call the sampler many times. # for i in range ( 0, 100000 ): j = walker_sampler ( n, y, a ) count[j] = count[j] + 1 # # Compare normalized sample frequencies to the original probabilities in X. # v = 0.0 print ( '' ) print ( ' 100000 samples:' ) print ( ' prob #samples:' ) print ( '' ) for i in range ( 1, n + 1 ): print ( ' %16g %6d' % ( x[i], count[i] ) ) expval = x[i] * 100000 t = expval - count[i] v = v + t * t / expval v = v / ( float ) ( n ) print ( '' ) print ( ' sumvar = %g, (should be about 1)' % ( v ) ) # # Terminate. # print ( '' ) print ( 'WALKER_SAMPLER_TEST' ) print ( ' Normal end of execution.' ) return def walker_sample_test ( ): #*****************************************************************************80 # ## walker_sample_test() tests walker_sample(). # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 20 February 2016 # # Author: # # John Burkardt # import platform print ( '' ) print ( 'WALKER_SAMPLE_TEST:' ) print ( ' Python version: %s' % ( platform.python_version ( ) ) ) print ( ' Test WALKER_SAMPLE()' ) normalize_test ( ) r8vec_indicator0_test ( ) random_permutation_test ( ) walker_build_test ( ) walker_sampler_test ( ) walker_verify_test ( ) zipf_probability_test ( ) # # Terminate. # print ( '' ) print ( 'WALKER_SAMPLE_TEST:' ) print ( ' Normal end of execution.' ) return def walker_verify ( n, x, y, a ): #****************************************************************************80 # ## walker_verify() verifies a Walker Sampler structure. # # Discussion: # # This test applies the sampling algorithms to a Zipfian distribution. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 20 February 2016 # # Author: # # Original C version by Warren Smith. # Python version by John Burkardt. # # Input: # # integer N, indicates the size of X. # # real X[N+2], contains in X[1] through X[N] the # probabilities of outcomes 1 through N. # # real Y[N+2], the Walker threshold vector. # # integer A[N+2], the Walker index vector. # # Output: # # real V, the sum of the absolute value of discrepancies. # import numpy as np z = np.zeros ( n + 2, dtype = np.float64 ) # # Reverse the scaling. # for i in range ( 0, n + 2 ): z[i] = y[i] / float ( n ) # # Add back the adjustments. # for i in range ( 1, n + 1 ): z[a[i]] = z[a[i]] + ( 1.0 - y[i] ) / float ( n ) # # Check for discrepancies between Z and X. # v = 0.0 for i in range ( 1, n + 1 ): v = v + abs ( z[i] - x[i] ) return v def walker_verify_test ( ): #*****************************************************************************80 # ## walker_verify_test() tests walker_verify(). # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 20 February 2016 # # Author: # # John Burkardt # import numpy as np import platform print ( '' ) print ( 'WALKER_VERIFY_TEST' ) print ( ' Python version: %s' % ( platform.python_version ( ) ) ) print ( ' WALKER_VERIFY verifies the Walker sampler data vectors Y and A,' ) print ( ' for a given probability vector X.' ) n = 9 x = np.zeros ( n + 2, dtype = np.float64 ) for i in range ( 1, n + 1 ): x[i] = np.log ( 1.0 + 1.0 / float ( i ) ) / np.log ( float ( n + 1 ) ) r8vec_print ( n + 2, x, ' Benford PDF (ignore first and last entries):' ) y, a = walker_build ( n, x ) print ( '' ) print ( ' I A[I] Y[i] (ignore first and last entries)' ) print ( '' ) for i in range ( 0, n + 2 ): print ( ' %2d %2d %10.4g' % ( i, a[i], y[i] ) ) v = walker_verify ( n, x, y, a ) print ( '' ) print ( ' The verification sum = %g' % ( v ) ) print ( ' It should be very close to zero.' ) # # Terminate. # print ( '' ) print ( 'WALKER_VERIFY_TEST' ) print ( ' Normal end of execution.' ) return def zipf_probability ( n, p ): #****************************************************************************80 # ## zipf_probability() sets up a Zipf probability vector. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 19 February 2016 # # Author: # # Original C version by Warren Smith. # Python version by John Burkardt. # # Reference: # # George Zipf, # The Psychobiology of Language, # 1935. # # Input: # # integer N, indicates the size of X. # # real P, the Zipf parameter. # 1.0 < P. # # Output: # # real X[N+2], contains in X[1] through X[N] the # probabilities of outcomes 1 through N. # import numpy as np x = np.zeros ( n + 2 ) for i in range ( 1, n + 1 ): x[i] = i ** ( - p ) x = normalize ( n, x ) return x def zipf_probability_test ( ): #*****************************************************************************80 # ## zipf_probability_test() tests zipf_probability(). # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 19 February 2016 # # Author: # # John Burkardt # import platform print ( '' ) print ( 'ZIPF_PROBABILITY_TEST' ) print ( ' Python version: %s' % ( platform.python_version ( ) ) ) print ( ' ZIPF_PROBABILITY sets up a probablity vector X of N+2 elements' ) print ( ' containing in X[1:N] the probabilities of outcomes 1 through N' ) print ( ' in a Zipf distribution with parameter P.' ) n = 5 p = 1.0 x = zipf_probability ( n, p ) r8vec_print ( n + 2, x, ' X for N = 5, P = 1.0' ) n = 5 p = 2.0 x = zipf_probability ( n, p ) r8vec_print ( n + 2, x, ' X for N = 5, P = 2.0' ) n = 10 p = 2.0 x = zipf_probability ( n, p ) r8vec_print ( n + 2, x, ' X for N = 10, P = 2.0' ) # # Terminate. # print ( '' ) print ( 'ZIPF_PROBABILITY_TEST' ) print ( ' Normal end of execution.' ) return if ( __name__ == '__main__' ): timestamp ( ) walker_sample_test ( ) timestamp ( )