# include <math.h>
# include <stdio.h>
# include <stdlib.h>

# include "walker_sample.h"

/******************************************************************************/

int i4_uniform_ab ( int a, int b )

/******************************************************************************/
/*
  Purpose:

    i4_uniform_ab() returns a scaled pseudorandom I4.

  Discussion:

    The pseudorandom number should be uniformly distributed
    between A and B.

  Licensing:

    This code is distributed under the MIT license.

  Modified:

    25 June 2024

  Author:

    John Burkardt

  Input:

    int A, B, the limits of the interval.

  Output:
 
    int I4_UNIFORM_AB, a number between A and B.
*/
{
  float r;
  int value;

  r = ( float ) rand ( ) / ( float ) RAND_MAX ;
  value = a + ( int ) ( ( b + 1 - a ) * r );

  return value;
}
/******************************************************************************/

void normalize ( unsigned int n, double x[] )

/******************************************************************************/
/*
  Purpose:

    normalize() scales a vector X so its entries sum to 1.

  Licensing:

    This code is distributed under the MIT license. 

  Modified:

    19 February 2016

  Author:

    Original C version by Warren Smith.
    This version by John Burkardt.

  Parameters:

    Input, unsigned int N, indicates the size of X.

    Input/output, double X[N+2], the vector to be normalized.
    Entries X[1] through X[N] will sum to 1 on output.
*/
{
  unsigned int i;
  double sum;
/*
  Sum X.
*/
  sum = 0.0;
  for ( i = 1; i <= n; i++ )
  {
    sum = sum + fabs ( x[i] );
  }
/*
  Normalize so that the new sum of X will be 1.
*/
  sum = 1.0 / sum;
  for ( i = 1; i <= n; i++ )
  {
    x[i] = x[i] * sum; 
  }

  return;
}
/******************************************************************************/

void random_permutation ( unsigned int n, double x[] )

/******************************************************************************/
/*
  Purpose:

    random_permutation() applies a random permutation to an array.

  Licensing:

    This code is distributed under the MIT license. 

  Modified:

    14 February 2016

  Author:

    Original C version by Warren Smith.
    This version by John Burkardt.

  Parameters:

    Input, unsigned int N, indicates the size of X.

    Input/output, double X[N+2].  On output, entries X[1] through
    X[N] have been randomly permuted.
*/
{
  unsigned int i;
  unsigned int j;
  double t;

  for ( i = 1; i < n; i++ )
  {
    j = i4_uniform_ab ( i, n );

    t = x[i];
    x[i] = x[j];
    x[j] = t;      
  }
  return;
}
/******************************************************************************/

void walker_build ( unsigned int n, double x[], double y[], unsigned int a[] )

/******************************************************************************/
/*
  Purpose:

    walker_build() sets up the data for a Walker sampler.

  Licensing:

    This code is distributed under the MIT license. 

  Modified:

    20 February 2016

  Author:

    Original C version by Warren Smith.
    This version by John Burkardt.

  Reference:

    Warren Smith,
    How to sample from a probability distribution,
    April 18, 2002.

  Parameters:

    Input, unsigned int N, indicates the size of X.

    Input, double X[N+2], contains in X[1] through X[N] the
    probabilities of outcomes 1 through N.  

    Output, double Y[N+2], the Walker threshold vector.

    Output, unsigned int A[N+2], the Walker index vector.
*/
{
  unsigned int *b;
  unsigned int i;
  unsigned int j;
  unsigned int k;
/*
  Initialize A.
*/
  a[0] = 0;
  for ( i = 1; i <= n; i++ )
  { 
    a[i] = i;
  }
  a[n+1] = n + 1;
/*
  Initialize B to the "stay here" value, and set sentinel values at the ends.
*/
  b = ( unsigned int * ) malloc ( ( n + 2 ) * sizeof ( unsigned int ) );

  b[0] = 0;
  for ( i = 1; i <= n; i++ )
  {
    b[i] = i;
  }
  b[n+1] = n + 1;
/*
  Copy Y from X.
  Scale the probability vector and set sentinel values at the ends.
*/
  y[0] = 0.0;
  for ( i = 1; i <= n; i++ )
  {
    y[i] = x[i] * ( double ) ( n );
  }
  y[n+1] = 2.0;

  i = 0;
  j = n + 1;
  for ( ; ; )
  {
/*
  Find i so Y[B[i]] needs more.
*/
    do 
    { 
      i++; 
    } while ( y[b[i]] < 1.0 );
/*
  Find j so Y[B[j]] wants less.
*/
    do
    { 
      j--; 
    } while ( 1.0 <= y[b[j]] );

    if ( j <= i )
    { 
      break;
    }
/*
  Swap B[i] and B[j].
*/
    k = b[i];
    b[i] = b[j];
    b[j] = k;
  }

  i = j;
  j++;

  while ( 0 < i )
  {
/*
  Find J such that Y[B[j]] needs more.
*/
    while ( y[b[j]] <= 1.0 )
    {
      j++; 
    }
/*
  Meanwhile, Y[B[i]] wants less.
*/
    if ( n < j )
    {
      break;
    }
/*
  B[i] will donate to B[j] to fix up.
*/
    y[b[j]] = y[b[j]] - ( 1.0 - y[b[i]] );     
    a[b[i]] = b[j];             
/* 
  Y[B[j]] now wants less so readjust ordering.
*/
    if ( y[b[j]] < 1.0 )
    {
      k = b[i];
      b[i] = b[j];
      b[j] = k;
      j++;
    }
    else
    {
      i--;
    }
  }
/*
  Free memory.
*/
  free ( b );

  return;
}
/******************************************************************************/

unsigned int walker_sampler ( unsigned int n, double y[], unsigned int a[] )

/******************************************************************************/
/*
  Purpose:

    WALKER_SAMPLER returns a random sample i=1..N with prob X[i].

  Discussion:

    Implementation of algorithm for sampling from a discrete
    probability N-vector X[1], X[2], ..., X[N].  (N>=1.)
    Runs on O(1) worst case time per sample,
    and uses one integer and one double N-element array for storage.
    Preprocessing consumes O(N) time and temporarily uses one 
    additional integer array (B[0..N+1]) for bookkeeping. 
    X[0] and X[N+1] are also used as sentinels in the Build() algorithm.
 
  Licensing:

    This code is distributed under the MIT license. 

  Modified:

    20 February 2016

  Author:

    Original C version by Warren Smith.
    This version by John Burkardt.

  Reference:

    Warren Smith,
    How to sample from a probability distribution,
    April 18, 2002.

  Parameters:

    Input, unsigned int N, indicates the size of the probability vector X.

    Input, double Y[N+2], the Walker threshold vector.

    Input, unsigned int A[N+2], the Walker index vector.

    Output, unsigned int WALKER_SAMPLER, a sample value between 1 and N,
    selected according to the probability vector X.
*/
{
  unsigned int i; 
  double r;
/* 
  Let i = random uniform integer from {1,2,...N};  
*/
  i = 1 + ( unsigned int ) ( n * drand48 ( ) ); 

  r = drand48 ( );

  if ( y[i] < r ) 
  { 
    i = a[i];
  }

  return i;
}
/******************************************************************************/
    
void walker_sampler_test ( )

/******************************************************************************/
/*
  Purpose:

    WALKER_SAMPLER_TEST tests WALKER_SAMPLER.

  Licensing:

    This code is distributed under the MIT license. 

  Modified:

    20 February 2016

  Author:

    Original C version by Warren Smith.
    This version by John Burkardt.
*/
{
  unsigned int *a;
  unsigned int *count;
  double expval;
  unsigned int i;
  unsigned int j;
  unsigned int n;
  double p;
  double sum;
  double t;
  double v;
  double *x;
  double *y;

  n = 10;
  p = 2.0;
  
  printf ( "\n" );
  printf ( "walker_sampler_test():\n" );
  printf ( "  walker_sampler() creates Walker sample vectors Y and A\n" );
  printf ( "  for efficient sampling of a discrete probability vector.\n" );
  printf ( "  Test the Walker sampler with a Zipf-type probability\n" );
/*
  "Warm up" the random number generator.
*/
  for ( i = 0; i < 100; i++ )
  {
    drand48 ( );
  }
  printf ( "\n" );
  printf ( "  After 100 warmup calls, next 3 values of drand48():\n" );

  for ( i = 100; i < 103; i++ )
  {
    printf ( "  %g\n", drand48 ( ) );
  }
/*
  Generate a standard Zipf probability vector for cases 1 - N,
  with parameter P.
*/
  x = zipf_probability ( n, p );

  printf ( "\n" );
  printf ( "  Zipf probabilities\n" );
  printf ( "  for N = %d\n", n );
  printf ( "  and parameter P = %g\n", p );
  printf ( "\n" );
  printf ( "     I     X[I]\n" );
  printf ( "\n" );
  for ( i = 1; i <= n; i++ )
  {
    printf ( "  %4u %g\n", i, x[i] );
  }
/*
  For better testing, randomly scramble the probabilities.
*/
  random_permutation ( n, x );

  printf ( "\n" );
  printf ( "  Randomly permuted X:\n");
  printf ( "\n" );
  printf ( "     I     X[I]\n" );
  printf ( "\n" );
  for ( i = 1; i <= n; i++ )
  {
    printf ( "  %4u %g\n", i, x[i] );
  }
/*
  Build the Walker sampler.
*/
  y = ( double * ) malloc ( ( n + 2 ) * sizeof ( double ) );
  a = ( unsigned int * ) malloc ( ( n + 2 ) * sizeof ( unsigned int ) );

  walker_build ( n, x, y, a );

  printf ( "\n" );
  printf ( "  Built the sampler\n" );
  printf ( "  i Y[i] A[i]:\n" );
  printf ( "\n" );

  for ( i = 1; i <= n; i++ )
  { 
    printf ( "  %3u %f %u\n", i, y[i], a[i] ); 
  }
/*
  Prepare to count the frequency of each outcome.
*/
  count = ( unsigned int * ) malloc ( ( n + 2 ) * sizeof ( unsigned int ) );
  for ( i = 1; i <= n; i++ )
  {
    count[i] = 0;
  }
/*
  Call the sampler many times.
*/
  for ( i = 0; i < 100000; i++ )
  {
    j = walker_sampler ( n, y, a );
    count[j] = count[j] + 1;
  }
/*
  Compare normalized sample frequencies to the original probabilities in X.
*/
  sum = 0.0;
  printf ( "\n" );
  printf ( "  100000 samples:\n" );
  printf ( "  prob   #samples:\n" );
  printf ( "\n" );

  for ( i = 1; i <= n; i++ )
  { 
    printf ( "  %f %6u\n", x[i], count[i] ); 
    expval = x[i] * 100000;
    t = expval - count[i];
    sum = sum + t * t / expval;
  }
  sum = sum / ( double ) ( n );

  printf ( "\n" );
  printf ( "  sumvar = %g (should be about 1)\n", sum );
/*
  Verify the data structure.
*/
  v = walker_verify ( n, x, y, a );
  printf ( "\n" );
  printf ( "  Verification sum = %g\n", v );
  printf ( "  (Should be close to 0)\n" );
/*
  Free memory.
*/
  free ( a );
  free ( count );
  free ( x );
  free ( y );

  return;
}
/******************************************************************************/

double walker_verify ( unsigned int n, double x[], double y[], unsigned int a[] )

/******************************************************************************/
/*
  Purpose:

    WALKER_VERIFY verifies a Walker Sampler structure.

  Discussion:

    This test applies the sampling algorithms to a Zipfian distribution.

  Licensing:

    This code is distributed under the MIT license. 

  Modified:

    20 February 2016

  Author:

    Original C version by Warren Smith.
    This version by John Burkardt.

  Parameters:

    Input, unsigned int N, indicates the size of X.

    Input, double X[N+2], contains in X[1] through X[N] the
    probabilities of outcomes 1 through N.

    Input, double Y[N+2], the Walker threshold vector.

    Input, unsigned int A[N+2], the Walker index vector.
*/
{
  unsigned int i;
  double v;
  double *z;

  z = ( double * ) malloc ( ( n + 2 ) * sizeof ( double ) );
/*
  Reverse the scaling.
*/
  for ( i = 1; i <= n; i++ )
  {
    z[i] = y[i] / ( double ) ( n );
  }
/*
  Add back the adjustments.
*/
  for ( i = 1; i <= n; i++ )
  {
    z[a[i]] = z[a[i]] + ( 1.0 - y[i] ) / ( double ) ( n );
  }
/*
  Check for discrepancies between Z and X.
*/
  v = 0.0;
  for ( i = 1; i <= n; i++ )
  {
    v = v + fabs ( z[i] - x[i] );
  }
/*
  Free memory.
*/
  free ( z );

  return v;
}
/******************************************************************************/

double *zipf_probability ( unsigned int n, double p )

/******************************************************************************/
/*
  Purpose:

    ZIPF_PROBABILITY sets up a Zipf probability vector.

  Licensing:

    This code is distributed under the MIT license. 

  Modified:

    19 February 2016

  Author:

    Original C version by Warren Smith.
    This version by John Burkardt.

  Reference:

    George Zipf,
    The Psychobiology of Language,
    1935.

  Parameters:

    Input, unsigned int N, indicates the size of X.

    Input, double P, the Zipf parameter.
    1.0 < P.

    Output, double ZIPF_PROBABILITY[N+2], contains in X[1] through X[N] the
    probabilities of outcomes 1 through N.

*/
{
  unsigned int i;
  double *x;

  x = ( double * ) malloc ( ( n + 2 ) * sizeof ( double ) );

  x[0] = 0.0;
  for ( i = 1; i <= n; i++ )
  {
    x[i] = pow ( i, - p );
  }
  x[n+1] = 0.0;

  normalize ( n, x );

  return x;
}