#! /usr/bin/env python3
#
def logistic_regression ( X, y, alpha, kmax ):

#*****************************************************************************80
#
## logistic_regression() seeks best coefficients for logistic regression.
#
#  Discussion:
#
#    The desired model is
#
#      y(x) = 1 / ( 1 + exp ( - sum ( w[i] x[i] ) ) )
#
#    Usually, x[0] has the value 1, and w[0] is known as the bias.
#
#    A gradient descent iteration is used to estimate the coefficients.
#
#  Input:
#
#    real X[n,d]: n sets of d-dimensional input variables.
#
#    real y[n]: the m output variables, between 0 and 1.
#
#    real alpha: a learning rate.  Set alpha to 1 initially, but if the
#    iteration is not converging, try repeatedly halving alpha.
#
#    integer kmax: the number of gradient descent iterations to take.
#
#  Output:
#
#    real w[d]: the estimated logistic regression coefficients.
#
  import numpy as np

  n, d = X.shape 

  w = np.zeros ( d )

  J = np.zeros ( kmax )

  for k in range ( 0, kmax ):

    y2 = 1.0 / ( 1.0 + np.exp ( - np.matmul ( X, w ) ) )

    for j in range ( 0, d ):
      w[j] = w[j] - ( alpha / n ) * np.dot ( ( y2 - y ), X[:,j] )

    J[k] = ( 1.0 / n ) * np.sum ( \
      - np.dot (         y,   np.log (       y2 ) ) \
      - np.dot ( ( 1.0 - y ), np.log ( 1.0 - y2 ) ) )

  return w