#! /usr/bin/env python3
#
def neural_network_test ( ):

#*****************************************************************************80
#
## neural_network_test() tests neural_network().
#
#  Licensing:
#
#    This code is distributed under the MIT license. 
#
#  Modified:
#
#    23 July 2022
#
#  Author:
#
#    Catherine Higham, Desmond Higham.
#    Modifications by John Burkardt.
#
#  Reference:
#
#    Catherine Higham, Desmond Higham,
#    Deep Learning: An introduction for applied mathematicians,
#    SIAM Review,
#    Volume 61, Number 4, pages 860-891,
#    November 2019.
#
  print ( '' )
  print ( 'neural_network_test():' )
  print ( '  Demonstrate back propagation.' )
#
#  eta is the learning rate.
#
  eta = 0.05
  print ( '  Using learning rate eta =', eta )
#
#  it_num is the number of stochastic gradient iterations.
#
  it_num = 1000000
  print ( '  Number of iterations = ', it_num )
#
#  Run the neural network.
#
  cost = neural_network ( eta, it_num )
#
#  Plot the cost over the iteration.
#
  cost_plot ( cost )
#
#  Report change in cost.
#
  print ( '  Initial cost = ', cost[0] )
  print ( '  Final cost   = ', cost[-1] )
#
#  Terminate.
#
  print ( '' )
  print ( 'neural_network_test():' )
  print ( '  Normal end of execution.' )

  return

def neural_network ( eta, it_num ):

#*****************************************************************************80
#
## neural_network() uses back propagation to train a network.
#
#  Licensing:
#
#    This code is distributed under the MIT license. 
#
#  Modified:
#
#    23 July 2022
#
#  Author:
#
#    Catherine Higham, Desmond Higham.
#    Modifications by John Burkardt.
#
#  Reference:
#
#    Catherine Higham, Desmond Higham,
#    Deep Learning: An introduction for applied mathematicians,
#    SIAM Review,
#    Volume 61, Number 4, pages 860-891,
#    November 2019.
#
#  Input:
#
#    real eta: the learning rate.
#
#    integer it_num: the number of stochastic gradient iterations.
#
  from numpy.random import default_rng
  import numpy as np

  rng = default_rng()
#
#  Data:
#
  x1 = np.array ( [ 0.1, 0.3, 0.1, 0.6, 0.4, 0.6, 0.5, 0.9, 0.4, 0.7 ] )
  x2 = np.array ( [ 0.1, 0.4, 0.5, 0.9, 0.2, 0.3, 0.6, 0.2, 0.4, 0.6 ] )

  y =  np.array( [ [ 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], \
                   [ 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0 ] ] )
#
#  Initialize weights and biases.
#
  W2 = 0.5 * np.random.normal ( loc = 0.0, scale = 1.0, size = [ 2, 2 ] )
  W3 = 0.5 * np.random.normal ( loc = 0.0, scale = 1.0, size = [ 3, 2 ] )
  W4 = 0.5 * np.random.normal ( loc = 0.0, scale = 1.0, size = [ 2, 3 ] )

  b2 = 0.5 * np.random.normal ( loc = 0.0, scale = 1.0, size = [ 2, 1 ] )
  b3 = 0.5 * np.random.normal ( loc = 0.0, scale = 1.0, size = [ 3, 1 ] )
  b4 = 0.5 * np.random.normal ( loc = 0.0, scale = 1.0, size = [ 2, 1 ] )
#
#  Forward and back propagate.
#
  cost_vec = np.zeros ( it_num + 1 )

  for it in range ( 0, it_num + 1 ):

    if ( 0 < it ):
#
#  Choose a training point at random.
#
      k = rng.integers ( low = 0, high = 10, endpoint = False )
#
#  We have to force xvec and yvec to be column vectors.
#
      xvec = np.array ( [ [ x1[k]  ], [ x2[k]  ] ] )
      yvec = np.array ( [ [ y[0,k] ], [ y[1,k] ] ] )
#
#  Forward pass.
#
      a2 = activate ( xvec, W2, b2 )
      a3 = activate ( a2,   W3, b3 )
      a4 = activate ( a3,   W4, b4 )
#
#  Backward pass.
#
      delta4 = a4 * ( 1.0 - a4 ) * ( a4 - yvec )
      delta3 = a3 * ( 1.0 - a3 ) * np.dot ( W4.T, delta4 )
      delta2 = a2 * ( 1.0 - a2 ) * np.dot ( W3.T, delta3 )
#
#  Gradient step.  
#  Adjust weights and biases with a rank one update.
#
      W2 = W2 - eta * np.dot ( delta2, xvec.T )
      W3 = W3 - eta * np.dot ( delta3, a2.T )
      W4 = W4 - eta * np.dot ( delta4, a3.T )

      b2 = b2 - eta * delta2
      b3 = b3 - eta * delta3
      b4 = b4 - eta * delta4
#
#  Monitor the progress.
#
    cost_vec[it] = cost_function ( W2, W3, W4, b2, b3, b4, x1, x2, y )

  return cost_vec

def activate ( x, W, b ):

#*****************************************************************************80
#
## activate() evaluates the sigmoid function.
#
#  Licensing:
#
#    This code is distributed under the MIT license. 
#
#  Modified:
#
#    05 May 2022
#
#  Author:
#
#    Catherine Higham, Desmond Higham
#    Modifications by John Burkardt.
#
#  Reference:
#
#    Catherine Higham, Desmond Higham,
#    Deep Learning: An introduction for applied mathematicians,
#    SIAM Review,
#    Volume 61, Number 4, pages 860-891,
#    November 2019.
#
#  Input:
#
#    x(4): the input vector.
#
#    W(4): the weights.
#
#    b(4): the shifts.
#
#  Output:
#
#    y(4): the output vector.
#
  import numpy as np

  y = 1.0 / ( 1.0 + np.exp ( - ( np.dot ( W, x ) + b ) ) )

  return y

def cost_function ( W2, W3, W4, b2, b3, b4, x1, x2, y ):

#*****************************************************************************80
#
## cost_function() evaluates the cost function.
#
#  Licensing:
#
#    This code is distributed under the MIT license. 
#
#  Modified:
#
#    17 May 2022
#
#  Author:
#
#    Catherine Higham, Desmond Higham.
#    Modifications by John Burkardt.
#
#  Input:
#
#    real W2(), W3(), W4(): the weights.
#
#    real b2(), b3(), b4(): the shifts.
#
#    real x1(), x2():
#
#    real y():
#
#  Output:
#
#    real costval: the current value of the cost function.
#
  import numpy as np

  costvec = np.zeros ( 10 )

  for i in range ( 0, 10 ):

    x = np.zeros ( [ 2, 1 ] )
    x[0,0] = x1[i]
    x[1,0] = x2[i]
    a2 = activate ( x, W2, b2 )
    a3 = activate ( a2, W3, b3 )
    a4 = activate ( a3, W4, b4 )
    costvec[i] = np.linalg.norm ( np.transpose ( a4 ) - y[:,i]  )

  costval = np.sum ( costvec**2 )

  return costval

def cost_plot ( cost_vec ):

#*****************************************************************************80
#
## cost_plot() plots the cost function over the iterations.
#
#  Licensing:
#
#    This code is distributed under the MIT license.
#
#  Modified:
#
#    23 July 2022
#
#  Author:
#
#    Catherine Higham, Desmond Higham.
#    Modifications by John Burkardt.
#
#  Input:
#
#    real cost_vec(it_num+1): the cost at each iteration.
#
  import matplotlib.pyplot as plt
  import numpy as np

  it_num = cost_vec.shape[0] - 1
#
#  Plot the decay of the cost function.
#
  plt.clf ( )
  i = np.arange ( 0, it_num+1, 10000 )
  plt.semilogy ( i, cost_vec[i], linewidth = 3 )
  plt.grid ( True )
  plt.xlabel ( '<-- Iteration -->' )
  plt.ylabel ( '<-- Cost -->' )
  plt.title ( 'Cost reduction during stochastic gradient descent' )
  filename = 'neural_network.png'
  plt.savefig ( filename )
  print ( '  Graphics saved as "' + filename + '"' )
  plt.show ( block = False )
  plt.close ( )

  return

def intspace ( a, b, n ):

#*****************************************************************************80
#
## intspace() returns roughly equally spaced integers in an interval.
#
#  Discussion:
#
#    intspace() is a sort of "linspace()" for integers.
#
#    An interval is defined by integers A <= B.
#
#    If possible, a vector C of N integers is to be determined, with
#    C(1) = A and C(N) = B.
#
#    So far as possible, the entries of C should be equally spaced in [A,B].
#
#    However, if B-A+1 < N, then we simply return C = A:B.
#
#    A use for this code can be imagined in which a large array of data
#    has been computed, and it is desired to make a plot.  However, displaying
#    all the data might result in an ugly plot, or one that takes too long
#    to display.  One can then specify an index set like 
#      c = intspace ( 1, max, 100 )
#    and restrict the plot to the data indexed by C.
#
#    Similarly, it might be desired to print out sample values of a large
#    array of data; this function would make it easy to do this in a
#    fairly regular fashion.
#
#  Licensing:
#
#    This code is distributed under the MIT license.
#
#  Modified:
#
#    23 July 2022
#
#  Author:
#
#    John Burkardt.
#
#  Input:
#
#    integer A, B: the left and right endpoints.
#
#    integer N: the number of samples desired.
#
#  Output:
#
#    integer C[*]: the sample values.  The first and last entries
#    of C will be A and B respectively.  The number of entries in C
#    will be min ( N, B+1-A).  The entries of C will be roughly
#    equally spaced.
#
  import numpy as np

  if ( b + 1 - a <= n ):
    c = np.arange ( a, b + 1 )
  else:
    i = np.arange ( n - 1, -1, -1 )
    j = np.arange ( 0, n )
    c = np.round ( ( i * a + j * b ) / ( n - 1 ) )

  c = c.astype ( int )

  return c

def timestamp ( ):

#*****************************************************************************80
#
## timestamp() prints the date as a timestamp.
#
#  Licensing:
#
#    This code is distributed under the MIT license. 
#
#  Modified:
#
#    21 August 2019
#
#  Author:
#
#    John Burkardt
#
  import time

  t = time.time ( )
  print ( time.ctime ( t ) )

  return

if ( __name__ == '__main__' ):
  timestamp ( )
  neural_network_test ( )
  timestamp ( )