subroutine backward_euler_residual ( dydt, n, to, yo, tm, ym, fm )

c*********************************************************************72
c
cc backward_euler_residual() evaluates the backward Euler residual.
c
c  Discussion:
c
c    Let to and tm be two times, with yo and ym the associated ODE
c    solution values there.  If ym satisfies the backward Euler condition,
c    then
c
c      dydt(tm,ym) = ( ym - yo ) / ( tm - to )
c
c    This can be rewritten as
c
c      residual = ym - yo - ( tm - to ) * dydt(tm,ym)
c
c    Given the other information, a nonlinear equation solver can be used
c    to estimate the value ym that makes the residual zero.
c
c  Licensing:
c
c    This code is distributed under the MIT license.
c
c  Modified:
c
c    10 November 2023
c
c  Author:
c
c    John Burkardt
c
c  Input:
c
c    external dydt(): the name of the user-supplied code which
c    evaluates the right hand side of the ODE, of the form:
c      subroutine dydt ( t, y, dy )
c      double precision dy(*)
c      double precision t
c      double precision y(*)
c
c    integer n: the vector size.
c
c    double precision to, yo(n): the old time and solution.
c
c    double precision tm, ym(n): the new time and tentative solution.
c
c   Output:
c
c    double precision fm(n): the backward Euler residual.
c
      implicit none

      integer n

      external dydt
      double precision fm(n)
      integer i
      double precision tm
      double precision to
      double precision ym(n)
      double precision yo(n)

      call dydt ( tm, ym, fm )

      do i = 1, n
        fm(i) = ym(i) - yo(i) - ( tm - to ) * fm(i)
      end do

      return
      end
      subroutine dogleg ( n, r, lr, diag, qtb, delta, x, wa1, wa2 )

c*********************************************************************72
c
cc dogleg() finds the minimizing combination of Gauss-Newton and gradient steps.
c
c  Discussion:
c
c    given an m by n matrix a, an n by n nonsingular diagonal
c    matrix d, an m-vector b, and a positive number delta, the
c    problem is to determine the convex combination x of the
c    gauss-newton and scaled gradient directions that minimizes
c    (a*x - b) in the least squares sense, subject to the
c    restriction that the euclidean norm of d*x be at most delta.
c
c    this subroutine completes the solution of the problem
c    if it is provided with the necessary information from the
c    qr factorization of a. that is, if a = q*r, where q has
c    orthogonal columns and r is an upper triangular matrix,
c    then dogleg expects the full upper triangle of r and
c    the first n components of (q transpose)*b.
c
c  Licensing:
c
c    This code is distributed under the MIT license.
c
c  Modified:
c
c    26 October 2023
c
c  Author:
c
c    Original Fortran77 version by Jorge More, Danny Sorenson, 
c    Burton Garbow, Kenneth Hillstrom.
c    This version by John Burkardt.
c
c  Parameters:
c
c    integer n: the order of r.
c
c       r is an input array of length lr which must contain the upper
c         triangular matrix r stored by rows.
c
c       lr is a positive integer input variable not less than
c         (n*(n+1))/2.
c
c       diag is an input array of length n which must contain the
c         diagonal elements of the matrix d.
c
c       qtb is an input array of length n which must contain the first
c         n elements of the vector (q transpose)*b.
c
c       delta is a positive input variable which specifies an upper
c         bound on the euclidean norm of d*x.
c
c       x is an output array of length n which contains the desired
c         convex combination of the gauss-newton direction and the
c         scaled gradient direction.
c
c       wa1 and wa2 are work arrays of length n.
c
      implicit none

      integer lr
      integer n

      double precision delta
      double precision diag(n)
      double precision enorm
      double precision qtb(n)
      double precision r(lr)
      double precision wa1(n)
      double precision wa2(n)
      double precision x(n)

      integer i,j,jj,k,l
      double precision alpha,bnorm,epsmch,gnorm,qnorm,sgnorm,sum,
     *                 temp
c
c  epsmch is the machine precision.
c
      epsmch = epsilon ( epsmch )
c
c  Calculate the gauss-newton direction.
c
      jj = (n*(n + 1))/2 + 1

      do k = 1, n
         j = n - k + 1
         jj = jj - k
         l = jj + 1
         sum = 0.0D+00
         do i = j + 1, n
            sum = sum + r(l)*x(i)
            l = l + 1
         end do
         temp = r(jj)
         if (temp /= 0.0D+00) go to 40
         l = j
         do i = 1, j
            temp = dmax1(temp,dabs(r(l)))
            l = l + n - i
         end do
         temp = epsmch*temp
         if (temp == 0.0D+00) temp = epsmch
   40    continue
         x(j) = (qtb(j) - sum)/temp
      end do
c
c  Test whether the gauss-newton direction is acceptable.
c
      do j = 1, n
         wa1(j) = 0.0D+00
         wa2(j) = diag(j)*x(j)
      end do
      qnorm = enorm(n,wa2)
      if (qnorm <= delta) go to 140
c
c  the gauss-newton direction is not acceptable.
c  next, calculate the scaled gradient direction.
c
      l = 1
      do j = 1, n
         temp = qtb(j)
         do i = j, n
            wa1(i) = wa1(i) + r(l)*temp
            l = l + 1
         end do
         wa1(j) = wa1(j)/diag(j)
      end do
c
c  calculate the norm of the scaled gradient and test for
c  the special case in which the scaled gradient is zero.
c
      gnorm = enorm(n,wa1)
      sgnorm = 0.0D+00
      alpha = delta/qnorm
      if (gnorm == 0.0D+00) go to 120
c
c  calculate the point along the scaled gradient
c  at which the quadratic is minimized.
c
      do j = 1, n
         wa1(j) = (wa1(j)/gnorm)/diag(j)
      end do

      l = 1
      do j = 1, n
         sum = 0.0D+00
         do i = j, n
            sum = sum + r(l)*wa1(i)
            l = l + 1
         end do
         wa2(j) = sum
      end do
      temp = enorm(n,wa2)
      sgnorm = (gnorm/temp)/temp
c
c  test whether the scaled gradient direction is acceptable.
c
      alpha = 0.0D+00
      if (sgnorm >= delta) go to 120
c
c  the scaled gradient direction is not acceptable.
c  finally, calculate the point along the dogleg
c  at which the quadratic is minimized.
c
      bnorm = enorm(n,qtb)
      temp = (bnorm/gnorm)*(bnorm/qnorm)*(sgnorm/delta)
      temp = temp - (delta/qnorm)*(sgnorm/delta)**2
     *  + dsqrt((temp-(delta/qnorm))**2
     *  +(1.0D+00-(delta/qnorm)**2)*(1.0D+00-(sgnorm/delta)**2))
      alpha = ((delta/qnorm)*(1.0D+00 - (sgnorm/delta)**2))/temp
  120 continue
c
c  form appropriate convex combination of the gauss-newton
c  direction and the scaled gradient direction.
c
      temp = (1.0D+00 - alpha)*dmin1(sgnorm,delta)
      do j = 1, n
         x(j) = temp*wa1(j) + alpha*x(j)
      end do

  140 continue

      return
      end
      double precision function enorm ( n, x )

c*********************************************************************72
c
cc enorm() computes the Euclidean norm of a vector.
c
c  Discussion:
c
c     given an n-vector x, this function calculates the
c     euclidean norm of x.
c
c     the euclidean norm is computed by accumulating the sum of
c     squares in three different sums. the sums of squares for the
c     small and large components are scaled so that no overflows
c     occur. non-destructive underflows are permitted. underflows
c     and overflows do not occur in the computation of the unscaled
c     sum of squares for the intermediate components.
c     the definitions of small, intermediate and large components
c     depend on two constants, rdwarf and rgiant. the main
c     restrictions on these constants are that rdwarf**2 not
c     underflow and rgiant**2 not overflow. the constants
c     given here are suitable for every known computer.
c
c  Licensing:
c
c    This code is distributed under the MIT license.
c
c  Modified:
c
c    26 October 2023
c
c  Author:
c
c    Original Fortran77 version by Jorge More, Danny Sorenson, 
c    Burton Garbow, Kenneth Hillstrom.
c    This version by John Burkardt.
c
c  Parameters:
c
c       n is a positive integer input variable.
c
c       x is an input array of length n.
c
      implicit none

      integer n
      double precision x(n)

      integer i
      double precision agiant,floatn,rdwarf,rgiant,s1,s2,s3,xabs,
     &                 x1max,x3max

      rdwarf = 3.834d-20
      rgiant = 1.304d19
      s1 = 0.0D+00
      s2 = 0.0D+00
      s3 = 0.0D+00
      x1max = 0.0D+00
      x3max = 0.0D+00
      floatn = n
      agiant = rgiant/floatn
      do 90 i = 1, n
         xabs = dabs(x(i))
         if (xabs > rdwarf .and. xabs < agiant) go to 70
            if (xabs <= rdwarf) go to 30
c
c  sum for large components.
c
               if (xabs <= x1max) go to 10
                  s1 = 1.0D+00 + s1*(x1max/xabs)**2
                  x1max = xabs
                  go to 20
   10          continue
                  s1 = s1 + (xabs/x1max)**2
   20          continue
               go to 60
   30       continue
c
c  sum for small components.
c
               if (xabs <= x3max) go to 40
                  s3 = 1.0D+00 + s3*(x3max/xabs)**2
                  x3max = xabs
                  go to 50
   40          continue
                  if (xabs /= 0.0D+00) s3 = s3 + (xabs/x3max)**2
   50          continue
   60       continue
            go to 80
   70    continue
c
c  sum for intermediate components.
c
            s2 = s2 + xabs**2
   80    continue
   90    continue
c
c  calculation of norm.
c
      if (s1 == 0.0D+00) go to 100
         enorm = x1max*dsqrt(s1+(s2/x1max)/x1max)
         go to 130
  100 continue
         if (s2 == 0.0D+00) go to 110
            if (s2 >= x3max)
     *         enorm = dsqrt(s2*(1.0D+00+(x3max/s2)*(x3max*s3)))
            if (s2 < x3max)
     *         enorm = dsqrt(x3max*((s2/x3max)+(x3max*s3)))
            go to 120
  110    continue
            enorm = x3max*dsqrt(s3)
  120    continue
  130 continue

      return
      end
      subroutine fdjac_be ( dydt, n, to, xo, t, x, fvec, fjac, ldfjac, 
     &  ml, mu, epsfcn, wa1, wa2 )

c*********************************************************************72
c
cc fdjac_be() estimates an N by N jacobian matrix using forward differences.
c
c  Discussion:
c
c    This subroutine computes a forward-difference approximation
c    to the n by n jacobian matrix associated with a specified
c    problem of n functions in n variables. if the jacobian has
c    a banded form, then function evaluations are saved by only
c    approximating the nonzero terms.
c
c    The original code fdjac1() was modified to deal with problems
c    involving a backward Euler residual.
c
c  Licensing:
c
c    This code is distributed under the MIT license.
c
c  Modified:
c
c    10 November 2023
c
c  Author:
c
c    Original Fortran77 version by Jorge More, Danny Sorenson, 
c    Burton Garbow, Kenneth Hillstrom.
c    This version by John Burkardt.
c
c  Input:
c
c    external dydt(): the name of the user-supplied code which
c    evaluates the right hand side of the ODE, of the form:
c      subroutine dydt ( t, y, dy )
c      double precision dy(*)
c      double precision t
c      double precision y(*)
c
c    integer n: the number of functions and variables.
c
c    double precision to, xo(n): the old time and solution.
c   
c    double precision t, x(n): the new time and current solution estimate. 
c
c    double precision fvec(n): the functions evaluated at X.
c
c    integer ldfjac: the leading dimension of FJAC, which
c    must not be less than N.
c
c    integer ml, mu: specify the number of subdiagonals and
c    superdiagonals within the band of the jacobian matrix.  If the
c    jacobian is not banded, set ML and MU to N-1.
c
c    double precision epsfcn: is used in determining a suitable step
c    length for the forward-difference approximation.  This approximation
c    assumes that the relative errors in the functions are of the order of
c    EPSFCN.  If EPSFCN is less than the machine precision, it is assumed that
c    the relative errors in the functions are of the order of the machine
c    precision.
c
c  Output:
c
c    double precision fjac(ldfjac,n): the N by N approximate
c    jacobian matrix.
c
c  Workspace:
c
c    double precision wa1(n), wa2(n).
c
      implicit none

      integer ldfjac
      integer n

      external dydt
      double precision eps
      double precision epsfcn
      double precision epsmch
      double precision fjac(ldfjac,n)
      double precision fvec(n)
      double precision h
      integer i
      integer j
      integer k
      integer ml
      integer msum
      integer mu
      double precision t
      double precision temp
      double precision to
      double precision wa1(n)
      double precision wa2(n)
      double precision x(n)
      double precision xo(n)

      epsmch = epsilon ( epsmch )

      eps = dsqrt ( dmax1 ( epsfcn, epsmch ) )
      msum = ml + mu + 1
c
c  computation of dense approximate jacobian.
c
      if ( n <= msum ) then

        do j = 1, n
          temp = x(j)
          h = eps * dabs ( temp )
          if ( h == 0.0D+00 ) then
            h = eps
          end if
          x(j) = temp + h
          call backward_euler_residual ( dydt, n, to, xo, t, x, wa1 )
          x(j) = temp
          do i = 1, n
            fjac(i,j) = ( wa1(i) - fvec(i) ) / h
          end do
       end do   
c
c  computation of banded approximate jacobian.
c
      else

        do k = 1, msum
          do j = k, n, msum
            wa2(j) = x(j)
            h = eps * dabs ( wa2(j) )
            if ( h == 0.0D+00 ) then
              h = eps
            end if
            x(j) = wa2(j) + h
          end do

          call backward_euler_residual ( dydt, n, to, xo, t, x, wa1 )

          do j = k, n, msum
            x(j) = wa2(j)
            h = eps * dabs ( wa2(j) )
            if ( h == 0.0D+00 ) then
              h = eps
            end if
            do i = 1, n
              fjac(i,j) = 0.0D+00
              if ( j - mu <= i .and. i <= j + ml ) then
                fjac(i,j) = ( wa1(i) - fvec(i) ) / h
              end if
            end do
         end do

        end do

      end if

      return
      end
      subroutine fsolve_be ( dydt, n, to, xo, t, x, fvec, tol, info )

c*********************************************************************72
c
cc fsolve_be() seeks a zero of N nonlinear equations in N variables.
c
c  Discussion:
c
c    The code finds a zero of a system of N nonlinear functions in N variables
c    by a modification of the Powell hybrid method.  This is done by using the
c    more general nonlinear equation solver HYBRD.  The user provides a
c    subroutine which calculates the functions.  
c
c    The jacobian is calculated by a forward-difference approximation.
c
c    The original code fsolve() was modified to deal with problems
c    involving a backward Euler residual.
c
c  Licensing:
c
c    This code is distributed under the MIT license.
c
c  Modified:
c
c    10 November 2023
c
c  Author:
c
c    Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom.
c    This version by John Burkardt.
c
c  Reference:
c
c    Jorge More, Burton Garbow, Kenneth Hillstrom,
c    User Guide for MINPACK-1,
c    Technical Report ANL-80-74,
c    Argonne National Laboratory, 1980.
c
c  Input:
c
c    external dydt(), the name of the user-supplied code which
c    evaluates the right hand side of the ODE, of the form:
c      subroutine dydt ( t, y, dy )
c      double precision dy(*)
c      double precision t
c      double precision y(*)
c
c    integer n: the number of functions and variables.
c
c    double precision to, xo(n): the old time and solution.
c   
c    double precision t, x(n): the new time and current solution estimate.  
c
c    double precision tol: Satisfactory termination occurs when the algorithm
c    estimates that the relative error between X and the solution is at
c    most TOL.  TOL should be nonnegative.
c
c  Output:
c
c    double precision x(n), the estimate of the solution vector.
c
c    double precision fvec(n: the functions evaluated at the output X.
c
c    integer info: error flag.
c    0, improper input parameters.
c    1, algorithm estimates that the relative error between X and the
c       solution is at most TOL.
c    2, number of calls to FCN has reached or exceeded 200*(N+1).
c    3, TOL is too small.  No further improvement in the approximate
c       solution X is possible.
c    4, the iteration is not making good progress.
c
      implicit none

      integer n

      double precision diag(n)
      external dydt
      double precision epsfcn
      double precision factor
      double precision fjac(n,n)
      double precision fvec(n)
      integer info
      integer ldfjac
      integer lr
      integer maxfev
      integer ml
      integer mode
      integer mu
      integer nfev
      double precision qtf(n)
      double precision r((n*(n+1))/2)
      double precision t
      double precision to
      double precision tol
      double precision wa1(n)
      double precision wa2(n)
      double precision wa3(n)
      double precision wa4(n)
      double precision x(n)
      double precision xo(n)
      double precision xtol

      if ( n <= 0 ) then
        info = 0
        return
      end if

      if ( tol < 0.0D+00 ) then
        info = 0
        return
      end if

      xtol = tol
      maxfev = 200 * ( n + 1 )
      ml = n - 1
      mu = n - 1
      epsfcn = 0.0D+00
      diag(1:n) = 1.0D+00
      mode = 2
      factor = 100.0D+00
      info = 0
      nfev = 0
      fjac(1:n,1:n) = 0.0D+00
      ldfjac = n
      r(1:(n*(n+1))/2) = 0.0D+00
      lr = ( n * ( n + 1 ) ) / 2
      qtf(1:n) = 0.0D+00

      call hybrd_be ( dydt, n, to, xo, t, x, fvec, xtol, maxfev, 
     &  ml, mu, epsfcn, diag, mode, factor, info, nfev, fjac, 
     &  ldfjac, r, lr, qtf, wa1, wa2, wa3, wa4 )

      if ( info == 5 ) then
        info = 4
      end if

      return
      end
      subroutine hybrd_be ( dydt, n, to, xo, t, x, fvec, xtol, maxfev, 
     &  ml, mu, epsfcn, diag, mode, factor, info, nfev, fjac, ldfjac, 
     &  r, lr, qtf, wa1, wa2, wa3, wa4 )

c*********************************************************************72
c
cc hybrd_be() seeks a zero of N nonlinear equations in N variables.
c
c  Discussion:
c
c    The code finds a zero of a system of n nonlinear functions in n 
c    variables by a modification of the powell hybrid method. 
c
c    the user must provide a subroutine which calculates the functions. 
c
c    the jacobian is calculated by a forward-difference approximation.
c
c    The original code hybrd() was modified to deal with problems
c    involving a backward Euler residual.
c
c  Licensing:
c
c    This code is distributed under the MIT license.
c
c  Modified:
c
c    10 November 2023
c
c  Author:
c
c    Original Fortran77 version by Jorge More, Danny Sorenson, 
c    Burton Garbow, Kenneth Hillstrom.
c    This version by John Burkardt.
c
c  Input:
c
c    external dydt(), the name of the user-supplied code which
c    evaluates the right hand side of the ODE, of the form:
c      subroutine dydt ( t, y, dy )
c      double precision dy(n)
c      double precision t
c      double precision y(n)
c
c    integer n: the number of functions and variables.
c
c    double precision to, xo(n): the old time and solution.
c   
c    double precision t, x(n): the new time and current solution estimate. 
c
c    double precision xtol: Termination occurs when the relative error
c    between two consecutive iterates is at most XTOL.  XTOL should be
c    nonnegative.
c
c    integer maxfev: Termination occurs when the number of
c    calls to the derivative code is at least MAXFEV by the end of an iteration.
c
c    integer ml, mu: specify the number of subdiagonals and
c    superdiagonals within the band of the jacobian matrix.  If the jacobian
c    is not banded, set ML and MU to at least n - 1.
c
c    double precision epsfcn: is used in determining a suitable step
c    length for the forward-difference approximation.  This approximation
c    assumes that the relative errors in the functions are of the order of
c    EPSFCN.  If EPSFCN is less than the machine precision, it is assumed that
c    the relative errors in the functions are of the order of the machine
c    precision.
c
c    double precision diag(n): multiplicative scale factors for the 
c    variables.  Only needed as input if MODE = 2.  
c
c    integer mode: scaling option.
c    1, variables will be scaled internally.
c    2, scaling is specified by the input DIAG vector.
c
c    double precision factor: determines the initial step bound.  This
c    bound is set to the product of FACTOR and the euclidean norm of DIAG*X if
c    nonzero, or else to FACTOR itself.  In most cases, FACTOR should lie
c    in the interval (0.1, 100) with 100 the recommended value.
c
c    integer ldfjac: the leading dimension of FJAC.
c    LDFJAC must be at least N.
c
c    integer lr: the size of the R array, which must be no
c    less than (N*(N+1))/2.
c
c  Output:
c
c    double precision x(n): the final estimate of the solution vector.
c
c    double precision fvec(n): the functions evaluated at the output x.
c
c    double precision diag(n): multiplicative scale factors for the 
c    variables.  Set internally if MODE = 1.
c    
c    integer info: status flag.  A value of 1 indicates success. 
c    0, improper input parameters.
c    1, relative error between two consecutive iterates is at most XTOL.
c    2, number of calls to derivative has reached or exceeded MAXFEV.
c    3, XTOL is too small.  No further improvement in the approximate
c       solution X is possible.
c    4, iteration is not making good progress, as measured by the improvement
c       from the last five jacobian evaluations.
c    5, iteration is not making good progress, as measured by the improvement
c       from the last ten iterations.
c
c    integer nfev: the number of calls to the derivative function.
c
c    double precision fjac(ldfjac,n): an N by N array which contains
c    the orthogonal matrix Q produced by the QR factorization of the final
c    approximate jacobian.
c
c    double precision r(lr): the upper triangular matrix produced by
c    the QR factorization of the final approximate jacobian, stored rowwise.
c
c    double precision qtf(n): contains the vector Q'*FVEC.
c
c  Workspace:
c
c    double precision wa1(n), wa2(n), wa3(n), wa4(n).
c
      implicit none

      integer ldfjac
      integer lr
      integer n

      double precision actred
      double precision delta
      double precision diag(n)
      external dydt
      double precision enorm
      double precision epsfcn
      double precision epsmch
      double precision factor
      double precision fjac(ldfjac,n)
      double precision fnorm
      double precision fnorm1
      double precision fvec(n)
      integer i
      integer info
      integer iter
      integer iwa(1)
      integer j
      logical jeval
      integer jm1
      integer l
      integer maxfev
      integer ml
      integer mode
      integer msum
      integer mu
      integer ncfail
      integer ncsuc
      integer nfev
      integer nslow1
      integer nslow2
      logical pivot
      double precision pnorm
      double precision prered
      double precision qtf(n)
      double precision r(lr)
      double precision ratio
      logical sing
      double precision sum
      double precision t
      double precision temp
      double precision to
      double precision wa1(n)
      double precision wa2(n)
      double precision wa3(n)
      double precision wa4(n)
      double precision x(n)
      double precision xnorm
      double precision xo(n)
      double precision xtol
c
c  epsmch is the machine precision.
c
      epsmch = epsilon ( epsmch )

      info = 0
      nfev = 0
c
c  check the input.
c
      if ( n <= 0 ) then
        return
      end if

      if ( xtol < 0.0D+00 ) then
        return
      end if

      if ( maxfev <= 0 ) then
        return
      end if

      if ( ml < 0 ) then
        return
      end if

      if ( mu < 0 ) then
        return
      end if

      if ( factor <= 0.0D+00 ) then
        return
      end if
     
      if ( ldfjac < n ) then
        return
      end if

      if ( lr < (n*(n + 1))/2 ) then
        return
      end if

      if ( mode == 2 ) then
        do j = 1, n
          if ( diag(j) <= 0.0D+00 ) then
            return
          end if
        end do
      end if
c
c  evaluate the function at the starting point
c  and calculate its norm.
c
      call backward_euler_residual ( dydt, n, to, xo, t, x, fvec )
      nfev = 1
      fnorm = enorm ( n, fvec )
c
c  determine the number of calls to fcn needed to compute
c  the jacobian matrix.
c
      msum = min ( ml + mu + 1, n )
c
c  initialize iteration counter and monitors.
c
      iter = 1
      ncsuc = 0
      ncfail = 0
      nslow1 = 0
      nslow2 = 0
c
c  beginning of the outer loop.
c
      do

        jeval = .true.
c
c  calculate the jacobian matrix.
c
        call fdjac_be ( dydt, n, to, xo, t, x, fvec, fjac, ldfjac, 
     &    ml, mu, epsfcn, wa1, wa2 )

        nfev = nfev + msum
c
c  compute the qr factorization of the jacobian.
c
        pivot = .false.
        call qrfac(n,n,fjac,ldfjac,pivot,iwa,1,wa1,wa2,wa3)
c
c  on the first iteration and if mode is 1, scale according
c  to the norms of the columns of the initial jacobian.
c
        if ( iter == 1 ) then

          if ( mode /= 2 ) then
            do j = 1, n
              diag(j) = wa2(j)
              if (wa2(j) == 0.0D+00 ) then
                diag(j) = 1.0D+00
              end if
            end do

          end if
c
c  on the first iteration, calculate the norm of the scaled x
c  and initialize the step bound delta.
c
          do j = 1, n
            wa3(j) = diag(j)*x(j)
          end do

          xnorm = enorm(n,wa3)
          delta = factor*xnorm
          if (delta == 0.0D+00 ) then
            delta = factor
          end if

        end if
c
c  form (q transpose)*fvec and store in qtf.
c
        do i = 1, n
          qtf(i) = fvec(i)
        end do

        do j = 1, n
          if (fjac(j,j) /= 0.0D+00 ) then
            sum = 0.0D+00
            do i = j, n
              sum = sum + fjac(i,j)*qtf(i)
            end do
            temp = -sum/fjac(j,j)
            do i = j, n
              qtf(i) = qtf(i) + fjac(i,j)*temp
            end do
          end if
        end do
c
c  copy the triangular factor of the qr factorization into r.
c
        sing = .false.
        do j = 1, n
          l = j
          jm1 = j - 1
          do i = 1, j - 1
            r(l) = fjac(i,j)
            l = l + n - i
          end do
          r(l) = wa1(j)
          if (wa1(j) == 0.0D+00 ) then
            sing = .true.
          end if
        end do
c
c  accumulate the orthogonal factor in fjac.
c
        call qform ( n, n, fjac, ldfjac, wa1 )
c
c  rescale if necessary.
c
        if ( mode /= 2 ) then
          do j = 1, n
            diag(j) = dmax1(diag(j),wa2(j))
          end do
        end if
c
c  beginning of the inner loop.
c
        do
c
c  determine the direction p.
c
          call dogleg(n,r,lr,diag,qtf,delta,wa1,wa2,wa3)
c
c  store the direction p and x + p. calculate the norm of p.
c
          do j = 1, n
            wa1(j) = -wa1(j)
            wa2(j) = x(j) + wa1(j)
            wa3(j) = diag(j)*wa1(j)
          end do
          pnorm = enorm ( n, wa3 )
c
c  on the first iteration, adjust the initial step bound.
c
          if (iter == 1) then
            delta = dmin1(delta,pnorm)
          end if
c
c  evaluate the function at x + p and calculate its norm.
c
          call backward_euler_residual ( dydt, n, to, xo, t, 
     &      wa2, wa4 )

          nfev = nfev + 1
          fnorm1 = enorm(n,wa4)
c
c  compute the scaled actual reduction.
c
          actred = - 1.0D+00
          if ( fnorm1 < fnorm ) then
            actred = 1.0D+00 - (fnorm1/fnorm)**2
          end if
c
c  compute the scaled predicted reduction.
c
          l = 1
          do i = 1, n
            sum = 0.0D+00
            do j = i, n
              sum = sum + r(l)*wa1(j)
              l = l + 1
            end do
            wa3(i) = qtf(i) + sum
          end do
          temp = enorm(n,wa3)
          prered = 0.0D+00
          if ( temp < fnorm ) then
            prered = 1.0D+00 - ( temp / fnorm )**2
          end if
c
c  compute the ratio of the actual to the predicted reduction.
c
          ratio = 0.0D+00
          if ( 0.0D+00 < prered ) then
            ratio = actred/prered
          end if
c
c  update the step bound.
c
          if (ratio < 0.1D+00 ) then
            ncsuc = 0
            ncfail = ncfail + 1
            delta = 0.5D+00 * delta
          else
            ncfail = 0
            ncsuc = ncsuc + 1
            if (ratio >= 0.5D+00 .or. ncsuc > 1) then
              delta = dmax1(delta,pnorm/0.5D+00)
            end if
            if (dabs(ratio-1.0D+00) <= 0.1D+00 ) then
              delta = pnorm/0.5D+00
            end if
          end if
c
c  Successful iteration.
c
          if ( 0.0001D+00 <= ratio ) then
            do j = 1, n
              x(j) = wa2(j)
              wa2(j) = diag(j)*x(j)
              fvec(j) = wa4(j)
            end do
            xnorm = enorm(n,wa2)
            fnorm = fnorm1
            iter = iter + 1
          end if
c
c  determine the progress of the iteration.
c
          nslow1 = nslow1 + 1
          if (actred >= 0.001D+00 ) nslow1 = 0
          if (jeval) nslow2 = nslow2 + 1
          if (actred >= 0.1D+00 ) nslow2 = 0
c
c  test for convergence.
c
          if (delta <= xtol*xnorm .or. fnorm == 0.0D+00) info = 1
          if (info /= 0) then
            return
          end if
c
c  tests for termination and stringent tolerances.
c
          if (nfev >= maxfev) info = 2
          if (0.1D+00*dmax1(0.1D+00*delta,pnorm) 
     &      <= epsmch*xnorm) info = 3
          if (nslow2 == 5) info = 4
          if (nslow1 == 10) info = 5
          if (info /= 0) then
            return
          end if
c
c  criterion for recalculating jacobian approximation
c  by forward differences.
c
          if ( ncfail == 2 ) then
            exit
          end if
c
c  calculate the rank one modification to the jacobian
c  and update qtf if necessary.
c
          do j = 1, n
            sum = 0.0D+00
            do i = 1, n
              sum = sum + fjac(i,j)*wa4(i)
            end do
            wa2(j) = (sum - wa3(j))/pnorm
            wa1(j) = diag(j)*((diag(j)*wa1(j))/pnorm)
            if (ratio >= 0.0001D+00 ) qtf(j) = sum
          end do
c
c  compute the qr factorization of the updated jacobian.
c
          call r1updt(n,n,r,lr,wa1,wa2,wa3,sing)
          call r1mpyq(n,n,fjac,ldfjac,wa2,wa3)
          call r1mpyq(1,n,qtf,1,wa2,wa3)
c
c  end of the inner loop.
c
          jeval = .false.

        end do
c
c  end of the outer loop.
c
      end do

      return
      end
      subroutine qform ( m, n, q, ldq, wa )

c*********************************************************************72
c
cc qform() produces the explicit QR factorization of a matrix.
c
c  Discussion:
c
c     this subroutine proceeds from the computed qr factorization of
c     an m by n matrix a to accumulate the m by m orthogonal matrix
c     q from its factored form.
c
c  Licensing:
c
c    This code is distributed under the MIT license.
c
c  Modified:
c
c    26 October 2023
c
c  Author:
c
c    Original Fortran77 version by Jorge More, Danny Sorenson, 
c    Burton Garbow, Kenneth Hillstrom.
c    This version by John Burkardt.
c
c  Parameters:
c
c       m is a positive integer input variable set to the number
c         of rows of a and the order of q.
c
c       n is a positive integer input variable set to the number
c         of columns of a.
c
c       q is an m by m array. on input the full lower trapezoid in
c         the first min(m,n) columns of q contains the factored form.
c         on output q has been accumulated into a square matrix.
c
c       ldq is a positive integer input variable not less than m
c         which specifies the leading dimension of the array q.
c
c       wa is a work array of length m.
c
      implicit none

      integer m,n,ldq
      double precision q(ldq,m),wa(m)

      integer i,j,jm1,k,l,minmn
      double precision sum,temp
c
c  zero out upper triangle of q in the first min(m,n) columns.
c
      minmn = min0(m,n)

      do j = 2, minmn
         jm1 = j - 1
         do i = 1, jm1
            q(i,j) = 0.0D+00
         end do
      end do
c
c initialize remaining columns to those of the identity matrix.
c
      do j = n + 1, m
         do i = 1, m
            q(i,j) = 0.0D+00
         end do
         q(j,j) = 1.0D+00
      end do
c
c  accumulate q from its factored form.
c
      do 120 l = 1, minmn
         k = minmn - l + 1
         do i = k, m
            wa(i) = q(i,k)
            q(i,k) = 0.0D+00
         end do
         q(k,k) = 1.0D+00
         if (wa(k) == 0.0D+00) go to 110
         do 100 j = k, m
            sum = 0.0D+00
            do i = k, m
               sum = sum + q(i,j)*wa(i)
            end do
            temp = sum/wa(k)
            do i = k, m
               q(i,j) = q(i,j) - temp*wa(i)
            end do
  100       continue
  110    continue
  120    continue

      return
      end
      subroutine qrfac(m,n,a,lda,pivot,ipvt,lipvt,rdiag,acnorm,wa)

c*********************************************************************72
c
cc qrfac() computes a QR factorization using Householder transformations.
c
c  Discussion:
c
c     this subroutine uses householder transformations with column
c     pivoting (optional) to compute a qr factorization of the
c     m by n matrix a. that is, qrfac determines an orthogonal
c     matrix q, a permutation matrix p, and an upper trapezoidal
c     matrix r with diagonal elements of nonincreasing magnitude,
c     such that a*p = q*r. the householder transformation for
c     column k, k = 1,2,...,min(m,n), is of the form
c
c                           t
c           i - (1/u(k))*u*u
c
c     where u has zeros in the first k-1 positions. the form of
c     this transformation and the method of pivoting first
c     appeared in the corresponding linpack subroutine.
c
c  Licensing:
c
c    This code is distributed under the MIT license.
c
c  Modified:
c
c    26 October 2023
c
c  Author:
c
c    Original Fortran77 version by Jorge More, Danny Sorenson, 
c    Burton Garbow, Kenneth Hillstrom.
c    This version by John Burkardt.
c
c  Parameters:
c
c       m is a positive integer input variable set to the number
c         of rows of a.
c
c       n is a positive integer input variable set to the number
c         of columns of a.
c
c       a is an m by n array. on input a contains the matrix for
c         which the qr factorization is to be computed. on output
c         the strict upper trapezoidal part of a contains the strict
c         upper trapezoidal part of r, and the lower trapezoidal
c         part of a contains a factored form of q (the non-trivial
c         elements of the u vectors described above).
c
c       lda is a positive integer input variable not less than m
c         which specifies the leading dimension of the array a.
c
c       pivot is a logical input variable. if pivot is set true,
c         then column pivoting is enforced. if pivot is set false,
c         then no column pivoting is done.
c
c       ipvt is an integer output array of length lipvt. ipvt
c         defines the permutation matrix p such that a*p = q*r.
c         column j of p is column ipvt(j) of the identity matrix.
c         if pivot is false, ipvt is not referenced.
c
c       lipvt is a positive integer input variable. if pivot is false,
c         then lipvt may be as small as 1. if pivot is true, then
c         lipvt must be at least n.
c
c       rdiag is an output array of length n which contains the
c         diagonal elements of r.
c
c       acnorm is an output array of length n which contains the
c         norms of the corresponding columns of the input matrix a.
c         if this information is not needed, then acnorm can coincide
c         with rdiag.
c
c       wa is a work array of length n. if pivot is false, then wa
c         can coincide with rdiag.
c
      implicit none

      integer lda
      integer lipvt
      integer n

      integer m
      integer ipvt(lipvt)
      logical pivot
      double precision a(lda,n),rdiag(n),acnorm(n),wa(n)

      integer i,j,k,kmax,minmn
      double precision ajnorm,epsmch,sum,temp
      double precision enorm
c
c  epsmch is the machine precision.
c
      epsmch = epsilon ( epsmch )
c
c  compute the initial column norms and initialize several arrays.
c
      do j = 1, n
         acnorm(j) = enorm(m,a(1,j))
         rdiag(j) = acnorm(j)
         wa(j) = rdiag(j)
         if (pivot) ipvt(j) = j
      end do
c
c  reduce a to r with householder transformations.
c
      minmn = min0(m,n)

      do j = 1, minmn

         if ( pivot ) then
c
c  bring the column of largest norm into the pivot position.
c
           kmax = j
           do k = j, n
             if (rdiag(k) > rdiag(kmax)) kmax = k
           end do

           if ( kmax /= j ) then

             do i = 1, m
               temp = a(i,j)
               a(i,j) = a(i,kmax)
               a(i,kmax) = temp
             end do
             rdiag(kmax) = rdiag(j)
             wa(kmax) = wa(j)
             k = ipvt(j)
             ipvt(j) = ipvt(kmax)
             ipvt(kmax) = k

           end if

         end if
c
c  compute the householder transformation to reduce the
c  j-th column of a to a multiple of the j-th unit vector.
c
         ajnorm = enorm(m-j+1,a(j,j))
         if (ajnorm == 0.0D+00) go to 100
         if (a(j,j) < 0.0D+00) ajnorm = -ajnorm
         do i = j, m
            a(i,j) = a(i,j)/ajnorm
         end do
         a(j,j) = a(j,j) + 1.0D+00
c
c  apply the transformation to the remaining columns
c  and update the norms.
c
         do k = j + 1, n
            sum = 0.0D+00
            do i = j, m
               sum = sum + a(i,j)*a(i,k)
            end do
            temp = sum/a(j,j)
            do i = j, m
               a(i,k) = a(i,k) - temp*a(i,j)
            end do
            if (.not.pivot .or. rdiag(k) == 0.0D+00) go to 80
            temp = a(j,k)/rdiag(k)
            rdiag(k) = rdiag(k)*dsqrt(dmax1(0.0D+00,1.0D+00-temp**2))
            if ( 0.05D+00 *(rdiag(k)/wa(k))**2 > epsmch) go to 80
            rdiag(k) = enorm(m-j,a(j+1,k))
            wa(k) = rdiag(k)
   80       continue

         end do

100      continue

         rdiag(j) = -ajnorm

      end do

      return
      end
      subroutine r1mpyq ( m, n, a, lda, v, w )

c*********************************************************************72
c
cc r1mpyq() computes A*Q, where Q is the product of Householder transformations.
c
c  Discussion:
c
c     given an m by n matrix a, this subroutine computes a*q where
c     q is the product of 2*(n - 1) transformations
c
c           gv(n-1)*...*gv(1)*gw(1)*...*gw(n-1)
c
c     and gv(i), gw(i) are givens rotations in the (i,n) plane which
c     eliminate elements in the i-th and n-th planes, respectively.
c     q itself is not given, rather the information to recover the
c     gv, gw rotations is supplied.
c
c  Licensing:
c
c    This code is distributed under the MIT license.
c
c  Modified:
c
c    26 October 2023
c
c  Author:
c
c    Original Fortran77 version by Jorge More, Danny Sorenson, 
c    Burton Garbow, Kenneth Hillstrom.
c    This version by John Burkardt.
c
c  Parameters:
c
c       m is a positive integer input variable set to the number
c         of rows of a.
c
c       n is a positive integer input variable set to the number
c         of columns of a.
c
c       a is an m by n array. on input a must contain the matrix
c         to be postmultiplied by the orthogonal matrix q
c         described above. on output a*q has replaced a.
c
c       lda is a positive integer input variable not less than m
c         which specifies the leading dimension of the array a.
c
c       v is an input array of length n. v(i) must contain the
c         information necessary to recover the givens rotation gv(i)
c         described above.
c
c       w is an input array of length n. w(i) must contain the
c         information necessary to recover the givens rotation gw(i)
c         described above.
c
      implicit none

      integer m,n,lda
      double precision a(lda,n),v(n),w(n)

      integer i,j,nmj,nm1
      double precision cos,sin,temp
c
c  apply the first set of givens rotations to a.
c
      nm1 = n - 1
      if (nm1 < 1) go to 50

      do nmj = 1, nm1
         j = n - nmj
         if (dabs(v(j)) > 1.0D+00) cos = 1.0D+00/v(j)
         if (dabs(v(j)) > 1.0D+00) sin = dsqrt(1.0D+00-cos**2)
         if (dabs(v(j)) <= 1.0D+00) sin = v(j)
         if (dabs(v(j)) <= 1.0D+00) cos = dsqrt(1.0D+00-sin**2)
         do i = 1, m
            temp = cos*a(i,j) - sin*a(i,n)
            a(i,n) = sin*a(i,j) + cos*a(i,n)
            a(i,j) = temp
         end do
      end do
c
c  apply the second set of givens rotations to a.
c
      do j = 1, nm1
         if (dabs(w(j)) > 1.0D+00) cos = 1.0D+00/w(j)
         if (dabs(w(j)) > 1.0D+00) sin = dsqrt(1.0D+00-cos**2)
         if (dabs(w(j)) <= 1.0D+00) sin = w(j)
         if (dabs(w(j)) <= 1.0D+00) cos = dsqrt(1.0D+00-sin**2)
         do i = 1, m
            temp = cos*a(i,j) + sin*a(i,n)
            a(i,n) = -sin*a(i,j) + cos*a(i,n)
            a(i,j) = temp
         end do
      end do

   50 continue

      return
      end
      subroutine r1updt(m,n,s,ls,u,v,w,sing)

c*********************************************************************72
c
cc r1updt() re-triangularizes a matrix after a rank one update.
c
c  Discussion:
c
c     given an m by n lower trapezoidal matrix s, an m-vector u,
c     and an n-vector v, the problem is to determine an
c     orthogonal matrix q such that
c
c                   t
c           (s + u*v )*q
c
c     is again lower trapezoidal.
c
c     this subroutine determines q as the product of 2*(n - 1)
c     transformations
c
c           gv(n-1)*...*gv(1)*gw(1)*...*gw(n-1)
c
c     where gv(i), gw(i) are givens rotations in the (i,n) plane
c     which eliminate elements in the i-th and n-th planes,
c     respectively. q itself is not accumulated, rather the
c     information to recover the gv, gw rotations is returned.
c
c  Licensing:
c
c    This code is distributed under the MIT license.
c
c  Modified:
c
c    26 October 2023
c
c  Author:
c
c    Original Fortran77 version by Jorge More, Danny Sorenson, 
c    Burton Garbow, Kenneth Hillstrom.
c    This version by John Burkardt.
c
c  Parameters:
c
c       m is a positive integer input variable set to the number
c         of rows of s.
c
c       n is a positive integer input variable set to the number
c         of columns of s. n must not exceed m.
c
c       s is an array of length ls. on input s must contain the lower
c         trapezoidal matrix s stored by columns. on output s contains
c         the lower trapezoidal matrix produced as described above.
c
c       ls is a positive integer input variable not less than
c         (n*(2*m-n+1))/2.
c
c       u is an input array of length m which must contain the
c         vector u.
c
c       v is an array of length n. on input v must contain the vector
c         v. on output v(i) contains the information necessary to
c         recover the givens rotation gv(i) described above.
c
c       w is an output array of length m. w(i) contains information
c         necessary to recover the givens rotation gw(i) described
c         above.
c
c       sing is a logical output variable. sing is set true if any
c         of the diagonal elements of the output s are zero. otherwise
c         sing is set false.
c
      implicit none

      integer ls
      integer m
      integer n

      logical sing
      double precision s(ls),u(m),v(n),w(m)

      integer i,j,jj,l,nmj,nm1
      double precision cos,cotan,giant,sin,tan,tau,temp
c
c  giant is the largest magnitude.
c
      giant = huge ( giant )
c
c  initialize the diagonal element pointer.
c
      jj = (n*(2*m - n + 1))/2 - (m - n)
c
c  move the nontrivial part of the last column of s into w.
c
      l = jj
      do i = n, m
         w(i) = s(l)
         l = l + 1
      end do
c
c  rotate the vector v into a multiple of the n-th unit vector
c  in such a way that a spike is introduced into w.
c
      nm1 = n - 1
      if (nm1 < 1) go to 70

      do 60 nmj = 1, nm1

         j = n - nmj
         jj = jj - (m - j + 1)
         w(j) = 0.0D+00
         if (v(j) == 0.0D+00) go to 50
c
c  determine a givens rotation which eliminates the
c  j-th element of v.
c
         if (dabs(v(n)) >= dabs(v(j))) go to 20
            cotan = v(n)/v(j)
            sin = 0.5D+00/dsqrt ( 0.25D+00 + 0.25D+00 * cotan**2)
            cos = sin*cotan
            tau = 1.0D+00
            if (dabs(cos)*giant > 1.0D+00) tau = 1.0D+00/cos
            go to 30
   20    continue
            tan = v(j)/v(n)
            cos = 0.5D+00/dsqrt( 0.25D+00 + 0.25D+00 * tan**2 )
            sin = cos*tan
            tau = sin
   30    continue
c
c  apply the transformation to v and store the information
c  necessary to recover the givens rotation.
c
         v(n) = sin*v(j) + cos*v(n)
         v(j) = tau
c
c  apply the transformation to s and extend the spike in w.
c
         l = jj
         do i = j, m
            temp = cos*s(l) - sin*w(i)
            w(i) = sin*s(l) + cos*w(i)
            s(l) = temp
            l = l + 1
         end do

   50    continue
   60    continue
   70 continue
c
c  add the spike from the rank 1 update to w.
c
      do i = 1, m
         w(i) = w(i) + v(n)*u(i)
      end do
c
c  eliminate the spike.
c
      sing = .false.
 
      do 130 j = 1, nm1

         if (w(j) == 0.0D+00) go to 120
c
c  determine a givens rotation which eliminates the
c  j-th element of the spike.
c
         if ( dabs(s(jj)) < dabs(w(j)) ) then
            cotan = s(jj)/w(j)
            sin = 0.5D+00/dsqrt(0.25D+00+0.25D+00*cotan**2)
            cos = sin*cotan
            tau = 1.0D+00
            if (dabs(cos)*giant > 1.0D+00) tau = 1.0D+00/cos
         else
            tan = w(j)/s(jj)
            cos = 0.5D+00/dsqrt(0.25D+00+0.25D+00*tan**2)
            sin = cos*tan
            tau = sin
         end if
c
c  apply the transformation to s and reduce the spike in w.
c
         l = jj
         do i = j, m
            temp = cos*s(l) + sin*w(i)
            w(i) = -sin*s(l) + cos*w(i)
            s(l) = temp
            l = l + 1
         end do
c
c  store the information necessary to recover the
c  givens rotation.
c
         w(j) = tau
  120    continue
c
c  test for zero diagonal elements in the output s.
c
         if (s(jj) == 0.0D+00) sing = .true.
         jj = jj + (m - j + 1)
  130    continue
c
c  move w back into the last column of the output s.
c
      l = jj
      do i = n, m
         s(l) = w(i)
         l = l + 1
      end do

      if (s(jj) == 0.0D+00) then
        sing = .true.
      end if

      return
      end