program main

!*****************************************************************************80
!
!! fft_openmp_test() tests fft_openmp().
!
!  Discussion:
!
!    The complex data in an N vector is stored as pairs of values in a
!    real vector of length 2*N.
!
!  Licensing:
!
!    This code is distributed under the MIT license.
!
!  Modified:
!
!    24 August 2020
!
!  Author:
!
!    Original C version by Wesley Petersen.
!    This version by John Burkardt.
!
!  Reference:
!
!    Wesley Petersen, Peter Arbenz, 
!    Introduction to Parallel Computing - A practical guide with examples in C,
!    Oxford University Press,
!    ISBN: 0-19-851576-6,
!    LC: QA76.58.P47.
!
  use omp_lib

  implicit none

  integer nits
  integer proc_num
  integer thread_num

  nits = 10000

  call timestamp ( )
  write ( *, '(a)' ) ' '
  write ( *, '(a)' ) 'fft_openmp_test():'
  write ( *, '(a)' ) '  Fortran90 version'
  write ( *, '(a)' ) '  fft_openmp() computes the Fast Fourier Transform of'
  write ( *, '(a)' ) '  a complex data vector,'
  write ( *, '(a)' ) '  using OpenMP for parallel execution.'
!
!  How many processors are available?
!
  proc_num = omp_get_num_procs ( )
  thread_num = omp_get_max_threads ( )

  write ( *, '(a)' ) ' '
  write ( *, '(a,i8)' ) '  The number of processors available = ', proc_num
  write ( *, '(a,i8)' ) '  The number of threads available    = ', thread_num

  call random_compare ( )
  call cfft_1d_accuracy_test ( )
  call cfft_1d_speed_test ( nits )
!
!  Terminate.
!
  write ( *, '(a)' ) ' '
  write ( *, '(a)' ) 'fft_openmp_test:'
  write ( *, '(a)' ) '  Normal end of execution.'
  write ( *, '(a)' ) ' '
  call timestamp ( )

  stop 0
end
subroutine random_compare ( )

!*****************************************************************************80
!
!! random_compare compares r82ggl and c8ggl.
!
!  Licensing:
!
!    This code is distributed under the MIT license.
!
!  Modified:
!
!    24 August 2020
!
!  Author:
!
!    John Burkardt.
!
  implicit none

  integer, parameter :: ck = kind ( ( 1.0D+00, 1.0D+00 ) )
  integer, parameter :: rk = kind ( 1.0D+00 )

  integer, parameter :: n1 = 10
  integer, parameter :: n2 = 5

  complex ( kind = ck ) c8(n2)
  complex ( kind = ck ) c8ggl
  integer i
  real ( kind = rk ) r8(n1)
  real ( kind = rk ) r82ggl
  real ( kind = rk ) seed

  write ( *, '(a)' ) ''
  write ( *, '(a)' ) 'random_compare:'
  write ( *, '(a)' ) '  FORTRAN90 version'
  write ( *, '(a)' ) '  r82ggl computes a random complex value as a'
  write ( *, '(a)' ) '  pair of real numbers.'
  write ( *, '(a)' ) '  c8ggl computes a random complex value directly.'

  seed = 331.0D+00
  do i = 1, n1
    r8(i) = r82ggl ( seed )
  end do

  seed = 331.0D+00
  do i = 1, n2
    c8(i) = c8ggl ( seed )
  end do

  write ( *, '(a)' ) ''
  write ( *, '(a)' ) '  -- real pairs ---  complex value --'
  write ( *, '(a)' ) ''
  do i = 1, n2
    write ( *, '(2x,f8.6,2x,f8.6,5x,f8.6,2x,f8.6)' ) r8(2*i-1), r8(2*i), c8(i)
  end do

  return
end
subroutine cfft_1d_accuracy_test ( )

!*****************************************************************************80
!
!! cfft_1d_accuracy_check compares the recovered data to the original.
!
!  Discussion:
!
!    Because the transform is not normalized, if we have perfect accuracy,
!    then a transformation followed by a back transformation should simply
!    multiply the data by N.
!
!  Licensing:
!
!    This code is distributed under the MIT license.
!
!  Modified:
!
!    21 August 2020
!
!  Author:
!
!    John Burkardt.
!
!  Reference:
!
!    Wesley Petersen, Peter Arbenz, 
!    Introduction to Parallel Computing - A practical guide with examples in C,
!    Oxford University Press,
!    ISBN: 0-19-851576-6,
!    LC: QA76.58.P47.
!
  implicit none

  integer, parameter :: ck = kind ( ( 1.0D+00, 1.0D+00 ) )
  integer, parameter :: rk = kind ( 1.0D+00 )

  integer i
  integer ln2
  integer, parameter :: ln2_max = 24
  integer n
  real ( kind = rk ) r82ggl
  real ( kind = rk ) rms
  real ( kind = rk ) seed
  real ( kind = rk ) sgn
  real ( kind = rk ), allocatable, dimension ( : ) :: w
  real ( kind = rk ), allocatable, dimension ( : ) :: x
  real ( kind = rk ), allocatable, dimension ( : ) :: y
  real ( kind = rk ), allocatable, dimension ( : ) :: z
!
!  Prepare for tests.
!
  write ( *, '(a)' ) ' '
  write ( *, '(a)' ) 'cfft_1d_accuracy_test:'
  write ( *, '(a)' ) '    FFTinverse ( FFT ( X(1:N) ) ) == N * X(1:N)?'
  write ( *, '(a)' ) '             N      RMS error'
  write ( *, '(a)' ) ' '
  write ( *, '(a)' ) ' '

  n = 1
!
!  LN2 is the log base 2 of N.  Each increase of LN2 doubles N.
!
  do ln2 = 1, ln2_max

    n = 2 * n
!
!  Allocate storage for the complex arrays W, X, Y, Z.  
!
!  We handle the complex arithmetic,
!  and store a complex number as a pair of floats, a complex vector as a doubly
!  dimensioned array whose second dimension is 2. 
!
    allocate ( w(1:n) )
    allocate ( x(1:2*n) )
    allocate ( y(1:2*n) )
    allocate ( z(1:2*n) )
!
!  Save the initial data in z.
!
    seed  = 331.0D+00
    do i = 1, 2 * n
      z(i) = r82ggl ( seed )
    end do
!
!  Copy the initial data into X.
!
    x(1:2*n) = z(1:2*n)
!
!  Initialize the sine and cosine tables.
!
    call cffti ( n, w )
!
!  Transform forward, back 
!
    sgn = + 1.0D+00
    call cfft_1d ( n, x, y, w, sgn )
    sgn = - 1.0D+00
    call cfft_1d ( n, y, x, w, sgn )
! 
!  Transform is not normalized, so presumably X = N * Z.
!
    x(1:2*n) = x(1:2*n) / n

    rms = 0.0D+00
    do i = 1, 2 * n - 1, 2
      rms = rms &
      + ( z(i)   - x(i) )**2 &
      + ( z(i+1) - x(i+1) )**2
    end do
    rms = sqrt ( rms / n )

    write ( *, '(2x,i12,2x,g14.6)' ) n, rms

    deallocate ( w )
    deallocate ( x )
    deallocate ( y )
    deallocate ( z )

  end do

  return
end
subroutine cfft_1d_speed_test ( nits )

!*****************************************************************************80
!
!! cfft_1d_speed_test estimates the speed of cfft_1d.
!
!  Discussion:
!
!    The complex data in an N vector is stored as pairs of values in a
!    real vector of length 2*N.
!
!  Licensing:
!
!    This code is distributed under the MIT license.
!
!  Modified:
!
!    24 August 2020
!
!  Author:
!
!    Original C version by Wesley Petersen.
!    This version by John Burkardt.
!
!  Reference:
!
!    Wesley Petersen, Peter Arbenz, 
!    Introduction to Parallel Computing - A practical guide with examples in C,
!    Oxford University Press,
!    ISBN: 0-19-851576-6,
!    LC: QA76.58.P47.
!
  use omp_lib

  implicit none

  integer, parameter :: ck = kind ( ( 1.0D+00, 1.0D+00 ) )
  integer, parameter :: rk = kind ( 1.0D+00 )

  real ( kind = rk ) flops
  integer i
  integer it
  integer ln2
  integer, parameter :: ln2_max = 24
  real ( kind = rk ) mflops
  integer n
  integer nits
  real ( kind = rk ) r82ggl
  real ( kind = rk ) seed
  real ( kind = rk ) sgn
  real ( kind = rk ), allocatable, dimension ( : ) :: w
  real ( kind = rk ) wtime
  real ( kind = rk ), allocatable, dimension ( : ) :: x
  real ( kind = rk ), allocatable, dimension ( : ) :: y
  real ( kind = rk ), allocatable, dimension ( : ) :: z
!
!  Prepare for tests.
!
  write ( *, '(a)' ) ' '
  write ( *, '(a)' ) '  cfft_1d_speed_test:'
  write ( *, '(a)' ) '    FFT/inverse FFF of N-vector, NITS times.'
  write ( *, '(a)' ) '             N      NITS    Time' // &
    '          Time/Call     MFLOPS'
  write ( *, '(a)' ) ' '

  n = 1
!
!  LN2 is the log base 2 of N.  Each increase of LN2 doubles N.
!
  do ln2 = 1, ln2_max

    n = 2 * n
!
!  Allocate storage for the complex arrays W, X, Y, Z.  
!
!  We handle the complex arithmetic,
!  and store a complex number as a pair of floats, a complex vector as a doubly
!  dimensioned array whose second dimension is 2. 
!
    allocate ( w(1:n) )
    allocate ( x(1:2*n) )
    allocate ( y(1:2*n) )
    allocate ( z(1:2*n) )
!
!  Save the initial data in z.
!
    seed  = 331.0D+00
    do i = 1, 2 * n
      z(i) = r82ggl ( seed )
    end do
!
!  Copy the initial data into X.
!
    x(1:2*n) = z(1:2*n)
!
!  Initialize the sine and cosine tables.
!
    call cffti ( n, w )
!
!  Transform forward, back 
!
    wtime = omp_get_wtime ( )

    do it = 1, nits

      sgn = + 1.0D+00
      call cfft_1d ( n, x, y, w, sgn )
      sgn = - 1.0D+00
      call cfft_1d ( n, y, x, w, sgn )

    end do

    wtime = omp_get_wtime ( ) - wtime
!
!  Report statistics.
!
    flops = 2.0D+00 * real ( nits, kind = rk ) &
      * ( 5.0D+00 * real ( n, kind = rk ) * real ( ln2, kind = rk ) )

    mflops = flops / 1.0D+06 / wtime

    write ( *, '(2x,i12,2x,i8,2x,g12.4,2x,g12.4,2x,g12.4)' ) &
      n, nits, wtime, wtime / real ( 2 * nits, kind = rk ), mflops

    if ( mod ( ln2, 4 ) == 0 ) then
      nits = nits / 10
    end if

    if ( nits < 1 ) then
      nits = 1
    end if

    deallocate ( w )
    deallocate ( x )
    deallocate ( y )
    deallocate ( z )

  end do

  return
end