program main

c*********************************************************************72
c
c  Purpose:
c
c    MAIN is the main program for FFT.
c
c  Discussion:
c
c    The complex data in an N vector is stored as pairs of values in a
c    real vector of length 2*N.
c
c  Modified:
c
c    16 July 2008
c
c  Author:
c
c    C original version by Wesley Petersen
c    FORTRAN77 version by John Burkardt
c
c  Reference:
c
c    Wesley Petersen, Peter Arbenz, 
c    Introduction to Parallel Computing - A practical guide with examples in C,
c    Oxford University Press,
c    ISBN: 0-19-851576-6,
c    LC: QA76.58.P47.
c
      include 'omp_lib.h'

      integer n_max
      parameter ( n_max = 131072 )

      double precision ctime
      double precision ctime1
      double precision ctime2
      real error
      logical first
      integer flops
      real fnm1
      real ggl
      integer i
      integer icase
      integer id
      integer it
      integer ln2
      double precision mflops
      integer n
      integer nits
      real seed
      real sgn
      real w(n_max)
      real x(2*n_max)
      real y(2*n_max)
      real z(2*n_max)
      real z0
      real z1

      nits = 10000

      write ( *, '(a)' ) ' '
      write ( *, '(a)' ) 'FFT'
      write ( *, '(a)' ) '  FORTRAN77 version'
      write ( *, '(a)' ) ' '
      write ( *, '(a)' ) 
     &  '  Demonstrate an implementation of the Fast Fourier Transform'
      write ( *, '(a)' ) '  of a complex data vector,'
c
c  Prepare for tests.
c
      write ( *, '(a)' ) ' '
      write ( *, '(a)' ) '  Accuracy check:'
      write ( *, '(a)' ) ' '
      write ( *, '(a)' ) '    FFT ( FFT ( X(1:N) ) ) == N * X(1:N)'
      write ( *, '(a)' ) ' '
      write ( *, '(a)' ) 
     &  '             N    Error         Time              MFLOPS'
      write ( *, '(a)' ) ' '

      seed  = 331.0
      n = 1
c
c  LN2 is the log base 2 of N.  Each increase of LN2 doubles N.
c
      do ln2 = 1, 17

        n = 2 * n
c
c  Allocate storage for the complex arrays W, X, Y, Z.  
c
c  We handle the complex arithmetic,
c  and store a complex number as a pair of floats, a complex vector as a doubly
c  dimensioned array whose second dimension is 2. 
c
        first = .true.

        do icase = 0, 1

          if ( first ) then

            do i = 1, 2 * n - 1, 2

              z0 = ggl ( seed )
              z1 = ggl ( seed )
              x(i) = z0
              z(i) = z0
              x(i+1) = z1
              z(i+1) = z1
            end do

          else

            do i = 1, 2 * n - 1, 2
              z0 = 0.0
              z1 = 0.0
              x(i) = z0
              z(i) = z0
              x(i+1) = z1
              z(i+1) = z1
            end do

          end if
c
c  Initialize the sine and cosine tables.
c
          call cffti ( n, w )
c
c  Transform forward, back 
c
          if ( first ) then

            sgn = + 1.0
            call cfft2 ( n, x, y, w, sgn )
            sgn = - 1.0
            call cfft2 ( n, y, x, w, sgn )
c 
c  Results should be same as initial multiplied by n 
c
            fnm1 = 1.0 / real ( n )

            error = 0.0
            do i = 1, 2 * n - 1, 2
              error = error 
     &        + ( z(i)   - fnm1 * x(i) )**2 
     &        + ( z(i+1) - fnm1 * x(i+1) )**2
            end do
            error = sqrt ( fnm1 * error )

            first = .false.

          else

            ctime1 = omp_get_wtime ( )

            do it = 1, nits

              sgn = + 1.0
              call cfft2 ( n, x, y, w, sgn )
              sgn = - 1.0
              call cfft2 ( n, y, x, w, sgn )

            end do

            ctime2 = omp_get_wtime ( )
            ctime = ctime2 - ctime1

            flops = 2 * nits * ( 5 * n * ln2 )

            mflops = dble ( flops ) / 1.0D+06 / ctime

            write ( *, '(2x,i12,2x,g12.4,2x,g12.4,2x,g12.4)' ) 
     &        n, error, ctime / dble ( 2 * nits ), mflops

          end if

        end do

        if ( mod ( ln2, 4 ) .eq. 0 ) then
          nits = nits / 10
        end if

        if ( nits .lt. 1 ) then
          nits = 1
        end if

      end do

      write ( *, '(a)' ) ' '
      write ( *, '(a)' ) 'FFT:'
      write ( *, '(a)' ) '  Normal end of execution.'

      stop
      end
      subroutine cfft2 ( n, x, y, w, sgn )

c*********************************************************************72
c
cc CFFT2 performs a complex Fast Fourier Transform.
c
c  Modified:
c
c    28 April 2008
c
c  Author:
c
c    C original version by Wesley Petersen
c    FORTRAN77 version by John Burkardt
c
c  Reference:
c
c    Wesley Petersen, Peter Arbenz, 
c    Introduction to Parallel Computing - A practical guide with examples in C,
c    Oxford University Press,
c    ISBN: 0-19-851576-6,
c    LC: QA76.58.P47.
c
c  Parameters:
c
c    Input, integer N, the size of the array to be transformed.
c
c    Input/output, real X(2*N), the data to be transformed.  
c    On output, the contents of X have been overwritten by work information.
c
c    Output, real Y(2*N), the forward or backward FFT of X.
c
c    Input, real W(N), a table of sines and cosines.
c
c    Input, real SGN, is +1 for a "forward" FFT and -1 for a "backward" FFT.
c
      implicit none

      integer n

      integer i
      integer j
      integer m
      integer mj
      real sgn
      logical tgle
      real w(n)
      real x(2*n)
      real y(2*n)

       m = int ( ( log ( real ( n ) ) / log ( 1.99 ) ) )
       mj = 1
c
c  Toggling switch for work array.
c
      tgle = .true.
      call step ( n, mj, x(1), x((n/2)*2+1), y(1), y(mj*2+1), w, sgn )

      if ( n == 2 ) then
        return
      end if

      do j = 1, m - 2  

        mj = mj * 2

        if ( tgle ) then
          call step ( n, mj, y(1), y((n/2)*2+1), x(1), x(mj*2+1), 
     &      w, sgn )
          tgle = .false.
        else
          call step ( n, mj, x(1), x((n/2)*2+1), y(1), y(mj*2+1), 
     &      w, sgn )
          tgle = .true.
        end if

      end do
c
c  Last pass through data: move Y to X if needed. 
c
      if ( tgle ) then
        do i = 1, 2 * n
          x(i) = y(i)
        end do
      end if

      mj = n / 2
      call step ( n, mj, x(1), x((n/2)*2+1), y(1), y(mj*2+1), w, sgn )

      return
      end
      subroutine cffti ( n, w )

c*********************************************************************72
c
cc CFFTI sets up sine and cosine tables needed for the FFT calculation.
c
c  Modified:
c
c    28 April 2008
c
c  Author:
c
c    C original version by Wesley Petersen
c    FORTRAN77 version by John Burkardt
c
c  Reference:
c
c    Wesley Petersen, Peter Arbenz, 
c    Introduction to Parallel Computing - A practical guide with examples in C,
c    Oxford University Press,
c    ISBN: 0-19-851576-6,
c    LC: QA76.58.P47.
c
c  Parameters:
c
c    Input, integer N, the size of the array to be transformed.
c
c    Output, real W(N), a table of sines and cosines.
c
      implicit none

      integer n

      real arg
      real aw
      integer i
      integer n2
      real pi
      parameter ( pi = 3.141592653589793E+00 )
      real w(n)

      n2 = n / 2
      aw = 2.0 * pi / real ( n )

      do i = 1, n2
        arg = aw * real ( i - 1 )
        w(2*i-1) = cos ( arg )
        w(2*i)   = sin ( arg )
      end do

      return
      end
      function ggl ( seed )

c*********************************************************************72
c
cc GGL generates uniformly distributed pseudorandom numbers. 
c
c  Modified:
c
c    16 July 2008
c
c  Author:
c
c    C original version by Wesley Petersen, M Troyer, I Vattulainen
c    FORTRAN77 version by John Burkardt
c
c  Reference:
c
c    Wesley Petersen, Peter Arbenz, 
c    Introduction to Parallel Computing - A practical guide with examples in C,
c    Oxford University Press,
c    ISBN: 0-19-851576-6,
c    LC: QA76.58.P47.
c
c  Parameters:
c
c    Input/output, real SEED, used as a seed for the sequence.
c
c    Output, real GGL, the next pseudorandom value.
c
      implicit none

      double precision d2
      parameter ( d2 = 0.2147483647D+10 )
      real ggl
      real seed
      double precision t

      t = mod ( 16807.0D+00 * dble ( seed ), d2 )
      seed = real ( t )
      ggl = real ( ( t - 1.0D+00 ) / ( d2 - 1.0D+00 ) )

      return
      end
      subroutine step ( n, mj, a, b, c, d, w, sgn )

c*********************************************************************72
c
cc STEP carries out one step of the workspace version of CFFT2.
c
c  Modified:
c
c    27 April 2008
c
c  Author:
c
c    C original version by Wesley Petersen
c    FORTRAN77 version by John Burkardt
c
c  Reference:
c
c    Wesley Petersen, Peter Arbenz, 
c    Introduction to Parallel Computing - A practical guide with examples in C,
c    Oxford University Press,
c    ISBN: 0-19-851576-6,
c    LC: QA76.58.P47.
c
      implicit none

      integer n

      real a(n)
      real ambr
      real ambu
      real b(n)
      real c(n)
      real d(n)
      integer j
      integer ja
      integer jb
      integer jc
      integer jd
      integer jw
      integer k
      integer lj
      integer mj
      integer mj2
      real sgn
      real w(n)
      real wjw(2)

      mj2 = 2 * mj
      lj = n / mj2

c$omp parallel do private ( ambr, ambu, j, ja, jb, jc, jd, jw, k, wjw ) 
c$omp&  shared ( a, b, c, d, lj, mj, mj2, sgn, w )

      do j = 0, lj - 1

        jw = j * mj
        ja  = jw
        jb  = ja
        jc  = j * mj2
        jd  = jc

        wjw(1) = w(jw*2+1) 
        wjw(2) = w(jw*2+2)

        if ( sgn .lt. 0.0 ) then
          wjw(2) = - wjw(2)
        end if

        do k = 0, mj - 1

          c((jc+k)*2+1) = a((ja+k)*2+1) + b((jb+k)*2+1)
          c((jc+k)*2+2) = a((ja+k)*2+2) + b((jb+k)*2+2)

          ambr = a((ja+k)*2+1) - b((jb+k)*2+1)
          ambu = a((ja+k)*2+2) - b((jb+k)*2+2)

          d((jd+k)*2+1) = wjw(1) * ambr - wjw(2) * ambu
          d((jd+k)*2+2) = wjw(2) * ambr + wjw(1) * ambu

        end do
      end do
c$omp end parallel do

      return
      end