program main

!*****************************************************************************80
!
!! cuda_loop_test() tests cuda_loop().
!
!  Discussion:
!
!    A CUDA kernel "kernel()" is invoked by a command of the form
!      
!      kernel << blocks, threads >> ( args )
!
!    where blocks and threads are each vectors of up to 3 values,
!    listing the number of blocks and number of threads to be used.
!
!    If a problem involves N tasks, then tasks are allotted to 
!    specific CUDA processes in an organized fashion.  Some processes
!    may get no tasks, one task, or multiple tasks.  
!
!    Each process is given variables that can be used to determine
!    the tasks to be performed:
!
!      gridDim.x, gridDim.y, gridDim.z: the block dimensions as
!      given by the user in "blocks"
!
!      blockDim.x, blockDim.y, blockDim.z: the thread dimensions as
!      given by the user in "threads"
!
!      blockIdx.x, blockIdx.y, blockId.z: the block indices for this process.
!
!      threadIdx.x, threadIdx.y, threadIdx.z: the thread indices for this process.
!
!    Essentially, a process can determine its linear index K by:
!
!      K = threadIdx.x
!        +  blockdim.x  * threadIdx.y
!        +  blockDim.x  *  blockDim.y  * threadIdx.z
!        +  blockDim.x  *  blockDim.y  *  blockDim.z  * blockIdx.x
!        +  blockDim.x  *  blockDim.y  *  blockDim.z  *  gridDim.x  * blockIdx.y
!        +  blockDim.x  *  blockDim.y  *  blockDim.z  *  gridDim.x  *  gridDim.y  * blockIdx.z
!
!    Set task T = K.
!
!    while ( T < N )
!      carry out task T
!      T = T + blockDim.x * blockDim.y * blockDim.z * gridDim.x * gridDim.y * gridDim.z.
!
!    This program suggests how a specific set of block and thread parameters 
!    would determine the assignment of individual tasks to CUDA processes.
!
!  Licensing:
!
!    This code is distributed under the MIT license. 
!
!  Modified:
!
!    22 March 2017
!
!  Author:
!
!    John Burkardt
!
!  Local:
!
!    integer BLOCKS(3), the CUDA block values.  These 
!    should be nonnegative.  Typically, the third entry is 1.  Generally, the 
!    first two values cannot be greater than 35,535.
!
!    integer THREADS(3), the CUDA thread values.  These should be 
!    nonnegative.  Typically, there is a maximum value imposed on these 
!    quantities, which depends on the GPU model.
!
!    integer N, the number of tasks to be carried out.
!
  implicit none

  integer blocks(3)
  integer n
  integer threads(3)

  call timestamp ( )
  write ( *, '(a)' ) ''
  write ( *, '(a)' ) 'cuda_loop_test():'
  write ( *, '(a)' ) '  Fortran90 version'
  write ( *, '(a)' ) '  Test cuda_loop(), which imulates the way CUDA breaks'
  write ( *, '(a)' ) '  up an iterative task, using blocks and threads.'
!
!  Linear array of blocks and threads.
!  Essentially, blocks = your hands and threads = your fingers.
!  Now count up to 23..
!
  blocks(1) = 2
  blocks(2) = 1
  blocks(3) = 1
  threads(1) = 5
  threads(2) = 1
  threads(3) = 1
  n = 23
 
  call cuda_loop ( blocks, threads, n )
!
!  Unit arrays of blocks and threads.
!  Waste your GPU by having a single block and thread do everything.
!
  blocks(1) = 1
  blocks(2) = 1
  blocks(3) = 1
  threads(1) = 1
  threads(2) = 1
  threads(3) = 1
  n = 23

  call cuda_loop ( blocks, threads, n )
!
!  2D block array, 3D thread array.
!  More processes than tasks.
!
  blocks(1) = 2
  blocks(2) = 3
  blocks(3) = 1
  threads(1) = 2
  threads(2) = 1
  threads(3) = 4
  n = 40
 
  call cuda_loop ( blocks, threads, n )
!
!  One block, 8 threads.
! 
  blocks(1) = 1
  blocks(2) = 1
  blocks(3) = 1
  threads(1) = 2
  threads(2) = 2
  threads(3) = 2
  n = 23
 
  call cuda_loop ( blocks, threads, n )
!
!  Terminate.
!
  write ( *, '(a)' ) ''
  write ( *, '(a)' ) 'cuda_loop_test():'
  write ( *, '(a)' ) '  Normal end of execution.'
  write ( *, '(a)' ) ''
  call timestamp ( )

  stop 0
end
subroutine timestamp ( )

!*****************************************************************************80
!
!! timestamp() prints the current YMDHMS date as a time stamp.
!
!  Example:
!
!    31 May 2001   9:45:54.872 AM
!
!  Licensing:
!
!    This code is distributed under the MIT license.
!
!  Modified:
!
!    18 May 2013
!
!  Author:
!
!    John Burkardt
!
  implicit none

  character ( len = 8 ) ampm
  integer d
  integer h
  integer m
  integer mm
  character ( len = 9 ), parameter, dimension(12) :: month = (/ &
    'January  ', 'February ', 'March    ', 'April    ', &
    'May      ', 'June     ', 'July     ', 'August   ', &
    'September', 'October  ', 'November ', 'December ' /)
  integer n
  integer s
  integer values(8)
  integer y

  call date_and_time ( values = values )

  y = values(1)
  m = values(2)
  d = values(3)
  h = values(5)
  n = values(6)
  s = values(7)
  mm = values(8)

  if ( h < 12 ) then
    ampm = 'AM'
  else if ( h == 12 ) then
    if ( n == 0 .and. s == 0 ) then
      ampm = 'Noon'
    else
      ampm = 'PM'
    end if
  else
    h = h - 12
    if ( h < 12 ) then
      ampm = 'PM'
    else if ( h == 12 ) then
      if ( n == 0 .and. s == 0 ) then
        ampm = 'Midnight'
      else
        ampm = 'AM'
      end if
    end if
  end if

  write ( *, '(i2.2,1x,a,1x,i4,2x,i2,a1,i2.2,a1,i2.2,a1,i3.3,1x,a)' ) &
    d, trim ( month(m) ), y, h, ':', n, ':', s, '.', mm, trim ( ampm )

  return
end