subroutine kmns ( a, m, n, c, k, ic1, ic2, nc, an1, an2, ncp, d, itran, live, & iter, wss, ifault ) !*****************************************************************************80 ! !! kmns() carries out the K-means algorithm. ! ! Discussion: ! ! This routine attempts to divide M points in N-dimensional space into ! K clusters so that the within cluster sum of squares is minimized. ! ! Licensing: ! ! This code is distributed under the MIT license. ! ! Modified: ! ! 28 August 2021 ! ! Author: ! ! Original FORTRAN77 version by John Hartigan, Manchek Wong. ! FORTRAN90 version by John Burkardt. ! ! Reference: ! ! John Hartigan, Manchek Wong, ! Algorithm AS 136: ! A K-Means Clustering Algorithm, ! Applied Statistics, ! Volume 28, Number 1, 1979, pages 100-108. ! ! Input: ! ! real ( kind = rk ) A(M,N), the points. ! ! integer M, the number of points. ! ! integer N, the number of spatial dimensions. ! ! real ( kind = rk ) C(K,N), the initial cluster centers. ! ! integer K, the number of clusters. ! ! integer ITER, the maximum number of iterations allowed. ! ! Output: ! ! real ( kind = rk ) C(K,N), the updated cluster centers. ! ! integer IC1(M), the cluster to which each point ! is assigned. ! ! integer NC(K), the number of points in each cluster. ! ! real ( kind = rk ) WSS(K), the within-cluster sum of squares ! of each cluster. ! ! integer IFAULT, error indicator. ! 0, no error was detected. ! 1, at least one cluster is empty after the initial assignment. A better ! set of initial cluster centers is needed. ! 2, the allowed maximum number off iterations was exceeded. ! 3, K is less than or equal to 1, or greater than or equal to M. ! ! Workspace: ! ! integer IC2(M), used to store the cluster which ! each point is most likely to be transferred to at each step. ! ! real ( kind = rk ) AN1(K). ! ! real ( kind = rk ) AN2(K). ! ! integer NCP(K). ! ! real ( kind = rk ) D(M). ! ! integer ITRAN(K). ! ! integer LIVE(K). ! implicit none integer, parameter :: rk = kind ( 1.0D+00 ) integer k integer m integer n real ( kind = rk ) a(m,n) real ( kind = rk ) aa real ( kind = rk ) an1(k) real ( kind = rk ) an2(k) real ( kind = rk ) c(k,n) real ( kind = rk ) d(m) real ( kind = rk ) da real ( kind = rk ) db real ( kind = rk ) dc real ( kind = rk ) dt(2) integer i integer ic1(m) integer ic2(m) integer ifault integer ii integer ij integer il integer indx integer iter integer itran(k) integer j integer l integer live(k) integer nc(k) integer ncp(k) real ( kind = rk ) temp real ( kind = rk ) wss(k) ifault = 0 if ( k <= 1 .or. m <= k ) then ifault = 3 return end if ! ! For each point I, find its two closest centers, IC1(I) and ! IC2(I). Assign the point to IC1(I). ! do i = 1, m ic1(i) = 1 ic2(i) = 2 do il = 1, 2 dt(il) = 0.0D+00 do j = 1, n da = a(i,j) - c(il,j) dt(il) = dt(il) + da * da end do end do if ( dt(2) < dt(1) ) then ic1(i) = 2 ic2(i) = 1 temp = dt(1) dt(1) = dt(2) dt(2) = temp end if do l = 3, k db = 0.0D+00 do j = 1, n dc = a(i,j) - c(l,j) db = db + dc * dc end do if ( db < dt(2) ) then if ( dt(1) <= db ) then dt(2) = db ic2(i) = l else dt(2) = dt(1) ic2(i) = ic1(i) dt(1) = db ic1(i) = l end if end if end do end do ! ! Update cluster centers to be the average of points contained within them. ! do l = 1, k nc(l) = 0 do j = 1, n c(l,j) = 0.0D+00 end do end do do i = 1, m l = ic1(i) nc(l) = nc(l) + 1 c(l,1:n) = c(l,1:n) + a(i,1:n) end do ! ! Check to see if there is any empty cluster at this stage. ! ifault = 1 do l = 1, k if ( nc(l) == 0 ) then ifault = 1 return end if end do ifault = 0 do l = 1, k aa = real ( nc(l), kind = rk ) c(l,1:n) = c(l,1:n) / aa ! ! Initialize AN1, AN2, ITRAN and NCP. ! ! AN1(L) = NC(L) / (NC(L) - 1) ! AN2(L) = NC(L) / (NC(L) + 1) ! ITRAN(L) = 1 if cluster L is updated in the quick-transfer stage, ! = 0 otherwise ! ! In the optimal-transfer stage, NCP(L) stores the step at which ! cluster L is last updated. ! ! In the quick-transfer stage, NCP(L) stores the step at which ! cluster L is last updated plus M. ! an2(l) = aa / ( aa + 1.0D+00 ) if ( 1.0D+00 < aa ) then an1(l) = aa / ( aa - 1.0D+00 ) else an1(l) = huge ( an1(l) ) end if itran(l) = 1 ncp(l) = -1 end do indx = 0 ifault = 2 do ij = 1, iter ! ! In this stage, there is only one pass through the data. Each ! point is re-allocated, if necessary, to the cluster that will ! induce the maximum reduction in within-cluster sum of squares. ! call optra ( a, m, n, c, k, ic1, ic2, nc, an1, an2, ncp, d, itran, & live, indx ) ! ! Stop if no transfer took place in the last M optimal transfer steps. ! if ( indx == m ) then ifault = 0 exit end if ! ! Each point is tested in turn to see if it should be re-allocated ! to the cluster to which it is most likely to be transferred, ! IC2(I), from its present cluster, IC1(I). Loop through the ! data until no further change is to take place. ! call qtran ( a, m, n, c, k, ic1, ic2, nc, an1, an2, ncp, d, & itran, indx ) ! ! If there are only two clusters, there is no need to re-enter the ! optimal transfer stage. ! if ( k == 2 ) then ifault = 0 exit end if ! ! NCP has to be set to 0 before entering OPTRA. ! do l = 1, k ncp(l) = 0 end do end do ! ! If the maximum number of iterations was taken without convergence, ! IFAULT is 2 now. This may indicate unforeseen looping. ! if ( ifault == 2 ) then write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'KMNS - Warning!' write ( *, '(a)' ) ' Maximum number of iterations reached' write ( *, '(a)' ) ' without convergence.' end if ! ! Compute the within-cluster sum of squares for each cluster. ! wss(1:k) = 0.0D+00 c(1:k,1:n) = 0.0D+00 do i = 1, m ii = ic1(i) c(ii,1:n) = c(ii,1:n) + a(i,1:n) end do do j = 1, n do l = 1, k c(l,j) = c(l,j) / real ( nc(l), kind = rk ) end do do i = 1, m ii = ic1(i) da = a(i,j) - c(ii,j) wss(ii) = wss(ii) + da * da end do end do return end subroutine optra ( a, m, n, c, k, ic1, ic2, nc, an1, an2, ncp, d, itran, & live, indx ) !*****************************************************************************80 ! !! optra() carries out the optimal transfer stage. ! ! Discussion: ! ! This is the optimal transfer stage. ! ! Each point is re-allocated, if necessary, to the cluster that ! will induce a maximum reduction in the within-cluster sum of ! squares. ! ! Licensing: ! ! This code is distributed under the MIT license. ! ! Modified: ! ! 28 August 2021 ! ! Author: ! ! Original FORTRAN77 version by John Hartigan, Manchek Wong. ! FORTRAN90 version by John Burkardt. ! ! Reference: ! ! John Hartigan, Manchek Wong, ! Algorithm AS 136: ! A K-Means Clustering Algorithm, ! Applied Statistics, ! Volume 28, Number 1, 1979, pages 100-108. ! ! Input: ! ! real ( kind = rk ) A(M,N), the points. ! ! integer M, the number of points. ! ! integer N, the number of spatial dimensions. ! ! real ( kind = rk ) C(K,N), the cluster centers. ! ! integer K, the number of clusters. ! ! integer IC1(M), the cluster to which each point is assigned. ! ! integer IC2(M), used to store the cluster ! which each point is most likely to be transferred to at each step. ! ! integer NC(K), the number of points in each cluster. ! ! real ( kind = rk ) AN1(K). ! ! real ( kind = rk ) AN2(K). ! ! integer NCP(K). ! ! real ( kind = rk ) D(M). ! ! integer ITRAN(K). ! ! integer LIVE(K). ! ! integer INDX, the number of steps since a transfer took place. ! ! Output: ! ! real ( kind = rk ) C(K,N), the cluster centers. ! ! integer IC1(M), the cluster to which each point is assigned. ! ! integer IC2(M), used to store the cluster ! which each point is most likely to be transferred to at each step. ! ! integer NC(K), the number of points in each cluster. ! ! real ( kind = rk ) AN1(K). ! ! real ( kind = rk ) AN2(K). ! ! integer NCP(K). ! ! real ( kind = rk ) D(M). ! ! integer ITRAN(K). ! ! integer LIVE(K). ! ! integer INDX, the number of steps since a transfer took place. ! implicit none integer, parameter :: rk = kind ( 1.0D+00 ) integer k integer m integer n real ( kind = rk ) a(m,n) real ( kind = rk ) al1 real ( kind = rk ) al2 real ( kind = rk ) alt real ( kind = rk ) alw real ( kind = rk ) an1(k) real ( kind = rk ) an2(k) real ( kind = rk ) c(k,n) real ( kind = rk ) d(m) real ( kind = rk ) da real ( kind = rk ) db real ( kind = rk ) dc real ( kind = rk ) dd real ( kind = rk ) de real ( kind = rk ) df integer i integer ic1(m) integer ic2(m) integer indx integer itran(k) integer j integer l integer l1 integer l2 integer live(k) integer ll integer nc(k) integer ncp(k) real ( kind = rk ) r2 real ( kind = rk ) rr ! ! If cluster L is updated in the last quick-transfer stage, it ! belongs to the live set throughout this stage. Otherwise, at ! each step, it is not in the live set if it has not been updated ! in the last M optimal transfer steps. ! do l = 1, k if ( itran(l) == 1) then live(l) = m + 1 end if end do do i = 1, m indx = indx + 1 l1 = ic1(i) l2 = ic2(i) ll = l2 ! ! If point I is the only member of cluster L1, no transfer. ! if ( 1 < nc(l1) ) then ! ! If L1 has not yet been updated in this stage, no need to ! re-compute D(I). ! if ( ncp(l1) /= 0 ) then de = 0.0D+00 do j = 1, n df = a(i,j) - c(l1,j) de = de + df * df end do d(i) = de * an1(l1) end if ! ! Find the cluster with minimum R2. ! da = 0.0D+00 do j = 1, n db = a(i,j) - c(l2,j) da = da + db * db end do r2 = da * an2(l2) do l = 1, k ! ! If LIVE(L1) <= I, then L1 is not in the live set. If this is ! true, we only need to consider clusters that are in the live set ! for possible transfer of point I. Otherwise, we need to consider ! all possible clusters. ! if ( ( i < live(l1) .or. i < live(l2) ) .and. & l /= l1 .and. l /= ll ) then rr = r2 / an2(l) dc = 0.0D+00 do j = 1, n dd = a(i,j) - c(l,j) dc = dc + dd * dd end do if ( dc < rr ) then r2 = dc * an2(l) l2 = l end if end if end do ! ! If no transfer is necessary, L2 is the new IC2(I). ! if ( d(i) <= r2 ) then ic2(i) = l2 ! ! Update cluster centers, LIVE, NCP, AN1 and AN2 for clusters L1 and ! L2, and update IC1(I) and IC2(I). ! else indx = 0 live(l1) = m + i live(l2) = m + i ncp(l1) = i ncp(l2) = i al1 = real ( nc(l1), kind = rk ) alw = al1 - 1.0D+00 al2 = real ( nc(l2), kind = rk ) alt = al2 + 1.0D+00 do j = 1, n c(l1,j) = ( c(l1,j) * al1 - a(i,j) ) / alw c(l2,j) = ( c(l2,j) * al2 + a(i,j) ) / alt end do nc(l1) = nc(l1) - 1 nc(l2) = nc(l2) + 1 an2(l1) = alw / al1 if ( 1.0D+00 < alw ) then an1(l1) = alw / ( alw - 1.0D+00 ) else an1(l1) = huge ( an1(l1) ) end if an1(l2) = alt / al2 an2(l2) = alt / ( alt + 1.0D+00 ) ic1(i) = l2 ic2(i) = l1 end if end if if ( indx == m ) then return end if end do ! ! ITRAN(L) = 0 before entering QTRAN. Also, LIVE(L) has to be ! decreased by M before re-entering OPTRA. ! do l = 1, k itran(l) = 0 live(l) = live(l) - m end do return end subroutine qtran ( a, m, n, c, k, ic1, ic2, nc, an1, an2, ncp, d, itran, & indx ) !*****************************************************************************80 ! !! qtran() carries out the quick transfer stage. ! ! Discussion: ! ! This is the quick transfer stage. ! ! IC1(I) is the cluster which point I belongs to. ! IC2(I) is the cluster which point I is most likely to be ! transferred to. ! ! For each point I, IC1(I) and IC2(I) are switched, if necessary, to ! reduce within-cluster sum of squares. The cluster centers are ! updated after each step. ! ! Licensing: ! ! This code is distributed under the MIT license. ! ! Modified: ! ! 28 August 2021 ! ! Author: ! ! Original FORTRAN77 version by John Hartigan, Manchek Wong. ! FORTRAN90 version by John Burkardt. ! ! Reference: ! ! John Hartigan, Manchek Wong, ! Algorithm AS 136: ! A K-Means Clustering Algorithm, ! Applied Statistics, ! Volume 28, Number 1, 1979, pages 100-108. ! ! Input: ! ! real ( kind = rk ) A(M,N), the points. ! ! integer M, the number of points. ! ! integer N, the number of spatial dimensions. ! ! real ( kind = rk ) C(K,N), the cluster centers. ! ! integer K, the number of clusters. ! ! integer IC1(M), the cluster to which each ! point is assigned. ! ! integer IC2(M), used to store the cluster ! which each point is most likely to be transferred to at each step. ! ! integer NC(K), the number of points in each cluster. ! ! real ( kind = rk ) AN1(K). ! ! real ( kind = rk ) AN2(K). ! ! integer NCP(K). ! ! real ( kind = rk ) D(M). ! ! integer ITRAN(K). ! ! integer INDX, counts the number of steps since the last transfer. ! ! Output: ! ! real ( kind = rk ) C(K,N), the cluster centers. ! ! integer IC1(M), the cluster to which each ! point is assigned. ! ! integer IC2(M), used to store the cluster ! which each point is most likely to be transferred to at each step. ! ! integer NC(K), the number of points in each cluster. ! ! real ( kind = rk ) AN1(K). ! ! real ( kind = rk ) AN2(K). ! ! integer NCP(K). ! ! real ( kind = rk ) D(M). ! ! integer ITRAN(K). ! ! integer INDX, counts the number of steps since the last transfer. ! implicit none integer, parameter :: rk = kind ( 1.0D+00 ) integer k integer m integer n real ( kind = rk ) a(m,n) real ( kind = rk ) al1 real ( kind = rk ) al2 real ( kind = rk ) alt real ( kind = rk ) alw real ( kind = rk ) an1(k) real ( kind = rk ) an2(k) real ( kind = rk ) c(k,n) real ( kind = rk ) d(m) real ( kind = rk ) da real ( kind = rk ) db real ( kind = rk ) dd real ( kind = rk ) de integer i integer ic1(m) integer ic2(m) integer icoun integer indx integer istep integer itran(k) integer j integer l1 integer l2 integer nc(k) integer ncp(k) real ( kind = rk ) r2 ! ! In the optimal transfer stage, NCP(L) indicates the step at which ! cluster L is last updated. In the quick transfer stage, NCP(L) ! is equal to the step at which cluster L is last updated plus M. ! icoun = 0 istep = 0 do do i = 1, m icoun = icoun + 1 istep = istep + 1 l1 = ic1(i) l2 = ic2(i) ! ! If point I is the only member of cluster L1, no transfer. ! if ( 1 < nc(l1) ) then ! ! If NCP(L1) < ISTEP, no need to re-compute distance from point I to ! cluster L1. Note that if cluster L1 is last updated exactly M ! steps ago, we still need to compute the distance from point I to ! cluster L1. ! if ( istep <= ncp(l1) ) then da = 0.0D+00 do j = 1, n db = a(i,j) - c(l1,j) da = da + db * db end do d(i) = da * an1(l1) end if ! ! If NCP(L1) <= ISTEP and NCP(L2) <= ISTEP, there will be no transfer of ! point I at this step. ! if ( istep < ncp(l1) .or. istep < ncp(l2) ) then r2 = d(i) / an2(l2) dd = 0.0D+00 do j = 1, n de = a(i,j) - c(l2,j) dd = dd + de * de end do ! ! Update cluster centers, NCP, NC, ITRAN, AN1 and AN2 for clusters ! L1 and L2. Also update IC1(I) and IC2(I). Note that if any ! updating occurs in this stage, INDX is set back to 0. ! if ( dd < r2 ) then icoun = 0 indx = 0 itran(l1) = 1 itran(l2) = 1 ncp(l1) = istep + m ncp(l2) = istep + m al1 = real ( nc(l1), kind = rk ) alw = al1 - 1.0D+00 al2 = real ( nc(l2), kind = rk ) alt = al2 + 1.0D+00 do j = 1, n c(l1,j) = ( c(l1,j) * al1 - a(i,j) ) / alw c(l2,j) = ( c(l2,j) * al2 + a(i,j) ) / alt end do nc(l1) = nc(l1) - 1 nc(l2) = nc(l2) + 1 an2(l1) = alw / al1 if ( 1.0D+00 < alw ) then an1(l1) = alw / ( alw - 1.0D+00 ) else an1(l1) = huge ( an1(l1) ) end if an1(l2) = alt / al2 an2(l2) = alt / ( alt + 1.0D+00 ) ic1(i) = l2 ic2(i) = l1 end if end if end if ! ! If no re-allocation took place in the last M steps, return. ! if ( icoun == m ) then return end if end do end do end