From 150e165d353397038e3504fda5f681de4550fc04 Mon Sep 17 00:00:00 2001 From: David A Minton Date: Mon, 27 Feb 2023 17:37:39 -0500 Subject: [PATCH] More refinement of efficient kick --- src/swiftest/swiftest_kick.f90 | 99 +++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 43 deletions(-) diff --git a/src/swiftest/swiftest_kick.f90 b/src/swiftest/swiftest_kick.f90 index 26d5c0506..b926a2e7e 100644 --- a/src/swiftest/swiftest_kick.f90 +++ b/src/swiftest/swiftest_kick.f90 @@ -169,36 +169,42 @@ module subroutine swiftest_kick_getacch_int_all_triangular_pl(npl, nplm, r, Gmas real(DP), dimension(:), intent(in), optional :: radius !! Array of massive body radii real(DP), dimension(:,:), intent(inout) :: acc !! Acceleration vector array ! Internals - integer(I4B) :: i, j + integer(I4B) :: i, j, nplt real(DP) :: rji2, rlim2, fac, rx, ry, rz + real(DP), dimension(NDIM,npl) :: ahi, ahj logical :: lmtiny - lmtiny = (nplm < npl) - if (present(radius)) then - !$omp parallel do default(private) schedule(static)& - !$omp shared(npl,nplm, r, Gmass, radius, acc) - do i = 1, nplm - do concurrent(j = 1:npl, j/=i) - rx = r(1,j) - r(1,i) - ry = r(2,j) - r(2,i) - rz = r(3,j) - r(3,i) - rji2 = rx**2 + ry**2 + rz**2 - rlim2 = (radius(i) + radius(j))**2 - if (rji2 > rlim2) then - fac = Gmass(j) / (rji2 * sqrt(rji2)) - acc(1,i) = acc(1,i) + fac * rx - acc(2,i) = acc(2,i) + fac * ry - acc(3,i) = acc(3,i) + fac * rz - end if - end do - end do - !$omp end parallel do + nplt = npl - nplm + lmtiny = (nplt > nplm) + if (present(radius)) then if (lmtiny) then + ahi(:,:) = 0.0_DP + ahj(:,:) = 0.0_DP + !$omp parallel do default(private) schedule(static)& + !$omp shared(npl, nplm, r, Gmass, radius) & + !$omp reduction(+:ahi) & + !$omp reduction(-:ahj) + do i = 1, nplm + do concurrent(j = i+1:npl) + rx = r(1, j) - r(1, i) + ry = r(2, j) - r(2, i) + rz = r(3, j) - r(3, i) + rji2 = rx**2 + ry**2 + rz**2 + rlim2 = (radius(i) + radius(j))**2 + if (rji2 > rlim2) call swiftest_kick_getacch_int_one_pl(rji2, rx, ry, rz, Gmass(i), Gmass(j), & + ahi(1,i), ahi(2,i), ahi(3,i), ahj(1,j), ahj(2,j), ahj(3,j)) + end do + end do + !$omp end parallel do + do concurrent(i = 1:npl) + acc(:,i) = acc(:,i) + ahi(:,i) + ahj(:,i) + end do + else !$omp parallel do default(private) schedule(static)& - !$omp shared(npl,nplm, r, Gmass, radius, acc) - do i = nplm+1,npl - do concurrent(j = 1:nplm, j/=i) + !$omp shared(npl,nplm, r, Gmass, radius, acc) + do i = 1, nplm + do concurrent(j = 1:npl, j/=i) rx = r(1,j) - r(1,i) ry = r(2,j) - r(2,i) rz = r(3,j) - r(3,i) @@ -215,31 +221,38 @@ module subroutine swiftest_kick_getacch_int_all_triangular_pl(npl, nplm, r, Gmas !$omp end parallel do end if else - !$omp parallel do default(private) schedule(static)& - !$omp shared(npl,nplm, r, Gmass, acc) - do i = 1, nplm - do concurrent(j = 1:npl, j/=i) - rx = r(1,j) - r(1,i) - ry = r(2,j) - r(2,i) - rz = r(3,j) - r(3,i) - rji2 = rx**2 + ry**2 + rz**2 - fac = Gmass(j) / (rji2 * sqrt(rji2)) - acc(1,i) = acc(1,i) + fac * rx - acc(2,i) = acc(2,i) + fac * ry - acc(3,i) = acc(3,i) + fac * rz - end do - end do - !$omp end parallel do - if (lmtiny) then + ahi(:,:) = 0.0_DP + ahj(:,:) = 0.0_DP !$omp parallel do default(private) schedule(static)& - !$omp shared(npl,nplm, r, Gmass, acc) - do i = nplm+1,npl - do concurrent(j = 1:nplm, j/=i) + !$omp shared(npl, nplm, r, Gmass) & + !$omp reduction(+:ahi) & + !$omp reduction(-:ahj) + do i = 1, nplm + do concurrent(j = i+1:npl) + rx = r(1, j) - r(1, i) + ry = r(2, j) - r(2, i) + rz = r(3, j) - r(3, i) + rji2 = rx**2 + ry**2 + rz**2 + rlim2 = (radius(i) + radius(j))**2 + call swiftest_kick_getacch_int_one_pl(rji2, rx, ry, rz, Gmass(i), Gmass(j), & + ahi(1,i), ahi(2,i), ahi(3,i), ahj(1,j), ahj(2,j), ahj(3,j)) + end do + end do + !$omp end parallel do + do concurrent(i = 1:npl) + acc(:,i) = acc(:,i) + ahi(:,i) + ahj(:,i) + end do + else + !$omp parallel do default(private) schedule(static)& + !$omp shared(npl,nplm, r, Gmass, acc) + do i = 1, nplm + do concurrent(j = 1:npl, j/=i) rx = r(1,j) - r(1,i) ry = r(2,j) - r(2,i) rz = r(3,j) - r(3,i) rji2 = rx**2 + ry**2 + rz**2 + rlim2 = (radius(i) + radius(j))**2 fac = Gmass(j) / (rji2 * sqrt(rji2)) acc(1,i) = acc(1,i) + fac * rx acc(2,i) = acc(2,i) + fac * ry