From 6e48ee594c2c578d24d12884699b3cb177bb233e Mon Sep 17 00:00:00 2001 From: David Minton Date: Wed, 14 Apr 2021 13:19:30 -0400 Subject: [PATCH] Experiment with swapping inner and outer loop depending on whether ntp > npl or not --- src/whm/whm_getacch.f90 | 42 +++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/src/whm/whm_getacch.f90 b/src/whm/whm_getacch.f90 index 4faeea061..6da84bfd1 100644 --- a/src/whm/whm_getacch.f90 +++ b/src/whm/whm_getacch.f90 @@ -215,25 +215,39 @@ pure subroutine whm_getacch_ah3_tp(cb, pl, tp, xh) ! Internals integer(I4B) :: i, j real(DP) :: rji2, irij3, fac - real(DP), dimension(NDIM) :: dx + real(DP), dimension(NDIM) :: dx, acc real(DP), dimension(:,:), allocatable :: aht - associate(ntp => tp%nbody, npl => pl%nbody, msun => cb%Gmass, Gmpl => pl%Gmass, & - xht => tp%xh) + associate(ntp => tp%nbody, npl => pl%nbody, msun => cb%Gmass, GMpl => pl%Gmass, xht => tp%xh) - allocate(aht, source=tp%ah) if (ntp == 0) return - do j = 1, npl - !$omp simd private(dx,rji2,irij3,fac) reduction(-:aht) - do i = 1, tp%nbody - dx(:) = tp%xh(:, i) - xh(:, j) - rji2 = dot_product(dx(:), dx(:)) - irij3 = 1.0_DP / (rji2 * sqrt(rji2)) - fac = pl%Gmass(j) * irij3 - aht(:, i) = aht(:, i) - fac * dx(:) + if (ntp > npl) then + allocate(aht, source=tp%ah) + do j = 1, npl + !$omp simd private(dx,rji2,irij3,fac) reduction(-:aht) + do i = 1, ntp + dx(:) = xht(:, i) - xh(:, j) + rji2 = dot_product(dx(:), dx(:)) + irij3 = 1.0_DP / (rji2 * sqrt(rji2)) + fac = GMpl(j) * irij3 + aht(:, i) = aht(:, i) - fac * dx(:) + end do end do - end do - call move_alloc(aht, tp%ah) + call move_alloc(aht, tp%ah) + else + do i = 1, ntp + acc(:) = 0.0_DP + !$omp simd private(dx,rji2,irij3,fac) reduction(-:acc) + do j = 1, npl + dx(:) = xht(:, i) - xh(:, j) + rji2 = dot_product(dx(:), dx(:)) + irij3 = 1.0_DP / (rji2 * sqrt(rji2)) + fac = GMpl(j) * irij3 + acc(:) = acc(:) - fac * dx(:) + end do + tp%ah(:, i) = tp%ah(:, i) + acc(:) + end do + end if end associate return end subroutine whm_getacch_ah3_tp