From ed4b2f0610b95fd893de3e81f6769cb532e441eb Mon Sep 17 00:00:00 2001 From: David Minton Date: Tue, 6 Jun 2023 12:54:11 -0400 Subject: [PATCH] Fixed issues that were causing do concurrent to give bad results when OMP_NUM_THREADS>1 when the F2018 locality-spec is used. --- src/helio/helio_gr.f90 | 16 ++++++++-------- src/swiftest/swiftest_gr.f90 | 26 +++++++++++++++----------- src/swiftest/swiftest_module.f90 | 10 +++++----- src/swiftest/swiftest_orbel.f90 | 32 ++++++++++++++++++-------------- src/swiftest/swiftest_util.f90 | 6 ++++-- src/whm/whm_gr.f90 | 16 ++++++++-------- 6 files changed, 58 insertions(+), 48 deletions(-) diff --git a/src/helio/helio_gr.f90 b/src/helio/helio_gr.f90 index 6de300cae..6b43714b7 100644 --- a/src/helio/helio_gr.f90 +++ b/src/helio/helio_gr.f90 @@ -75,14 +75,14 @@ pure module subroutine helio_gr_p4_pl(self, nbody_system, param, dt) if (self%nbody == 0) return - associate(pl => self) + associate(lmask => self%lmask, rh => self%rh, vb => self%vb, inv_c2 => param%inv_c2) npl = self%nbody #ifdef DOCONLOC - do concurrent(i = 1:npl, pl%lmask(i)) shared(param,pl,dt) + do concurrent(i = 1:npl, lmask(i)) shared(inv_c2, lmask, rh, vb, dt) #else - do concurrent(i = 1:npl, pl%lmask(i)) + do concurrent(i = 1:npl, lmask(i)) #endif - call swiftest_gr_p4_pos_kick(param, pl%rh(:, i), pl%vb(:, i), dt) + call swiftest_gr_p4_pos_kick(inv_c2, rh(1,i), rh(2,i), rh(3,i), vb(1,i), vb(2,i), vb(3,i), dt) end do end associate @@ -108,14 +108,14 @@ pure module subroutine helio_gr_p4_tp(self, nbody_system, param, dt) if (self%nbody == 0) return - associate(tp => self) + associate(rh => self%rh, vb => self%vb, lmask => self%lmask, inv_c2 => param%inv_c2) ntp = self%nbody #ifdef DOCONLOC - do concurrent(i = 1:ntp, tp%lmask(i)) shared(param,tp,dt) + do concurrent(i = 1:ntp, lmask(i)) shared(inv_c2, lmask, rh, vb, dt) #else - do concurrent(i = 1:ntp, tp%lmask(i)) + do concurrent(i = 1:ntp, lmask(i)) #endif - call swiftest_gr_p4_pos_kick(param, tp%rh(:, i), tp%vb(:, i), dt) + call swiftest_gr_p4_pos_kick(inv_c2, rh(1,i), rh(2,i), rh(3,i), vb(1,i), vb(2,i), vb(3,i), dt) end do end associate diff --git a/src/swiftest/swiftest_gr.f90 b/src/swiftest/swiftest_gr.f90 index 1985f6dd1..083e5de1b 100644 --- a/src/swiftest/swiftest_gr.f90 +++ b/src/swiftest/swiftest_gr.f90 @@ -87,7 +87,7 @@ pure module subroutine swiftest_gr_kick_getacch(mu, x, lmask, n, inv_c2, agr) end subroutine swiftest_gr_kick_getacch - pure module subroutine swiftest_gr_p4_pos_kick(param, x, v, dt) + pure elemental module subroutine swiftest_gr_p4_pos_kick(inv_c2, rx, ry, rz, vx, vy, vz, dt) !! author: David A. Minton !! !! Position kick due to p**4 term in the post-Newtonian correction @@ -100,17 +100,21 @@ pure module subroutine swiftest_gr_p4_pos_kick(param, x, v, dt) !! Adapted from David A. Minton's Swifter routine gr_whm_p4.f90 implicit none ! Arguments - class(swiftest_parameters), intent(in) :: param !! Current run configuration parameters - real(DP), dimension(:), intent(inout) :: x !! Position vector - real(DP), dimension(:), intent(in) :: v !! Velocity vector - real(DP), intent(in) :: dt !! Step size + real(DP), intent(in) :: inv_c2 !! One over speed of light squared (1/c**2) + real(DP), intent(inout) :: rx, ry, rz !! Position vector + real(DP), intent(in) :: vx, vy, vz !! Velocity vector + real(DP), intent(in) :: dt !! Step size ! Internals - real(DP), dimension(NDIM) :: dr - real(DP) :: vmag2 - - vmag2 = dot_product(v(:), v(:)) - dr(:) = -2 * param%inv_c2 * vmag2 * v(:) - x(:) = x(:) + dr(:) * dt + real(DP) :: drx, dry, drz + real(DP) :: vmag2 + + vmag2 = vx*vx + vy*vy + vz*vz + drx = -2 * inv_c2 * vmag2 * vx + dry = -2 * inv_c2 * vmag2 * vy + drz = -2 * inv_c2 * vmag2 * vz + rx = rx + drx * dt + ry = ry + dry * dt + rz = rz + drz * dt return end subroutine swiftest_gr_p4_pos_kick diff --git a/src/swiftest/swiftest_module.f90 b/src/swiftest/swiftest_module.f90 index 8349c12c1..a54e2351b 100644 --- a/src/swiftest/swiftest_module.f90 +++ b/src/swiftest/swiftest_module.f90 @@ -550,12 +550,12 @@ pure module subroutine swiftest_gr_kick_getacch(mu, x, lmask, n, inv_c2, agr) real(DP), dimension(:,:), intent(out) :: agr !! Accelerations end subroutine swiftest_gr_kick_getacch - pure module subroutine swiftest_gr_p4_pos_kick(param, x, v, dt) + pure elemental module subroutine swiftest_gr_p4_pos_kick(inv_c2, rx, ry, rz, vx, vy, vz, dt) implicit none - class(swiftest_parameters), intent(in) :: param !! Current run configuration parameters - real(DP), dimension(:), intent(inout) :: x !! Position vector - real(DP), dimension(:), intent(in) :: v !! Velocity vector - real(DP), intent(in) :: dt !! Step size + real(DP), intent(in) :: inv_c2 !! One over speed of light squared (1/c**2) + real(DP), intent(inout) :: rx, ry, rz !! Position vector + real(DP), intent(in) :: vx, vy, vz !! Velocity vector + real(DP), intent(in) :: dt !! Step size end subroutine swiftest_gr_p4_pos_kick pure module subroutine swiftest_gr_pseudovel2vel(param, mu, rh, pv, vh) diff --git a/src/swiftest/swiftest_orbel.f90 b/src/swiftest/swiftest_orbel.f90 index 3827c1b59..431d182ab 100644 --- a/src/swiftest/swiftest_orbel.f90 +++ b/src/swiftest/swiftest_orbel.f90 @@ -21,24 +21,28 @@ module subroutine swiftest_orbel_el2xv_vec(self, cb) class(swiftest_body), intent(inout) :: self !! Swiftest body object class(swiftest_cb), intent(inout) :: cb !! Swiftest central body objec ! Internals - integer(I4B) :: i + integer(I4B) :: i, n if (self%nbody == 0) return + n = self%nbody call self%set_mu(cb) + associate(mu => self%mu, a => self%a, e => self%e, inc => self%inc, capom => self%capom, omega => self%omega, & + capm => self%capm, rh => self%rh, vh => self%vh) #ifdef DOCONLOC - do concurrent (i = 1:self%nbody) shared(self) + do concurrent (i = 1:n) shared(mu, a, e, inc, capom, omega, capm, rh, vh) #else - do concurrent (i = 1:self%nbody) + do concurrent (i = 1:n) #endif - call swiftest_orbel_el2xv(self%mu(i), self%a(i), self%e(i), self%inc(i), self%capom(i), & - self%omega(i), self%capm(i), self%rh(:, i), self%vh(:, i)) - end do + call swiftest_orbel_el2xv(mu(i), a(i), e(i), inc(i), capom(i), omega(i), capm(i), & + rh(1,i), rh(2,i), rh(3,i), vh(1,i), vh(2,i), vh(3,i)) + end do + end associate return end subroutine swiftest_orbel_el2xv_vec - pure subroutine swiftest_orbel_el2xv(mu, a, ie, inc, capom, omega, capm, x, v) + pure elemental subroutine swiftest_orbel_el2xv(mu, a, ie, inc, capom, omega, capm, rx, ry, rz, vx, vy, vz) !! author: David A. Minton !! !! Compute osculating orbital elements from relative C)rtesian position and velocity @@ -56,7 +60,7 @@ pure subroutine swiftest_orbel_el2xv(mu, a, ie, inc, capom, omega, capm, x, v) implicit none real(DP), intent(in) :: mu real(DP), intent(in) :: a, ie, inc, capom, omega, capm - real(DP), dimension(:), intent(out) :: x, v + real(DP), intent(out) :: rx, ry, rz, vx, vy, vz integer(I4B) :: iorbit_type real(DP) :: e, cape, capf, zpara, em1 @@ -129,12 +133,12 @@ pure subroutine swiftest_orbel_el2xv(mu, a, ie, inc, capom, omega, capm, x, v) vfac2 = ri * sqgma endif !-- - x(1) = d11 * xfac1 + d21 * xfac2 - x(2) = d12 * xfac1 + d22 * xfac2 - x(3) = d13 * xfac1 + d23 * xfac2 - v(1) = d11 * vfac1 + d21 * vfac2 - v(2) = d12 * vfac1 + d22 * vfac2 - v(3) = d13 * vfac1 + d23 * vfac2 + rx = d11 * xfac1 + d21 * xfac2 + ry = d12 * xfac1 + d22 * xfac2 + rz = d13 * xfac1 + d23 * xfac2 + vx = d11 * vfac1 + d21 * vfac2 + vy = d12 * vfac1 + d22 * vfac2 + vz = d13 * vfac1 + d23 * vfac2 return end subroutine swiftest_orbel_el2xv diff --git a/src/swiftest/swiftest_util.f90 b/src/swiftest/swiftest_util.f90 index c164a365a..f3ce3082f 100644 --- a/src/swiftest/swiftest_util.f90 +++ b/src/swiftest/swiftest_util.f90 @@ -1213,7 +1213,9 @@ module subroutine swiftest_util_get_energy_and_momentum_system(self, param) #else do concurrent (i = 1:npl, pl%lmask(i)) #endif - h(:) = pl%rb(:,i) .cross. pl%vb(:,i) + h(1) = pl%rb(2,i) * pl%vb(3,i) - pl%rb(3,i) * pl%vb(2,i) + h(2) = pl%rb(3,i) * pl%vb(1,i) - pl%rb(1,i) * pl%vb(3,i) + h(3) = pl%rb(1,i) * pl%vb(2,i) - pl%rb(2,i) * pl%vb(1,i) ! Angular momentum from orbit Lplorbit(:,i) = pl%mass(i) * h(:) @@ -1269,7 +1271,7 @@ module subroutine swiftest_util_get_energy_and_momentum_system(self, param) nbody_system%ke_orbit = 0.5_DP * (kecb + sum(kepl(1:npl), pl%lmask(1:npl))) #ifdef DOCONLOC - do concurrent (j = 1:NDIM) shared(nbody_system,pl,Lcborbit,Lplorbit) + do concurrent (j = 1:NDIM) shared(nbody_system,pl,Lcborbit,Lplorbit,npl) #else do concurrent (j = 1:NDIM) #endif diff --git a/src/whm/whm_gr.f90 b/src/whm/whm_gr.f90 index c46a5ce2d..6e23ca21b 100644 --- a/src/whm/whm_gr.f90 +++ b/src/whm/whm_gr.f90 @@ -84,14 +84,14 @@ pure module subroutine whm_gr_p4_pl(self, nbody_system, param, dt) if (self%nbody == 0) return - associate(pl => self) + associate(xj => self%xj, vj => self%vj, lmask => self%lmask, inv_c2 => param%inv_c2) npl = self%nbody #ifdef DOCONLOC - do concurrent(i = 1:npl, pl%lmask(i)) shared(pl,dt) + do concurrent(i = 1:npl, lmask(i)) shared(lmask, inv_c2, xj, vj,dt) #else - do concurrent(i = 1:npl, pl%lmask(i)) + do concurrent(i = 1:npl, lmask(i)) #endif - call swiftest_gr_p4_pos_kick(param, pl%xj(:, i), pl%vj(:, i), dt) + call swiftest_gr_p4_pos_kick(inv_c2, xj(1,i), xj(2,i), xj(3,i), vj(1,i), vj(2,i), vj(3,i), dt) end do end associate @@ -115,15 +115,15 @@ pure module subroutine whm_gr_p4_tp(self, nbody_system, param, dt) ! Internals integer(I4B) :: i, ntp - associate(tp => self) + associate(rh => self%rh, vh => self%vh, lmask => self%lmask, inv_c2 => param%inv_c2) ntp = self%nbody if (ntp == 0) return #ifdef DOCONLOC - do concurrent(i = 1:ntp, tp%lmask(i)) shared(tp,dt) + do concurrent(i = 1:ntp, lmask(i)) shared(lmask, rh, vh, inv_c2, dt) #else - do concurrent(i = 1:ntp, tp%lmask(i)) + do concurrent(i = 1:ntp, lmask(i)) #endif - call swiftest_gr_p4_pos_kick(param, tp%rh(:, i), tp%vh(:, i), dt) + call swiftest_gr_p4_pos_kick(inv_c2, rh(1,i), rh(2,i), rh(3,i), vh(1,i), vh(2,i), vh(3,i), dt) end do end associate