From 6219c3e7685d73131e00d243b07c8d0fb160415c Mon Sep 17 00:00:00 2001 From: David Minton Date: Wed, 14 Apr 2021 17:01:54 -0400 Subject: [PATCH] Rewriting drift functions with SIMD. Still can't get drift to vectorize due to implied flow dependence. Work in progress --- src/driftkick/drift.f90 | 9 ++++++ src/driftkick/kick.f90 | 5 +-- src/modules/swiftest_classes.f90 | 2 ++ src/modules/swiftest_globals.f90 | 12 ++++---- src/whm/whm_drift.f90 | 53 +++++++++++++++++++------------- 5 files changed, 51 insertions(+), 30 deletions(-) diff --git a/src/driftkick/drift.f90 b/src/driftkick/drift.f90 index 6a175c6fb..5998cec56 100644 --- a/src/driftkick/drift.f90 +++ b/src/driftkick/drift.f90 @@ -39,6 +39,7 @@ module pure subroutine drift_one(mu, x, v, dt, iflag) end subroutine drift_one pure subroutine drift_dan(mu, x0, v0, dt0, iflag) + !$omp declare simd(drift_dan) !! author: David A. Minton !! !! Perform Kepler drift, solving Kepler's equation in appropriate variables @@ -108,6 +109,7 @@ pure subroutine drift_dan(mu, x0, v0, dt0, iflag) end subroutine drift_dan pure subroutine drift_kepmd(dm, es, ec, x, s, c) + !$omp declare simd(drift_kepmd) !! author: David A. Minton !! !! Solve Kepler's equation in difference form for an ellipse for small input dm and eccentricity @@ -146,6 +148,7 @@ pure subroutine drift_kepmd(dm, es, ec, x, s, c) end subroutine drift_kepmd pure subroutine drift_kepu(dt,r0,mu,alpha,u,fp,c1,c2,c3,iflag) + !$omp declare simd(drift_kepu) !! author: David A. Minton !! !! Solve Kepler's equation in universal variables @@ -173,6 +176,7 @@ pure subroutine drift_kepu(dt,r0,mu,alpha,u,fp,c1,c2,c3,iflag) end subroutine drift_kepu pure subroutine drift_kepu_fchk(dt, r0, mu, alpha, u, s, f) + !$omp declare simd(drift_kepu_fchk) !! author: David A. Minton !! !! Computes the value of f, the function whose root we are trying to find in universal variables @@ -195,6 +199,7 @@ pure subroutine drift_kepu_fchk(dt, r0, mu, alpha, u, s, f) end subroutine drift_kepu_fchk pure subroutine drift_kepu_guess(dt, r0, mu, alpha, u, s) + !$omp declare simd(drift_kepu_guess) !! author: David A. Minton !! !! Compute initial guess for solving Kepler's equation using universal variables @@ -232,6 +237,7 @@ pure subroutine drift_kepu_guess(dt, r0, mu, alpha, u, s) end subroutine drift_kepu_guess pure subroutine drift_kepu_lag(s, dt, r0, mu, alpha, u, fp, c1, c2, c3, iflag) + !$omp declare simd(drift_kepu_lag) !! author: David A. Minton !! !! Solve Kepler's equation in universal variables using Laguerre's method @@ -276,6 +282,7 @@ pure subroutine drift_kepu_lag(s, dt, r0, mu, alpha, u, fp, c1, c2, c3, iflag) end subroutine drift_kepu_lag pure subroutine drift_kepu_new(s, dt, r0, mu, alpha, u, fp, c1, c2, c3, iflag) + !$omp declare simd(drift_kepu_new) !! author: David A. Minton !! !! Solve Kepler's equation in universal variables using Newton's method @@ -317,6 +324,7 @@ pure subroutine drift_kepu_new(s, dt, r0, mu, alpha, u, fp, c1, c2, c3, iflag) end subroutine drift_kepu_new pure subroutine drift_kepu_p3solve(dt, r0, mu, alpha, u, s, iflag) + !$omp declare simd(drift_kepu_p3solve) !! author: David A. Minton !! !! Computes real root of cubic involved in setting initial guess for solving Kepler's equation in universal variables @@ -360,6 +368,7 @@ pure subroutine drift_kepu_p3solve(dt, r0, mu, alpha, u, s, iflag) end subroutine drift_kepu_p3solve pure subroutine drift_kepu_stumpff(x, c0, c1, c2, c3) + !$omp declare simd(drift_kepu_stumpff) !! author: David A. Minton !! !! Compute Stumpff functions needed for Kepler drift in universal variables diff --git a/src/driftkick/kick.f90 b/src/driftkick/kick.f90 index 32888c9a2..8c428f76c 100644 --- a/src/driftkick/kick.f90 +++ b/src/driftkick/kick.f90 @@ -17,8 +17,9 @@ module subroutine kick_vh_body(self, dt) associate(n => self%nbody, vh => self%vh, ah => self%ah, status => self%status) if (n == 0) return - do concurrent(i = 1:n, status(i) == ACTIVE) - vh(:, i) = vh(:, i) + ah(:, i) * dt + !$omp simd + do i = 1, n + if (status(i) == ACTIVE) vh(:, i) = vh(:, i) + ah(:, i) * dt end do end associate diff --git a/src/modules/swiftest_classes.f90 b/src/modules/swiftest_classes.f90 index 8b9c23b70..342e886b7 100644 --- a/src/modules/swiftest_classes.f90 +++ b/src/modules/swiftest_classes.f90 @@ -350,6 +350,7 @@ module subroutine discard_system(self, config) end subroutine discard_system module pure subroutine drift_one(mu, x, v, dt, iflag) + !$omp declare simd(drift_one) implicit none real(DP), intent(in) :: mu !! G * (Mcb + m), G = gravitational constant, Mcb = mass of central body, m = mass of body to drift real(DP), dimension(:), intent(inout) :: x, v !! Position and velocity of body to drift @@ -592,6 +593,7 @@ module subroutine orbel_el2xv_vec(self, cb) end subroutine orbel_el2xv_vec module pure subroutine orbel_scget(angle, sx, cx) + !$omp declare simd(orbel_scget) implicit none real(DP), intent(in) :: angle real(DP), intent(out) :: sx, cx diff --git a/src/modules/swiftest_globals.f90 b/src/modules/swiftest_globals.f90 index 1a17a0733..b3fe78494 100644 --- a/src/modules/swiftest_globals.f90 +++ b/src/modules/swiftest_globals.f90 @@ -17,12 +17,12 @@ module swiftest_globals integer, parameter :: DP = real64 !! Symbolic name for kind types of double-precision reals integer, parameter :: QP = real128 !! Symbolic name for kind types of quad-precision reals - real(QP), parameter :: PIBY2 = 1.570796326794896619231321691639751442099_QP !! Definition of /(\pi / 2\) - real(QP), parameter :: PI = 3.141592653589793238462643383279502884197_QP !! Definition of /(\pi\) - real(QP), parameter :: PI3BY2 = 4.712388980384689857693965074919254326296_QP !! Definition of /(3 \pi / 2\) - real(QP), parameter :: TWOPI = 6.283185307179586476925286766559005768394_QP !! Definition of 2 \pi - real(QP), parameter :: THIRD = 0.333333333333333333333333333333333333333_QP !! Definition of 1 / 3 - real(QP), parameter :: DEGRAD = 180.0_DP/PI !! Definition of conversion factor from degrees to radians + real(DP), parameter :: PIBY2 = 1.570796326794896619231321691639751442099_DP !! Definition of /(\pi / 2\) + real(DP), parameter :: PI = 3.141592653589793238462643383279502884197_DP !! Definition of /(\pi\) + real(DP), parameter :: PI3BY2 = 4.712388980384689857693965074919254326296_DP !! Definition of /(3 \pi / 2\) + real(DP), parameter :: TWOPI = 6.283185307179586476925286766559005768394_DP !! Definition of 2 \pi + real(DP), parameter :: THIRD = 0.333333333333333333333333333333333333333_DP !! Definition of 1 / 3 + real(DP), parameter :: DEGRAD = 180.0_DP/PI !! Definition of conversion factor from degrees to radians integer(I4B), parameter :: LOWERCASE_BEGIN = iachar('a') !! ASCII character set parameter for lower to upper conversion - start of lowercase integer(I4B), parameter :: LOWERCASE_END = iachar('z') !! ASCII character set parameter for lower to upper conversion - end of lowercase diff --git a/src/whm/whm_drift.f90 b/src/whm/whm_drift.f90 index e595edb7e..85997fa0c 100644 --- a/src/whm/whm_drift.f90 +++ b/src/whm/whm_drift.f90 @@ -16,8 +16,9 @@ module subroutine whm_drift_pl(self, cb, config, dt) real(DP), intent(in) :: dt !! Stepsize ! Internals integer(I4B) :: i - real(DP) :: dtp, energy, vmag2, rmag !! Variables used in GR calculation + real(DP) :: energy, vmag2, rmag !! Variables used in GR calculation integer(I4B), dimension(:), allocatable :: iflag + real(DP), dimension(:), allocatable :: dtp associate(npl => self%nbody, & xj => self%xj, & @@ -29,17 +30,22 @@ module subroutine whm_drift_pl(self, cb, config, dt) allocate(iflag(npl)) iflag(:) = 0 - - do i = 1, npl - if (config%lgr) then + allocate(dtp(npl)) + + if (config%lgr) then + do i = 1,npl rmag = norm2(xj(:, i)) vmag2 = dot_product(vj(:, i), vj(:, i)) energy = 0.5_DP * vmag2 - mu(i) / rmag - dtp = dt * (1.0_DP + 3 * config%inv_c2 * energy) - else - dtp = dt - end if - call drift_one(mu(i), xj(:, i), vj(:, i), dtp, iflag(i)) + dtp(i) = dt * (1.0_DP + 3 * config%inv_c2 * energy) + end do + else + dtp(:) = dt + end if + + !$omp simd + do i = 1, npl + call drift_one(mu(i), xj(:, i), vj(:, i), dtp(i), iflag(i)) end do if (any(iflag(1:npl) /= 0)) then do i = 1, npl @@ -72,8 +78,9 @@ module subroutine whm_drift_tp(self, cb, config, dt) real(DP), intent(in) :: dt !! Stepsize ! Internals integer(I4B) :: i - real(DP) :: dtp, energy, vmag2, rmag !! Variables used in GR calculation + real(DP) :: energy, vmag2, rmag !! Variables used in GR calculation integer(I4B), dimension(:), allocatable :: iflag + real(DP), dimension(:), allocatable :: dtp associate(ntp => self%nbody, & xh => self%xh, & @@ -83,21 +90,23 @@ module subroutine whm_drift_tp(self, cb, config, dt) if (ntp == 0) return allocate(iflag(ntp)) iflag(:) = 0 + allocate(dtp(ntp)) + if (config%lgr) then + do i = 1,ntp + rmag = norm2(xh(:, i)) + vmag2 = dot_product(vh(:, i), vh(:, i)) + energy = 0.5_DP * vmag2 - cb%Gmass / rmag + dtp(i) = dt * (1.0_DP + 3 * config%inv_c2 * energy) + end do + else + dtp(:) = dt + end if + !$omp simd do i = 1,ntp - if (status(i) == ACTIVE) then - if (config%lgr) then - rmag = norm2(xh(:, i)) - vmag2 = dot_product(vh(:, i), vh(:, i)) - energy = 0.5_DP * vmag2 - cb%Gmass / rmag - dtp = dt * (1.0_DP + 3 * config%inv_c2 * energy) - else - dtp = dt - end if - call drift_one(mu(i), xh(:, i), vh(:, i), dtp, iflag(i)) - if (iflag(i) /= 0) status = DISCARDED_DRIFTERR - end if + if (status(i) == ACTIVE) call drift_one(mu(i), xh(:, i), vh(:, i), dtp(i), iflag(i)) end do if (any(iflag(1:ntp) /= 0)) then + where(iflag(:) /= 0) status(:) = DISCARDED_DRIFTERR do i = 1, ntp if (iflag(i) /= 0) write(*, *) "Particle ", self%name(i), " lost due to error in Danby drift" end do