Skip to content
This repository was archived by the owner on Aug 28, 2024. It is now read-only.

Commit

Permalink
Enabled OpenMP loops for interaction accelrations and encounter check…
Browse files Browse the repository at this point in the history
…s. Performance is significantly improved.
  • Loading branch information
daminton committed Aug 22, 2021
1 parent e61118b commit 3b942e1
Show file tree
Hide file tree
Showing 8 changed files with 250 additions and 112 deletions.
10 changes: 5 additions & 5 deletions Makefile.Defines
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,13 @@ GWARNINGS = -Wall -Warray-bounds -Wimplicit-interface -Wextra -Warray-temporari
GPRODUCTION = -O3 -ffree-line-length-none $(GPAR)

#FFLAGS = $(IDEBUG) $(HEAPARR) $(SIMDVEC) $(PAR)
#FFLAGS = $(IPRODUCTION)
#FORTRAN = ifort
FFLAGS = $(IPRODUCTION) $(OPTREPORT)
FORTRAN = ifort
#AR = xiar

FORTRAN = gfortran
#FFLAGS = $(GDEBUG) #$(GMEM) #$(GPAR)
FFLAGS = $(GPRODUCTION)
#FORTRAN = gfortran
#FFLAGS = $(GDEBUG) $(GMEM) $(GPAR)
#FFLAGS = $(GPRODUCTION)
AR = ar

# DO NOT include in CFLAGS the "-c" option to compile object only
Expand Down
4 changes: 2 additions & 2 deletions examples/symba_mars_disk/param.in
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ CB_IN cb.in
PL_IN mars.in
TP_IN tp.in
IN_TYPE ASCII
ISTEP_OUT 100
ISTEP_DUMP 100
ISTEP_OUT 10
ISTEP_DUMP 10
BIN_OUT bin.dat
PARTICLE_OUT particle.dat
OUT_TYPE REAL8
Expand Down
118 changes: 84 additions & 34 deletions src/kick/kick.f90
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
use swiftest
contains

module pure subroutine kick_getacch_int_pl(self)
module subroutine kick_getacch_int_pl(self)
!! author: David A. Minton
!!
!! Compute direct cross (third) term heliocentric accelerations of massive bodies
Expand All @@ -13,33 +13,35 @@ module pure subroutine kick_getacch_int_pl(self)
! Arguments
class(swiftest_pl), intent(inout) :: self
! Internals
integer(I4B) :: k
real(DP) :: rji2, irij3, faci, facj, rlim2
integer(I8B) :: k, nplpl
real(DP) :: rji2, rlim2
real(DP) :: dx, dy, dz
integer(I4B) :: i, j
real(DP), dimension(:,:), pointer :: ah, xh
real(DP), dimension(NDIM,self%nbody) :: ahi, ahj
integer(I4B), dimension(:,:), pointer :: k_plpl
logical, dimension(:), pointer :: lmask
real(DP), dimension(:), pointer :: Gmass

associate(pl => self, npl => self%nbody, nplpl => self%nplpl)
do k = 1, nplpl
associate(i => pl%k_plpl(1, k), j => pl%k_plpl(2, k))
if (pl%lmask(i) .and. pl%lmask(j)) then
dx = pl%xh(1, j) - pl%xh(1, i)
dy = pl%xh(2, j) - pl%xh(2, i)
dz = pl%xh(3, j) - pl%xh(3, i)
rji2 = dx**2 + dy**2 + dz**2
rlim2 = (pl%radius(i) + pl%radius(j))**2
if (rji2 > rlim2) then
irij3 = 1.0_DP / (rji2 * sqrt(rji2))
faci = pl%Gmass(i) * irij3
facj = pl%Gmass(j) * irij3
pl%ah(1, i) = pl%ah(1, i) + facj * dx
pl%ah(2, i) = pl%ah(2, i) + facj * dy
pl%ah(3, i) = pl%ah(3, i) + facj * dz
pl%ah(1, j) = pl%ah(1, j) - faci * dx
pl%ah(2, j) = pl%ah(2, j) - faci * dy
pl%ah(3, j) = pl%ah(3, j) - faci * dz
end if
end if
end associate
associate(ah => self%ah, xh => self%xh, k_plpl => self%k_plpl, lmask => self%lmask, Gmass => self%Gmass)
nplpl = self%nplpl
ahi(:,:) = 0.0_DP
ahj(:,:) = 0.0_DP
!$omp parallel do default(shared)&
!$omp private(k, i, j, dx, dy, dz, rji2) &
!$omp reduction(+:ahi) &
!$omp reduction(-:ahj)
do k = 1_I8B, nplpl
i = k_plpl(1,k)
j = k_plpl(2,k)
dx = xh(1, j) - xh(1, i)
dy = xh(2, j) - xh(2, i)
dz = xh(3, j) - xh(3, i)
rji2 = dx**2 + dy**2 + dz**2
if (lmask(i) .and. lmask(j)) call kick_getacch_int_one_pl(rji2, dx, dy, dz, Gmass(i), Gmass(j), ahi(1,i), ahi(2,i), ahi(3,i), ahj(1,j), ahj(2,j), ahj(3,j))
end do
!$omp end parallel do
ah(:,:) = ah(:,:) + ahi(:,:) + ahj(:,:)
end associate

return
Expand All @@ -61,23 +63,71 @@ module pure subroutine kick_getacch_int_tp(self, GMpl, xhp, npl)
integer(I4B), intent(in) :: npl !! Number of active massive bodies
! Internals
integer(I4B) :: i, j
real(DP) :: rji2, irij3, fac, r2
real(DP), dimension(NDIM) :: dx
!real(DP), dimension(:,:), allocatable :: aht

if ((self%nbody == 0) .or. (npl == 0)) return

associate(tp => self, ntp => self%nbody)
do concurrent(i = 1:ntp, tp%lmask(i))
do j = 1, npl
dx(:) = tp%xh(:,i) - xhp(:, j)
r2 = dot_product(dx(:), dx(:))
fac = GMpl(j) / (r2 * sqrt(r2))
tp%ah(:, i) = tp%ah(:, i) - fac * dx(:)
end do
do i = 1, ntp
if (tp%lmask(i)) then
do j = 1, npl
block
real(DP) :: rji2
real(DP) :: dx, dy, dz
dx = tp%xh(1, i) - xhp(1, j)
dy = tp%xh(2, i) - xhp(1, j)
dz = tp%xh(3, i) - xhp(1, j)
rji2 = dx**2 + dy**2 + dz**2
call kick_getacch_int_one_tp(rji2, dx, dy, dz, GMpl(i), tp%ah(1,i), tp%ah(2,i), tp%ah(3,i))
end block
end do
end if
end do
!call move_alloc(aht, tp%ah)
end associate

return
end subroutine kick_getacch_int_tp

module pure elemental subroutine kick_getacch_int_one_pl(rji2, dx, dy, dz, Gmi, Gmj, axi, ayi, azi, axj, ayj, azj)
implicit none
real(DP), intent(in) :: rji2
real(DP), intent(in) :: dx, dy, dz
real(DP), intent(in) :: Gmi
real(DP), intent(in) :: Gmj
real(DP), intent(inout) :: axi, ayi, azi
real(DP), intent(inout) :: axj, ayj, azj
! Internals
real(DP) :: faci, facj, irij3

irij3 = 1.0_DP / (rji2 * sqrt(rji2))
faci = Gmi * irij3
facj = Gmj * irij3
axi = axi + facj * dx
ayi = ayi + facj * dy
azi = azi + facj * dz
axj = axj - faci * dx
ayj = ayj - faci * dy
azj = azj - faci * dz
return
end subroutine kick_getacch_int_one_pl


!module pure elemental subroutine kick_getacch_int_one_tp(rji2, dx, dy, dz, GMpl, ax, ay, az)
module pure subroutine kick_getacch_int_one_tp(rji2, dx, dy, dz, GMpl, ax, ay, az)
implicit none
real(DP), intent(in) :: rji2
real(DP), intent(in) :: dx, dy, dz
real(DP), intent(in) :: GMpl
real(DP), intent(inout) :: ax, ay, az
! Internals
real(DP) :: fac

fac = GMpl / (rji2 * sqrt(rji2))
ax = ax - fac * dx
ay = ay - fac * dy
az = az - fac * dz
return
end subroutine kick_getacch_int_one_tp

end submodule s_kick
32 changes: 31 additions & 1 deletion src/modules/swiftest_classes.f90
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,8 @@ module swiftest_classes
integer(I4B), dimension(:), allocatable :: isperi !! Perihelion passage flag
real(DP), dimension(:), allocatable :: peri !! Perihelion distance
real(DP), dimension(:), allocatable :: atp !! Semimajor axis following perihelion passage
integer(I4B), dimension(:,:), allocatable :: k_pltp !! Index array used to convert flattened the body-body comparison upper triangular matrix
integer(I8B) :: npltp !! Number of pl-tp comparisons in the flattened upper triangular matrix
!! Note to developers: If you add components to this class, be sure to update methods and subroutines that traverse the
!! component list, such as setup_tp and util_spill_tp
contains
Expand All @@ -261,6 +263,7 @@ module swiftest_classes
procedure :: vb2vh => util_coord_vb2vh_tp !! Convert test particles from barycentric to heliocentric coordinates (velocity only)
procedure :: vh2vb => util_coord_vh2vb_tp !! Convert test particles from heliocentric to barycentric coordinates (velocity only)
procedure :: xh2xb => util_coord_xh2xb_tp !! Convert test particles from heliocentric to barycentric coordinates (position only)
!procedure :: index => util_index_eucl_pltp !! Sets up the (i, j) -> k indexing used for the single-loop blocking Euclidean distance matrix
procedure :: fill => util_fill_tp !! "Fills" bodies from one object into another depending on the results of a mask (uses the UNPACK intrinsic)
procedure :: get_peri => util_peri_tp !! Determine system pericenter passages for test particles
procedure :: resize => util_resize_tp !! Checks the current size of a Swiftest body against the requested size and resizes it if it is too small.
Expand Down Expand Up @@ -458,6 +461,13 @@ module subroutine util_index_eucl_plpl(self, param)
class(swiftest_parameters), intent(in) :: param !! Current run configuration parameters
end subroutine

module subroutine util_index_eucl_pltp(self, pl, param)
implicit none
class(swiftest_tp), intent(inout) :: self !! Swiftest test particle object
class(swiftest_pl), intent(in) :: pl !! Swiftest massive body object
class(swiftest_parameters), intent(in) :: param !! Current run configuration parameters
end subroutine

module subroutine fragmentation_initialize(system, param, family, x, v, L_spin, Ip, mass, radius, &
nfrag, Ip_frag, m_frag, rad_frag, xb_frag, vb_frag, rot_frag, Qloss, lfailure)
implicit none
Expand Down Expand Up @@ -701,7 +711,27 @@ module subroutine io_write_frame_system(self, iu, param)
class(swiftest_parameters), intent(in) :: param !! Current run configuration parameters
end subroutine io_write_frame_system

module pure subroutine kick_getacch_int_pl(self)
module pure elemental subroutine kick_getacch_int_one_pl(rji2, dx, dy, dz, Gmi, Gmj, axi, ayi, azi, axj, ayj, azj)
implicit none
real(DP), intent(in) :: rji2
real(DP), intent(in) :: dx, dy, dz
real(DP), intent(in) :: Gmi
real(DP), intent(in) :: Gmj
real(DP), intent(inout) :: axi, ayi, azi
real(DP), intent(inout) :: axj, ayj, azj
end subroutine kick_getacch_int_one_pl

!module pure elemental subroutine kick_getacch_int_one_tp(rji2, dx, dy, dz, Gmpl, ax, ay, az)
module pure subroutine kick_getacch_int_one_tp(rji2, dx, dy, dz, Gmpl, ax, ay, az)
!$omp declare simd(kick_getacch_int_one_tp)
implicit none
real(DP), intent(in) :: rji2
real(DP), intent(in) :: dx, dy, dz
real(DP), intent(in) :: Gmpl
real(DP), intent(inout) :: ax, ay, az
end subroutine kick_getacch_int_one_tp

module subroutine kick_getacch_int_pl(self)
implicit none
class(swiftest_pl), intent(inout) :: self
end subroutine kick_getacch_int_pl
Expand Down
2 changes: 1 addition & 1 deletion src/modules/symba_classes.f90
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ module subroutine symba_io_read_particle(system, param)
class(symba_parameters), intent(inout) :: param !! Current run configuration parameters with SyMBA extensions
end subroutine symba_io_read_particle

module pure subroutine symba_kick_getacch_int_pl(self)
module subroutine symba_kick_getacch_int_pl(self)
implicit none
class(symba_pl), intent(inout) :: self
end subroutine symba_kick_getacch_int_pl
Expand Down
63 changes: 38 additions & 25 deletions src/symba/symba_encounter_check.f90
Original file line number Diff line number Diff line change
Expand Up @@ -16,24 +16,37 @@ module function symba_encounter_check_pl(self, system, dt, irec) result(lany_enc
! Result
logical :: lany_encounter !! Returns true if there is at least one close encounter
! Internals
integer(I8B) :: k
integer(I4B) :: nenc
real(DP), dimension(NDIM) :: xr, vr
logical, dimension(:), allocatable :: lencounter, loc_lvdotr
integer(I8B) :: k, nplplm
integer(I4B) :: i, j, nenc
real(DP) :: xr, yr, zr, vxr, vyr, vzr, rhill1, rhill2
logical, dimension(self%nplplm) :: lencounter, loc_lvdotr
real(DP), dimension(:,:), pointer :: xh, vh
real(DP), dimension(:), pointer :: rhill
integer(I4B), dimension(:,:), pointer :: k_plpl

if (self%nbody == 0) return

associate(pl => self, npl => self%nbody, nplplm => self%nplplm)
allocate(lencounter(nplplm), loc_lvdotr(nplplm))
associate(pl => self, xh => self%xh, vh => self%vh, rhill => self%rhill, npl => self%nbody, k_plpl => self%k_plpl)
nplplm = self%nplplm
lencounter(:) = .false.

do k = 1, nplplm
associate(i => pl%k_plpl(1, k), j => pl%k_plpl(2, k))
xr(:) = pl%xh(:, j) - pl%xh(:, i)
vr(:) = pl%vh(:, j) - pl%vh(:, i)
call symba_encounter_check_one(xr(1), xr(2), xr(3), vr(1), vr(2), vr(3), pl%rhill(i), pl%rhill(j), dt, irec, lencounter(k), loc_lvdotr(k))
end associate
loc_lvdotr(:) = .false.

!$omp parallel do default(shared)&
!$omp private(k, i, j, xr, yr, zr, vxr, vyr, vzr, rhill1, rhill2)
do k = 1_I8B, nplplm
i = k_plpl(1, k)
j = k_plpl(2, k)
xr = xh(1, j) - xh(1, i)
yr = xh(2, j) - xh(2, i)
zr = xh(3, j) - xh(3, i)
vxr = vh(1, j) - vh(1, i)
vyr = vh(2, j) - vh(2, i)
vzr = vh(3, j) - vh(3, i)
rhill1 = rhill(i)
rhill2 = rhill(j)
call symba_encounter_check_one(xr, yr, zr, vxr, vyr, vzr, rhill1, rhill2, dt, irec, lencounter(k), loc_lvdotr(k))
end do
!$omp end parallel do

nenc = count(lencounter(:))
lany_encounter = nenc > 0
Expand All @@ -46,18 +59,18 @@ module function symba_encounter_check_pl(self, system, dt, irec) result(lany_enc
plplenc_list%id1(1:nenc) = pl%id(plplenc_list%index1(1:nenc))
plplenc_list%id2(1:nenc) = pl%id(plplenc_list%index2(1:nenc))
do k = 1, nenc
associate(i => plplenc_list%index1(k), j => plplenc_list%index2(k))
plplenc_list%status(k) = ACTIVE
plplenc_list%level(k) = irec
pl%lencounter(i) = .true.
pl%lencounter(j) = .true.
pl%levelg(i) = irec
pl%levelm(i) = irec
pl%levelg(j) = irec
pl%levelm(j) = irec
pl%nplenc(i) = pl%nplenc(i) + 1
pl%nplenc(j) = pl%nplenc(j) + 1
end associate
i = plplenc_list%index1(k)
j = plplenc_list%index2(k)
plplenc_list%status(k) = ACTIVE
plplenc_list%level(k) = irec
pl%lencounter(i) = .true.
pl%lencounter(j) = .true.
pl%levelg(i) = irec
pl%levelm(i) = irec
pl%levelg(j) = irec
pl%levelm(j) = irec
pl%nplenc(i) = pl%nplenc(i) + 1
pl%nplenc(j) = pl%nplenc(j) + 1
end do
end associate
end if
Expand Down
Loading

0 comments on commit 3b942e1

Please sign in to comment.