Skip to content
This repository was archived by the owner on Aug 28, 2024. It is now read-only.

Commit

Permalink
Improved the performance of the code significantly using SIMD and OpenMP
Browse files Browse the repository at this point in the history
  • Loading branch information
daminton committed Sep 13, 2021
1 parent 03c9926 commit 5c5d498
Show file tree
Hide file tree
Showing 12 changed files with 168 additions and 486 deletions.
24 changes: 11 additions & 13 deletions Makefile.Defines
Original file line number Diff line number Diff line change
Expand Up @@ -46,37 +46,35 @@ COLLRESOLVE_HOME = $(ROOT_DIR)/collresolve/
# DO NOT include in FFLAGS the "-c" option to compile object only
# this is done explicitly as needed in the Makefile
ADVIXE_DIR = /apps/cent7/intel/advisor_2019
ADVIXE_FLAGS = -g -O2 -qopt-report=5 -vec -vecabi=cmdtarget -simd -shared-intel -debug inline-debug-info -DTBB_DEBUG -DTBB_USE_THREADING_TOOLS -fp-model no-except -mp1 -xhost -traceback
ADVIXE_FLAGS = -g -O2 -qopt-report=5 -vecabi=cmdtarget -simd -shared-intel -debug inline-debug-info -DTBB_DEBUG -DTBB_USE_THREADING_TOOLS -xhost -traceback

VTUNE_FLAGS = -g -O2 -vec -simd -shared-intel -qopenmp -debug inline-debug-info -parallel-source-info=2 -parallel -DTBB_DEBUG -DTBB_USE_THREADING_TOOLS -qopenmp -fp-model no-except -mp1 -xhost -traceback
VTUNE_FLAGS = -g -O2 -qopt-report=5 -simd -shared-intel -qopenmp -debug inline-debug-info -parallel-source-info=2 -parallel -DTBB_DEBUG -DTBB_USE_THREADING_TOOLS -qopenmp -fp-model no-except -mp1 -xhost -traceback
#Be sure to set the environment variable KMP_FORKJOIN_FRAMES=1 for OpenMP debuging in vtune

IDEBUG = -O0 -init=snan,arrays -nogen-interfaces -no-pie -no-ftz -fpe-all=0 -g -traceback -mp1 -fp-model strict -fpe0 -debug all -align all -pad -ip -prec-div -prec-sqrt -assume protect-parens -CB -no-wrap-margin
STRICTREAL = -fp-model strict -fp-model no-except -prec-div -prec-sqrt -assume protect-parens
SIMDVEC = -simd -xhost -align all -assume contiguous_assumed_shape -vecabi=cmdtarget -prec-div -prec-sqrt -assume protect-parens
STRICTREAL = -fp-model strict -prec-div -prec-sqrt -assume protect-parens
SIMDVEC = -simd -xhost -align all -assume contiguous_assumed_shape -vecabi=cmdtarget -fp-model no-except
PAR = -qopenmp #-parallel #Something goes wrong in SyMBA at the moment with auto-paralellization enabled
HEAPARR = -heap-arrays 1048576
HEAPARR = -heap-arrays 4194304
OPTREPORT = -qopt-report=5
IPRODUCTION = -init=snan,arrays -no-wrap-margin -O3 $(STRICTREAL) $(PAR) $(SIMDVEC) $(HEAPARR)
IPRODUCTION = -init=snan,arrays -no-wrap-margin -O3 $(PAR) $(SIMDVEC) -fp-model fast #$(STRICTREAL) #$(HEAPARR)

#gfortran flags
GDEBUG = -g -Og -fbacktrace -fbounds-check -ffree-line-length-none
GPAR = -fopenmp #-ftree-parallelize-loops=4
GMEM = -fsanitize-address-use-after-scope -fstack-check -fsanitize=bounds-strict -fsanitize=undefined -fsanitize=signed-integer-overflow -fsanitize=object-size -fstack-protector-all
GWARNINGS = -Wall -Warray-bounds -Wimplicit-interface -Wextra -Warray-temporaries
GPRODUCTION = -O3 -ffree-line-length-none $(GPAR)
GPRODUCTION = -O2 -ffree-line-length-none $(GPAR)

#FFLAGS = $(IDEBUG) $(HEAPARR) $(SIMDVEC) $(PAR)
FFLAGS = $(IPRODUCTION) $(OPTREPORT)
#FFLAGS = $(IPRODUCTION) $(OPTREPORT)
#FFLAGS = $(IPRODUCTION) $(OPTREPORT) $(ADVIXE_FLAGS)
FFLAGS = -O3 $(PAR) $(OPTREPORT) $(SIMDVEC) -shared-intel -debug inline-debug-info -DTBB_DEBUG -DTBB_USE_THREADING_TOOLS -traceback -g
FORTRAN = ifort
#AR = xiar

#FORTRAN = gfortran
#FFLAGS = $(GDEBUG) $(GMEM) $(GPAR)
#FFLAGS = $(GPRODUCTION) -g -fbacktrace #-fcheck=all #-Wall
AR = ar

# DO NOT include in CFLAGS the "-c" option to compile object only
#FFLAGS = $(GPRODUCTION) -g -fbacktrace #-fcheck=all #-Wall AR = ar # DO NOT include in CFLAGS the "-c" option to compile object only
# this is done explicitly as needed in the Makefile

CC = icc
Expand Down
5 changes: 1 addition & 4 deletions docs/src/rmvs_encounter_check.f90
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,7 @@ module function rmvs_encounter_check_tp(self, system, dt) result(lencounter)
if ((.not.tp%lmask(i)).or.(tp%plencP(i) /= 0)) cycle
xr(:) = tp%xh(:, i) - pl%xbeg(:, j)
vr(:) = tp%vh(:, i) - pl%vbeg(:, j)
r2 = dot_product(xr(:), xr(:))
v2 = dot_product(vr(:), vr(:))
vdotr = dot_product(vr(:), xr(:))
lflag = rmvs_chk_ind(r2, v2, vdotr, dt, r2crit(j))
lflag = rmvs_chk_ind(xr(1), xr(2), xr(3), vr(1), vr(2), vr(3), dt, r2crit(j))
if (lflag) tp%plencP(i) = j
end do
pl%nenc(j) = count(tp%plencP(:) == j)
Expand Down
26 changes: 13 additions & 13 deletions docs/src/symba_encounter_check.f90
Original file line number Diff line number Diff line change
Expand Up @@ -202,20 +202,20 @@ module pure elemental subroutine symba_encounter_check_one(xr, yr, zr, vxr, vyr,
!! Adapted from Hal Levison's Swift routine symba5_chk.f
implicit none
! Arguments
real(DP), intent(in) :: xr, yr, zr, vxr, vyr, vzr
real(DP), intent(in) :: rhill1, rhill2, dt
integer(I4B), intent(in) :: irec
logical, intent(out) :: lencounter, lvdotr
real(DP), intent(in) :: xr, yr, zr !! Relative distance vector components
real(DP), intent(in) :: vxr, vyr, vzr !! Relative velocity vector components
real(DP), intent(in) :: rhill1, rhill2 !! Hill spheres of the two bodies
real(DP), intent(in) :: dt !! Step size
integer(I4B), intent(in) :: irec !! Current SyMBA recursion level
real(DP), intent(in) :: r2crit !! Square of the critical encounter distance
logical, intent(out) :: lencounter !! Flag indicating that an encounter has occurred
logical, intent(out) :: lvdotr !! Logical flag indicating the direction of the v .dot. r vector
! Internals
real(DP) :: r2, v2, rcrit, r2crit, vdotr

rcrit = (rhill1 + rhill2)*RHSCALE*(RSHELL**(irec))
r2crit = rcrit**2
r2 = xr**2 + yr**2 + zr**2
v2 = vxr**2 + vyr**2 + vzr**2
vdotr = xr * vxr + yr * vyr + zr * vzr
lencounter = rmvs_chk_ind(r2, v2, vdotr, dt, r2crit)
lvdotr = (vdotr < 0.0_DP)
real(DP) :: r2crit

r2crit = (rhill1 + rhill2)*RHSCALE*(RSHELL**(irec))
r2crit = r2crit**2
call rmvs_chk_ind(xr, yr, zr, vxr, vyr, vzr, dt, r2crit, lencounter, lvdotr)

return
end subroutine symba_encounter_check_one
Expand Down
Loading

0 comments on commit 5c5d498

Please sign in to comment.