Skip to content
This repository was archived by the owner on Aug 28, 2024. It is now read-only.

Commit

Permalink
Made a number of changes to improve performance, such as using the In…
Browse files Browse the repository at this point in the history
…tel MKL libraries and compiling some subroutines with fast math options
  • Loading branch information
daminton committed Sep 14, 2021
1 parent 4bfcd59 commit 3d2a916
Show file tree
Hide file tree
Showing 7 changed files with 71 additions and 35 deletions.
56 changes: 41 additions & 15 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -59,25 +59,30 @@ SWIFTEST_MODULES = swiftest_globals.f90 \

include Makefile.Defines

MKL_ROOT = /apps/spack/bell/apps/intel-parallel-studio/cluster.2019.5-intel-19.0.5-4brgqlf/mkl/lib
IMKL = -I$(MKLROOT)/include
LMKL = -L$(MKLROOT)/lib/intel64 -qopt-matmul

MODULES = $(SWIFTEST_MODULES) $(USER_MODULES)

.PHONY : all mod lib libdir drivers bin clean force
.PHONY : all mod lib libdir fast drivers bin clean force

% : %.f90 force
$(FORTRAN) $(FFLAGS) -I$(SWIFTEST_HOME)/include -I$(NETCDF_FORTRAN_HOME)/include $< -o $@ \
-L$(SWIFTEST_HOME)/lib -lswiftest -L$(NETCDF_FORTRAN_HOME)/lib -lnetcdf -lnetcdff
$(FORTRAN) $(FFLAGS) -I$(SWIFTEST_HOME)/include -I$(NETCDF_FORTRAN_HOME)/include $(IMKL) $< -o $@ \
-L$(SWIFTEST_HOME)/lib -lswiftest -L$(NETCDF_FORTRAN_HOME)/lib -lnetcdf -lnetcdff $(LMKL)
$(INSTALL_PROGRAM) $@ $(SWIFTEST_HOME)/bin
rm -f $@

all:
cd $(SWIFTEST_HOME); \
make mod; \
make lib; \
make fast; \
make drivers; \

mod:
cd $(SWIFTEST_HOME)/src/modules/; \
$(FORTRAN) $(FFLAGS) -I$(SWIFTEST_HOME)/include -I$(NETCDF_FORTRAN_HOME)/include -c $(MODULES); \
$(FORTRAN) $(FFLAGS) -I$(SWIFTEST_HOME)/include -I$(NETCDF_FORTRAN_HOME)/include $(IMKL) -c $(MODULES); \
$(AR) rv $(SWIFTEST_HOME)/lib/libswiftest.a *.o; \
$(INSTALL_DATA) *.mod *.smod $(SWIFTEST_HOME)/include; \
rm -f *.o *.mod *.smod
Expand All @@ -93,11 +98,6 @@ lib:
ln -s $(SWIFTEST_HOME)/Makefile.Defines .; \
ln -s $(SWIFTEST_HOME)/Makefile .; \
make libdir
cd $(SWIFTEST_HOME)/src/fraggle; \
rm -f Makefile.Defines Makefile; \
ln -s $(SWIFTEST_HOME)/Makefile.Defines .; \
ln -s $(SWIFTEST_HOME)/Makefile .; \
make libdir
cd $(SWIFTEST_HOME)/src/gr; \
rm -f Makefile.Defines Makefile; \
ln -s $(SWIFTEST_HOME)/Makefile.Defines .; \
Expand Down Expand Up @@ -143,11 +143,6 @@ lib:
ln -s $(SWIFTEST_HOME)/Makefile.Defines .; \
ln -s $(SWIFTEST_HOME)/Makefile .; \
make libdir
cd $(SWIFTEST_HOME)/src/util; \
rm -f Makefile.Defines Makefile; \
ln -s $(SWIFTEST_HOME)/Makefile.Defines .; \
ln -s $(SWIFTEST_HOME)/Makefile .; \
make libdir
cd $(SWIFTEST_HOME)/src/whm; \
rm -f Makefile.Defines Makefile; \
ln -s $(SWIFTEST_HOME)/Makefile.Defines .; \
Expand All @@ -174,8 +169,39 @@ lib:
ln -s $(SWIFTEST_HOME)/Makefile .; \
make libdir

fast:
cd $(SWIFTEST_HOME)/src/fraggle; \
rm -f Makefile.Defines Makefile; \
ln -s $(SWIFTEST_HOME)/Makefile.Defines .; \
ln -s $(SWIFTEST_HOME)/Makefile .; \
make fastdir

cd $(SWIFTEST_HOME)/src/util; \
rm -f Makefile.Defines Makefile; \
ln -s $(SWIFTEST_HOME)/Makefile.Defines .; \
ln -s $(SWIFTEST_HOME)/Makefile .; \
make fastdir

cd $(SWIFTEST_HOME)/src/rmvs; \
$(FORTRAN) $(FFASTFLAGS) -I$(SWIFTEST_HOME)/include -I$(NETCDF_FORTRAN_HOME)/include $(IMKL) -c rmvs_encounter_check.f90; \
$(AR) rv $(SWIFTEST_HOME)/lib/libswiftest.a *.o *.smod; \
$(INSTALL_DATA) *.smod $(SWIFTEST_HOME)/include; \
rm -f *.o *.smod

cd $(SWIFTEST_HOME)/src/symba; \
$(FORTRAN) $(FFASTFLAGS) -I$(SWIFTEST_HOME)/include -I$(NETCDF_FORTRAN_HOME)/include $(IMKL) -c symba_encounter_check.f90; \
$(AR) rv $(SWIFTEST_HOME)/lib/libswiftest.a *.o *.smod; \
$(INSTALL_DATA) *.smod $(SWIFTEST_HOME)/include; \
rm -f *.o *.smod

libdir:
$(FORTRAN) $(FFLAGS) -I$(SWIFTEST_HOME)/include -I$(NETCDF_FORTRAN_HOME)/include -c *.f90; \
$(FORTRAN) $(FFLAGS) -I$(SWIFTEST_HOME)/include -I$(NETCDF_FORTRAN_HOME)/include $(IMKL) -c *.f90; \
$(AR) rv $(SWIFTEST_HOME)/lib/libswiftest.a *.o *.smod; \
$(INSTALL_DATA) *.smod $(SWIFTEST_HOME)/include; \
rm -f *.o *.smod

fastdir:
$(FORTRAN) $(FFASTFLAGS) -I$(SWIFTEST_HOME)/include -I$(NETCDF_FORTRAN_HOME)/include $(IMKL) -c *.f90; \
$(AR) rv $(SWIFTEST_HOME)/lib/libswiftest.a *.o *.smod; \
$(INSTALL_DATA) *.smod $(SWIFTEST_HOME)/include; \
rm -f *.o *.smod
Expand Down
8 changes: 5 additions & 3 deletions Makefile.Defines
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ VTUNE_FLAGS = -g -O2 -qopt-report=5 -simd -shared-intel -qopenmp -debug inline-d
IDEBUG = -O0 -init=snan,arrays -nogen-interfaces -no-pie -no-ftz -fpe-all=0 -g -traceback -mp1 -fp-model strict -fpe0 -debug all -align all -pad -ip -prec-div -prec-sqrt -assume protect-parens -CB -no-wrap-margin
STRICTREAL = -fp-model strict -prec-div -prec-sqrt -assume protect-parens
SIMDVEC = -simd -xhost -align all -assume contiguous_assumed_shape -vecabi=cmdtarget -fp-model no-except
PAR = -qopenmp #-parallel #Something goes wrong in SyMBA at the moment with auto-paralellization enabled
PAR = -qopenmp -parallel #Something goes wrong in SyMBA at the moment with auto-paralellization enabled
HEAPARR = -heap-arrays 4194304
OPTREPORT = -qopt-report=5
IPRODUCTION = -init=snan,arrays -no-wrap-margin -O3 $(PAR) $(SIMDVEC) $(STRICTREAL) #$(HEAPARR)
IPRODUCTION = -no-wrap-margin -O3 $(PAR) $(SIMDVEC) #$(HEAPARR)

#gfortran flags
GDEBUG = -g -Og -fbacktrace -fbounds-check -ffree-line-length-none
Expand All @@ -66,8 +66,10 @@ GMEM = -fsanitize-address-use-after-scope -fstack-check -fsanitize=bounds-stri
GWARNINGS = -Wall -Warray-bounds -Wimplicit-interface -Wextra -Warray-temporaries
GPRODUCTION = -O2 -ffree-line-length-none $(GPAR)


#FFLAGS = $(IDEBUG) $(SIMDVEC) $(PAR)
FFLAGS = $(IPRODUCTION) $(OPTREPORT) #$(ADVIXE_FLAGS)
FFLAGS = $(IPRODUCTION) $(OPTREPORT) $(STRICTREAL) $(ADVIXE_FLAGS)
FFASTFLAGS = -no-wrap-margin -O3 -fp-model fast $(SIMDVEC) $(OPTREPORT) $(PAR)
FORTRAN = ifort
#AR = xiar

Expand Down
10 changes: 5 additions & 5 deletions examples/symba_mars_disk/param.in
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
!Parameter file for the SyMBA-RINGMOONS test
T0 0.0
TSTOP 1.0e12
TSTOP 60000.0
DT 600.0
CB_IN cb.in
PL_IN mars.in
TP_IN tp.in
IN_TYPE ASCII
ISTEP_OUT 1
ISTEP_DUMP 1
ISTEP_OUT 100
ISTEP_DUMP 100
BIN_OUT bin.nc
PARTICLE_OUT particle.dat
OUT_TYPE NETCDF_DOUBLE
OUT_FORM XVEL
OUT_TYPE REAL8
OUT_FORM XV
OUT_STAT REPLACE
CHK_CLOSE yes
CHK_RMIN 3389500.0
Expand Down

Large diffs are not rendered by default.

10 changes: 7 additions & 3 deletions src/fraggle/fraggle_generate.f90
Original file line number Diff line number Diff line change
Expand Up @@ -545,14 +545,18 @@ function radial_objective_function(v_r_mag_input) result(fval)
integer(I4B) :: i
real(DP), dimension(:,:), allocatable :: v_shift
real(DP), dimension(frag%nbody) :: kearr
real(DP) :: keo, ke_radial
real(DP) :: keo, ke_radial, rotmag2, vmag2

associate(nfrag => frag%nbody)
allocate(v_shift, mold=frag%vb)
v_shift(:,:) = fraggle_util_vmag_to_vb(v_r_mag_input, frag%v_r_unit, frag%v_t_mag, frag%v_t_unit, frag%mass, frag%vbcom)
do concurrent(i = 1:nfrag)
kearr(i) = frag%mass(i) * (frag%Ip(3, i) * frag%radius(i)**2 * dot_product(frag%rot(:, i), frag%rot(:, i)) + dot_product(v_shift(:, i), v_shift(:, i)))
!$omp do simd
do i = 1,nfrag
rotmag2 = frag%rot(1,i)**2 + frag%rot(2,i)**2 + frag%rot(3,i)**2
vmag2 = v_shift(1,i)**2 + v_shift(2,i)**2 + v_shift(3,i)**2
kearr(i) = frag%mass(i) * (frag%Ip(3, i) * frag%radius(i)**2 * rotmag2 + vmag2)
end do
!$omp end do simd
keo = 2 * frag%ke_budget - sum(kearr(:))
ke_radial = frag%ke_budget - frag%ke_orbit - frag%ke_spin
! The following ensures that fval = 0 is a local minimum, which is what the BFGS method is searching for
Expand Down
5 changes: 2 additions & 3 deletions src/kick/kick.f90
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ module subroutine kick_getacch_int_all_pl(npl, nplpl, k_plpl, x, Gmass, radius,
ahi(:,:) = 0.0_DP
ahj(:,:) = 0.0_DP

!$omp parallel do simd default(private) schedule(static)&
!$omp parallel do default(private) schedule(static)&
!$omp shared(nplpl, k_plpl, x, Gmass, radius) &
!$omp lastprivate(rji2, rlim2, xr, yr, zr) &
!$omp reduction(+:ahi) &
Expand All @@ -81,8 +81,7 @@ module subroutine kick_getacch_int_all_pl(npl, nplpl, k_plpl, x, Gmass, radius,
rlim2 = (radius(i) + radius(j))**2
if (rji2 > rlim2) call kick_getacch_int_one_pl(rji2, xr, yr, zr, Gmass(i), Gmass(j), ahi(1,i), ahi(2,i), ahi(3,i), ahj(1,j), ahj(2,j), ahj(3,j))
end do
!$omp end parallel do simd

!$omp end parallel do

do concurrent(i = 1:npl)
acc(:,i) = acc(:,i) + ahi(:,i) + ahj(:,i)
Expand Down
9 changes: 7 additions & 2 deletions src/symba/symba_encounter_check.f90
Original file line number Diff line number Diff line change
Expand Up @@ -284,9 +284,14 @@ module pure subroutine symba_encounter_check_one(xr, yr, zr, vxr, vyr, vzr, rhil
integer(I4B), intent(in) :: irec
logical, intent(out) :: lencounter, lvdotr
! Internals
real(DP) :: r2crit
real(DP) :: r2crit, rshell_irec
integer(I4B) :: i

r2crit = (rhill1 + rhill2)*RHSCALE*(RSHELL**(irec))
rshell_irec = 1._DP
do i = 1, irec
rshell_irec = rshell_irec * RSHELL
end do
r2crit = (rhill1 + rhill2) * RHSCALE * rshell_irec
r2crit = r2crit**2
call rmvs_chk_ind(xr, yr, zr, vxr, vyr, vzr, dt, r2crit, lencounter, lvdotr)

Expand Down

0 comments on commit 3d2a916

Please sign in to comment.