From 406bb80fd30799bce679b77386758d7ee861da2c Mon Sep 17 00:00:00 2001 From: David A Minton Date: Tue, 21 Sep 2021 20:05:18 -0400 Subject: [PATCH 1/2] Fixed compiler flags to allow vectorized procedures --- Makefile | 17 +++++++++++++- Makefile.Defines | 2 +- examples/symba_mars_disk/param.in | 2 +- src/orbel/orbel.f90 | 38 +++++++++---------------------- 4 files changed, 29 insertions(+), 30 deletions(-) diff --git a/Makefile b/Makefile index c3cce41c2..5f3459f1a 100644 --- a/Makefile +++ b/Makefile @@ -170,12 +170,21 @@ fast: ln -s $(SWIFTEST_HOME)/Makefile.Defines .; \ ln -s $(SWIFTEST_HOME)/Makefile .; \ make fastdir - cd $(SWIFTEST_HOME)/src/util; \ rm -f Makefile.Defines Makefile; \ ln -s $(SWIFTEST_HOME)/Makefile.Defines .; \ ln -s $(SWIFTEST_HOME)/Makefile .; \ make fastdir + cd $(SWIFTEST_HOME)/src/orbel; \ + rm -f Makefile.Defines Makefile; \ + ln -s $(SWIFTEST_HOME)/Makefile.Defines .; \ + ln -s $(SWIFTEST_HOME)/Makefile .; \ + make fastdir + cd $(SWIFTEST_HOME)/src/drift; \ + rm -f Makefile.Defines Makefile; \ + ln -s $(SWIFTEST_HOME)/Makefile.Defines .; \ + ln -s $(SWIFTEST_HOME)/Makefile .; \ + make fastdir cd $(SWIFTEST_HOME)/src/orbel; \ rm -f Makefile.Defines Makefile; \ @@ -207,6 +216,12 @@ fast: $(INSTALL_DATA) *.smod $(SWIFTEST_HOME)/include; \ rm -f *.o *.smod + cd $(SWIFTEST_HOME)/src/helio; \ + $(FORTRAN) $(FFASTFLAGS) -I$(SWIFTEST_HOME)/include -I$(NETCDF_FORTRAN_HOME)/include $(IMKL) -c helio_drift.f90; \ + $(AR) rv $(SWIFTEST_HOME)/lib/libswiftest.a *.o *.smod; \ + $(INSTALL_DATA) *.smod $(SWIFTEST_HOME)/include; \ + rm -f *.o *.smod + libdir: $(FORTRAN) $(FFLAGS) -I$(SWIFTEST_HOME)/include -I$(NETCDF_FORTRAN_HOME)/include $(IMKL) -c *.f90; \ $(AR) rv $(SWIFTEST_HOME)/lib/libswiftest.a *.o *.smod; \ diff --git a/Makefile.Defines b/Makefile.Defines index 16ce3afc3..ea2772b04 100644 --- a/Makefile.Defines +++ b/Makefile.Defines @@ -57,7 +57,7 @@ SIMDVEC = -simd -xhost -align all -assume contiguous_assumed_shape -vecabi=cmdta PAR = -qopenmp -parallel HEAPARR = -heap-arrays 4194304 OPTREPORT = -qopt-report=5 -IPRODUCTION = -no-wrap-margin -O3 -ipo -qopt-prefetch=0 -sox $(PAR) $(SIMDVEC) #$(HEAPARR) +IPRODUCTION = -no-wrap-margin -O3 -qopt-prefetch=0 -sox $(PAR) $(SIMDVEC) #$(HEAPARR) #gfortran flags GDEBUG = -g -Og -fbacktrace -fbounds-check -ffree-line-length-none diff --git a/examples/symba_mars_disk/param.in b/examples/symba_mars_disk/param.in index 023f31647..fd0cc9134 100644 --- a/examples/symba_mars_disk/param.in +++ b/examples/symba_mars_disk/param.in @@ -1,6 +1,6 @@ !Parameter file for the SyMBA-RINGMOONS test T0 0.0 -TSTOP 1200.0 +TSTOP 12000.0 DT 600.0 CB_IN cb.in PL_IN mars.in diff --git a/src/orbel/orbel.f90 b/src/orbel/orbel.f90 index a815b92c7..5de45e296 100644 --- a/src/orbel/orbel.f90 +++ b/src/orbel/orbel.f90 @@ -254,40 +254,24 @@ real(DP) pure function orbel_flon(e,icapn) biga = (-0.5_DP * b + sq)**(1.0_DP / 3.0_DP) bigb = -(+0.5_DP * b + sq)**(1.0_DP / 3.0_DP) x = biga + bigb - ! write(6,*) 'cubic = ',x**3 +a*x +b orbel_flon = x ! If capn is VSMALL (or zero) no need to go further than cubic even for ! e =1. - if( capn < VSMALL) go to 100 - - do i = 1,IMAX - x2 = x * x - f = a0 + x * (a1 + x2 * (a3 + x2 * (a5 + x2 * (a7 + x2 * (a9 + x2 * (a11 + x2)))))) - fp = b1 + x2 * (b3 + x2 * (b5 + x2 * (b7 + x2 * (b9 + x2 * (b11 + 13 * x2))))) - dx = -f / fp - ! write(6,*) 'i,dx,x,f : ' - ! write(6,432) i,dx,x,f - 432 format(1x,i3,3(2x,1p1e22.15)) - orbel_flon = x + dx - ! if we have converged here there's no point in going on - if(abs(dx) <= VSMALL) go to 100 + if( capn >= VSMALL) then + do i = 1,IMAX + x2 = x**2 + f = a0 + x * (a1 + x2 * (a3 + x2 * (a5 + x2 * (a7 + x2 * (a9 + x2 * (a11 + x2)))))) + fp = b1 + x2 * (b3 + x2 * (b5 + x2 * (b7 + x2 * (b9 + x2 * (b11 + 13 * x2))))) + dx = -f / fp + orbel_flon = x + dx + ! if we have converged here there's no point in going on + if(abs(dx) <= VSMALL) exit x = orbel_flon - end do - - ! abnormal return here - we've gone thru the loop - ! imax times without convergence - if(iflag == 1) then - orbel_flon = -orbel_flon - capn = -capn + end do end if - !write(*,*) 'flon : returning without complete convergence' - diff = e * sinh(orbel_flon) - orbel_flon - capn - !write(*,*) 'n, f, ecc*sinh(f) - f - n : ' - !write(*,*) capn,orbel_flon,diff - return ! normal return here, but check if capn was originally negative - 100 if(iflag == 1) then + if(iflag == 1) then orbel_flon = -orbel_flon capn = -capn end if From 3b6854b82ba7b64277198108adb5a1cfab5891a0 Mon Sep 17 00:00:00 2001 From: David A Minton Date: Tue, 21 Sep 2021 22:51:15 -0400 Subject: [PATCH 2/2] Vectorized the operators --- Makefile | 12 +++++----- Makefile.Defines | 4 ++-- src/modules/swiftest_operators.f90 | 10 +++++++++ src/operators/operator_cross.f90 | 35 ++++++++++++------------------ src/operators/operator_mag.f90 | 2 ++ 5 files changed, 34 insertions(+), 29 deletions(-) diff --git a/Makefile b/Makefile index 5f3459f1a..ef02313c2 100644 --- a/Makefile +++ b/Makefile @@ -65,7 +65,7 @@ LMKL = -L$(MKLROOT)/lib/intel64 -qopt-matmul MODULES = $(SWIFTEST_MODULES) $(USER_MODULES) -.PHONY : all mod lib libdir fast drivers bin clean force +.PHONY : all mod lib fast drivers bin clean force % : %.f90 force $(FORTRAN) $(FFLAGS) -I$(SWIFTEST_HOME)/include -I$(NETCDF_FORTRAN_HOME)/include $(IMKL) $< -o $@ \ @@ -118,11 +118,6 @@ lib: ln -s $(SWIFTEST_HOME)/Makefile.Defines .; \ ln -s $(SWIFTEST_HOME)/Makefile .; \ make libdir - cd $(SWIFTEST_HOME)/src/operators; \ - rm -f Makefile.Defines Makefile; \ - ln -s $(SWIFTEST_HOME)/Makefile.Defines .; \ - ln -s $(SWIFTEST_HOME)/Makefile .; \ - make libdir cd $(SWIFTEST_HOME)/src/setup; \ rm -f Makefile.Defines Makefile; \ ln -s $(SWIFTEST_HOME)/Makefile.Defines .; \ @@ -165,6 +160,11 @@ lib: make libdir fast: + cd $(SWIFTEST_HOME)/src/operators; \ + rm -f Makefile.Defines Makefile; \ + ln -s $(SWIFTEST_HOME)/Makefile.Defines .; \ + ln -s $(SWIFTEST_HOME)/Makefile .; \ + make fastdir cd $(SWIFTEST_HOME)/src/fraggle; \ rm -f Makefile.Defines Makefile; \ ln -s $(SWIFTEST_HOME)/Makefile.Defines .; \ diff --git a/Makefile.Defines b/Makefile.Defines index ea2772b04..838038524 100644 --- a/Makefile.Defines +++ b/Makefile.Defines @@ -69,8 +69,8 @@ GPRODUCTION = -O2 -ffree-line-length-none $(GPAR) #FFLAGS = $(IDEBUG) #$(SIMDVEC) $(PAR) #FFASTFLAGS = $(IDEBUG) #$(SIMDVEC) $(PAR) -FFLAGS = $(IPRODUCTION) $(STRICTREAL) #$(ADVIXE_FLAGS) -FFASTFLAGS = $(IPRODUCTION) -fp-model fast #$(ADVIXE_FLAGS) +FFLAGS = $(IPRODUCTION) $(STRICTREAL) $(OPTREPORT) #$(ADVIXE_FLAGS) +FFASTFLAGS = $(IPRODUCTION) -fp-model fast $(OPTREPORT) #$(ADVIXE_FLAGS) FORTRAN = ifort AR = xiar diff --git a/src/modules/swiftest_operators.f90 b/src/modules/swiftest_operators.f90 index 2c982f09c..13cb57839 100644 --- a/src/modules/swiftest_operators.f90 +++ b/src/modules/swiftest_operators.f90 @@ -15,42 +15,49 @@ module swiftest_operators interface operator(.cross.) module pure function operator_cross_sp(A, B) result(C) + !$omp declare simd(operator_cross_sp) implicit none real(SP), dimension(:), intent(in) :: A, B real(SP), dimension(3) :: C end function operator_cross_sp module pure function operator_cross_dp(A, B) result(C) + !$omp declare simd(operator_cross_dp) implicit none real(DP), dimension(:), intent(in) :: A, B real(DP), dimension(3) :: C end function operator_cross_dp module pure function operator_cross_qp(A, B) result(C) + !$omp declare simd(operator_cross_qp) implicit none real(QP), dimension(:), intent(in) :: A, B real(QP), dimension(3) :: C end function operator_cross_qp module pure function operator_cross_i1b(A, B) result(C) + !$omp declare simd(operator_cross_i1b) implicit none integer(I1B), dimension(:), intent(in) :: A, B integer(I1B), dimension(3) :: C end function operator_cross_i1b module pure function operator_cross_i2b(A, B) result(C) + !$omp declare simd(operator_cross_i2b) implicit none integer(I2B), dimension(:), intent(in) :: A, B integer(I2B), dimension(3) :: C end function operator_cross_i2b module pure function operator_cross_i4b(A, B) result(C) + !$omp declare simd(operator_cross_i4b) implicit none integer(I4B), dimension(:), intent(in) :: A, B integer(I4B), dimension(3) :: C end function operator_cross_i4b module pure function operator_cross_i8b(A, B) result(C) + !$omp declare simd(operator_cross_i8b) implicit none integer(I8B), dimension(:), intent(in) :: A, B integer(I8B), dimension(3) :: C @@ -105,18 +112,21 @@ end function operator_cross_el_i8b interface operator(.mag.) module pure function operator_mag_sp(A) result(B) + !$omp declare simd(operator_mag_sp) implicit none real(SP), dimension(:), intent(in) :: A real(SP) :: B end function operator_mag_sp module pure function operator_mag_dp(A) result(B) + !$omp declare simd(operator_mag_dp) implicit none real(DP), dimension(:), intent(in) :: A real(DP) :: B end function operator_mag_dp module pure function operator_mag_qp(A) result(B) + !$omp declare simd(operator_mag_qp) implicit none real(QP), dimension(:), intent(in) :: A real(QP) :: B diff --git a/src/operators/operator_cross.f90 b/src/operators/operator_cross.f90 index 736dc2696..99de730fe 100644 --- a/src/operators/operator_cross.f90 +++ b/src/operators/operator_cross.f90 @@ -8,6 +8,7 @@ contains module pure function operator_cross_sp(A, B) result(C) + !$omp declare simd(operator_cross_sp) implicit none real(SP), dimension(:), intent(in) :: A, B real(SP), dimension(3) :: C @@ -18,6 +19,7 @@ module pure function operator_cross_sp(A, B) result(C) end function operator_cross_sp module pure function operator_cross_dp(A, B) result(C) + !$omp declare simd(operator_cross_dp) implicit none real(DP), dimension(:), intent(in) :: A, B real(DP), dimension(3) :: C @@ -28,6 +30,7 @@ module pure function operator_cross_dp(A, B) result(C) end function operator_cross_dp module pure function operator_cross_qp(A, B) result(C) + !$omp declare simd(operator_cross_qp) implicit none real(QP), dimension(:), intent(in) :: A, B real(QP), dimension(3) :: C @@ -38,6 +41,7 @@ module pure function operator_cross_qp(A, B) result(C) end function operator_cross_qp module pure function operator_cross_i1b(A, B) result(C) + !$omp declare simd(operator_cross_i1b) implicit none integer(I1B), dimension(:), intent(in) :: A, B integer(I1B), dimension(3) :: C @@ -48,6 +52,7 @@ module pure function operator_cross_i1b(A, B) result(C) end function operator_cross_i1b module pure function operator_cross_i2b(A, B) result(C) + !$omp declare simd(operator_cross_i2b) implicit none integer(I2B), dimension(:), intent(in) :: A, B integer(I2B), dimension(3) :: C @@ -58,6 +63,7 @@ module pure function operator_cross_i2b(A, B) result(C) end function operator_cross_i2b module pure function operator_cross_i4b(A, B) result(C) + !$omp declare simd(operator_cross_i4b) implicit none integer(I4B), dimension(:), intent(in) :: A, B integer(I4B), dimension(3) :: C @@ -68,6 +74,7 @@ module pure function operator_cross_i4b(A, B) result(C) end function operator_cross_i4b module pure function operator_cross_i8b(A, B) result(C) + !$omp declare simd(operator_cross_i8b) implicit none integer(I8B), dimension(:), intent(in) :: A, B integer(I8B), dimension(3) :: C @@ -86,9 +93,7 @@ module pure function operator_cross_el_sp(A, B) result(C) if (allocated(C)) deallocate(C) allocate(C, mold = A) do concurrent (i = 1:n) - C(1, i) = A(2, i) * B(3, i) - A(3, i) * B(2, i) - C(2, i) = A(3, i) * B(1, i) - A(1, i) * B(3, i) - C(3, i) = A(1, i) * B(2, i) - A(2, i) * B(1, i) + C(:,i) = operator_cross_sp(A(:,i), B(:,i)) end do return end function operator_cross_el_sp @@ -102,9 +107,7 @@ module pure function operator_cross_el_dp(A, B) result(C) if (allocated(C)) deallocate(C) allocate(C, mold = A) do concurrent (i = 1:n) - C(1, i) = A(2, i) * B(3, i) - A(3, i) * B(2, i) - C(2, i) = A(3, i) * B(1, i) - A(1, i) * B(3, i) - C(3, i) = A(1, i) * B(2, i) - A(2, i) * B(1, i) + C(:,i) = operator_cross_dp(A(:,i), B(:,i)) end do return end function operator_cross_el_dp @@ -118,9 +121,7 @@ module pure function operator_cross_el_qp(A, B) result(C) if (allocated(C)) deallocate(C) allocate(C, mold = A) do concurrent (i = 1:n) - C(1, i) = A(2, i) * B(3, i) - A(3, i) * B(2, i) - C(2, i) = A(3, i) * B(1, i) - A(1, i) * B(3, i) - C(3, i) = A(1, i) * B(2, i) - A(2, i) * B(1, i) + C(:,i) = operator_cross_qp(A(:,i), B(:,i)) end do return end function operator_cross_el_qp @@ -134,9 +135,7 @@ module pure function operator_cross_el_i1b(A, B) result(C) if (allocated(C)) deallocate(C) allocate(C, mold = A) do concurrent (i = 1:n) - C(1, i) = A(2, i) * B(3, i) - A(3, i) * B(2, i) - C(2, i) = A(3, i) * B(1, i) - A(1, i) * B(3, i) - C(3, i) = A(1, i) * B(2, i) - A(2, i) * B(1, i) + C(:,i) = operator_cross_i1b(A(:,i), B(:,i)) end do return end function operator_cross_el_i1b @@ -150,9 +149,7 @@ module pure function operator_cross_el_i2b(A, B) result(C) if (allocated(C)) deallocate(C) allocate(C, mold = A) do concurrent (i = 1:n) - C(1, i) = A(2, i) * B(3, i) - A(3, i) * B(2, i) - C(2, i) = A(3, i) * B(1, i) - A(1, i) * B(3, i) - C(3, i) = A(1, i) * B(2, i) - A(2, i) * B(1, i) + C(:,i) = operator_cross_i2b(A(:,i), B(:,i)) end do return end function operator_cross_el_i2b @@ -166,9 +163,7 @@ module pure function operator_cross_el_i4b(A, B) result(C) if (allocated(C)) deallocate(C) allocate(C, mold = A) do concurrent (i = 1:n) - C(1, i) = A(2, i) * B(3, i) - A(3, i) * B(2, i) - C(2, i) = A(3, i) * B(1, i) - A(1, i) * B(3, i) - C(3, i) = A(1, i) * B(2, i) - A(2, i) * B(1, i) + C(:,i) = operator_cross_i4b(A(:,i), B(:,i)) end do return end function operator_cross_el_i4b @@ -182,9 +177,7 @@ module pure function operator_cross_el_i8b(A, B) result(C) if (allocated(C)) deallocate(C) allocate(C, mold = A) do concurrent (i = 1:n) - C(1, i) = A(2, i) * B(3, i) - A(3, i) * B(2, i) - C(2, i) = A(3, i) * B(1, i) - A(1, i) * B(3, i) - C(3, i) = A(1, i) * B(2, i) - A(2, i) * B(1, i) + C(:,i) = operator_cross_i8b(A(:,i), B(:,i)) end do return end function operator_cross_el_i8b diff --git a/src/operators/operator_mag.f90 b/src/operators/operator_mag.f90 index 5a054d5ce..f74555775 100644 --- a/src/operators/operator_mag.f90 +++ b/src/operators/operator_mag.f90 @@ -7,6 +7,7 @@ contains module pure function operator_mag_sp(A) result(B) + !$omp declare simd(operator_mag_sp) implicit none real(SP), dimension(:), intent(in) :: A real(SP) :: B @@ -15,6 +16,7 @@ module pure function operator_mag_sp(A) result(B) end function operator_mag_sp module pure function operator_mag_dp(A) result(B) + !$omp declare simd(operator_mag_dp) implicit none real(DP), dimension(:), intent(in) :: A real(DP) :: B