Merge branch 'debug' into IntelAdvisor

MintonGroup · Oct 14, 2021 · f80aea4 · f80aea4
2 parents 1d2e63c + 385b215
commit f80aea4
Show file tree

Hide file tree

Showing 33 changed files with 1,060 additions and 236 deletions.
diff --git a/Makefile b/Makefile
@@ -60,12 +60,6 @@ SWIFTEST_MODULES =   swiftest_globals.f90 \
 
 include Makefile.Defines
 
-MKL_ROOT = /apps/spack/bell/apps/intel-parallel-studio/cluster.2019.5-intel-19.0.5-4brgqlf/mkl/lib
-IMKL = -I$(MKLROOT)/include
-LMKL = -L$(MKLROOT)/lib/intel64 -qopt-matmul 
-IADVIXE = -I$(ADVISOR_2019_DIR)/include/ia64
-LADVIXE = -L$(ADVISOR_2019_DIR)/lib64
-
 MODULES         = $(SWIFTEST_MODULES) $(USER_MODULES) 
 
 .PHONY : all mod fast strict drivers bin clean force 

diff --git a/Makefile.Defines b/Makefile.Defines
@@ -58,12 +58,14 @@ IPRODUCTION = -no-wrap-margin -O3 -qopt-prefetch=0 -sox $(PAR) $(SIMDVEC) #$(HEA
 
 #gfortran flags
 GDEBUG = -g -Og -fbacktrace -fbounds-check -ffree-line-length-none
-GPAR = -fopenmp #-ftree-parallelize-loops=4
+GPAR = -fopenmp -ftree-parallelize-loops=4
 GMEM =  -fsanitize-address-use-after-scope -fstack-check  -fsanitize=bounds-strict -fsanitize=undefined -fsanitize=signed-integer-overflow -fsanitize=object-size -fstack-protector-all
 GWARNINGS = -Wall -Warray-bounds  -Wimplicit-interface -Wextra -Warray-temporaries
-GPRODUCTION = -O2 -ffree-line-length-none $(GPAR)
-
+GPRODUCTION = -O3 -ffree-line-length-none $(GPAR)
 
+MKL_ROOT = /apps/spack/bell/apps/intel-parallel-studio/cluster.2019.5-intel-19.0.5-4brgqlf/mkl/lib
+IMKL = -I$(MKLROOT)/include
+LMKL = -L$(MKLROOT)/lib/intel64 -qopt-matmul 
 #FFLAGS  = $(IDEBUG)  #$(SIMDVEC) $(PAR)
 #FFASTFLAGS  = $(IDEBUG) #$(SIMDVEC) $(PAR)
 FSTRICTFLAGS = $(ADVIXE_FLAGS) $(STRICTREAL) $(SIMDVEC) $(PAR)
@@ -77,8 +79,8 @@ AR     = xiar
 #AR     = ar # DO NOT include in CFLAGS the "-c" option to compile object only
 # this is done explicitly as needed in the Makefile
 
-CC              = icc
-#CC              = cc
+#CC              = icc
+CC              = cc
 CFLAGS          = -O3 -w -m64 -std=c99
 
 64_BIT_REALS    = -r8

diff --git a/src/discard/discard.f90 b/src/discard/discard.f90
@@ -76,9 +76,9 @@ module subroutine discard_tp(self, system, param)
       integer(I4B) :: npl, ntp
 
       associate(tp => self, cb => system%cb, pl => system%pl)
-         if ((ntp == 0) .or. (npl ==0)) return 
          ntp = tp%nbody
          npl = pl%nbody
+         if ((ntp == 0) .or. (npl ==0)) return 
 
          if ((param%rmin >= 0.0_DP) .or. (param%rmax >= 0.0_DP) .or. &
              (param%rmaxu >= 0.0_DP) .or. ((param%qmin >= 0.0_DP) .and. (param%qmin_coord == "BARY"))) then

diff --git a/src/drift/drift.f90 b/src/drift/drift.f90
@@ -39,6 +39,8 @@ module subroutine drift_body(self, system, param, dt)
          end if
       end associate
 
+      deallocate(iflag)
+
       return
    end subroutine drift_body
 
@@ -77,18 +79,20 @@ module subroutine drift_all(mu, x, v, n, param, dt, lmask, iflag)
       else
          where(lmask(1:n)) dtp(1:n) = dt
       end if 
+
       !$omp simd
       do i = 1, n
          if (lmask(i)) call drift_one(mu(i), x(1,i), x(2,i), x(3,i), v(1,i), v(2,i), v(3,i), dtp(i), iflag(i))
       end do
       !$omp end simd
 
+      deallocate(dtp)
+
       return
    end subroutine drift_all
 
 
    module pure elemental subroutine drift_one(mu, px, py, pz, vx, vy, vz, dt, iflag) 
-      !$omp declare simd(drift_one)
       !! author: The Purdue Swiftest Team - David A. Minton, Carlisle A. Wishard, Jennifer L.L. Pouplin, and Jacob R. Elliott
       !!
       !! Perform Danby drift for one body, redoing drift with smaller substeps if original accuracy is insufficient
@@ -119,7 +123,6 @@ end subroutine drift_one
 
 
    pure subroutine drift_dan(mu, px0, py0, pz0, vx0, vy0, vz0, dt0, iflag)
-      !$omp declare simd(drift_dan)
       !! author: David A. Minton
       !!
       !! Perform Kepler drift, solving Kepler's equation in appropriate variables
@@ -197,7 +200,6 @@ end subroutine drift_dan
 
 
    pure subroutine drift_kepmd(dm, es, ec, x, s, c)
-      !$omp declare simd(drift_kepmd)
       !! author: David A. Minton
       !!
       !! Solve Kepler's equation in difference form for an ellipse for small input dm and eccentricity
@@ -243,7 +245,6 @@ end subroutine drift_kepmd
 
 
    pure subroutine drift_kepu(dt,r0,mu,alpha,u,fp,c1,c2,c3,iflag)
-      !$omp declare simd(drift_kepu)
       !! author: David A. Minton
       !!
       !! Solve Kepler's equation in universal variables
@@ -272,7 +273,6 @@ end subroutine drift_kepu
 
 
    pure subroutine drift_kepu_fchk(dt, r0, mu, alpha, u, s, f)
-      !$omp declare simd(drift_kepu_fchk)
       !! author: David A. Minton
       !!
       !! Computes the value of f, the function whose root we are trying to find in universal variables
@@ -303,7 +303,6 @@ end subroutine drift_kepu_fchk
 
 
    pure subroutine drift_kepu_guess(dt, r0, mu, alpha, u, s)
-      !$omp declare simd(drift_kepu_guess)
       !! author: David A. Minton
       !!
       !! Compute initial guess for solving Kepler's equation using universal variables
@@ -348,7 +347,6 @@ end subroutine drift_kepu_guess
 
 
    pure subroutine drift_kepu_lag(s, dt, r0, mu, alpha, u, fp, c1, c2, c3, iflag)
-      !$omp declare simd(drift_kepu_lag)
       !! author: David A. Minton
       !!
       !! Solve Kepler's equation in universal variables using Laguerre's method
@@ -403,7 +401,6 @@ end subroutine drift_kepu_lag
 
 
    pure subroutine drift_kepu_new(s, dt, r0, mu, alpha, u, fp, c1, c2, c3, iflag)
-      !$omp declare simd(drift_kepu_new)
       !! author: David A. Minton
       !!
       !! Solve Kepler's equation in universal variables using Newton's method
@@ -455,7 +452,6 @@ end subroutine drift_kepu_new
 
 
    pure subroutine drift_kepu_p3solve(dt, r0, mu, alpha, u, s, iflag)
-      !$omp declare simd(drift_kepu_p3solve)
       !! author: David A. Minton
       !!
       !! Computes real root of cubic involved in setting initial guess for solving Kepler's equation in universal variables
@@ -506,7 +502,6 @@ end subroutine drift_kepu_p3solve
 
 
    pure subroutine drift_kepu_stumpff(x, c0, c1, c2, c3)
-      !$omp declare simd(drift_kepu_stumpff)
       !! author: David A. Minton
       !!
       !! Compute Stumpff functions needed for Kepler drift in universal variables

diff --git a/src/encounter/encounter_check.f90 b/src/encounter/encounter_check.f90
@@ -403,6 +403,7 @@ subroutine encounter_check_all_sort_and_sweep_plpl(npl, x, v, renc, dt, lvdotr,
          call encounter_check_all(nenc, index1, index2, x, v, x, v, renc, renc, dt, lencounter, lvdotr)
 
          call encounter_check_reduce_broadphase(npl, nenc, index1, index2, lencounter, lvdotr)
+         deallocate(lencounter)
       end if
 
       return
@@ -770,7 +771,6 @@ end subroutine encounter_check_all_triangular_pltp
 
 
    module pure subroutine encounter_check_one(xr, yr, zr, vxr, vyr, vzr, renc, dt, lencounter, lvdotr)
-      !$omp declare simd(encounter_check_one)
       !! author: David A. Minton
       !!
       !! Determine whether a test particle and planet are having or will have an encounter within the next time step
@@ -868,17 +868,18 @@ module subroutine encounter_check_collapse_ragged_list(ragged_list, n1, nenc, in
    end subroutine encounter_check_collapse_ragged_list
 
 
-   pure subroutine encounter_check_make_ragged_list(lencounteri, ind_arr, lenci)
+   pure subroutine encounter_check_make_ragged_list(lencounteri, ind_arr, nenc,index2)
       implicit none
       ! Arguments
       logical, dimension(:), intent(in) :: lencounteri
       integer(I4B), dimension(:), intent(in) :: ind_arr
-      type(encounter_list), intent(out) :: lenci
+      integer(I4B),                            intent(out) :: nenc
+      integer(I4B), dimension(:), allocatable, intent(out) :: index2
 
-      lenci%nenc = count(lencounteri(:))
-      if (lenci%nenc > 0) then
-         allocate(lenci%index2(lenci%nenc))
-         lenci%index2(:) = pack(ind_arr(:), lencounteri(:)) 
+      nenc = count(lencounteri(:))
+      if (nenc > 0) then
+         allocate(index2(nenc))
+         index2(:) = pack(ind_arr(:), lencounteri(:)) 
       end if
 
       return
@@ -1017,7 +1018,7 @@ subroutine encounter_check_sweep_aabb_all_double_list(n1, n2, ext_ind, ibegx, ie
             ibegyi = ibegy(i) 
             iendyi = iendy(i)
             call encounter_check_sweep_aabb_one_double_list(i, n1, n2, ntot, ext_ind(:), ibegxi, iendxi, ibegyi, iendyi, ibegx(:), iendx(:), ibegy(:), iendy(:), lencounteri(:))
-            call encounter_check_make_ragged_list(lencounteri(:), ind_arr(:), lenc(i))
+            call encounter_check_make_ragged_list(lencounteri(:), ind_arr(:), lenc(i)%nenc, lenc(i)%index2)
          else
             lenc(i)%nenc = 0
          end if
@@ -1055,7 +1056,7 @@ subroutine encounter_check_sweep_aabb_all_single_list(n, ext_ind, ibegx, iendx,
             ibegyi = ibegy(i) 
             iendyi = iendy(i)
             call encounter_check_sweep_aabb_one_single_list(n, ext_ind(:), ibegxi, iendxi, ibegyi, iendyi, ibegx(:), iendx(:), ibegy(:), iendy(:), lencounteri(:))
-            call encounter_check_make_ragged_list(lencounteri(:), ind_arr(:), lenc(i))
+            call encounter_check_make_ragged_list(lencounteri(:), ind_arr(:), lenc(i)%nenc, lenc(i)%index2)
          else
             lenc(i)%nenc = 0
          end if
@@ -1067,7 +1068,6 @@ end subroutine encounter_check_sweep_aabb_all_single_list
 
 
    pure subroutine encounter_check_sweep_aabb_one_double_list(i, n1, n2, ntot, ext_ind, ibegxi, iendxi, ibegyi, iendyi, ibegx, iendx, ibegy, iendy, lencounteri)
-      !$omp declare simd(encounter_check_sweep_aabb_one_double_list)
       !! author: David A. Minton
       !!
       !! Performs a sweep operation on a single body. Encounters from the same lists not allowed (e.g. pl-tp encounters only)
@@ -1100,7 +1100,6 @@ end subroutine encounter_check_sweep_aabb_one_double_list
 
 
    pure subroutine encounter_check_sweep_aabb_one_single_list(n, ext_ind, ibegxi, iendxi, ibegyi, iendyi, ibegx, iendx, ibegy, iendy, lencounteri)
-      !$omp declare simd(encounter_check_sweep_aabb_one_single_list)
       !! author: David A. Minton
       !!
       !! Performs a sweep operation on a single body. Mutual encounters allowed (e.g. pl-pl)

diff --git a/src/encounter/encounter_util.f90 b/src/encounter/encounter_util.f90
@@ -62,6 +62,74 @@ module subroutine encounter_util_copy_list(self, source)
    end subroutine encounter_util_copy_list
 
 
+   module subroutine encounter_util_dealloc_aabb(self)
+      !! author: David A. Minton
+      !!
+      !! Deallocates all allocatables
+      implicit none
+      ! Arguments
+      class(encounter_bounding_box_1D), intent(inout) :: self
+
+      if (allocated(self%ind)) deallocate(self%ind)
+      if (allocated(self%ibeg)) deallocate(self%ibeg)
+      if (allocated(self%iend)) deallocate(self%iend)
+
+      return
+   end subroutine encounter_util_dealloc_aabb
+
+
+   module subroutine encounter_util_dealloc_list(self)
+      !! author: David A. Minton
+      !!
+      !! Deallocates all allocatables
+      implicit none
+      ! Arguments
+      class(encounter_list), intent(inout) :: self
+
+      if (allocated(self%lvdotr)) deallocate(self%lvdotr)
+      if (allocated(self%status)) deallocate(self%status)
+      if (allocated(self%index1)) deallocate(self%index1)
+      if (allocated(self%index2)) deallocate(self%index2)
+      if (allocated(self%id1)) deallocate(self%id1)
+      if (allocated(self%id2)) deallocate(self%id2)
+      if (allocated(self%x1)) deallocate(self%x1)
+      if (allocated(self%x2)) deallocate(self%x2)
+      if (allocated(self%v1)) deallocate(self%v1)
+      if (allocated(self%v2)) deallocate(self%v2)
+      if (allocated(self%t)) deallocate(self%t)
+
+      return
+   end subroutine encounter_util_dealloc_list
+
+
+   module subroutine encounter_util_final_aabb(self)
+      !! author: David A. Minton
+      !!
+      !! Finalize the axis aligned bounding box (1D) - deallocates all allocatables
+      implicit none
+      ! Arguments
+      type(encounter_bounding_box_1D), intent(inout) :: self
+
+      call self%dealloc()
+
+      return
+   end subroutine encounter_util_final_aabb
+
+
+   module subroutine encounter_util_final_list(self)
+      !! author: David A. Minton
+      !!
+      !! Finalize the encounter list - deallocates all allocatables
+      implicit none
+      ! Arguments
+      type(encounter_list), intent(inout) :: self
+
+      call self%dealloc()
+
+      return
+   end subroutine encounter_util_final_list
+
+
    module subroutine encounter_util_resize_list(self, nnew)
       !! author: David A. Minton
       !!

diff --git a/src/fraggle/fraggle_generate.f90 b/src/fraggle/fraggle_generate.f90
@@ -547,21 +547,19 @@ function radial_objective_function(v_r_mag_input) result(fval)
             real(DP), dimension(frag%nbody)       :: kearr
             real(DP)                              :: keo, ke_radial, rotmag2, vmag2
 
-            associate(nfrag => frag%nbody)
-               allocate(v_shift, mold=frag%vb)
-               v_shift(:,:) = fraggle_util_vmag_to_vb(v_r_mag_input, frag%v_r_unit, frag%v_t_mag, frag%v_t_unit, frag%mass, frag%vbcom) 
-               !$omp do simd firstprivate(nfrag) lastprivate(rotmag2, vmag2) 
-               do i = 1,nfrag
-                  rotmag2 = frag%rot(1,i)**2 + frag%rot(2,i)**2 + frag%rot(3,i)**2
-                  vmag2 = v_shift(1,i)**2 + v_shift(2,i)**2 + v_shift(3,i)**2
-                  kearr(i) = frag%mass(i) * (frag%Ip(3, i) * frag%radius(i)**2 * rotmag2 + vmag2) 
-               end do
-               !$omp end do simd
-               keo = 2 * frag%ke_budget - sum(kearr(:))
-               ke_radial = frag%ke_budget - frag%ke_orbit - frag%ke_spin
-               ! The following ensures that fval = 0 is a local minimum, which is what the BFGS method is searching for
-               fval = (keo / (2 * ke_radial))**2
-            end associate
+            allocate(v_shift, mold=frag%vb)
+            v_shift(:,:) = fraggle_util_vmag_to_vb(v_r_mag_input, frag%v_r_unit, frag%v_t_mag, frag%v_t_unit, frag%mass, frag%vbcom) 
+            !$omp do simd firstprivate(frag)
+            do i = 1,frag%nbody
+               rotmag2 = frag%rot(1,i)**2 + frag%rot(2,i)**2 + frag%rot(3,i)**2
+               vmag2 = v_shift(1,i)**2 + v_shift(2,i)**2 + v_shift(3,i)**2
+               kearr(i) = frag%mass(i) * (frag%Ip(3, i) * frag%radius(i)**2 * rotmag2 + vmag2) 
+            end do
+            !$omp end do simd
+            keo = 2 * frag%ke_budget - sum(kearr(:))
+            ke_radial = frag%ke_budget - frag%ke_orbit - frag%ke_spin
+            ! The following ensures that fval = 0 is a local minimum, which is what the BFGS method is searching for
+            fval = (keo / (2 * ke_radial))**2
 
             return
          end function radial_objective_function

diff --git a/src/helio/helio_drift.f90 b/src/helio/helio_drift.f90
@@ -19,7 +19,7 @@ module subroutine helio_drift_body(self, system, param, dt)
       integer(I4B) :: i !! Loop counter
       real(DP) :: rmag, vmag2, energy
       integer(I4B), dimension(:),allocatable :: iflag !! Vectorized error code flag
-      real(DP), dimension(:), allocatable    :: dtp, mu
+      real(DP), dimension(:), allocatable    :: mu
 
       if (self%nbody == 0) return
 
@@ -37,6 +37,7 @@ module subroutine helio_drift_body(self, system, param, dt)
          end if
       end associate
 
+      deallocate(iflag, mu)
       return
    end subroutine helio_drift_body
 
@@ -76,7 +77,6 @@ end subroutine helio_drift_tp
 
 
    pure elemental subroutine helio_drift_linear_one(xhx, xhy, xhz, ptx, pty, ptz, dt)
-      !$omp declare simd(helio_drift_linear_one)
       implicit none
       real(DP), intent(inout) :: xhx, xhy, xhz
       real(DP), intent(in) :: ptx, pty, ptz, dt