diff --git a/src/collision/collision_check.f90 b/src/collision/collision_check.f90 index 936963caa..0e69032bc 100644 --- a/src/collision/collision_check.f90 +++ b/src/collision/collision_check.f90 @@ -101,7 +101,11 @@ module subroutine collision_check_plpl(self, nbody_system, param, t, dt, irec, l lcollision(:) = .false. self%lclosest(:) = .false. +#ifdef DOCONLOC + do concurrent(k = 1:nenc, lmask(k)) shared(self,pl,lmask, dt, lcollision) local(i,j,xr,vr,rlim,Gmtot) +#else do concurrent(k = 1:nenc, lmask(k)) +#endif i = self%index1(k) j = self%index2(k) xr(:) = pl%rh(:, i) - pl%rh(:, j) @@ -204,8 +208,11 @@ module subroutine collision_check_pltp(self, nbody_system, param, t, dt, irec, l lcollision(:) = .false. self%lclosest(:) = .false. - +#ifdef DOCONLOC + do concurrent(k = 1:nenc, lmask(k)) shared(self,pl,tp,lmask, dt, lcollision) local(i,j,xr,vr) +#else do concurrent(k = 1:nenc, lmask(k)) +#endif i = self%index1(k) j = self%index2(k) xr(:) = pl%rh(:, i) - tp%rh(:, j) diff --git a/src/collision/collision_regime.f90 b/src/collision/collision_regime.f90 index a612eb37a..0c81eca14 100644 --- a/src/collision/collision_regime.f90 +++ b/src/collision/collision_regime.f90 @@ -111,7 +111,11 @@ subroutine collision_regime_LS12(collider, nbody_system, param) if (impactors%regime == COLLRESOLVE_REGIME_MERGE) then volume = 4._DP / 3._DP * PI * sum(impactors%radius(:)**3) radius = (3._DP * volume / (4._DP * PI))**(THIRD) +#ifdef DOCONLOC + do concurrent(i = 1:NDIM) shared(impactors,Ip,L_spin) +#else do concurrent(i = 1:NDIM) +#endif Ip(i) = sum(impactors%mass(:) * impactors%Ip(i,:)) L_spin(i) = sum(impactors%L_orbit(i,:) + impactors%L_spin(i,:)) end do diff --git a/src/collision/collision_resolve.f90 b/src/collision/collision_resolve.f90 index befc4ba8a..7aa76166c 100644 --- a/src/collision/collision_resolve.f90 +++ b/src/collision/collision_resolve.f90 @@ -387,7 +387,11 @@ module subroutine collision_resolve_mergeaddsub(nbody_system, param, t, status) ! plnew%tlag = pl%tlag(ibiggest) ! end if +#ifdef DOCONLOC + do concurrent(i = 1:nfrag) shared(plnew,fragments) local(volume) +#else do concurrent(i = 1:nfrag) +#endif volume = 4.0_DP/3.0_DP * PI * plnew%radius(i)**3 plnew%density(i) = fragments%mass(i) / volume end do diff --git a/src/collision/collision_util.f90 b/src/collision/collision_util.f90 index 45f4f516d..179fb289c 100644 --- a/src/collision/collision_util.f90 +++ b/src/collision/collision_util.f90 @@ -36,7 +36,11 @@ module subroutine collision_util_add_fragments_to_collider(self, nbody_system, p pl%mass(npl_before+1:npl_after) = fragments%mass(1:nfrag) pl%Gmass(npl_before+1:npl_after) = fragments%mass(1:nfrag) * param%GU pl%radius(npl_before+1:npl_after) = fragments%radius(1:nfrag) +#ifdef DOCONLOC + do concurrent (i = 1:nfrag) shared(cb,pl,fragments) +#else do concurrent (i = 1:nfrag) +#endif pl%rb(:,npl_before+i) = fragments%rb(:,i) pl%vb(:,npl_before+i) = fragments%vb(:,i) pl%rh(:,npl_before+i) = fragments%rb(:,i) - cb%rb(:) @@ -169,7 +173,11 @@ module subroutine collision_util_get_energy_and_momentum(self, nbody_system, par associate(fragments => self%fragments, impactors => self%impactors, pl => nbody_system%pl, cb => nbody_system%cb) nfrag = self%fragments%nbody if (phase_val == 1) then +#ifdef DOCONLOC + do concurrent(i = 1:2) shared(impactors) +#else do concurrent(i = 1:2) +#endif impactors%ke_orbit(i) = 0.5_DP * impactors%mass(i) * dot_product(impactors%vc(:,i), impactors%vc(:,i)) impactors%ke_spin(i) = 0.5_DP * impactors%mass(i) * impactors%radius(i)**2 * impactors%Ip(3,i) * dot_product(impactors%rot(:,i), impactors%rot(:,i)) impactors%be(i) = -3 * impactors%Gmass(i) * impactors%mass(i) / (5 * impactors%radius(i)) @@ -185,7 +193,11 @@ module subroutine collision_util_get_energy_and_momentum(self, nbody_system, par call swiftest_util_get_potential_energy(2, [(.true., i = 1, 2)], 0.0_DP, impactors%Gmass, impactors%mass, impactors%rb, self%pe(phase_val)) self%te(phase_val) = self%ke_orbit(phase_val) + self%ke_spin(phase_val) + self%be(phase_val) + self%pe(phase_val) else if (phase_val == 2) then +#ifdef DOCONLOC + do concurrent(i = 1:nfrag) shared(fragments) +#else do concurrent(i = 1:nfrag) +#endif fragments%ke_orbit(i) = 0.5_DP * fragments%mass(i) * dot_product(fragments%vc(:,i), fragments%vc(:,i)) fragments%ke_spin(i) = 0.5_DP * fragments%mass(i) * fragments%radius(i)**2 * fragments%Ip(3,i) * dot_product(fragments%rot(:,i), fragments%rot(:,i)) fragments%L_orbit(:,i) = fragments%mass(i) * fragments%rc(:,i) .cross. fragments%vc(:,i) @@ -918,7 +930,11 @@ module subroutine collision_util_set_original_scale_factors(self) impactors%L_orbit = impactors%L_orbit * collider%Lscale impactors%ke_orbit = impactors%ke_orbit * collider%Escale impactors%ke_spin = impactors%ke_spin * collider%Escale +#ifdef DOCONLOC + do concurrent(i = 1:2) shared(impactors) +#else do concurrent(i = 1:2) +#endif impactors%rot(:,i) = impactors%L_spin(:,i) * (impactors%mass(i) * impactors%radius(i)**2 * impactors%Ip(3,i)) end do diff --git a/src/encounter/encounter_check.f90 b/src/encounter/encounter_check.f90 index a539486b5..b987f8799 100644 --- a/src/encounter/encounter_check.f90 +++ b/src/encounter/encounter_check.f90 @@ -174,7 +174,11 @@ subroutine encounter_check_all_sort_and_sweep_plpl(npl, r, v, renc, dt, nenc, in npl_last = npl end if +#ifdef DOCONLOC + do concurrent (i = 1:npl) shared(r,renc,rmin,rmax) local(rmag) +#else do concurrent (i = 1:npl) +#endif rmag = .mag.r(:,i) rmax(i) = rmag + RSWEEP_FACTOR * renc(i) rmin(i) = rmag - RSWEEP_FACTOR * renc(i) @@ -227,12 +231,20 @@ subroutine encounter_check_all_sort_and_sweep_plplm(nplm, nplt, rplm, vplm, rplt ntot_last = ntot end if +#ifdef DOCONLOC + do concurrent (i = 1:nplm) shared(rmin,rmax,rplm,rencm) local(rmag) +#else do concurrent (i = 1:nplm) +#endif rmag = .mag.rplm(:,i) rmax(i) = rmag + RSWEEP_FACTOR * rencm(i) rmin(i) = rmag - RSWEEP_FACTOR * rencm(i) end do +#ifdef DOCONLOC + do concurrent (i = 1:nplt) shared(rmin,rmax,rplt,renct) local(rmag) +#else do concurrent (i = 1:nplt) +#endif rmag = .mag.rplt(:,i) rmax(nplm+i) = rmag + RSWEEP_FACTOR * renct(i) rmin(nplm+i) = rmag - RSWEEP_FACTOR * renct(i) @@ -287,12 +299,20 @@ subroutine encounter_check_all_sort_and_sweep_pltp(npl, ntp, rpl, vpl, rtp, vtp, renctp(:) = 0.0_DP +#ifdef DOCONLOC + do concurrent (i = 1:npl) shared(rmin,rmax,rpl,rencpl) local(rmag) +#else do concurrent (i = 1:npl) +#endif rmag = .mag.rpl(:,i) rmax(i) = rmag + RSWEEP_FACTOR * rencpl(i) rmin(i) = rmag - RSWEEP_FACTOR * rencpl(i) end do - do concurrent (i = 1:ntp) +#ifdef DOCONLOC + do concurrent (i = 1:ntp) shared(rmin,rmax,rtp,renctp) local(rmag) +#else + do concurrent (i = 1:ntp) +#endif rmag = .mag.rtp(:,i) rmax(npl+i) = rmag + RSWEEP_FACTOR * renctp(i) rmin(npl+i) = rmag - RSWEEP_FACTOR * renctp(i) @@ -335,7 +355,11 @@ pure subroutine encounter_check_all_sweep_one(i, n, xi, yi, zi, vxi, vyi, vzi, x logical, dimension(n) :: lencounteri, lvdotri lencounteri(:) = .false. +#ifdef DOCONLOC + do concurrent(j = 1:n, lgood(j)) shared(lgood,lencounteri,lvdotri,x,y,z,vx,vy,vz,renci,renc) local(xr,yr,zr,vxr,vyr,vzr,renc12) +#else do concurrent(j = 1:n, lgood(j)) +#endif xr = x(j) - xi yr = y(j) - yi zr = z(j) - zi @@ -383,7 +407,7 @@ subroutine encounter_check_all_triangular_one(i, n, xi, yi, zi, vxi, vyi, vzi, x logical, dimension(n) :: lencounteri, lvdotri #ifdef DOCONLOC - do concurrent(j = i+1:n) shared(lencounteri, lvdotri) + do concurrent(j = i+1:n) shared(lencounteri, lvdotri, renci, renc) local(xr,yr,zr,vxr,vyr,vzr,renc12) #else do concurrent(j = i+1:n) #endif @@ -700,7 +724,11 @@ subroutine encounter_check_remove_duplicates(n, nenc, index1, index2, lvdotr) ! Sort on the second index and remove duplicates if (allocated(itmp)) deallocate(itmp) allocate(itmp, source=index2) +#ifdef DOCONLOC + do concurrent(i = 1:n, iend(i) - ibeg(i) > 0_I8B) shared(iend,ibeg,index2,lencounter,itmp) local(klo,khi,nenci,j) +#else do concurrent(i = 1:n, iend(i) - ibeg(i) > 0_I8B) +#endif klo = ibeg(i) khi = iend(i) nenci = khi - klo + 1_I8B @@ -747,7 +775,11 @@ pure module subroutine encounter_check_sort_aabb_1D(self, n, extent_arr) call util_sort(extent_arr, self%ind) +#ifdef DOCONLOC + do concurrent(k = 1_I8B:2_I8B * n) shared(self,n) local(i) +#else do concurrent(k = 1_I8B:2_I8B * n) +#endif i = self%ind(k) if (i <= n) then self%ibeg(i) = k @@ -940,7 +972,11 @@ module subroutine encounter_check_sweep_aabb_single_list(self, n, r, v, renc, dt call encounter_check_collapse_ragged_list(lenc, n, nenc, index1, index2, lvdotr) ! By convention, we always assume that index1 < index2, and so we must swap any that are out of order +#ifdef DOCONLOC + do concurrent(k = 1_I8B:nenc, index1(k) > index2(k)) shared(index1,index2) local(itmp) +#else do concurrent(k = 1_I8B:nenc, index1(k) > index2(k)) +#endif itmp = index1(k) index1(k) = index2(k) index2(k) = itmp diff --git a/src/fraggle/fraggle_generate.f90 b/src/fraggle/fraggle_generate.f90 index ee4466c11..0dec13fde 100644 --- a/src/fraggle/fraggle_generate.f90 +++ b/src/fraggle/fraggle_generate.f90 @@ -68,7 +68,11 @@ module subroutine fraggle_generate(self, nbody_system, param, t) ! Get the energy and momentum of the system before and after the collision call self%get_energy_and_momentum(nbody_system, param, phase="before") nfrag = fragments%nbody +#ifdef DOCONLOC + do concurrent(i = 1:2) shared(fragments,impactors) +#else do concurrent(i = 1:2) +#endif fragments%rc(:,i) = fragments%rb(:,i) - impactors%rbcom(:) fragments%vc(:,i) = fragments%vb(:,i) - impactors%vbcom(:) end do @@ -309,7 +313,11 @@ module subroutine fraggle_generate_merge(self, nbody_system, param, t) mass = sum(impactors%mass(:)) volume = 4._DP / 3._DP * PI * sum(impactors%radius(:)**3) radius = (3._DP * volume / (4._DP * PI))**(THIRD) +#ifdef DOCONLOC + do concurrent(i = 1:NDIM) shared(impactors, Ip, L_spin_new) +#else do concurrent(i = 1:NDIM) +#endif Ip(i) = sum(impactors%mass(:) * impactors%Ip(i,:)) L_spin_new(i) = sum(impactors%L_orbit(i,:) + impactors%L_spin(i,:)) end do @@ -414,7 +422,7 @@ module subroutine fraggle_generate_pos_vec(collider, nbody_system, param, lfailu ! Randomly place the n>2 fragments inside their cloud until none are overlapping #ifdef DOCONLOC - do concurrent(i = istart:nfrag, loverlap(i)) shared(loverlap, mass_rscale, u, phi, theta, lhitandrun) local(j, direction) + do concurrent(i = istart:nfrag, loverlap(i)) shared(fragments, impactors, fragment_cloud_radius, fragment_cloud_center, loverlap, mass_rscale, u, phi, theta, lhitandrun) local(j, direction) #else do concurrent(i = istart:nfrag, loverlap(i)) #endif @@ -457,7 +465,7 @@ module subroutine fraggle_generate_pos_vec(collider, nbody_system, param, lfailu ! when the rest are not, we will randomly walk their position in space so as not to move them too far from their starting position if (all(.not.loverlap(istart:nfrag)) .and. any(loverlap(1:istart-1))) then #ifdef DOCONLOC - do concurrent(i = 1:istart-1,loverlap(i)) shared(loverlap, u, theta, i) local(rwalk, dis) + do concurrent(i = 1:istart-1,loverlap(i)) shared(fragments,loverlap, u, theta, i) local(rwalk, dis) #else do concurrent(i = 1:istart-1,loverlap(i)) #endif @@ -519,7 +527,7 @@ module subroutine fraggle_generate_rot_vec(collider, nbody_system, param) class(swiftest_parameters), intent(inout) :: param !! Current run configuration parameters ! Internals integer(I4B) :: i, nfrag - real(DP), parameter :: frag_rot_fac = 0.1_DP ! Fraction of projectile rotation magnitude to add as random noise to fragment rotation + real(DP), parameter :: FRAG_ROT_FAC = 0.1_DP ! Fraction of projectile rotation magnitude to add as random noise to fragment rotation real(DP), parameter :: hitandrun_momentum_transfer = 0.01_DP ! Fraction of projectile momentum transfered to target in a hit and run real(DP) :: mass_fac real(DP), dimension(NDIM) :: drot, dL @@ -568,9 +576,13 @@ module subroutine fraggle_generate_rot_vec(collider, nbody_system, param) end if call random_number(fragments%rot(:,2:nfrag)) +#ifdef DOCONLOC + do concurrent (i = 2:nfrag) shared(fragments,impactors) local(mass_fac) +#else do concurrent (i = 2:nfrag) +#endif mass_fac = fragments%mass(i) / impactors%mass(2) - fragments%rot(:,i) = mass_fac**(5.0_DP/3.0_DP) * impactors%rot(:,2) + 2 * (fragments%rot(:,i) - 1.0_DP) * frag_rot_fac * .mag.impactors%rot(:,2) + fragments%rot(:,i) = mass_fac**(5.0_DP/3.0_DP) * impactors%rot(:,2) + 2 * (fragments%rot(:,i) - 1.0_DP) * FRAG_ROT_FAC * .mag.impactors%rot(:,2) end do fragments%rotmag(:) = .mag.fragments%rot(:,:) @@ -662,7 +674,11 @@ module subroutine fraggle_generate_vel_vec(collider, nbody_system, param, lfailu ! Scale the magnitude of the velocity by the distance from the impact point ! This will reduce the chances of fragments colliding with each other immediately, and is more physically correct - do concurrent(i = 1:fragments%nbody) +#ifdef DOCONLOC + do concurrent(i = 1:fragments%nbody) shared(fragments,impactors,vscale) local(rimp) +#else + do concurrent(i = 1:fragments%nbody) +#endif rimp(:) = fragments%rc(:,i) - impactors%rcimp(:) vscale(i) = .mag. rimp(:) / sum(impactors%radius(1:2)) end do @@ -673,7 +689,11 @@ module subroutine fraggle_generate_vel_vec(collider, nbody_system, param, lfailu ! Set the velocities of all fragments using all of the scale factors determined above if (istart > 1) fragments%vc(:,1) = impactors%vc(:,1) * impactors%mass(1) / fragments%mass(1) +#ifdef DOCONLOC + do concurrent(i = istart:fragments%nbody) shared(fragments,impactors,lhitandrun, vscale, vesc, vsign) local(j,vrot,vmag,vdisp,rimp,vimp_unit) +#else do concurrent(i = istart:fragments%nbody) +#endif j = fragments%origin_body(i) vrot(:) = impactors%rot(:,j) .cross. (fragments%rc(:,i) - impactors%rc(:,j)) if (lhitandrun) then @@ -704,7 +724,11 @@ module subroutine fraggle_generate_vel_vec(collider, nbody_system, param, lfailu if (nsteps == 1) L_residual_best(:) = L_residual(:) ! Use equipartition of spin kinetic energy to distribution spin angular momentum +#ifdef DOCONLOC + do concurrent(i = istart:fragments%nbody) shared(DLi_mag, fragments) +#else do concurrent(i = istart:fragments%nbody) +#endif dLi_mag(i) = ((fragments%mass(i) / fragments%mass(istart)) * & (fragments%radius(i) / fragments%radius(istart))**2 * & (fragments%Ip(3,i) / fragments%Ip(3,istart)))**(1.5_DP) @@ -816,7 +840,11 @@ module subroutine fraggle_generate_vel_vec(collider, nbody_system, param, lfailu L_residual(:) = (collider%L_total(:,2) - collider%L_total(:,1)) call collision_util_velocity_torque(-L_residual(:), collider%fragments%mtot, impactors%rbcom, impactors%vbcom) +#ifdef DOCONLOC + do concurrent(i = 1:nfrag) shared(collider, impactors) +#else do concurrent(i = 1:nfrag) +#endif collider%fragments%vb(:,i) = collider%fragments%vc(:,i) + impactors%vbcom(:) end do diff --git a/src/fraggle/fraggle_util.f90 b/src/fraggle/fraggle_util.f90 index 3b0c42d6a..351bd05fa 100644 --- a/src/fraggle/fraggle_util.f90 +++ b/src/fraggle/fraggle_util.f90 @@ -44,7 +44,11 @@ module subroutine fraggle_util_restructure(self, nbody_system, param, lfailure) new_fragments%Gmass(1) =sum(old_fragments%Gmass(1:2)) new_fragments%density(1) = new_fragments%mass(1) / volume new_fragments%radius(1) = (3._DP * volume / (4._DP * PI))**(THIRD) +#ifdef DOCONLOC + do concurrent(i = 1:NDIM) shared(old_fragments, new_fragments) +#else do concurrent(i = 1:NDIM) +#endif new_fragments%Ip(i,1) = sum(old_fragments%mass(1:2) * old_fragments%Ip(i,1:2)) end do new_fragments%Ip(:,1) = new_fragments%Ip(:,1) / new_fragments%mass(1) @@ -55,7 +59,11 @@ module subroutine fraggle_util_restructure(self, nbody_system, param, lfailure) new_fragments%Gmass(2:nnew) = old_fragments%Gmass(3:nold) new_fragments%density(2:nnew) = old_fragments%density(3:nold) new_fragments%radius(2:nnew) = old_fragments%radius(3:nold) +#ifdef DOCONLOC + do concurrent(i = 1:NDIM) shared(old_fragments,new_fragments) +#else do concurrent(i = 1:NDIM) +#endif new_fragments%Ip(i,2:nnew) = old_fragments%Ip(i,3:nold) end do new_fragments%origin_body(2:nnew) = old_fragments%origin_body(3:nold) @@ -87,10 +95,10 @@ module subroutine fraggle_util_set_mass_dist(self, param) class(collision_fraggle), intent(inout) :: self !! Fraggle collision system object class(swiftest_parameters), intent(in) :: param !! Current Swiftest run configuration parameters ! Internals - integer(I4B) :: i, j, k, jproj, jtarg, nfrag, istart, nfragmax, nrem + integer(I4B) :: i, j, jproj, jtarg, nfrag, istart, nfragmax real(DP), dimension(2) :: volume real(DP), dimension(NDIM) :: Ip_avg - real(DP) :: mremaining, mtot, mcumul, G, mass_noise, Mslr, x0, x1, ymid, y0, y1, y, yp, eps, Mrat + real(DP) :: mremaining, mtot, mcumul, G, mass_noise, Mslr, Mrat real(DP), dimension(:), allocatable :: mass real(DP) :: beta integer(I4B), parameter :: MASS_NOISE_FACTOR = 5 !! The number of digits of random noise that get added to the minimum mass value to prevent identical masses from being generated in a single run @@ -207,7 +215,11 @@ module subroutine fraggle_util_set_mass_dist(self, param) fragments%density(istart:nfrag) = fragments%mtot / sum(volume(:)) fragments%radius(istart:nfrag) = (3 * fragments%mass(istart:nfrag) / (4 * PI * fragments%density(istart:nfrag)))**(1.0_DP / 3.0_DP) +#ifdef DOCONLOC + do concurrent(i = istart:nfrag) shared(fragments,Ip_avg) +#else do concurrent(i = istart:nfrag) +#endif fragments%Ip(:, i) = Ip_avg(:) end do diff --git a/src/helio/helio_gr.f90 b/src/helio/helio_gr.f90 index def25224d..6de300cae 100644 --- a/src/helio/helio_gr.f90 +++ b/src/helio/helio_gr.f90 @@ -77,7 +77,11 @@ pure module subroutine helio_gr_p4_pl(self, nbody_system, param, dt) associate(pl => self) npl = self%nbody +#ifdef DOCONLOC + do concurrent(i = 1:npl, pl%lmask(i)) shared(param,pl,dt) +#else do concurrent(i = 1:npl, pl%lmask(i)) +#endif call swiftest_gr_p4_pos_kick(param, pl%rh(:, i), pl%vb(:, i), dt) end do end associate @@ -106,7 +110,11 @@ pure module subroutine helio_gr_p4_tp(self, nbody_system, param, dt) associate(tp => self) ntp = self%nbody +#ifdef DOCONLOC + do concurrent(i = 1:ntp, tp%lmask(i)) shared(param,tp,dt) +#else do concurrent(i = 1:ntp, tp%lmask(i)) +#endif call swiftest_gr_p4_pos_kick(param, tp%rh(:, i), tp%vb(:, i), dt) end do end associate diff --git a/src/helio/helio_kick.f90 b/src/helio/helio_kick.f90 index 89c93a7ed..755a385d9 100644 --- a/src/helio/helio_kick.f90 +++ b/src/helio/helio_kick.f90 @@ -117,7 +117,11 @@ module subroutine helio_kick_vb_pl(self, nbody_system, param, t, dt, lbeg) else call pl%set_beg_end(rend = pl%rh) end if +#ifdef DOCONLOC + do concurrent(i = 1:npl, pl%lmask(i)) shared(pl,dt) +#else do concurrent(i = 1:npl, pl%lmask(i)) +#endif pl%vb(1, i) = pl%vb(1, i) + pl%ah(1, i) * dt pl%vb(2, i) = pl%vb(2, i) + pl%ah(2, i) * dt pl%vb(3, i) = pl%vb(3, i) + pl%ah(3, i) * dt @@ -152,7 +156,11 @@ module subroutine helio_kick_vb_tp(self, nbody_system, param, t, dt, lbeg) ntp = self%nbody tp%ah(:, 1:ntp) = 0.0_DP call tp%accel(nbody_system, param, t, lbeg) +#ifdef DOCONLOC + do concurrent(i = 1:ntp, tp%lmask(i)) shared(tp,dt) +#else do concurrent(i = 1:ntp, tp%lmask(i)) +#endif tp%vb(:, i) = tp%vb(:, i) + tp%ah(:, i) * dt end do end associate diff --git a/src/operator/operator_cross.f90 b/src/operator/operator_cross.f90 index f784a1c98..ef80e1fb8 100644 --- a/src/operator/operator_cross.f90 +++ b/src/operator/operator_cross.f90 @@ -104,7 +104,11 @@ pure module function operator_cross_el_sp(A, B) result(C) n = size(A, 2) if (allocated(C)) deallocate(C) allocate(C, mold = A) +#ifdef DOCONLOC + do concurrent (i = 1:n) shared(A,B,C) +#else do concurrent (i = 1:n) +#endif C(:,i) = operator_cross_sp(A(:,i), B(:,i)) end do return @@ -118,7 +122,11 @@ pure module function operator_cross_el_dp(A, B) result(C) n = size(A, 2) if (allocated(C)) deallocate(C) allocate(C, mold = A) +#ifdef DOCONLOC + do concurrent (i = 1:n) shared(A,B,C) +#else do concurrent (i = 1:n) +#endif C(:,i) = operator_cross_dp(A(:,i), B(:,i)) end do return @@ -132,7 +140,11 @@ pure module function operator_cross_el_qp(A, B) result(C) n = size(A, 2) if (allocated(C)) deallocate(C) allocate(C, mold = A) +#ifdef DOCONLOC + do concurrent (i = 1:n) shared(A,B,C) +#else do concurrent (i = 1:n) +#endif C(:,i) = operator_cross_qp(A(:,i), B(:,i)) end do return @@ -146,7 +158,11 @@ pure module function operator_cross_el_i1b(A, B) result(C) n = size(A, 2) if (allocated(C)) deallocate(C) allocate(C, mold = A) +#ifdef DOCONLOC + do concurrent (i = 1:n) shared(A,B,C) +#else do concurrent (i = 1:n) +#endif C(:,i) = operator_cross_i1b(A(:,i), B(:,i)) end do return @@ -160,7 +176,11 @@ pure module function operator_cross_el_i2b(A, B) result(C) n = size(A, 2) if (allocated(C)) deallocate(C) allocate(C, mold = A) +#ifdef DOCONLOC + do concurrent (i = 1:n) shared(A,B,C) +#else do concurrent (i = 1:n) +#endif C(:,i) = operator_cross_i2b(A(:,i), B(:,i)) end do return @@ -174,7 +194,11 @@ pure module function operator_cross_el_i4b(A, B) result(C) n = size(A, 2) if (allocated(C)) deallocate(C) allocate(C, mold = A) +#ifdef DOCONLOC + do concurrent (i = 1:n) shared(A,B,C) +#else do concurrent (i = 1:n) +#endif C(:,i) = operator_cross_i4b(A(:,i), B(:,i)) end do return @@ -188,7 +212,11 @@ pure module function operator_cross_el_i8b(A, B) result(C) n = size(A, 2) if (allocated(C)) deallocate(C) allocate(C, mold = A) +#ifdef DOCONLOC + do concurrent (i = 1:n) shared(A,B,C) +#else do concurrent (i = 1:n) +#endif C(:,i) = operator_cross_i8b(A(:,i), B(:,i)) end do return diff --git a/src/operator/operator_mag.f90 b/src/operator/operator_mag.f90 index 8bf6bff5b..55f653fb9 100644 --- a/src/operator/operator_mag.f90 +++ b/src/operator/operator_mag.f90 @@ -44,7 +44,11 @@ pure module function operator_mag_el_sp(A) result(B) if (allocated(B)) deallocate(B) allocate(B(n)) call ieee_set_halting_mode(ieee_underflow, .false.) - do concurrent (i=1:n) +#ifdef DOCONLOC + do concurrent (i = 1:n) shared(A,B) +#else + do concurrent (i = 1:n) +#endif B(i) = norm2(A(:, i)) end do return @@ -59,7 +63,11 @@ pure module function operator_mag_el_dp(A) result(B) if (allocated(B)) deallocate(B) allocate(B(n)) call ieee_set_halting_mode(ieee_underflow, .false.) - do concurrent (i=1:n) +#ifdef DOCONLOC + do concurrent (i = 1:n) shared(A,B) +#else + do concurrent (i = 1:n) +#endif B(i) = norm2(A(:, i)) end do return @@ -74,7 +82,11 @@ pure module function operator_mag_el_qp(A) result(B) if (allocated(B)) deallocate(B) allocate(B(n)) call ieee_set_halting_mode(ieee_underflow, .false.) - do concurrent (i=1:n) +#ifdef DOCONLOC + do concurrent (i = 1:n) shared(A,B) +#else + do concurrent (i = 1:n) +#endif B(i) = norm2(A(:, i)) end do return diff --git a/src/operator/operator_unit.f90 b/src/operator/operator_unit.f90 index 39f3fce53..a25ee1bb1 100644 --- a/src/operator/operator_unit.f90 +++ b/src/operator/operator_unit.f90 @@ -89,7 +89,11 @@ pure module function operator_unit_el_sp(A) result(B) if (allocated(B)) deallocate(B) allocate(B(NDIM,n)) +#ifdef DOCONLOC + do concurrent (i=1:n) shared(A,B) +#else do concurrent (i=1:n) +#endif B(:,i) = operator_unit_sp(A(:,i)) end do @@ -109,7 +113,11 @@ pure module function operator_unit_el_dp(A) result(B) if (allocated(B)) deallocate(B) allocate(B(NDIM,n)) +#ifdef DOCONLOC + do concurrent (i=1:n) shared(A,B) +#else do concurrent (i=1:n) +#endif B(:,i) = operator_unit_dp(A(:,i)) end do @@ -128,7 +136,11 @@ pure module function operator_unit_el_qp(A) result(B) if (allocated(B)) deallocate(B) allocate(B(NDIM,n)) +#ifdef DOCONLOC + do concurrent (i=1:n) shared(A,B) +#else do concurrent (i=1:n) +#endif B(:,i) = operator_unit_qp(A(:,i)) end do diff --git a/src/rmvs/rmvs_kick.f90 b/src/rmvs/rmvs_kick.f90 index 8fb6c14ce..82d19463e 100644 --- a/src/rmvs/rmvs_kick.f90 +++ b/src/rmvs/rmvs_kick.f90 @@ -61,17 +61,29 @@ module subroutine rmvs_kick_getacch_tp(self, nbody_system, param, t, lbeg) ! Now compute any heliocentric values of acceleration if (tp%lfirst) then +#ifdef DOCONLOC + do concurrent(i = 1:ntp, tp%lmask(i)) shared(tp) +#else do concurrent(i = 1:ntp, tp%lmask(i)) +#endif tp%rheliocentric(:,i) = tp%rh(:,i) + cb%inner(inner_index - 1)%x(:,1) end do else +#ifdef DOCONLOC + do concurrent(i = 1:ntp, tp%lmask(i)) shared(tp) +#else do concurrent(i = 1:ntp, tp%lmask(i)) +#endif tp%rheliocentric(:,i) = tp%rh(:,i) + cb%inner(inner_index )%x(:,1) end do end if ! Swap the planetocentric and heliocentric position vectors and central body masses +#ifdef DOCONLOC + do concurrent(i = 1:ntp, tp%lmask(i)) shared(tp) +#else do concurrent(i = 1:ntp, tp%lmask(i)) +#endif tp%rh(:, i) = tp%rheliocentric(:, i) end do GMcb_original = cb%Gmass diff --git a/src/swiftest/swiftest_drift.f90 b/src/swiftest/swiftest_drift.f90 index 5c0427c62..82239e452 100644 --- a/src/swiftest/swiftest_drift.f90 +++ b/src/swiftest/swiftest_drift.f90 @@ -82,7 +82,11 @@ module subroutine swiftest_drift_all(mu, x, v, n, param, dt, lmask, iflag) allocate(dtp(n)) if (param%lgr) then +#ifdef DOCONLOC + do concurrent(i = 1:n, lmask(i)) shared(param,lmask,x,v,mu,dtp,dt) local(rmag,vmag2,energy) +#else do concurrent(i = 1:n, lmask(i)) +#endif rmag = norm2(x(:, i)) vmag2 = dot_product(v(:, i), v(:, i)) energy = 0.5_DP * vmag2 - mu(i) / rmag diff --git a/src/swiftest/swiftest_gr.f90 b/src/swiftest/swiftest_gr.f90 index 1c37d3da4..1985f6dd1 100644 --- a/src/swiftest/swiftest_gr.f90 +++ b/src/swiftest/swiftest_gr.f90 @@ -73,7 +73,11 @@ pure module subroutine swiftest_gr_kick_getacch(mu, x, lmask, n, inv_c2, agr) real(DP) :: beta, rjmag4 agr(:,:) = 0.0_DP +#ifdef DOCONLOC + do concurrent (i = 1:n, lmask(i)) shared(lmask,x,mu,agr,inv_c2) local(rjmag4,beta) +#else do concurrent (i = 1:n, lmask(i)) +#endif rjmag4 = (dot_product(x(:, i), x(:, i)))**2 beta = -mu(i)**2 * inv_c2 agr(:, i) = 2 * beta * x(:, i) / rjmag4 diff --git a/src/swiftest/swiftest_kick.f90 b/src/swiftest/swiftest_kick.f90 index 751702fa2..82cefd26c 100644 --- a/src/swiftest/swiftest_kick.f90 +++ b/src/swiftest/swiftest_kick.f90 @@ -21,7 +21,6 @@ module subroutine swiftest_kick_getacch_int_pl(self, param) class(swiftest_pl), intent(inout) :: self !! Swiftest massive body object class(swiftest_parameters), intent(inout) :: param !! Current swiftest run configuration parameters ! Internals - logical, save :: lfirst = .true. #ifdef PROFILE type(walltimer), save :: timer #endif @@ -136,7 +135,7 @@ module subroutine swiftest_kick_getacch_int_all_flat_norad_pl(npl, nplpl, k_plpl integer(I8B) :: k real(DP), dimension(NDIM,npl) :: ahi, ahj integer(I4B) :: i, j - real(DP) :: rji2, rlim2 + real(DP) :: rji2 real(DP) :: rx, ry, rz ahi(:,:) = 0.0_DP @@ -197,7 +196,11 @@ module subroutine swiftest_kick_getacch_int_all_tri_rad_pl(npl, nplm, r, Gmass, !$omp reduction(+:ahi) & !$omp reduction(-:ahj) do i = 1, nplm +#ifdef DOCONLOC + do concurrent(j = i+1:npl) shared(i,r,radius,ahi,ahj,Gmass) local(rx,ry,rz,rji2,rlim2) +#else do concurrent(j = i+1:npl) +#endif rx = r(1, j) - r(1, i) ry = r(2, j) - r(2, i) rz = r(3, j) - r(3, i) @@ -208,14 +211,22 @@ module subroutine swiftest_kick_getacch_int_all_tri_rad_pl(npl, nplm, r, Gmass, end do end do !$omp end parallel do +#ifdef DOCONLOC + do concurrent(i = 1:npl) shared(acc,ahi,ahj) +#else do concurrent(i = 1:npl) +#endif acc(:,i) = acc(:,i) + ahi(:,i) + ahj(:,i) end do else !$omp parallel do default(private) schedule(static)& !$omp shared(npl, nplm, r, Gmass, radius, acc) do i = 1, nplm +#ifdef DOCONLOC + do concurrent(j = 1:npl, i/=j) shared(i,r,radius,Gmass,acc) local(rx,ry,rz,rji2,rlim2,fac) +#else do concurrent(j = 1:npl, i/=j) +#endif rx = r(1,j) - r(1,i) ry = r(2,j) - r(2,i) rz = r(3,j) - r(3,i) @@ -235,7 +246,11 @@ module subroutine swiftest_kick_getacch_int_all_tri_rad_pl(npl, nplm, r, Gmass, !$omp parallel do default(private) schedule(static)& !$omp shared(npl, nplm, r, Gmass, radius, acc) do i = nplm+1,npl +#ifdef DOCONLOC + do concurrent(j = 1:nplm) shared(i,r,radius,Gmass,acc) local(rx,ry,rz,rji2,rlim2,fac) +#else do concurrent(j = 1:nplm) +#endif rx = r(1,j) - r(1,i) ry = r(2,j) - r(2,i) rz = r(3,j) - r(3,i) @@ -275,7 +290,7 @@ module subroutine swiftest_kick_getacch_int_all_tri_norad_pl(npl, nplm, r, Gmass real(DP), dimension(:,:), intent(inout) :: acc !! Acceleration vector array ! Internals integer(I4B) :: i, j, nplt - real(DP) :: rji2, rlim2, fac, rx, ry, rz + real(DP) :: rji2, fac, rx, ry, rz real(DP), dimension(NDIM,npl) :: ahi, ahj logical :: lmtiny @@ -290,7 +305,11 @@ module subroutine swiftest_kick_getacch_int_all_tri_norad_pl(npl, nplm, r, Gmass !$omp reduction(+:ahi) & !$omp reduction(-:ahj) do i = 1, nplm +#ifdef DOCONLOC + do concurrent(j = i+1:npl) shared(i,r,Gmass,ahi,ahj) local(rx,ry,rz,rji2) +#else do concurrent(j = i+1:npl) +#endif rx = r(1, j) - r(1, i) ry = r(2, j) - r(2, i) rz = r(3, j) - r(3, i) @@ -300,14 +319,22 @@ module subroutine swiftest_kick_getacch_int_all_tri_norad_pl(npl, nplm, r, Gmass end do end do !$omp end parallel do +#ifdef DOCONLOC + do concurrent(i = 1:npl) shared(acc,ahi,ahj) +#else do concurrent(i = 1:npl) +#endif acc(:,i) = acc(:,i) + ahi(:,i) + ahj(:,i) end do else !$omp parallel do default(private) schedule(static)& !$omp shared(npl, nplm, r, Gmass, acc) do i = 1, nplm +#ifdef DOCONLOC + do concurrent(j = 1:npl, j/=i) shared(i,r,Gmass, acc) local(rx,ry,rz,rji2,fac) +#else do concurrent(j = 1:npl, j/=i) +#endif rx = r(1,j) - r(1,i) ry = r(2,j) - r(2,i) rz = r(3,j) - r(3,i) @@ -324,7 +351,11 @@ module subroutine swiftest_kick_getacch_int_all_tri_norad_pl(npl, nplm, r, Gmass !$omp parallel do default(private) schedule(static)& !$omp shared(npl, nplm, r, Gmass, acc) do i = nplm+1,npl +#ifdef DOCONLOC + do concurrent(j = 1:nplm) shared(i,r,Gmass,acc) local(rx,ry,rz,rji2,fac) +#else do concurrent(j = 1:nplm) +#endif rx = r(1,j) - r(1,i) ry = r(2,j) - r(2,i) rz = r(3,j) - r(3,i) @@ -369,7 +400,11 @@ module subroutine swiftest_kick_getacch_int_all_tp(ntp, npl, rtp, rpl, GMpl, lma !$omp reduction(-:acc) do i = 1, ntp if (lmask(i)) then +#ifdef DOCONLOC + do concurrent (j = 1:npl) shared(rtp,rpl,GMpl,acc) local(rx,ry,rz,rji2) +#else do concurrent (j = 1:npl) +#endif rx = rtp(1, i) - rpl(1, j) ry = rtp(2, i) - rpl(2, j) rz = rtp(3, i) - rpl(3, j) diff --git a/src/swiftest/swiftest_obl.f90 b/src/swiftest/swiftest_obl.f90 index 85f88bc7d..21f4363ae 100644 --- a/src/swiftest/swiftest_obl.f90 +++ b/src/swiftest/swiftest_obl.f90 @@ -35,7 +35,11 @@ module subroutine swiftest_obl_acc(n, GMcb, j2rp2, j4rp4, rh, lmask, aobl, GMpl, if (n == 0) return aobl(:,:) = 0.0_DP +#ifdef DOCONLOC + do concurrent(i = 1:n, lmask(i)) shared(lmask,rh,aobl) local(r2,irh,rinv2,t0,t1,t2,t3,fac1,fac2) +#else do concurrent(i = 1:n, lmask(i)) +#endif r2 = dot_product(rh(:, i), rh(:, i)) irh = 1.0_DP / sqrt(r2) rinv2 = irh**2 @@ -81,7 +85,11 @@ module subroutine swiftest_obl_acc_pl(self, nbody_system) npl = self%nbody call swiftest_obl_acc(npl, cb%Gmass, cb%j2rp2, cb%j4rp4, pl%rh, pl%lmask, pl%aobl, pl%Gmass, cb%aobl) +#ifdef DOCONLOC + do concurrent(i = 1:npl, pl%lmask(i)) shared(cb,pl) +#else do concurrent(i = 1:npl, pl%lmask(i)) +#endif pl%ah(:, i) = pl%ah(:, i) + pl%aobl(:, i) - cb%aobl(:) end do end associate @@ -117,7 +125,11 @@ module subroutine swiftest_obl_acc_tp(self, nbody_system) aoblcb = cb%aoblend end if +#ifdef DOCONLOC + do concurrent(i = 1:ntp, tp%lmask(i)) shared(tp,aoblcb) +#else do concurrent(i = 1:ntp, tp%lmask(i)) +#endif tp%ah(:, i) = tp%ah(:, i) + tp%aobl(:, i) - aoblcb(:) end do @@ -148,7 +160,7 @@ module subroutine swiftest_obl_pot_system(self) npl = self%pl%nbody if (.not. any(pl%lmask(1:npl))) return #ifdef DOCONLOC - do concurrent (i = 1:npl, pl%lmask(i)) shared(oblpot_arr) + do concurrent (i = 1:npl, pl%lmask(i)) shared(cb,pl,oblpot_arr) #else do concurrent (i = 1:npl, pl%lmask(i)) #endif diff --git a/src/swiftest/swiftest_orbel.f90 b/src/swiftest/swiftest_orbel.f90 index a3ec6f424..dafb3e293 100644 --- a/src/swiftest/swiftest_orbel.f90 +++ b/src/swiftest/swiftest_orbel.f90 @@ -26,7 +26,11 @@ module subroutine swiftest_orbel_el2xv_vec(self, cb) if (self%nbody == 0) return call self%set_mu(cb) +#ifdef DOCONLOC + do concurrent (i = 1:self%nbody) shared(self) +#else do concurrent (i = 1:self%nbody) +#endif call swiftest_orbel_el2xv(self%mu(i), self%a(i), self%e(i), self%inc(i), self%capom(i), & self%omega(i), self%capm(i), self%rh(:, i), self%vh(:, i)) end do @@ -887,7 +891,11 @@ module subroutine swiftest_orbel_xv2el_vec(self, cb) if (allocated(self%capom)) deallocate(self%capom); allocate(self%capom(self%nbody)) if (allocated(self%omega)) deallocate(self%omega); allocate(self%omega(self%nbody)) if (allocated(self%capm)) deallocate(self%capm); allocate(self%capm(self%nbody)) +#ifdef DOCONLOC + do concurrent (i = 1:self%nbody) shared(self) local(varpi,lam,f,cape,capf) +#else do concurrent (i = 1:self%nbody) +#endif call swiftest_orbel_xv2el(self%mu(i), self%rh(1,i), self%rh(2,i), self%rh(3,i), & self%vh(1,i), self%vh(2,i), self%vh(3,i), & self%a(i), self%e(i), self%inc(i), & @@ -932,7 +940,7 @@ pure module subroutine swiftest_orbel_xv2el(mu, rx, ry, rz, vx, vy, vz, a, e, in real(DP), intent(out) :: capf !! hyperbolic anomaly (hyperbolic orbits) ! Internals integer(I4B) :: iorbit_type - real(DP) :: hx, hy, hz, r, v2, h2, h, rdotv, energy, fac, u, w, cw, sw, face, tmpf, sf, cf, rdot, h_over_r2 + real(DP) :: hx, hy, hz, r, v2, h2, h, rdotv, energy, fac, u, w, cw, sw, face, tmpf, sf, cf, rdot a = 0.0_DP e = 0.0_DP diff --git a/src/swiftest/swiftest_util.f90 b/src/swiftest/swiftest_util.f90 index b3e43c7e2..b063cb418 100644 --- a/src/swiftest/swiftest_util.f90 +++ b/src/swiftest/swiftest_util.f90 @@ -282,7 +282,11 @@ module subroutine swiftest_util_coord_h2b_tp(self, cb) if (self%nbody == 0) return associate(tp => self) ntp = self%nbody +#ifdef DOCONLOC + do concurrent (i = 1:ntp, tp%status(i) /= INACTIVE) shared(cb,tp) +#else do concurrent (i = 1:ntp, tp%status(i) /= INACTIVE) +#endif tp%rb(:, i) = tp%rh(:, i) + cb%rb(:) tp%vb(:, i) = tp%vh(:, i) + cb%vb(:) end do @@ -310,7 +314,11 @@ module subroutine swiftest_util_coord_b2h_pl(self, cb) associate(pl => self) npl = self%nbody +#ifdef DOCONLOC + do concurrent (i = 1:npl, pl%status(i) /= INACTIVE) shared(cb,pl) +#else do concurrent (i = 1:npl, pl%status(i) /= INACTIVE) +#endif pl%rh(:, i) = pl%rb(:, i) - cb%rb(:) pl%vh(:, i) = pl%vb(:, i) - cb%vb(:) end do @@ -338,7 +346,11 @@ module subroutine swiftest_util_coord_b2h_tp(self, cb) associate(tp => self) ntp = self%nbody +#ifdef DOCONLOC + do concurrent(i = 1:ntp, tp%status(i) /= INACTIVE) shared(cb,tp) +#else do concurrent(i = 1:ntp, tp%status(i) /= INACTIVE) +#endif tp%rh(:, i) = tp%rb(:, i) - cb%rb(:) tp%vh(:, i) = tp%vb(:, i) - cb%vb(:) end do @@ -370,7 +382,11 @@ module subroutine swiftest_util_coord_vb2vh_pl(self, cb) do i = npl, 1, -1 if (pl%status(i) /= INACTIVE) cb%vb(:) = cb%vb(:) - pl%Gmass(i) * pl%vb(:, i) / cb%Gmass end do +#ifdef DOCONLOC + do concurrent(i = 1:npl) shared(cb,pl) +#else do concurrent(i = 1:npl) +#endif pl%vh(:, i) = pl%vb(:, i) - cb%vb(:) end do end associate @@ -430,7 +446,11 @@ module subroutine swiftest_util_coord_vh2vb_pl(self, cb) cb%vb(:) = cb%vb(:) - pl%Gmass(i) * pl%vh(:, i) end do cb%vb(:) = cb%vb(:) / Gmtot +#ifdef DOCONLOC + do concurrent(i = 1:npl) shared(cb,pl) +#else do concurrent(i = 1:npl) +#endif pl%vb(:, i) = pl%vh(:, i) + cb%vb(:) end do end associate @@ -518,7 +538,11 @@ module subroutine swiftest_util_coord_rh2rb_tp(self, cb) if (self%nbody == 0) return associate(tp => self) ntp = self%nbody +#ifdef DOCONLOC + do concurrent (i = 1:ntp, tp%status(i) /= INACTIVE) shared(cb,tp) +#else do concurrent (i = 1:ntp, tp%status(i) /= INACTIVE) +#endif tp%rb(:, i) = tp%rh(:, i) + cb%rb(:) end do end associate @@ -1089,7 +1113,11 @@ module subroutine swiftest_util_flatten_eucl_plpl(self, param) if (err /=0) then ! An error occurred trying to allocate this big array. This probably means it's too big to fit in memory, and so we will force the run back into triangular mode param%lflatten_interactions = .false. else +#ifdef DOCONLOC + do concurrent (i=1:npl, j=1:npl, j>i) shared(self) local(k) +#else do concurrent (i=1:npl, j=1:npl, j>i) +#endif call swiftest_util_flatten_eucl_ij_to_k(self%nbody, i, j, k) self%k_plpl(1, k) = i self%k_plpl(2, k) = j @@ -1179,7 +1207,7 @@ module subroutine swiftest_util_get_energy_and_momentum_system(self, param) Lcborbit(:) = cb%mass * (cb%rb(:) .cross. cb%vb(:)) #ifdef DOCONLOC - do concurrent (i = 1:npl, pl%lmask(i)) local(h) shared(Lplorbit, kepl) + do concurrent (i = 1:npl, pl%lmask(i)) shared(pl,Lplorbit,kepl) local(h) #else do concurrent (i = 1:npl, pl%lmask(i)) #endif @@ -1199,7 +1227,7 @@ module subroutine swiftest_util_get_energy_and_momentum_system(self, param) Lcbspin(:) = cb%Ip(3) * cb%mass * cb%radius**2 * cb%rot(:) #ifdef DOCONLOC - do concurrent (i = 1:npl, pl%lmask(i)) shared(Lplspin, kespinpl) + do concurrent (i = 1:npl, pl%lmask(i)) shared(pl,Lplspin,kespinpl) #else do concurrent (i = 1:npl, pl%lmask(i)) #endif @@ -1213,7 +1241,11 @@ module subroutine swiftest_util_get_energy_and_momentum_system(self, param) nbody_system%ke_spin = 0.5_DP * (kespincb + sum(kespinpl(1:npl), pl%lmask(1:npl))) +#ifdef DOCONLOC + do concurrent (j = 1:NDIM) shared(nbody_system,pl,Lplspin,Lcbspin) +#else do concurrent (j = 1:NDIM) +#endif nbody_system%L_spin(j) = Lcbspin(j) + sum(Lplspin(j,1:npl), pl%lmask(1:npl)) end do else @@ -1234,8 +1266,11 @@ module subroutine swiftest_util_get_energy_and_momentum_system(self, param) end if nbody_system%ke_orbit = 0.5_DP * (kecb + sum(kepl(1:npl), pl%lmask(1:npl))) - +#ifdef DOCONLOC + do concurrent (j = 1:NDIM) shared(nbody_system,pl,Lcborbit,Lplorbit) +#else do concurrent (j = 1:NDIM) +#endif nbody_system%L_orbit(j) = Lcborbit(j) + sum(Lplorbit(j,1:npl), pl%lmask(1:npl)) end do @@ -1280,7 +1315,7 @@ module subroutine swiftest_util_get_potential_energy_flat(npl, nplpl, k_plpl, lm end where #ifdef DOCONLOC - do concurrent(i = 1:npl, lmask(i)) shared(pecb) + do concurrent(i = 1:npl, lmask(i)) shared(lmask,pecb,GMcb,mass,rb) #else do concurrent(i = 1:npl, lmask(i)) #endif @@ -1331,7 +1366,7 @@ module subroutine swiftest_util_get_potential_energy_triangular(npl, lmask, GMcb end where #ifdef DOCONLOC - do concurrent(i = 1:npl, lmask(i)) shared(pecb) + do concurrent(i = 1:npl, lmask(i)) shared(lmask, pecb, GMcb, mass, rb, lmask) #else do concurrent(i = 1:npl, lmask(i)) #endif @@ -1346,7 +1381,7 @@ module subroutine swiftest_util_get_potential_energy_triangular(npl, lmask, GMcb do i = 1, npl if (lmask(i)) then #ifdef DOCONLOC - do concurrent(j = i+1:npl, lmask(i) .and. lmask(j)) shared(pepl) + do concurrent(j = i+1:npl, lmask(i) .and. lmask(j)) shared(lmask, pepl, rb, mass, Gmass, lmask) #else do concurrent(j = i+1:npl, lmask(i) .and. lmask(j)) #endif diff --git a/src/symba/symba_encounter_check.f90 b/src/symba/symba_encounter_check.f90 index 895a9f34d..4bc92b79d 100644 --- a/src/symba/symba_encounter_check.f90 +++ b/src/symba/symba_encounter_check.f90 @@ -119,7 +119,11 @@ module function symba_encounter_check_list_plpl(self, param, nbody_system, dt, i eidx(:) = pack([(k, k = 1, self%nenc)], lencmask(:)) lencounter(:) = .false. +#ifdef DOCONLOC + do concurrent(lidx = 1:nenc_enc) shared(self,pl,eidx,lencounter,dt) local(i,j,k,xr,vr,rcrit12,rlim2,rji2) +#else do concurrent(lidx = 1:nenc_enc) +#endif k = eidx(lidx) i = self%index1(k) j = self%index2(k) @@ -188,8 +192,11 @@ module function symba_encounter_check_list_pltp(self, param, nbody_system, dt, i allocate(lencounter(nenc_enc)) eidx(:) = pack([(k, k = 1, self%nenc)], lencmask(:)) lencounter(:) = .false. - +#ifdef DOCONLOC + do concurrent(lidx = 1:nenc_enc) shared(self,pl,tp,eidx,lencounter,dt) local(i,j,k,xr,vr,rlim2,rji2) +#else do concurrent(lidx = 1:nenc_enc) +#endif k = eidx(lidx) i = self%index1(k) j = self%index2(k) diff --git a/src/symba/symba_kick.f90 b/src/symba/symba_kick.f90 index 0c75e1054..dc280ef1c 100644 --- a/src/symba/symba_kick.f90 +++ b/src/symba/symba_kick.f90 @@ -172,7 +172,11 @@ module subroutine symba_kick_list_plpl(self, nbody_system, dt, irec, sgn) allocate(good_idx(ngood)) good_idx(:) = pack([(i, i = 1, nenc)], lgoodlevel(:)) +#ifdef DOCONLOC + do concurrent (k = 1:ngood) shared(self,pl,good_idx) local(i,j) +#else do concurrent (k = 1:ngood) +#endif i = self%index1(good_idx(k)) j = self%index2(good_idx(k)) pl%ah(:,i) = 0.0_DP @@ -282,8 +286,11 @@ module subroutine symba_kick_list_pltp(self, nbody_system, dt, irec, sgn) allocate(good_idx(ngood)) good_idx(:) = pack([(i, i = 1, nenc)], lgoodlevel(:)) - +#ifdef DOCONLOC + do concurrent (k = 1_I8B:ngood) shared(self,tp,good_idx) local(j) +#else do concurrent (k = 1_I8B:ngood) +#endif j = self%index2(good_idx(k)) tp%ah(:,j) = 0.0_DP end do diff --git a/src/whm/whm_gr.f90 b/src/whm/whm_gr.f90 index 1b1716800..c46a5ce2d 100644 --- a/src/whm/whm_gr.f90 +++ b/src/whm/whm_gr.f90 @@ -86,7 +86,11 @@ pure module subroutine whm_gr_p4_pl(self, nbody_system, param, dt) associate(pl => self) npl = self%nbody +#ifdef DOCONLOC + do concurrent(i = 1:npl, pl%lmask(i)) shared(pl,dt) +#else do concurrent(i = 1:npl, pl%lmask(i)) +#endif call swiftest_gr_p4_pos_kick(param, pl%xj(:, i), pl%vj(:, i), dt) end do end associate @@ -114,7 +118,11 @@ pure module subroutine whm_gr_p4_tp(self, nbody_system, param, dt) associate(tp => self) ntp = self%nbody if (ntp == 0) return +#ifdef DOCONLOC + do concurrent(i = 1:ntp, tp%lmask(i)) shared(tp,dt) +#else do concurrent(i = 1:ntp, tp%lmask(i)) +#endif call swiftest_gr_p4_pos_kick(param, tp%rh(:, i), tp%vh(:, i), dt) end do end associate diff --git a/src/whm/whm_kick.f90 b/src/whm/whm_kick.f90 index a1d4b0dc6..bcddf0de7 100644 --- a/src/whm/whm_kick.f90 +++ b/src/whm/whm_kick.f90 @@ -93,13 +93,21 @@ module subroutine whm_kick_getacch_tp(self, nbody_system, param, t, lbeg) if (lbeg) then ah0(:) = whm_kick_getacch_ah0(pl%Gmass(1:npl), pl%rbeg(:, 1:npl), npl) +#ifdef DOCONLOC + do concurrent(i = 1:ntp, tp%lmask(i)) shared(tp,ah0) +#else do concurrent(i = 1:ntp, tp%lmask(i)) +#endif tp%ah(:, i) = tp%ah(:, i) + ah0(:) end do call tp%accel_int(param, pl%Gmass(1:npl), pl%rbeg(:, 1:npl), npl) else ah0(:) = whm_kick_getacch_ah0(pl%Gmass(1:npl), pl%rend(:, 1:npl), npl) +#ifdef DOCONLOC + do concurrent(i = 1:ntp, tp%lmask(i)) shared(tp,ah0) +#else do concurrent(i = 1:ntp, tp%lmask(i)) +#endif tp%ah(:, i) = tp%ah(:, i) + ah0(:) end do call tp%accel_int(param, pl%Gmass(1:npl), pl%rend(:, 1:npl), npl) @@ -157,7 +165,11 @@ pure subroutine whm_kick_getacch_ah1(cb, pl) real(DP), dimension(NDIM) :: ah1h, ah1j npl = pl%nbody +#ifdef DOCONLOC + do concurrent (i = 2:npl, pl%lmask(i)) shared(pl,cb) local(ah1j,ah1h) +#else do concurrent (i = 2:npl, pl%lmask(i)) +#endif ah1j(:) = pl%xj(:, i) * pl%ir3j(i) ah1h(:) = pl%rh(:, i) * pl%ir3h(i) pl%ah(:, i) = pl%ah(:, i) + cb%Gmass * (ah1j(:) - ah1h(:)) @@ -187,10 +199,14 @@ pure subroutine whm_kick_getacch_ah2(cb, pl) ah2(:) = 0.0_DP ah2o(:) = 0.0_DP etaj = cb%Gmass +#ifdef DOCONLOC + do concurrent(i = 2:npl, pl%lmask(i)) shared(pl,cb,ah2,ah2o) local(etaj,fac) +#else do concurrent(i = 2:npl, pl%lmask(i)) +#endif etaj = etaj + pl%Gmass(i - 1) fac = pl%Gmass(i) * cb%Gmass * pl%ir3j(i) / etaj - ah2(:) = ah2o + fac * pl%xj(:, i) + ah2(:) = ah2o(:) + fac * pl%xj(:, i) pl%ah(:,i) = pl%ah(:, i) + ah2(:) ah2o(:) = ah2(:) end do @@ -233,7 +249,11 @@ module subroutine whm_kick_vh_pl(self, nbody_system, param, t, dt, lbeg) call pl%accel(nbody_system, param, t, lbeg) call pl%set_beg_end(rend = pl%rh) end if +#ifdef DOCONLOC + do concurrent(i = 1:npl, pl%lmask(i)) shared(pl,dt) +#else do concurrent(i = 1:npl, pl%lmask(i)) +#endif pl%vh(:, i) = pl%vh(:, i) + pl%ah(:, i) * dt end do end associate @@ -265,19 +285,31 @@ module subroutine whm_kick_vh_tp(self, nbody_system, param, t, dt, lbeg) associate(tp => self) ntp = self%nbody if (tp%lfirst) then +#ifdef DOCONLOC + do concurrent(i = 1:ntp, tp%lmask(i)) shared(tp) +#else do concurrent(i = 1:ntp, tp%lmask(i)) +#endif tp%ah(:, i) = 0.0_DP end do call tp%accel(nbody_system, param, t, lbeg=.true.) tp%lfirst = .false. end if if (.not.lbeg) then +#ifdef DOCONLOC + do concurrent(i = 1:ntp, tp%lmask(i)) shared(tp) +#else do concurrent(i = 1:ntp, tp%lmask(i)) +#endif tp%ah(:, i) = 0.0_DP end do call tp%accel(nbody_system, param, t, lbeg) end if +#ifdef DOCONLOC + do concurrent(i = 1:ntp, tp%lmask(i)) shared(tp) +#else do concurrent(i = 1:ntp, tp%lmask(i)) +#endif tp%vh(:, i) = tp%vh(:, i) + tp%ah(:, i) * dt end do end associate