diff --git a/.gitignore b/.gitignore index 674a2f7f4..5056a889a 100644 --- a/.gitignore +++ b/.gitignore @@ -30,6 +30,13 @@ dump* !docs/*/*/* !README_figs/* +#Docker and Singularity files +!docker/ +!singularity/ +!Dockerfile +!environment.yml + bin/ build/* -disruption_headon/swiftest_driver.sh + + diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a9ed99c0..4d36d9612 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,14 +30,10 @@ ENDIF(NOT CMAKE_Fortran_COMPILER_SUPPORTS_F90) # Set some options the user may choose OPTION(USE_COARRAY "Use Coarray Fortran for parallelization of test particles" OFF) OPTION(USE_OPENMP "Use OpenMP for parallelization" ON) +OPTION(USE_SIMD "Use SIMD vectorization" ON) +OPTION(CONTAINERIZE "Compiling for use in a Docker/Singularity container" OFF) +OPTION(BUILD_SHARED_LIBS "Build using shared libraries" ON) -IF (USE_COARRAY) - ADD_DEFINITIONS(-DCOARRAY) -ENDIF() - -# Locate and set parallelization libraries. There are some CMake peculiarities -# taken care of here, such as the fact that the FindOpenMP routine doesn't know -# about Fortran. INCLUDE(${CMAKE_MODULE_PATH}/SetParallelizationLibrary.cmake) INCLUDE(${CMAKE_MODULE_PATH}/SetUpNetCDF.cmake) INCLUDE(${CMAKE_MODULE_PATH}/SetMKL.cmake) @@ -48,7 +44,6 @@ INCLUDE(${CMAKE_MODULE_PATH}/SetFortranFlags.cmake) INCLUDE_DIRECTORIES($ENV{NETCDF_FORTRAN_HOME}/include;$ENV{NETCDF_HOME}/include) - # There is an error in CMAKE with this flag for pgf90. Unset it GET_FILENAME_COMPONENT(FCNAME ${CMAKE_Fortran_COMPILER} NAME) IF(FCNAME STREQUAL "pgf90") @@ -78,3 +73,17 @@ ADD_SUBDIRECTORY(${SRC} ${BIN}) ADD_CUSTOM_TARGET(distclean COMMAND ${CMAKE_COMMAND} -P ${CMAKE_SOURCE_DIR}/distclean.cmake ) + +# Add an automatic test +ADD_CUSTOM_TARGET(Run_Test ALL + COMMAND echo "***Running Python script examples/Basic_Simulation/initial_conditions.py***" + COMMAND python ${CMAKE_SOURCE_DIR}/examples/Basic_Simulation/initial_conditions.py + COMMAND echo "***Saving data to directory examples/Basic_Simulation/simdata***" + COMMAND echo "***Running Python script examples/Basic_Simulation/output_reader.py***" + COMMAND python ${CMAKE_SOURCE_DIR}/examples/Basic_Simulation/output_reader.py + COMMAND echo "***Plotting results as examples/Basic_Simulation/output.eps***" + COMMAND echo "***Calculating errors with Python script examples/Basic_Simulation/errors.py***" + COMMAND python ${CMAKE_SOURCE_DIR}/examples/Basic_Simulation/errors.py + COMMAND echo "***Test Complete***" +) + diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..ce3e615c3 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,158 @@ +# Copyright 2023 - David Minton +# This file is part of Swiftest. +# Swiftest is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. +# Swiftest is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty +# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# You should have received a copy of the GNU General Public License along with Swiftest. +# If not, see: https://www.gnu.org/licenses. +# +# This Dockerfile will build the Swiftest driver program with minimal external dependencies using the Intel Oneapi toolkit. +# This is done by building static versions of a minimal set of libraries that NetCDF-Fortran needs (Netcdf-C, HDF5, and Zlib). +# These, along with the Intel runtime libraries, are linked statically to the executable. Only the OS-specific libraries are linked +# dynamically. + +# This build target compiles all dependencies and the swiftest driver itself +FROM intel/oneapi-hpckit:2023.1.0-devel-ubuntu20.04 as build_deps + +ENV INSTALL_DIR="/usr/local" +ENV CC="${ONEAPI_ROOT}/compiler/latest/linux/bin/icx" +ENV FC="${ONEAPI_ROOT}/compiler/latest/linux/bin/ifx" +ENV CXX="${ONEAPI_ROOT}/compiler/latest/linux/bin/icpx" +ENV F77="${FC}" + +# Get the HDF5, NetCDF-C, and NetCDF-Fortran libraries +RUN wget -qO- https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.14/hdf5-1.14.1/src/hdf5-1.14.1-2.tar.gz | tar xvz && \ + wget -qO- https://github.com/Unidata/netcdf-c/archive/refs/tags/v4.9.2.tar.gz | tar xvz && \ + wget -qO- https://github.com/Unidata/netcdf-fortran/archive/refs/tags/v4.6.1.tar.gz | tar xvz && \ + wget -qO- https://www.zlib.net/zlib-1.2.13.tar.gz | tar xvz + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + m4 && \ + rm -rf /var/lib/apt/lists/* + +RUN cd zlib-1.2.13 && \ + ./configure --prefix=${INSTALL_DIR} --static && \ + make && \ + make install + +RUN cd hdf5-1.14.1-2 && \ + ./configure --disable-shared \ + --enable-build-mode=production \ + --disable-fortran \ + --disable-java \ + --disable-cxx \ + --prefix=${INSTALL_DIR} \ + --with-zlib=${INSTALL_DIR} && \ + make && \ + make install + +RUN cd netcdf-c-4.9.2 && \ + ./configure --disable-shared \ + --disable-dap \ + --disable-libxml2 \ + --disable-byterange \ + --prefix=${INSTALL_DIR} && \ + make && \ + make install + +ENV NCDIR="${INSTALL_DIR}" +ENV NFDIR="${INSTALL_DIR}" +ENV HDF5_ROOT="${INSTALL_DIR}" +ENV HDF5_LIBDIR="${HDF5_ROOT}/lib" +ENV HDF5_INCLUDE_DIR="${HDF5_ROOT}/include" +ENV HDF5_PLUGIN_PATH="${HDF5_LIBDIR}/plugin" + +# NetCDF-Fortran library +ENV CFLAGS="-fPIC" +ENV FCFLAGS="${CFLAGS} -standard-semantics" +ENV FFLAGS=${CFLAGS} +ENV CPPFLAGS="-I${INSTALL_DIR}/include" +ENV LIBS="-L/usr/local/lib -L/usr/lib/x86_64-linux-gnu -lnetcdf -lhdf5_hl -lhdf5 -lm -lz" +RUN cd netcdf-fortran-4.6.1 && \ + ./configure --disable-shared --prefix=${NFDIR} && \ + make && \ + make install + +FROM intel/oneapi-hpckit:2023.1.0-devel-ubuntu20.04 as build_driver +COPY --from=build_deps /usr/local/. /usr/local/ +ENV INSTALL_DIR="/usr/local" +ENV CC="${ONEAPI_ROOT}/compiler/latest/linux/bin/icx" +ENV FC="${ONEAPI_ROOT}/compiler/latest/linux/bin/ifx" +ENV CXX="${ONEAPI_ROOT}/compiler/latest/linux/bin/icpx" +ENV F77="${FC}" + +# The MACHINE_CODE_VALUE argument is a string that is used when compiling the swiftest_driver. It is appended to the "-x" compiler +# option: (-x${MACHINE_CODE_VALUE}). The default value is set to "sse2" which allows for certain SIMD instructions to be used while +# remaining # compatible with a wide range of CPUs. To get the highest performance, you can pass "host" as an argument, but the +# compiled binary # would only run on a CPU with an architecture compatible with the one that the build was performed on. +# For more details and other options, see: +# https://www.intel.com/content/www/us/en/docs/fortran-compiler/developer-guide-reference/2023-1/x-qx.html +ARG MACHINE_CODE_VALUE="sse2" + +# Build type options are DEBUG, RELEASE, PROFILE, or TESTING. +ARG BUILD_TYPE="RELEASE" + +# Swiftest +ENV NETCDF_HOME=${INSTALL_DIR} +ENV NETCDF_FORTRAN_HOME=${NETCDF_HOME} +ENV NETCDF_LIBRARY=${NETCDF_HOME} +ENV FOR_COARRAY_NUM_IMAGES=1 +ENV OMP_NUM_THREADS=1 +ENV FC="${ONEAPI_ROOT}/mpi/latest/bin/mpiifort" +ENV FFLAGS="-fPIC -standard-semantics" +ENV LDFLAGS="-L/usr/local/lib" +ENV LIBS="-lhdf5_hl -lhdf5 -lz" +COPY ./cmake/ /swiftest/cmake/ +COPY ./src/ /swiftest/src/ +COPY ./CMakeLists.txt /swiftest/ +RUN cd swiftest && \ + cmake -S . -B build -DCMAKE_INSTALL_PREFIX="${INSTALL_DIR}" \ + -DMACHINE_CODE_VALUE=${MACHINE_CODE} \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DUSE_COARRAY=OFF \ + -DBUILD_SHARED_LIBS=OFF && \ + cmake --build build && \ + cmake --install build + +# This build target creates a container that executes just the driver program +FROM ubuntu:20.04 as driver +COPY --from=build_driver /usr/local/bin/swiftest_driver /usr/local/bin/ +ENTRYPOINT ["/usr/local/bin/swiftest_driver"] + +# This build target exports the binary to the host +FROM scratch AS export_driver +COPY --from=build_driver /usr/local/bin/swiftest_driver / + +# This build target creates a container with a conda environment with all dependencies needed to run the Python front end and +# analysis tools +FROM continuumio/miniconda3 as python +SHELL ["/bin/bash", "--login", "-c"] +ENV SHELL="/bin/bash" +ENV PATH="/opt/conda/bin:${PATH}" +ENV LD_LIBRARY_PATH="/usr/local/lib" + +RUN conda update --all -y && \ + conda install conda-libmamba-solver -y && \ + conda config --set solver libmamba + +COPY environment.yml . +RUN conda env create -f environment.yml && \ + conda init bash && \ + echo "conda activate swiftest-env" >> ~/.bashrc + +COPY ./python/. /opt/conda/pkgs/ +COPY --from=build_driver /usr/local/bin/swiftest_driver /opt/conda/bin/swiftest_driver + +# Start new shell to activate the environment and install Swiftest +RUN cd /opt/conda/pkgs/swiftest && conda develop . && \ + conda clean --all -y && \ + mkdir -p /.astropy && \ + chmod -R 777 /.astropy && \ + mkdir -p /.cache/matplotlib && \ + mkdir -p /.config/matplotlib && \ + chmod -R 777 /.cache/matplotlib && \ + chmod -R 777 /.config/matplotlib + +ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "swiftest-env"] \ No newline at end of file diff --git a/README.md b/README.md index 033d86834..f45a812c3 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,9 @@ Swiftest also includes the collisional fragmentation algorithm **Fraggle**, an a #### Installation -**System Requirements** +In order to use Swiftest, you need to have a working `swiftest_driver` executable. Currently, this can be obtained by either compiling the source code on the system you plan to run simulations on (fastest), or by running it from a Docker/Singularity container compiled for an x86_64 CPU using the Intel Fortran compiler (slower) or compiled using the GNU/gfortran compiler (slowest). + +**Building the `swiftest_driver` executable** Swiftest is designed to be downloaded, compiled, and run on a Linux based system. It is untested on Windows systems. @@ -44,7 +46,7 @@ Parallelization in Swiftest is done with OpenMP. Version 3.1.4 or higher is nece The easiest way to get Swiftest on your machine is to clone the GitHub repository. To do so, open a terminal window and type the following: ``` -$ git clone https://github.itap.purdue.edu/MintonGroup/swiftest.git +$ git clone https://github.com/carlislewishard/swiftest.git ``` If your cloned version is not already set to the master branch: @@ -83,7 +85,7 @@ CMake allows the user to specify a set of compiler flags to use during compilati As a general rule, the release flags are fully optimized and best used when running Swiftest with the goal of generating results. This is the default set of flags. When making changes to the Swiftest source code, it is best to compile Swiftest using the debug set of flags. You may also define your own set of compiler flags. -To build Swiftest with the release flags (default), type the following: +To build Swiftest with the release flags (default) using the Intel fortran compiler (ifort), type the following: ``` $ cmake .. ``` @@ -95,14 +97,45 @@ To build with another set of flags, simply replace ```DEBUG``` in the above line Add ```-CMAKE_PREFIX_PATH=/path/to/netcdf/``` to these commands as needed. -After building Swiftest, make the executable using: +If using the GCC fortran compiler (gfortran), add the following flags: +``` +-DCMAKE_Fortran_FLAGS="-I/usr/lib64/gfortran/modules/ -ffree-line-length-512" +``` +You can manually specify the compiler you wish to use with the following flag: +``` +c-DCMAKE_Fortran_COMPILER=$(which ifort) +``` +After building Swiftest, make the executable using: ``` $ make ``` The Swiftest executable, called ```swiftest_driver```, should now be created in the ```/swiftest/bin/``` directory. +**Download the `swiftest_driver` as a Docker or Singularity container.** + +The Swiftest driver is available as a Docker container on DockerHub in two versions: Intel and GNU. The Intel version was compiled for the x86_64 CPU using the Intel classic Fortran compiler. The GNU version was compliled for the x86_64 CPU using gfortran. The Intel version is faster than the GNU version (though not as fast as a native compile to the target CPU that you wish to run it on due to vectorization optimizations that Swiftest takes advantage of), however it is much larger: The Intel version is ~2.7GB while the GNU version is ~300MB. The Singularity container pulls from the same DockerHub container. + +To facilitate installation of the container, we provide a set of shell scripts to help automate the process of installing container versions of the executable. To install the default Intel version of the docker container from within the `swiftest\` project directory + +``` +$ cd docker +$ . ./install.sh +``` + +To install the GNU version: + +``` +$ cd docker +$ . ./install.sh gnu +``` + +The Singularity versions are installed the same way, just replace `cd docker` with `cd singularity` above. + +Whether installing either the Docker or Singularity containers, the install script will copy an executable shell script `swiftest_driver` into `swiftest/bin/`. Not that when installing the Singularity container, the install script will set an environment variable called `SWIFTEST_SIF` that must point to the aboslute path of the container file called `swiftest_driver.sif`. To use the driver script in a future shell, rather than running the install script again, we suggest adding the environment variable definition to your shell startup script (e.g. add `export SWIFTEST_SIF="/path/to/swiftest/singularity/swiftest.sif"` to your `.zshrc`) + + **Swiftest Python Package** Included with Swiftest, in the ```/swiftest/python/swiftest/``` directory, is a Python package designed to facilitate seamless data processing and analysis. The Python package, also called Swiftest, can be used to generate input files, run Swiftest simulations, and process output files in the NetCDF file format. diff --git a/README_tables/add_body_kwargs.md b/README_tables/add_body_kwargs.md index 778bda864..4e341a83d 100644 --- a/README_tables/add_body_kwargs.md +++ b/README_tables/add_body_kwargs.md @@ -17,5 +17,5 @@ | ```rhill``` | Hill Radius value(s) of bodies. Only for massive bodies. | float or array-like of floats | ```rot``` | Rotation rate vector(s) of bodies in radians/sec. Only for massive bodies. Only used if ```rotation``` is set to ```True```. | (n,3) array-like of floats | ```Ip``` | Principal axes moments of inertia vector(s) of bodies. Only for massive bodies. Only used if ```rotation``` is set to ```True```. | (n,3) array-like of floats -| ```J2``` | The J2 term of the central body. | float or array-like of floats -| ```J4``` | The J4 term of the central body. | float or array-like of floats \ No newline at end of file +| ```J2``` | The unitless value of the spherical harmonic term equal to J2*R^2 where R is the radius of the central body. | float or array-like of floats +| ```J4``` | The unitless value of the spherical harmonic term equal to J4*R^4 where R is the radius of the central body. | float or array-like of floats diff --git a/apptainer/.gitignore b/apptainer/.gitignore new file mode 100644 index 000000000..baed94e88 --- /dev/null +++ b/apptainer/.gitignore @@ -0,0 +1,4 @@ +!bin/ +!bin/swiftest_driver +!bin/swiftest +!install.sh diff --git a/apptainer/bin/swiftest b/apptainer/bin/swiftest new file mode 100755 index 000000000..5d91b80d6 --- /dev/null +++ b/apptainer/bin/swiftest @@ -0,0 +1,4 @@ +#!/bin/sh -- +OMP_NUM_THREADS=${OMP_NUM_THREADS:-`nproc --all`} +FOR_COARRAY_NUM_IMAGES=${FOR_COARRAY_NUM_IMAGES:-1} +apptainer run --bind $(pwd):$(pwd) --cleanenv --env OMP_NUM_THREADS=${OMP_NUM_THREADS},FOR_COARRAY_NUM_IMAGES=${FOR_COARRAY_NUM_IMAGES} ${SWIFTEST_SIF} "$@" \ No newline at end of file diff --git a/apptainer/bin/swiftest_driver b/apptainer/bin/swiftest_driver new file mode 100755 index 000000000..e88be2805 --- /dev/null +++ b/apptainer/bin/swiftest_driver @@ -0,0 +1,4 @@ +#!/bin/sh -- +OMP_NUM_THREADS=${OMP_NUM_THREADS:-`nproc --all`} +FOR_COARRAY_NUM_IMAGES=${FOR_COARRAY_NUM_IMAGES:-1} +apptainer exec --bind $(pwd):$(pwd) --env OMP_NUM_THREADS=${OMP_NUM_THREADS},FOR_COARRAY_NUM_IMAGES=${FOR_COARRAY_NUM_IMAGES} ${SWIFTEST_SIF} swiftest_driver "$@" \ No newline at end of file diff --git a/apptainer/install.sh b/apptainer/install.sh new file mode 100755 index 000000000..5d84ca34a --- /dev/null +++ b/apptainer/install.sh @@ -0,0 +1,21 @@ +#!/bin/sh -- +# This will install the Apptainer version of the swiftest_driver in place of the native compiled version into ../bin as +# well as the swiftest_python script that is used to execute a Python input file. +# The swiftest.sif file will be copied to the SIF_DIR directory. The default location is ${HOME}/.apptainer. +# To change this, just set environment variable SIF_DIR prior to running this script. +# +# The script takes an optional argument "tag" if you want to pull a container other than "latest". +# +# In order to use one executable script, the SWIFTEST_SIF environment variable must be set to point to the location of swiftest.sif, +# which requires this script to be called via source: +# $ source ./install.sh +# or +# $ . ./install.sh +TAG=${1:-latest} + +SIF_DIR=${SIF_DIR:-${HOME}/.apptainer} +echo "Installing ${SIF_DIR}/swiftest.sif container from mintongroup/swiftest:${TAG} Docker container" +apptainer pull --force ${SIF_DIR}/swiftest.sif docker://mintongroup/swiftest:${TAG} +cp -rf bin/swiftest ../bin/ +cp -rf bin/swiftest_driver ../bin/ +export SWIFTEST_SIF=${SIF_DIR}/swiftest.sif \ No newline at end of file diff --git a/cmake/Modules/FindNETCDF.cmake b/cmake/Modules/FindNETCDF.cmake index 2355ad900..003d5b195 100644 --- a/cmake/Modules/FindNETCDF.cmake +++ b/cmake/Modules/FindNETCDF.cmake @@ -8,11 +8,14 @@ # If not, see: https://www.gnu.org/licenses. # - Finds the NetCDF libraries -find_path(NETCDF_INCLUDE_DIR NAMES netcdf.mod HINTS ENV NETCDF_FORTRAN_HOME) -find_library(NETCDF_LIBRARY NAMES netcdf HINTS ENV NETCDF_FORTRAN_HOME) -find_library(NETCDF_FORTRAN_LIBRARY NAMES netcdff HINTS ENV NETCDF_FORTRAN_HOME) + +find_path(NETCDF_INCLUDE_DIR NAMES netcdf.mod HINTS ENV NETCDF_FORTRAN_HOME ENV CPATH) +find_library(NETCDF_FORTRAN_LIBRARY NAMES netcdff HINTS ENV NETCDF_FORTRAN_HOME ENV LD_LIBRARY_PATH) +find_library(NETCDF_LIBRARY NAMES netcdf HINTS ENV NETCDF_FORTRAN_HOME ENV LD_LIBRARY_PATH) set(NETCDF_FOUND TRUE) set(NETCDF_INCLUDE_DIRS ${NETCDF_INCLUDE_DIR}) -set(NETCDF_LIBRARIES ${NETCDF_LIBRARY} ${NETCDF_FORTRAN_LIBRARY}) +# Note for posterity: When building static libraries, NETCDF_FORTRAN_LIBRARY must come *before* NETCDF_LIBRARY. Otherwise you get a bunch of "undefined reference to" errors +set(NETCDF_LIBRARIES ${NETCDF_FORTRAN_LIBRARY} ${NETCDF_LIBRARY}) + mark_as_advanced(NETCDF_LIBRARY NETCDF_FORTRAN_LIBRARY NETCDF_INCLUDE_DIR) \ No newline at end of file diff --git a/cmake/Modules/FindOpenMP_Fortran.cmake b/cmake/Modules/FindOpenMP_Fortran.cmake index 32777569e..06d679e7c 100644 --- a/cmake/Modules/FindOpenMP_Fortran.cmake +++ b/cmake/Modules/FindOpenMP_Fortran.cmake @@ -25,24 +25,19 @@ INCLUDE (${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake) -SET (OpenMP_Fortran_FLAG_CANDIDATES - #Intel - "-qopenmp" - #Intel windows - "/Qopenmp" - #Gnu - "-fopenmp" - #Portland Group - "-mp" - #Empty, if compiler automatically accepts openmp - " " - #Sun - "-xopenmp" - #HP - "+Oopenmp" - #IBM XL C/c++ - "-qsmp" -) +IF (USE_SIMD) + SET (OpenMP_Fortran_FLAG_CANDIDATES + "-qopenmp" # Intel + "/Qopenmp" # Intel Windows + "-fopenmp" # GNU + ) +ELSE () + SET (OpenMP_Fortran_FLAG_CANDIDATES + "-qopenmp -qno-openmp-simd" # Intel + "/Qopenmp-simd-" # Intel Windows + "-fopenmp" # GNU + ) +ENDIF (USE_SIMD) IF (DEFINED OpenMP_Fortran_FLAGS) SET (OpenMP_Fortran_FLAG_CANDIDATES) diff --git a/cmake/Modules/SetCompileFlag.cmake b/cmake/Modules/SetCompileFlag.cmake index 4141c4773..d094009ed 100644 --- a/cmake/Modules/SetCompileFlag.cmake +++ b/cmake/Modules/SetCompileFlag.cmake @@ -45,6 +45,7 @@ FUNCTION(SET_COMPILE_FLAG FLAGVAR FLAGVAL LANG) SET(FAIL_REGEX "ignoring unknown option" # Intel "invalid argument" # Intel + "not supported" # Intel ifx "unrecognized .*option" # GNU "[Uu]nknown switch" # Portland Group "ignoring unknown option" # MSVC diff --git a/cmake/Modules/SetFortranFlags.cmake b/cmake/Modules/SetFortranFlags.cmake index 76f23f5cf..4c8cc9b85 100644 --- a/cmake/Modules/SetFortranFlags.cmake +++ b/cmake/Modules/SetFortranFlags.cmake @@ -19,36 +19,48 @@ INCLUDE(${CMAKE_MODULE_PATH}/SetCompileFlag.cmake) # Make sure the build type is uppercase STRING(TOUPPER "${CMAKE_BUILD_TYPE}" BT) +SET(BUILD_TYPE_MSG "Choose the type of build, options are DEBUG, RELEASE, PROFILE, or TESTING.") + IF(BT STREQUAL "RELEASE") SET(CMAKE_BUILD_TYPE RELEASE CACHE STRING - "Choose the type of build, options are DEBUG, RELEASE, PROFILE, or TESTING." + ${BUILD_TYPE_MSG} FORCE) ELSEIF(BT STREQUAL "DEBUG") SET (CMAKE_BUILD_TYPE DEBUG CACHE STRING - "Choose the type of build, options are DEBUG, RELEASE, PROFILE, or TESTING." + ${BUILD_TYPE_MSG} FORCE) ELSEIF(BT STREQUAL "TESTING") SET (CMAKE_BUILD_TYPE TESTING CACHE STRING - "Choose the type of build, options are DEBUG, RELEASE, PROFILE, or TESTING." + ${BUILD_TYPE_MSG} FORCE) ELSEIF(BT STREQUAL "PROFILE") SET (CMAKE_BUILD_TYPE PROFILE CACHE STRING - "Choose the type of build, options are DEBUG, RELEASE, PROFILE, or TESTING." - FORCE) + ${BUILD_TYPE_MSG} + FORCE) ELSEIF(NOT BT) SET(CMAKE_BUILD_TYPE RELEASE CACHE STRING - "Choose the type of build, options are DEBUG, RELEASE, PROFILE, or TESTING." + ${BUILD_TYPE_MSG} FORCE) MESSAGE(STATUS "CMAKE_BUILD_TYPE not given, defaulting to RELEASE") ELSE() - MESSAGE(FATAL_ERROR "CMAKE_BUILD_TYPE not valid, choices are DEBUG, RELEASE, PROFILE, or TESTING") + MESSAGE(FATAL_ERROR "CMAKE_BUILD_TYPE not valid! ${BUILD_TYPE_MSG}") ENDIF(BT STREQUAL "RELEASE") +IF (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + IF (APPLE) + SET(MACHINE_CODE_VALUE "tune=native" CACHE STRING "Tells the compiler which processor features it may target, including which instruction sets and optimizations it may generate.") + ELSE () + SET(MACHINE_CODE_VALUE "arch=native" CACHE STRING "Tells the compiler which processor features it may target, including which instruction sets and optimizations it may generate.") + ENDIF () +ELSE () + SET(MACHINE_CODE_VALUE "host" CACHE STRING "Tells the compiler which processor features it may target, including which instruction sets and optimizations it may generate.") +ENDIF () + ######################################################### # If the compiler flags have already been set, return now ######################################################### -IF(CMAKE_Fortran_FLAGS_RELEASE AND CMAKE_Fortran_FLAGS_TESTING AND CMAKE_Fortran_FLAGS_DEBUG AND CMAKE_Fortran_FLAGS_PROFILE) +IF(CMAKE_Fortran_FLAGS_RELEASE AND CMAKE_Fortran_FLAGS_TESTING AND CMAKE_Fortran_FLAGS_DEBUG AND CMAKE_Fortran_FLAGS_PROFILE ) RETURN () ENDIF(CMAKE_Fortran_FLAGS_RELEASE AND CMAKE_Fortran_FLAGS_TESTING AND CMAKE_Fortran_FLAGS_DEBUG AND CMAKE_Fortran_FLAGS_PROFILE) @@ -64,28 +76,91 @@ ENDIF(CMAKE_Fortran_FLAGS_RELEASE AND CMAKE_Fortran_FLAGS_TESTING AND CMAKE_Fort ### GENERAL FLAGS ### ##################### +# Free form +SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}" + Fortran "-ffree-form" # GNU + ) + # Don't add underscores in symbols for C-compatability SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}" - Fortran "-fno-underscoring") + Fortran "-fno-underscoring" # GNU + ) +# Compile code assuming that IEEE signaling NaNs may generate user-visible traps during floating-point operations. +# Setting this option disables optimizations that may change the number of exceptions visible with signaling NaNs. +SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}" + Fortran "-fsignaling-nans " # GNU + ) + +# Allows for lines longer than 80 characters without truncation +SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}" + Fortran "-ffree-line-length-none" # GNU (gfortran) + ) -# There is some bug where -march=native doesn't work on Mac -IF(APPLE) - SET(GNUNATIVE "-mtune=native") -ELSE() - SET(GNUNATIVE "-march=native") -ENDIF() -# Optimize for the host's architecture +# Disables right margin wrapping in list-directed output +SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}" + Fortran "-no-wrap-margin" # Intel + "/wrap-margin-" # Intel Windows + ) + +# Aligns a variable to a specified boundary and offset SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}" - Fortran "-xhost" # Intel - "/QxHost" # Intel Windows - ${GNUNATIVE} # GNU + Fortran "-align all -align array64byte" # Intel + "/align:all /align:array64byte" # Intel Windows ) +# Enables changing the variable and array memory layout +SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}" + Fortran "-pad" # Intel + "/Qpad" # Intel Windows + + ) + +IF (NOT BUILD_SHARED_LIBS) + # Use static Intel libraries + SET_COMPILE_FLAG(CMAKE_Fortran_LINK_FLAGS "${CMAKE_Fortran_LINK_FLAGS}" + Fortran "-static-intel" # Intel + ) + # Use static Intel MPI libraries + SET_COMPILE_FLAG(CMAKE_Fortran_LINK_FLAGS "${CMAKE_Fortran_LINK_FLAGS}" + Fortran "-static_mpi" # Intel + ) + + IF (USE_OPENMP) + SET_COMPILE_FLAG(CMAKE_Fortran_LINK_FLAGS "${CMAKE_Fortran_LINK_FLAGS}" + Fortran "-qopenmp-link=static" # Intel + ) + ENDIF (USE_OPENMP) +ENDIF () + +IF (USE_SIMD) + # Enables OpenMP SIMD compilation when OpenMP parallelization is disabled. + IF (NOT USE_OPENMP) + SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}" + Fortran "-qno-openmp -qopenmp-simd" # Intel + Fortran "/Qopenmp- /Qopenmp-simd" # Intel Windows + ) + ENDIF (NOT USE_OPENMP) + + # Optimize for an old enough processor that it should run on most computers + SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}" + Fortran "-x${MACHINE_CODE_VALUE}" # Intel + "/Qx${MACHINE_CODE_VALUE}" # Intel Windows + "-m${MACHINE_CODE_VALUE}" # GNU + ) + + # Generate an extended set of vector functions + SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}" + Fortran "-vecabi=cmdtarget" # Intel + Fortran "/Qvecabi:cmdtarget" # Intel Windows + ) +ENDIF (USE_SIMD) + + + ################### ### DEBUG FLAGS ### ################### -# NOTE: debugging symbols (-g or /debug:full) are already on by default # Disable optimizations SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" @@ -100,39 +175,61 @@ SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" "/warn:all" # Intel Windows "-Wall" # GNU ) +# This enables some extra warning flags that are not enabled by -Wall +SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" + Fortran "-Wextra" # GNU + ) + +# Disable the warning that arrays may be uninitialized, which comes up due to a known bug in gfortran +SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" + Fortran "-Wno-maybe-uninitialized" # GNU + ) +# Disable the warning about unused dummy arguments. These primarily occur due to interface rules for type-bound procedures used in extendable types. +SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" + Fortran "-Wno-unused-dummy-argument" # GNU + ) + +# Tells the compiler to issue compile-time messages for nonstandard language elements (Fortran 2018). +SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" + Fortran "-stand f18" # Intel + "/stand:f18" # Intel Windows + "-fstd=f2018" # GNU + ) # Traceback SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" Fortran "-traceback" # Intel Group "/traceback" # Intel Windows "-fbacktrace" # GNU (gfortran) - "-ftrace=full" # GNU (g95) ) # Sanitize SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-fsanitize=address" # Gnu + Fortran "-fsanitize=address, undefined" # Gnu ) # Check everything SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-check" # Intel - "/check" # Intel Windows + Fortran "-check all" # Intel + "/check:all" # Intel Windows "-fcheck=all" # GNU ) SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-fstack-check" # GNU + Fortran "-fstack-check" # GNU ) # Initializes matrices/arrays with NaN values SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-init=snan,arrays" # Intel + Fortran "-init=snan,arrays" # Intel + "/Qinit:snan,arrays" # Intel Windows + "-finit-real=snan" # GNU ) # Does not generate an interface block for each routine in a source file SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" Fortran "-nogen-interfaces" # Intel + "/nogen-interfaces" # Intel Windows ) # Does not generate aposition independent executable @@ -143,74 +240,48 @@ SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" # Does not set denormal results from floating-point calculations to zero SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" Fortran "-no-ftz" # Intel + "/Qftz-" # Intel Windows ) # Enables floating-point invalid, divide-by-zero, and overflow exceptions SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-fpe-all=0" # Intel + Fortran "-fpe-all=0" # Intel + "/fpe-all:0" # Intel Windows "-ffpe-trap=zero,overflow,underflow" # GNU ) -# Improves floating-point precision and consistency +# List of floating-point exceptions, whose flag status is printed to ERROR_UNIT when invoking STOP and ERROR STOP SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-mp1" # Intel - ) - -# Strict model for floating-point calculations (precise and except) -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-fp-model=strict" # Intel + Fortran "-ffpe-summary=all" # GNU ) # Enables floating-point invalid, divide-by-zero, and overflow exceptions SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-fpe0" # Intel + Fortran "-fpe0" # Intel + "/fpe:0" # Intel Windows ) # Enables debug info SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" Fortran "-debug all" # Intel + "/debug:all" # Intel Windows ) -# Aligns a variable to a specified boundary and offset -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-align all -align array64byte" # Intel - ) - -# Enables changing the variable and array memory layout -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-pad" # Intel - ) - -# Enables additional interprocedural optimizations for a single file cimpilation -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-ip" # Intel - ) - -# Improves precision when dividing floating-points -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-prec-div" # Intel - ) - -# Improves precision when taking the square root of floating-points +# Disables additional interprocedural optimizations for a single file compilation SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-prec-sqrt" # Intel + Fortran "-no-ip" # Intel + "/Qip-" # Intel Windows ) -# Treat parentheses in accordance with the Fortran standard (ifort 10 only) -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-assume protect-parens" # Intel - ) - -# Checks the bounds of arrays at run-time +# Disables prefetch insertion optimization SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-CB" # Intel + Fortran "-qno-opt-prefetch" # Intel + "/Qopt-prefetch-" # Intel Windows ) - -# Allows for lines longer than 80 characters without truncation + SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-no-wrap-margin" # Intel - "-ffree-line-length-none" # GNU (gfortran) - ) + Fortran "-fstack-check" # GNU + ) ##################### ### TESTING FLAGS ### @@ -218,8 +289,8 @@ SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" # Optimizations SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_TESTING "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran REQUIRED "-O3" # All compilers not on Windows - "/O3" # Intel Windows + Fortran REQUIRED "-O3" # All compilers not on Windows + "/O3" # Intel Windows ) ##################### @@ -229,9 +300,9 @@ SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_TESTING "${CMAKE_Fortran_FLAGS_DEBUG}" # Unroll loops SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" - Fortran "-unroll" # Intel - "/unroll" # Intel Windows - "-funroll-loops" # GNU + Fortran "-unroll" # Intel + "/unroll" # Intel Windows + "-funroll-loops" # GNU ) # Inline functions @@ -241,91 +312,103 @@ SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" "-finline-functions" # GNU ) - -# Allows for lines longer than 80 characters without truncation -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" - Fortran "-no-wrap-margin" # Intel - "-ffree-line-length-none" # GNU (gfortran) - ) - -# Disables prefetch insertion optimization -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" - Fortran "-qopt-prefetch=0" # Intel - ) - # Calls the Matrix Multiply library SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" - Fortran "-qopt-matmul" # Intel + Fortran "-qopt-matmul" # Intel + "/Qopt-matmul" # Intel Windows ) # Saves the compiler options and version number to the executable SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" - Fortran "-sox" # Intel - ) - -# Enforces vectorization of loops -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" - Fortran "-simd" # Intel + Fortran "-sox" # Intel ) # Aligns a variable to a specified boundary and offset SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" - Fortran "-align all" # Intel - ) - -# Generate an extended set of vector functions -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" - Fortran "-vecabi=cmdtarget" # Intel + Fortran "-align all" # Intel + "/align:all" # Intel Windows ) # No floating-point exceptions SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" - Fortran "-fp-model no-except" # Intel + Fortran "-fp-model no-except" # Intel + "/fp:no-except" # Intel Windows ) # Generate fused multiply-add instructions SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" - Fortran "-fma" # Intel - ) + Fortran "-fma" # Intel + "/Qfma" # Intel Windows + ) -# Generate fused multiply-add instructions +# Tells the compiler to link to certain libraries in the Intel oneAPI Math Kernel Library (oneMKL). SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" - Fortran "-qmkl=cluster" # Intel - Fortran "-qmkl" # Intel - Fortran "-mkl" # Old Intel - ) + Fortran "-qmkl=cluster" # Intel + "-qmkl" # Intel + "/Qmkl:cluster" # Intel Windows + "/Qmkl" # Intel Windows + ) + +# Enables additional interprocedural optimizations for a single file compilation +SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" + Fortran "-ip" # Intel + "/Qip" # Intel Windows + ) + ##################### ### MATH FLAGS ### ##################### # Some subroutines require more strict floating point operation optimizations for repeatability SET_COMPILE_FLAG(STRICTMATH_FLAGS "${STRICTMATH_FLAGS}" - Fortran "-fp-model=precise -prec-div -prec-sqrt -assume protect-parens" # Intel - "/fp:precise /Qprec-div /Qprec-sqrt /assume:protect-parens" # Intel Windows - ) + Fortran "-fp-model=precise" # Intel + "/fp:precise" # Intel Windows + "-fno-unsafe-math-optimizations" # GNU + ) +# Disable transformations and optimizations that assume default floating-point rounding behavior. +SET_COMPILE_FLAG(STRICTMATH_FLAGS "${STRICTMATH_FLAGS}" + Fortran "-frounding-math" + ) + +SET_COMPILE_FLAG(STRICTMATH_FLAGS "${STRICTMATH_FLAGS}" + Fortran "-prec-div" # Intel + "/Qprec-div" # Intel Windows + ) + +SET_COMPILE_FLAG(STRICTMATH_FLAGS "${STRICTMATH_FLAGS}" + Fortran "-prec-sqrt" # Intel + "/Qprec-sqrt" # Intel Windows + ) + +SET_COMPILE_FLAG(STRICTMATH_FLAGS "${STRICTMATH_FLAGS}" + Fortran "-assume protect-parens" # Intel + "/assume:protect-parens" # Intel Windows + ) + +# Improves floating-point precision and consistency +SET_COMPILE_FLAG(STRICTMATH_FLAGS "${STRICTMATH_FLAGS}" + Fortran "-mp1" # Intel + "/Qprec" # Intel Windows + ) # Most subroutines can use aggressive optimization of floating point operations without problems. SET_COMPILE_FLAG(FASTMATH_FLAGS "${FASTMATH_FLAGS}" - Fortran "-fp-model=fast" - "/fp:fast" - ) + Fortran "-fp-model=fast" # Intel + "/fp:fast" # Intel Windows + "-ffast-math" # GNU + ) + + +SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" + Fortran ${STRICTMATH_FLAGS} + ) ##################### ### PROFILE FLAGS ### ##################### # Enables the optimization reports to be generated SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_PROFILE "${CMAKE_Fortran_FLAGS_RELEASE}" - Fortran "-O2 -pg -qopt-report=5 -traceback -p -g3" # Intel - "/O2 /Qopt-report:5 /traceback -g3" # Windows Intel - "-O2 -pg -fbacktrace" + Fortran "-O2 -pg -qopt-report=5 -traceback -p -g3" # Intel + "/O2 /Qopt-report:5 /traceback /Z7" # Intel Windows + "-O2 -pg -fbacktrace" # GNU ) - -# Sanitize -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-fsanitize=address,undefined" # Gnu - ) - - -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-fstack-check" # GNU - ) diff --git a/docker/.gitignore b/docker/.gitignore new file mode 100644 index 000000000..5c73deb60 --- /dev/null +++ b/docker/.gitignore @@ -0,0 +1,6 @@ +* +!.gitignore +!install.sh +!bin +!bin/swiftest +!bin/swiftest_driver diff --git a/docker/bin/swiftest b/docker/bin/swiftest new file mode 100755 index 000000000..8c985e065 --- /dev/null +++ b/docker/bin/swiftest @@ -0,0 +1,4 @@ +#!/bin/sh -- +OMP_NUM_THREADS=${OMP_NUM_THREADS:-`nproc --all`} +FOR_COARRAY_NUM_IMAGES=${FOR_COARRAY_NUM_IMAGES:-1} +docker run -v $(pwd):$(pwd) -w $(pwd) --user "$(id -u):$(id -g)" -ti -e OMP_NUM_THREADS -e FOR_COARRAY_NUM_IMAGES mintongroup/swiftest "$@" \ No newline at end of file diff --git a/docker/bin/swiftest_driver b/docker/bin/swiftest_driver new file mode 100755 index 000000000..146509c36 --- /dev/null +++ b/docker/bin/swiftest_driver @@ -0,0 +1,4 @@ +#!/bin/sh -- +OMP_NUM_THREADS=${OMP_NUM_THREADS:-`nproc --all`} +FOR_COARRAY_NUM_IMAGES=${FOR_COARRAY_NUM_IMAGES:-1} +docker run -v $(pwd):$(pwd) -w $(pwd) --user "$(id -u):$(id -g)" -ti --entrypoint /usr/local/bin/swiftest_driver -e OMP_NUM_THREADS -e FOR_COARRAY_NUM_IMAGES MintonGroup/swiftest "$@" \ No newline at end of file diff --git a/docker/install.sh b/docker/install.sh new file mode 100755 index 000000000..296223149 --- /dev/null +++ b/docker/install.sh @@ -0,0 +1,6 @@ +#!/bin/sh -- +tag=${1:-latest} +echo "Installing swiftest:${tag} Docker container and executable script" +docker pull mintongroup/swiftest:${tag} +cp -rf bin/swiftest ../bin/ +cp -rf bin/swiftest_driver ../bin/ \ No newline at end of file diff --git a/environment.yml b/environment.yml new file mode 100644 index 000000000..0e1dbb387 --- /dev/null +++ b/environment.yml @@ -0,0 +1,22 @@ +name: swiftest-env + +channels: + - conda-forge + - defaults + +dependencies: + - numpy + - scipy + - matplotlib + - pandas + - xarray + - h5netcdf + - netcdf4 + - dask + - bottleneck + - astropy + - astroquery + - tqdm + - x264 + - ffmpeg + - conda-build \ No newline at end of file diff --git a/examples/Basic_Simulation/.gitignore b/examples/Basic_Simulation/.gitignore index 728fe8873..9e4d02aad 100644 --- a/examples/Basic_Simulation/.gitignore +++ b/examples/Basic_Simulation/.gitignore @@ -2,4 +2,5 @@ !.gitignore !initial_conditions.py !output_reader.py +!errors.py !README.txt diff --git a/examples/Basic_Simulation/README.txt b/examples/Basic_Simulation/README.txt index 9db2d5f54..ae626a9c5 100644 --- a/examples/Basic_Simulation/README.txt +++ b/examples/Basic_Simulation/README.txt @@ -11,13 +11,14 @@ README.txt Swiftest Example : Basic_Simulation Author : Carlisle Wishard and David Minton -Date : December 6, 2022 +Date : June 27, 2023 Included in the Basic_Simulation example directory are the following files: - README.txt : This file - initial_conditions.py : A Python Script that generates and runs a set of initial conditions. - output_reader.py : A Python Script that processes out.nc and generates output.eps + - errors.py : A Python Script that processes out.nc and reports the simulation errors to the terminal This example is intended to be run with Swiftest SyMBA. For details on how to generate, run, and analyze this example, -see the Swiftest User Manual. \ No newline at end of file +see the Swiftest User Manual. diff --git a/examples/Basic_Simulation/errors.py b/examples/Basic_Simulation/errors.py new file mode 100644 index 000000000..5cbdfaa55 --- /dev/null +++ b/examples/Basic_Simulation/errors.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 + +""" + Copyright 2023 - David Minton, Carlisle Wishard, Jennifer Pouplin, Jake Elliott, & Dana Singh + This file is part of Swiftest. + Swiftest is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License + as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + Swiftest is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + You should have received a copy of the GNU General Public License along with Swiftest. + If not, see: https://www.gnu.org/licenses. +""" + +""" +Reads and processes a Swiftest output file. + +Input +------ +data.nc : A NetCDF file containing the simulation output. + +Output +------ +None +""" + +import swiftest +import xarray as xr +import matplotlib.pyplot as plt + +# Read in the simulation output and store it as an Xarray dataset. +ds = swiftest.Simulation(read_data=True) + +# Calculate the angular momentum error +ds.data['L_tot'] = ds.data['L_orbit'] + ds.data['L_spin'] + ds.data['L_escape'] +ds.data['DL'] = ds.data['L_tot'] - ds.data['L_tot'].isel(time=0) +ds.data['L_error'] = swiftest.tool.magnitude(ds.data,'DL') / swiftest.tool.magnitude(ds.data.isel(time=0), 'L_tot') +L_final = ds.data['L_error'][-1].values + +# Calculate the energy error +ds.data['E_error'] = (ds.data['TE'] - ds.data['TE'].isel(time=0)) / ds.data['TE'].isel(time=0) +E_final = ds.data['E_error'][-1].values + +# Calculate the mass error +ds.data['GMtot'] = ds.data['Gmass'].sum(dim='name',skipna=True) + ds.data['GMescape'] +ds.data['GM_error'] = (ds.data['GMtot'] - ds.data['GMtot'].isel(time=0)) / ds.data['GMtot'].isel(time=0) +GM_final = ds.data['GM_error'][-1].values + +# Print the final errors +print("Final Angular Momentum Error: ", L_final) +print("Final Energy Error: ", E_final) +print("Final Mass Error: ", GM_final) + +# Determine if the errors are within bounds +L_limit = 1e-10 +E_limit = 1e-5 +GM_limit = 1e-14 + +lerror = 0 +if (L_final > L_limit): + lerror = 1 + print("Angular Momentum Error of", L_final, " higher than threshold value of", L_limit, ". Test failed.") +if (E_final > E_limit): + lerror = 1 + print("Energy Error of", E_final, " higher than threshold value of", E_limit, ". Test failed.") +if (GM_final > GM_limit): + lerror = 1 + print("Mass Error of", GM_final, " higher than threshold value of", GM_limit, ". Test failed.") +if (lerror == 0): + print("Test successful.") diff --git a/examples/Basic_Simulation/initial_conditions.py b/examples/Basic_Simulation/initial_conditions.py index 564c2ebee..83614fb07 100755 --- a/examples/Basic_Simulation/initial_conditions.py +++ b/examples/Basic_Simulation/initial_conditions.py @@ -39,6 +39,7 @@ #sim = swiftest.Simulation(fragmentation=True, minimum_fragment_mass = 2.5e-11, mtiny=2.5e-8) sim = swiftest.Simulation() sim.clean() +rng = default_rng(seed=123) # Add the modern planets and the Sun using the JPL Horizons Database. sim.add_solar_system_body(["Sun","Mercury","Venus","Earth","Mars","Jupiter","Saturn","Uranus","Neptune","Pluto"]) @@ -47,37 +48,37 @@ npl = 5 density_pl = 3000.0 / (sim.param['MU2KG'] / sim.param['DU2M'] ** 3) -name_pl = ["MassiveBody_01", "MassiveBody_02", "MassiveBody_03", "MassiveBody_04", "MassiveBody_05"] -a_pl = default_rng().uniform(0.3, 1.5, npl) -e_pl = default_rng().uniform(0.0, 0.3, npl) -inc_pl = default_rng().uniform(0.0, 90, npl) -capom_pl = default_rng().uniform(0.0, 360.0, npl) -omega_pl = default_rng().uniform(0.0, 360.0, npl) -capm_pl = default_rng().uniform(0.0, 360.0, npl) -GM_pl = (np.array([6e23, 8e23, 1e24, 3e24, 5e24]) / sim.param['MU2KG']) * sim.GU -R_pl = np.full(npl, (3 * (GM_pl / sim.GU) / (4 * np.pi * density_pl)) ** (1.0 / 3.0)) -Rh_pl = a_pl * ((GM_pl) / (3 * sim.GU)) ** (1.0 / 3.0) -Ip_pl = np.full((npl,3),0.4,) -rot_pl = np.zeros((npl,3)) +name_pl = ["SemiBody_01", "SemiBody_02", "SemiBody_03", "SemiBody_04", "SemiBody_05"] +a_pl = rng.uniform(0.3, 1.5, npl) +e_pl = rng.uniform(0.0, 0.2, npl) +inc_pl = rng.uniform(0.0, 10, npl) +capom_pl = rng.uniform(0.0, 360.0, npl) +omega_pl = rng.uniform(0.0, 360.0, npl) +capm_pl = rng.uniform(0.0, 360.0, npl) +M_pl = np.array([6e20, 8e20, 1e21, 3e21, 5e21]) * sim.KG2MU +R_pl = np.full(npl, (3 * M_pl/ (4 * np.pi * density_pl)) ** (1.0 / 3.0)) +Ip_pl = np.full((npl,3),0.4,) +rot_pl = np.zeros((npl,3)) +mtiny = 1.1 * np.max(M_pl) -sim.add_body(name=name_pl, a=a_pl, e=e_pl, inc=inc_pl, capom=capom_pl, omega=omega_pl, capm=capm_pl, Gmass=GM_pl, radius=R_pl, rhill=Rh_pl, Ip=Ip_pl, rot=rot_pl) +sim.add_body(name=name_pl, a=a_pl, e=e_pl, inc=inc_pl, capom=capom_pl, omega=omega_pl, capm=capm_pl, mass=M_pl, radius=R_pl, Ip=Ip_pl, rot=rot_pl) # Add 10 user-defined test particles. ntp = 10 name_tp = ["TestParticle_01", "TestParticle_02", "TestParticle_03", "TestParticle_04", "TestParticle_05", "TestParticle_06", "TestParticle_07", "TestParticle_08", "TestParticle_09", "TestParticle_10"] -a_tp = default_rng().uniform(0.3, 1.5, ntp) -e_tp = default_rng().uniform(0.0, 0.3, ntp) -inc_tp = default_rng().uniform(0.0, 90, ntp) -capom_tp = default_rng().uniform(0.0, 360.0, ntp) -omega_tp = default_rng().uniform(0.0, 360.0, ntp) -capm_tp = default_rng().uniform(0.0, 360.0, ntp) +a_tp = rng.uniform(0.3, 1.5, ntp) +e_tp = rng.uniform(0.0, 0.2, ntp) +inc_tp = rng.uniform(0.0, 10, ntp) +capom_tp = rng.uniform(0.0, 360.0, ntp) +omega_tp = rng.uniform(0.0, 360.0, ntp) +capm_tp = rng.uniform(0.0, 360.0, ntp) sim.add_body(name=name_tp, a=a_tp, e=e_tp, inc=inc_tp, capom=capom_tp, omega=omega_tp, capm=capm_tp) - -# Run the simulation. Arguments may be defined here or thorugh the swiftest.Simulation() method. -#sim.run(tstart=0.0, tstop=1.0e3, dt=0.01, istep_out=100, dump_cadence=10) -sim.set_parameter(tstart=0.0, tstop=1.0e3, dt=0.01, istep_out=100, dump_cadence=0) +sim.set_parameter(tstart=0.0, tstop=1.0e3, dt=0.01, istep_out=100, dump_cadence=0, compute_conservation_values=True, mtiny=mtiny) # Display the run configuration parameters. sim.get_parameter() sim.save() + +# Run the simulation. Arguments may be defined here or thorugh the swiftest.Simulation() method. +sim.run() diff --git a/examples/Basic_Simulation/output_reader.py b/examples/Basic_Simulation/output_reader.py index 851384bf3..a46b6cefc 100644 --- a/examples/Basic_Simulation/output_reader.py +++ b/examples/Basic_Simulation/output_reader.py @@ -39,8 +39,7 @@ ax.set(xlabel="Semimajor Axis (AU)", ylabel="Eccentricity", title="Simulation Initial Conditions (t=0)") ax.scatter(sim.data['a'].isel(time=0), sim.data['e'].isel(time=0), c=colors, s=sizes, edgecolor='black') ax.set_xlim(0, 2.0) -ax.set_ylim(0, 0.4) -ax.text(1.5, 0.35, f"t = {sim.data['time'].isel(time=0).values} years", size=10, ha="left") +ax.set_ylim(0, 0.25) +ax.text(1.5, 0.2, f"t = {sim.data['time'].isel(time=0).values} years", size=10, ha="left") plt.tight_layout() -plt.show() -fig.savefig("output.eps", dpi=300, facecolor='white', transparent=False, bbox_inches="tight") +fig.savefig("../examples/Basic_Simulation/output.eps", dpi=300, facecolor='white', transparent=False, bbox_inches="tight") diff --git a/examples/rmvs_swifter_comparison/8pl_16tp_encounters/init_cond.py b/examples/rmvs_swifter_comparison/8pl_16tp_encounters/init_cond.py index d59c068d6..725c123b1 100755 --- a/examples/rmvs_swifter_comparison/8pl_16tp_encounters/init_cond.py +++ b/examples/rmvs_swifter_comparison/8pl_16tp_encounters/init_cond.py @@ -17,7 +17,7 @@ for i,n in enumerate(pl.name): pli = pl.sel(name=n) rstart = 2 * pli['radius'].data[0] # Start the test particles at a multiple of the planet radius away - vstart = 1.5 * np.sqrt(2 * pli['Gmass'].data[0]) / rstart # Start the test particle velocities at a multiple of the escape speed + vstart = 1.5 * np.sqrt(2 * pli['Gmass'].data[0] / rstart) # Start the test particle velocities at a multiple of the escape speed rstart_vec = np.array([rstart / np.sqrt(2.0), rstart / np.sqrt(2.0), 0.0]) vstart_vec = np.array([vstart, 0.0, 0.0]) rp = pli['rh'].data[0] diff --git a/paper/paper.md b/paper/paper.md index 7dc8b772c..f5c8c6310 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -17,21 +17,15 @@ authors: equal-contrib: true affiliation: 1 - name: Jennifer Pouplin - affiliation: "1, 2" + affiliation: "1" - name: Jake Elliott - affiliation: "1, 3" + affiliation: "1" - name: Dana Singh - affiliation: "1, 4" + affiliation: "1" affiliations: - name: Department of Earth, Atmospheric, and Planetary Sciences, Purdue University, USA index: 1 - - name: Metrea Orbital Effects, USA - index: 2 - - name: Verisk, USA - index: 3 - - name: SAIC, USA - index: 4 -date: 03 March 2023 +date: 05 April 2023 bibliography: paper.bib --- @@ -59,4 +53,4 @@ Modeling the behavior of thousands of fully interacting bodies over long timesca `Swiftest` was developed at Purdue University and was funded under the NASA Emerging Worlds and Solar System Workings programs. Active development by the Purdue Swiftest Team is ongoing and contributions from the community are highly encouraged. -# References \ No newline at end of file +# References diff --git a/python/swiftest/swiftest/init_cond.py b/python/swiftest/swiftest/init_cond.py index efaa6429f..1b140f119 100644 --- a/python/swiftest/swiftest/init_cond.py +++ b/python/swiftest/swiftest/init_cond.py @@ -283,7 +283,7 @@ def vec2xr(param: Dict, **kwargs: Any): # Check for valid keyword arguments kwargs = {k:kwargs[k] for k,v in kwargs.items() if v is not None} - if "rotation" in param and param['rotation'] == True: + if "ROTATION" in param and param['ROTATION'] == True: if "rot" not in kwargs and "Gmass" in kwargs: kwargs['rot'] = np.zeros((len(kwargs['Gmass']),3)) if "Ip" not in kwargs and "Gmass" in kwargs: diff --git a/python/swiftest/swiftest/simulation_class.py b/python/swiftest/swiftest/simulation_class.py index 804eee651..399ab0d38 100644 --- a/python/swiftest/swiftest/simulation_class.py +++ b/python/swiftest/swiftest/simulation_class.py @@ -404,9 +404,9 @@ def _run_swiftest_driver(self): """ # Get current environment variables - env = os.environ.copy() cmd = f"{env['SHELL']} -l {self.driver_script}" + def _type_scrub(output_data): int_vars = ["ILOOP","NPL","NTP","NPLM"] @@ -470,6 +470,9 @@ def _type_scrub(output_data): sys.exit() except: warnings.warn(f"Error executing main swiftest_driver program", stacklevel=2) + res = p.communicate() + for line in res[1]: + print(line, end='') sys.exit() pbar.close() @@ -502,11 +505,9 @@ def run(self,dask: bool = False, **kwargs): if not self.binary_source.exists(): msg = "Path to swiftest_driver has not been set!" - msg += f"\nMake sure swiftest_driver is compiled and the executable is in {str(self.binary_path)}" + msg += f"\nMake sure swiftest_driver is compiled and the executable is in {str(self.binary_source.parent)}" warnings.warn(msg,stacklevel=2) return - else: - shutil.copy2(self.binary_source, self.driver_executable) if not self.restart: self.clean() @@ -829,7 +830,7 @@ def set_parameter(self, verbose: bool = True, **kwargs): "restart": False, "encounter_save" : "NONE", "coarray" : False, - "simdir" : self.simdir + "simdir" : self.simdir, } param_file = kwargs.pop("param_file",None) @@ -891,7 +892,7 @@ def get_parameter(self, **kwargs): return param_dict def set_integrator(self, - codename: Literal["Swiftest", "Swifter", "Swift"] | None = None, + codename: None | Literal["Swiftest", "Swifter", "Swift"] = "Swiftest", integrator: Literal["symba","rmvs","whm","helio"] | None = None, mtiny: float | None = None, gmtiny: float | None = None, @@ -926,7 +927,7 @@ def set_integrator(self, # TODO: Improve how it finds the executable binary update_list = [] - + if codename is not None: valid_codename = ["Swiftest", "Swifter", "Swift"] if codename.title() not in valid_codename: @@ -942,16 +943,15 @@ def set_integrator(self, update_list.append("codename") if self.codename == "Swiftest": self.binary_source = Path(_pyfile).parent.parent.parent.parent / "bin" / "swiftest_driver" - self.binary_path = self.simdir.resolve() - self.driver_executable = self.binary_path / "swiftest_driver" + self.driver_executable = self.binary_source if not self.binary_source.exists(): - warnings.warn(f"Cannot find the Swiftest driver in {str(self.binary_path)}",stacklevel=2) + warnings.warn(f"Cannot find the Swiftest driver at {str(self.binary_source)}",stacklevel=2) self.driver_executable = None else: - if self.binary_path.exists(): + if self.binary_source.exists(): self.driver_executable.resolve() else: - self.binary_path = "NOT IMPLEMENTED FOR THIS CODE" + self.binary_source = "NOT IMPLEMENTED FOR THIS CODE" self.driver_executable = None update_list.append("driver_executable") @@ -1016,7 +1016,8 @@ def get_integrator(self,arg_list: str | List[str] | None = None, verbose: bool | valid_instance_vars = {"codename": self.codename, "integrator": self.integrator, "param_file": str(self.param_file), - "driver_executable": str(self.driver_executable)} + "driver_executable": str(self.driver_executable) + } try: self.integrator @@ -1199,8 +1200,6 @@ def set_feature(self, msg = f"Cannot create the {self.simdir.resolve()} directory: File exists." msg += "\nDelete the file or change the location of param_file" raise NotADirectoryError(msg) - self.binary_path = self.simdir.resolve() - self.driver_executable = self.binary_path / "swiftest_driver" self.param_file = Path(kwargs.pop("param_file","param.in")) if self.codename == "Swiftest": @@ -2753,7 +2752,6 @@ def write_param(self, self.driver_script = os.path.join(self.simdir, "swiftest_driver.sh") with open(self.driver_script, 'w') as f: f.write(f"#{self._shell_full}\n") - f.write(f"source ~/.{self._shell}rc\n") f.write(f"cd {self.simdir}\n") f.write(f"{str(self.driver_executable)} {self.integrator} {str(self.param_file)} compact\n") @@ -2990,11 +2988,9 @@ def save(self, self.write_param(param_file=param_file,**kwargs) if not self.binary_source.exists(): msg = "Path to swiftest_driver has not been set!" - msg += f"\nMake sure swiftest_driver is compiled and the executable is in {str(self.binary_path)}" + msg += f"\nMake sure swiftest_driver is compiled and the executable is in {str(self.binary_source.parent)}" warnings.warn(msg,stacklevel=2) return - else: - shutil.copy2(self.binary_source, self.driver_executable) elif codename == "Swifter": swifter_param = io.swiftest2swifter_param(param) if "rhill" in self.data: diff --git a/singularity/.gitignore b/singularity/.gitignore new file mode 100644 index 000000000..baed94e88 --- /dev/null +++ b/singularity/.gitignore @@ -0,0 +1,4 @@ +!bin/ +!bin/swiftest_driver +!bin/swiftest +!install.sh diff --git a/singularity/bin/swiftest b/singularity/bin/swiftest new file mode 100755 index 000000000..a384860fe --- /dev/null +++ b/singularity/bin/swiftest @@ -0,0 +1,4 @@ +#!/bin/sh -- +OMP_NUM_THREADS=${OMP_NUM_THREADS:-`nproc --all`} +FOR_COARRAY_NUM_IMAGES=${FOR_COARRAY_NUM_IMAGES:-1} +singularity run --bind $(pwd):$(pwd) --cleanenv --env OMP_NUM_THREADS=${OMP_NUM_THREADS},FOR_COARRAY_NUM_IMAGES=${FOR_COARRAY_NUM_IMAGES} ${SWIFTEST_SIF} "$@" \ No newline at end of file diff --git a/singularity/bin/swiftest_driver b/singularity/bin/swiftest_driver new file mode 100755 index 000000000..024e93115 --- /dev/null +++ b/singularity/bin/swiftest_driver @@ -0,0 +1,4 @@ +#!/bin/sh -- +OMP_NUM_THREADS=${OMP_NUM_THREADS:-`nproc --all`} +FOR_COARRAY_NUM_IMAGES=${FOR_COARRAY_NUM_IMAGES:-1} +singularity exec --bind $(pwd):$(pwd) --env OMP_NUM_THREADS=${OMP_NUM_THREADS},FOR_COARRAY_NUM_IMAGES=${FOR_COARRAY_NUM_IMAGES} ${SWIFTEST_SIF} swiftest_driver "$@" \ No newline at end of file diff --git a/singularity/install.sh b/singularity/install.sh new file mode 100755 index 000000000..233ee187f --- /dev/null +++ b/singularity/install.sh @@ -0,0 +1,21 @@ +#!/bin/sh -- +# This will install the Singularity version of the swiftest_driver in place of the native compiled version into ../bin as +# well as the swiftest_python script that is used to execute a Python input file. +# The swiftest.sif file will be copied to the SIF_DIR directory. The default location is ${HOME}/.singularity. +# To change this, just set environment variable SIF_DIR prior to running this script. +# +# The script takes an optional argument "tag" if you want to pull a container other than "latest". +# +# In order to use one executable script, the SWIFTEST_SIF environment variable must be set to point to the location of swiftest.sif, +# which requires this script to be called via source: +# $ source ./install.sh +# or +# $ . ./install.sh +TAG=${1:-latest} + +SIF_DIR=${SIF_DIR:-${HOME}/.singularity} +echo "Installing ${SIF_DIR}/swiftest.sif container from mintongroup/swiftest:${TAG} Docker container" +singularity pull --force ${SIF_DIR}/swiftest.sif docker://mintongroup/swiftest:${TAG} +cp -rf bin/swiftest ../bin/ +cp -rf bin/swiftest_driver ../bin/ +export SWIFTEST_SIF=${SIF_DIR}/swiftest.sif \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a0ae5a554..f360bcdbc 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -14,6 +14,8 @@ # Add the source files SET(STRICT_MATH_FILES ${SRC}/collision/collision_generate.f90 + ${SRC}/collision/collision_io.f90 + ${SRC}/collision/collision_util.f90 ${SRC}/fraggle/fraggle_generate.f90 ${SRC}/fraggle/fraggle_util.f90 ${SRC}/fraggle/fraggle_module.f90 @@ -23,6 +25,7 @@ SET(STRICT_MATH_FILES ${SRC}/helio/helio_step.f90 ${SRC}/misc/lambda_function_module.f90 ${SRC}/misc/solver_module.f90 + ${SRC}/netcdf_io/netcdf_io_implementations.f90 ${SRC}/operator/operator_module.f90 ${SRC}/operator/operator_cross.f90 ${SRC}/operator/operator_mag.f90 @@ -31,6 +34,7 @@ SET(STRICT_MATH_FILES ${SRC}/rmvs/rmvs_step.f90 ${SRC}/swiftest/swiftest_drift.f90 ${SRC}/swiftest/swiftest_gr.f90 + ${SRC}/swiftest/swiftest_io.f90 ${SRC}/swiftest/swiftest_kick.f90 ${SRC}/swiftest/swiftest_user.f90 ${SRC}/swiftest/swiftest_obl.f90 @@ -60,20 +64,16 @@ SET(FAST_MATH_FILES ${SRC}/helio/helio_module.f90 ${SRC}/symba/symba_module.f90 ${SRC}/collision/collision_check.f90 - ${SRC}/collision/collision_io.f90 ${SRC}/collision/collision_regime.f90 ${SRC}/collision/collision_resolve.f90 - ${SRC}/collision/collision_util.f90 ${SRC}/encounter/encounter_check.f90 ${SRC}/encounter/encounter_io.f90 ${SRC}/encounter/encounter_util.f90 ${SRC}/helio/helio_util.f90 - ${SRC}/netcdf_io/netcdf_io_implementations.f90 ${SRC}/rmvs/rmvs_discard.f90 ${SRC}/rmvs/rmvs_encounter_check.f90 ${SRC}/rmvs/rmvs_util.f90 ${SRC}/swiftest/swiftest_discard.f90 - ${SRC}/swiftest/swiftest_io.f90 ${SRC}/swiftest/swiftest_util.f90 ${SRC}/symba/symba_discard.f90 ${SRC}/symba/symba_encounter_check.f90 @@ -108,19 +108,17 @@ SET_SOURCE_FILES_PROPERTIES(${SWIFTEST_src} PROPERTIES Fortran_PREPROCESS ON) # Add the needed libraries and special compiler flags ##################################################### -# # Uncomment if you need to link to BLAS and LAPACK -TARGET_LINK_LIBRARIES(${SWIFTEST_DRIVER} ${NETCDF_LIBRARIES} ${NETCDF_FORTRAN_LIBRARIES}) +TARGET_LINK_LIBRARIES(${SWIFTEST_DRIVER} PRIVATE ${NETCDF_FORTRAN_LIBRARIES} ${NETCDF_LIBRARIES} $ENV{LIBS}) IF(USE_OPENMP) - SET_TARGET_PROPERTIES(${SWIFTEST_DRIVER} PROPERTIES - COMPILE_FLAGS "${OpenMP_Fortran_FLAGS}" - LINK_FLAGS "${OpenMP_Fortran_FLAGS}") + SET_PROPERTY(TARGET ${SWIFTEST_DRIVER} APPEND_STRING PROPERTY COMPILE_FLAGS "${OpenMP_Fortran_FLAGS} ") + SET_PROPERTY(TARGET ${SWIFTEST_DRIVER} APPEND_STRING PROPERTY LINK_FLAGS "${OpenMP_Fortran_FLAGS} ") ENDIF(USE_OPENMP) IF(USE_COARRAY) - SET_TARGET_PROPERTIES(${SWIFTEST_DRIVER} PROPERTIES - COMPILE_FLAGS "${Coarray_Fortran_FLAGS}" - LINK_FLAGS "${Coarray_Fortran_FLAGS}") + TARGET_COMPILE_DEFINITIONS(${SWIFTEST_DRIVER} PRIVATE -DCOARRAY) + SET_PROPERTY(TARGET ${SWIFTEST_DRIVER} APPEND_STRING PROPERTY COMPILE_FLAGS "${Coarray_Fortran_FLAGS} ") + SET_PROPERTY(TARGET ${SWIFTEST_DRIVER} APPEND_STRING PROPERTY LINK_FLAGS "${Coarray_Fortran_FLAGS} ") ENDIF(USE_COARRAY) @@ -147,4 +145,25 @@ IF(BT STREQUAL "DEBUG") ADD_DEFINITIONS(-DDEBUG) ELSEIF(BT STREQUAL "PROFILE") ADD_DEFINITIONS(-DPROFILE) -ENDIF() \ No newline at end of file +ENDIF() + +# Check to see if the compiler allows for local-spec in do concurrent statements. Set a preprocessor variable if it does +IF (USE_OPENMP) + SET(TESTFILE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}") + SET(TESTFILE "${TESTFILE}/CMakeTmp/testFortranDoConcurrentLoc.f90") + FILE(WRITE "${TESTFILE}" +" +program TestDoConcurrentLoc +integer :: i +real,dimension(10) :: a +do concurrent(i = 1:10) shared(a) + a(i) = i +end do +end program TestDoConcurrentLoc +") + TRY_COMPILE(DOCONLOC_WORKS ${CMAKE_BINARY_DIR} ${TESTFILE} + COMPILE_DEFINITIONS "${CMAKE_Fortran_FLAGS}" OUTPUT_VARIABLE OUTPUT) + IF (DOCONLOC_WORKS) + TARGET_COMPILE_DEFINITIONS(${SWIFTEST_DRIVER} PRIVATE -DDOCONLOC) + ENDIF (DOCONLOC_WORKS) +ENDIF (USE_OPENMP) diff --git a/src/base/base_module.f90 b/src/base/base_module.f90 index 01c111661..51266238f 100644 --- a/src/base/base_module.f90 +++ b/src/base/base_module.f90 @@ -22,90 +22,100 @@ module base !> User defined parameters that are read in from the parameters input file. !> Each paramter is initialized to a default values. type, abstract :: base_parameters - character(STRMAX) :: integrator !! Name of the nbody integrator used - character(STRMAX) :: param_file_name !! The name of the parameter file - real(DP) :: t0 = 0.0_DP !! Integration reference time - real(DP) :: tstart = -1.0_DP !! Integration start time - real(DP) :: tstop = -1.0_DP !! Integration stop time - real(DP) :: dt = -1.0_DP !! Time step - integer(I8B) :: iloop = 0_I8B !! Main loop counter - integer(I8B) :: nloops = 0_I8B !! Total number of loops to execute - integer(I8B) :: istart = 0_I8B !! Starting index for loop counter - integer(I4B) :: iout = 0 !! Output cadence counter - integer(I4B) :: idump = 0 !! Dump cadence counter - integer(I4B) :: nout = 0 !! Current output step - integer(I4B) :: istep = 0 !! Current value of istep (used for time stretching) - character(STRMAX) :: incbfile = CB_INFILE !! Name of input file for the central body - character(STRMAX) :: inplfile = PL_INFILE !! Name of input file for massive bodies - character(STRMAX) :: intpfile = TP_INFILE !! Name of input file for test particles - character(STRMAX) :: nc_in = NC_INFILE !! Name of system input file for NetCDF input - character(STRMAX) :: in_type = "NETCDF_DOUBLE" !! Data representation type of input data files - character(STRMAX) :: in_form = "XV" !! Format of input data files ("EL" or ["XV"]) - integer(I4B) :: istep_out = -1 !! Number of time steps between saved outputs - integer(I4B) :: nstep_out = -1 !! Total number of saved outputs - real(DP) :: fstep_out = 1.0_DP !! The output step time stretching factor - logical :: ltstretch = .false. !! Whether to employ time stretching or not - character(STRMAX) :: outfile = BIN_OUTFILE !! Name of output binary file - character(STRMAX) :: out_type = "NETCDF_DOUBLE" !! Binary format of output file - character(STRMAX) :: out_form = "XVEL" !! Data to write to output file - character(STRMAX) :: out_stat = 'NEW' !! Open status for output binary file - integer(I4B) :: dump_cadence = 10 !! Number of output steps between dumping simulation data to file - real(DP) :: rmin = -1.0_DP !! Minimum heliocentric radius for test particle - real(DP) :: rmax = -1.0_DP !! Maximum heliocentric radius for test particle - real(DP) :: rmaxu = -1.0_DP !! Maximum unbound heliocentric radius for test particle - real(DP) :: qmin = -1.0_DP !! Minimum pericenter distance for test particle - character(STRMAX) :: qmin_coord = "HELIO" !! Coordinate frame to use for qmin (["HELIO"] or "BARY") - real(DP) :: qmin_alo = -1.0_DP !! Minimum semimajor axis for qmin - real(DP) :: qmin_ahi = -1.0_DP !! Maximum semimajor axis for qmin - real(QP) :: MU2KG = -1.0_QP !! Converts mass units to grams - real(QP) :: TU2S = -1.0_QP !! Converts time units to seconds - real(QP) :: DU2M = -1.0_QP !! Converts distance unit to centimeters - real(DP) :: GU = -1.0_DP !! Universal gravitational constant in the system units - real(DP) :: inv_c2 = -1.0_DP !! Inverse speed of light squared in the system units - real(DP) :: GMTINY = -1.0_DP !! Smallest G*mass that is fully gravitating - real(DP) :: min_GMfrag = -1.0_DP !! Smallest G*mass that can be produced in a fragmentation event - real(DP) :: nfrag_reduction = 30.0_DP !! Reduction factor for limiting the number of fragments in a collision - integer(I4B), dimension(:), allocatable :: seed !! Random seeds for fragmentation modeling - logical :: lmtiny_pl = .false. !! Include semi-interacting massive bodies - character(STRMAX) :: collision_model = "MERGE" !! The Coll - character(STRMAX) :: encounter_save = "NONE" !! Indicate if and how encounter data should be saved - logical :: lenc_save_trajectory = .false. !! Indicates that when encounters are saved, the full trajectory through recursion steps are saved - logical :: lenc_save_closest = .false. !! Indicates that when encounters are saved, the closest approach distance between pairs of bodies is saved - character(NAMELEN) :: interaction_loops = "ADAPTIVE" !! Method used to compute interaction loops. Options are "TRIANGULAR", "FLAT", or "ADAPTIVE" - character(NAMELEN) :: encounter_check_plpl = "ADAPTIVE" !! Method used to compute pl-pl encounter checks. Options are "TRIANGULAR", "SORTSWEEP", or "ADAPTIVE" - character(NAMELEN) :: encounter_check_pltp = "ADAPTIVE" !! Method used to compute pl-tp encounter checks. Options are "TRIANGULAR", "SORTSWEEP", or "ADAPTIVE" - logical :: lcoarray = .false. !! Use Coarrays for test particle parallelization. - - ! The following are used internally, and are not set by the user, but instead are determined by the input value of INTERACTION_LOOPS + character(STRMAX) :: integrator !! Name of the nbody integrator used + character(STRMAX) :: param_file_name !! The name of the parameter file + real(DP) :: t0 = 0.0_DP !! Integration reference time + real(DP) :: tstart = -1.0_DP !! Integration start time + real(DP) :: tstop = -1.0_DP !! Integration stop time + real(DP) :: dt = -1.0_DP !! Time step + integer(I8B) :: iloop = 0_I8B !! Main loop counter + integer(I8B) :: nloops = 0_I8B !! Total number of loops to execute + integer(I8B) :: istart = 0_I8B !! Starting index for loop counter + integer(I4B) :: iout = 0 !! Output cadence counter + integer(I4B) :: idump = 0 !! Dump cadence counter + integer(I4B) :: nout = 0 !! Current output step + integer(I4B) :: istep = 0 !! Current value of istep (used for time stretching) + character(STRMAX) :: incbfile = CB_INFILE !! Name of input file for the central body + character(STRMAX) :: inplfile = PL_INFILE !! Name of input file for massive bodies + character(STRMAX) :: intpfile = TP_INFILE !! Name of input file for test particles + character(STRMAX) :: nc_in = NC_INFILE !! Name of system input file for NetCDF input + character(STRMAX) :: in_type = "NETCDF_DOUBLE" !! Data representation type of input data files + character(STRMAX) :: in_form = "XV" !! Format of input data files ("EL" or ["XV"]) + integer(I4B) :: istep_out = -1 !! Number of time steps between saved outputs + integer(I4B) :: nstep_out = -1 !! Total number of saved outputs + real(DP) :: fstep_out = 1.0_DP !! The output step time stretching factor + logical :: ltstretch = .false. !! Whether to employ time stretching or not + character(STRMAX) :: outfile = BIN_OUTFILE !! Name of output binary file + character(STRMAX) :: out_type = "NETCDF_DOUBLE" !! Binary format of output file + character(STRMAX) :: out_form = "XVEL" !! Data to write to output file + character(STRMAX) :: out_stat = 'NEW' !! Open status for output binary file + integer(I4B) :: dump_cadence = 10 !! Number of output steps between dumping simulation data to file + real(DP) :: rmin = -1.0_DP !! Minimum heliocentric radius for test particle + real(DP) :: rmax = -1.0_DP !! Maximum heliocentric radius for test particle + real(DP) :: rmaxu = -1.0_DP !! Maximum unbound heliocentric radius for test particle + real(DP) :: qmin = -1.0_DP !! Minimum pericenter distance for test particle + character(STRMAX) :: qmin_coord = "HELIO" !! Coordinate frame to use for qmin (["HELIO"] or "BARY") + real(DP) :: qmin_alo = -1.0_DP !! Minimum semimajor axis for qmin + real(DP) :: qmin_ahi = -1.0_DP !! Maximum semimajor axis for qmin + real(QP) :: MU2KG = -1.0_QP !! Converts mass units to grams + real(QP) :: TU2S = -1.0_QP !! Converts time units to seconds + real(QP) :: DU2M = -1.0_QP !! Converts distance unit to centimeters + real(DP) :: GU = -1.0_DP !! Universal gravitational constant in the system units + real(DP) :: inv_c2 = -1.0_DP !! Inverse speed of light squared in the system units + real(DP) :: GMTINY = -1.0_DP !! Smallest G*mass that is fully gravitating + real(DP) :: min_GMfrag = -1.0_DP !! Smallest G*mass that can be produced in a fragmentation event + real(DP) :: nfrag_reduction = 30.0_DP !! Reduction factor for limiting the number of collision fragments + integer(I4B), dimension(:), allocatable :: seed !! Random seeds for fragmentation modeling + logical :: lmtiny_pl = .false. !! Include semi-interacting massive bodies + character(STRMAX) :: collision_model = "MERGE" !! The Coll + character(STRMAX) :: encounter_save = "NONE" !! Indicate if and how encounter data should be saved + logical :: lenc_save_trajectory = .false. !! Indicates that when encounters are saved, the full trajectory + !! through recursion steps are saved + logical :: lenc_save_closest = .false. !! Indicates that when encounters are saved, the closest approach + !! distance between pairs of bodies is saved + character(NAMELEN):: interaction_loops = "ADAPTIVE" !! Method used to compute interaction loops. + !! Options are "TRIANGULAR", "FLAT", or "ADAPTIVE" + character(NAMELEN):: encounter_check_plpl = "ADAPTIVE" !! Method used to compute pl-pl encounter checks. + !! Options are "TRIANGULAR", "SORTSWEEP", or "ADAPTIVE" + character(NAMELEN):: encounter_check_pltp = "ADAPTIVE" !! Method used to compute pl-tp encounter checks. + !! Options are "TRIANGULAR", "SORTSWEEP", or "ADAPTIVE" + logical :: lcoarray = .false. !! Use Coarrays for test particle parallelization. + + ! The following are not set by the user, but instead are determined by the input value of INTERACTION_LOOPS logical :: lflatten_interactions = .false. !! Use the flattened upper triangular matrix for pl-pl interaction loops - logical :: lencounter_sas_plpl = .false. !! Use the Sort and Sweep algorithm to prune the encounter list before checking for close encounters - logical :: lencounter_sas_pltp = .false. !! Use the Sort and Sweep algorithm to prune the encounter list before checking for close encounters + logical :: lencounter_sas_plpl = .false. !! Use the Sort and Sweep algorithm to prune the encounter list before checking + !! for close encounters + logical :: lencounter_sas_pltp = .false. !! Use the Sort and Sweep algorithm to prune the encounter list before checking + !! for close encounters ! Logical flags to turn on or off various features of the code - logical :: lrhill_present = .false. !! Hill radii are given as an input rather than calculated by the code (can be used to inflate close encounter regions manually) + logical :: lrhill_present = .false. !! Hill radii are given as an input rather than calculated by the code (can be used to + !! inflate close encounter regions manually) logical :: lextra_force = .false. !! User defined force function turned on logical :: lbig_discard = .false. !! Save big bodies on every discard logical :: lclose = .false. !! Turn on close encounters logical :: lenergy = .false. !! Track the total energy of the system - logical :: loblatecb = .false. !! Calculate acceleration from oblate central body (automatically turns true if nonzero J2 is input) + logical :: loblatecb = .false. !! Calculate acceleration from oblate central body (automatically turns true if nonzero J2 + !! is input) logical :: lrotation = .false. !! Include rotation states of big bodies logical :: ltides = .false. !! Include tidal dissipation - ! Initial values to pass to the energy report subroutine (usually only used in the case of a restart, otherwise these will be updated with initial conditions values) - real(DP) :: E_orbit_orig = 0.0_DP !! Initial orbital energy + ! Initial values to pass to the energy report subroutine (usually only used in the case of a restart, otherwise these will be + ! updated with initial conditions values) + real(DP) :: E_orbit_orig = 0.0_DP !! Initial orbital energy real(DP) :: GMtot_orig = 0.0_DP !! Initial system mass - real(DP), dimension(NDIM) :: L_total_orig = 0.0_DP !! Initial total angular momentum vector - real(DP), dimension(NDIM) :: L_orbit_orig = 0.0_DP !! Initial orbital angular momentum - real(DP), dimension(NDIM) :: L_spin_orig = 0.0_DP !! Initial spin angular momentum vector - real(DP), dimension(NDIM) :: L_escape = 0.0_DP !! Angular momentum of bodies that escaped the system (used for bookeeping) + real(DP), dimension(NDIM) :: L_total_orig = 0.0_DP !! Initial total angular momentum vector + real(DP), dimension(NDIM) :: L_orbit_orig = 0.0_DP !! Initial orbital angular momentum + real(DP), dimension(NDIM) :: L_spin_orig = 0.0_DP !! Initial spin angular momentum vector + real(DP), dimension(NDIM) :: L_escape = 0.0_DP !! Angular momentum of escaped bodies (used for bookeeping) real(DP) :: GMescape = 0.0_DP !! Mass of bodies that escaped the system (used for bookeeping) - real(DP) :: E_collisions = 0.0_DP !! Energy lost from system due to collisions - real(DP) :: E_untracked = 0.0_DP !! Energy gained from system due to escaped bodies + real(DP) :: E_collisions = 0.0_DP !! Energy lost from system due to collisions + real(DP) :: E_untracked = 0.0_DP !! Energy gained from system due to escaped bodies logical :: lfirstenergy = .true. !! This is the first time computing energe logical :: lfirstkick = .true. !! Initiate the first kick in a symplectic step logical :: lrestart = .false. !! Indicates whether or not this is a restarted run - character(NAMELEN) :: display_style !! Style of the output display {"STANDARD", "COMPACT"}). Default is "STANDARD" + character(NAMELEN) :: display_style !! Style of the output display {["STANDARD"], "COMPACT"}). integer(I4B) :: display_unit !! File unit number for display (either to stdout or to a log file) logical :: log_output = .false. !! Logs the output to file instead of displaying it on the terminal @@ -137,8 +147,10 @@ subroutine abstract_io_param_reader(self, unit, iotype, v_list, iostat, iomsg) implicit none class(base_parameters), intent(inout) :: self !! Collection of parameters integer(I4B), intent(in) :: unit !! File unit number - character(len=*), intent(in) :: iotype !! Dummy argument passed to the input/output procedure contains the text from the char-literal-constant, prefixed with DT. - !! If you do not include a char-literal-constant, the iotype argument contains only DT. + character(len=*), intent(in) :: iotype !! Dummy argument passed to the input/output procedure contains the + !! text from the char-literal-constant, prefixed with DT. If you do + !! not include a char-literal-constant, the iotype argument contains + !! only DT. character(len=*), intent(in) :: v_list(:) !! The first element passes the integrator code to the reader integer(I4B), intent(out) :: iostat !! IO status code character(len=*), intent(inout) :: iomsg !! Message to pass if iostat /= 0 @@ -149,8 +161,10 @@ subroutine abstract_io_param_writer(self, unit, iotype, v_list, iostat, iomsg) implicit none class(base_parameters), intent(in) :: self !! Collection of parameters integer(I4B), intent(in) :: unit !! File unit number - character(len=*), intent(in) :: iotype !! Dummy argument passed to the input/output procedure contains the text from the char-literal-constant, prefixed with DT. - !! If you do not include a char-literal-constant, the iotype argument contains only DT. + character(len=*), intent(in) :: iotype !! Dummy argument passed to the input/output procedure contains the + !! text from the char-literal-constant, prefixed with DT. If you do + !! not include a char-literal-constant, the iotype argument contains + !! only DT. integer(I4B), intent(in) :: v_list(:) !! Not used in this procedure integer(I4B), intent(out) :: iostat !! IO status code character(len=*), intent(inout) :: iomsg !! Message to pass if iostat /= 0 @@ -168,7 +182,8 @@ end subroutine abstract_io_read_in_param type :: base_storage_frame class(*), allocatable :: item contains - procedure :: store => base_util_copy_store !! Stores a snapshot of the nbody system so that later it can be retrieved for saving to file. + procedure :: store => base_util_copy_store !! Stores a snapshot of the nbody system so that later it can be + !! retrieved for saving to file. generic :: assignment(=) => store final :: base_final_storage_frame end type @@ -179,25 +194,26 @@ end subroutine abstract_io_read_in_param integer(I4B) :: nframes !! Total number of frames that can be stored !! An class that establishes the pattern for various storage objects - type(base_storage_frame), dimension(:), allocatable :: frame !! Array of stored frames - integer(I4B) :: iframe = 0 !! Index of the last frame stored in the system - integer(I4B) :: nid !! Number of unique id values in all saved snapshots - integer(I4B), dimension(:), allocatable :: idvals !! The set of unique id values contained in the snapshots - integer(I4B), dimension(:), allocatable :: idmap !! The id value -> index map - integer(I4B) :: nt !! Number of unique time values in all saved snapshots - real(DP), dimension(:), allocatable :: tvals !! The set of unique time values contained in the snapshots - integer(I4B), dimension(:), allocatable :: tmap !! The t value -> index map + type(base_storage_frame), dimension(:), allocatable :: frame !! Array of stored frames + integer(I4B) :: iframe = 0 !! Index of the last frame stored in the system + integer(I4B) :: nid !! Number of unique id values in all saved snapshots + integer(I4B), dimension(:), allocatable :: idvals !! The set of unique id values contained in the snapshots + integer(I4B), dimension(:), allocatable :: idmap !! The id value -> index map + integer(I4B) :: nt !! Number of unique time values in all saved snapshots + real(DP), dimension(:), allocatable :: tvals !! The set of unique time values contained in the snapshots + integer(I4B), dimension(:), allocatable :: tmap !! The t value -> index map contains procedure :: dealloc => base_util_dealloc_storage !! Deallocates all allocatables - procedure :: reset => base_util_reset_storage !! Resets the storage object back to its original state by removing all of the saved items from the storage frames + procedure :: reset => base_util_reset_storage !! Resets the storage object back to its original state by removing all of + !! the saved items from the storage frames procedure :: resize => base_util_resize_storage !! Resizes storage if it is too small procedure :: setup => base_util_setup_storage !! Sets up a storage system with a set number of frames procedure :: save => base_util_snapshot_save !! Takes a snapshot of the current system end type base_storage - !> Class definition for the particle origin information object. This object is used to track time, location, and collisional regime - !> of fragments produced in collisional events. + !> Class definition for the particle origin information object. This object is used to track time, location, and collisional + !> regime of fragments produced in collisional events. type, abstract :: base_particle_info end type base_particle_info @@ -291,13 +307,15 @@ end subroutine abstract_util_dealloc_object subroutine base_util_append_arr_char_string(arr, source, nold, lsource_mask) !! author: David A. Minton !! - !! Append a single array of character string type onto another. If the destination array is not allocated, or is not big enough, this will allocate space for it. + !! Append a single array of character string type onto another. If the destination array is not allocated, or is not big + !! enough, this will allocate space for it. implicit none ! Arguments - character(len=STRMAX), dimension(:), allocatable, intent(inout) :: arr !! Destination array - character(len=STRMAX), dimension(:), allocatable, intent(in) :: source !! Array to append - integer(I4B), intent(in), optional :: nold !! Extent of original array. If passed, the source array will begin at arr(nold+1). Otherwise, the size of arr will be used. - logical, dimension(:), intent(in), optional :: lsource_mask !! Logical mask indicating which elements to append to + character(len=STRMAX), dimension(:), allocatable, intent(inout) :: arr !! Destination array + character(len=STRMAX), dimension(:), allocatable, intent(in) :: source !! Array to append + integer(I4B), intent(in), optional :: nold !! Extent of original array. If passed, the source array will begin at + !! arr(nold+1). Otherwise, the size of arr will be used. + logical, dimension(:), intent(in), optional :: lsource_mask !! Logical mask indicating which elements to append to ! Internals integer(I4B) :: nnew, nsrc, nend_orig @@ -336,13 +354,15 @@ end subroutine base_util_append_arr_char_string subroutine base_util_append_arr_DP(arr, source, nold, lsource_mask) !! author: David A. Minton !! - !! Append a single array of double precision type onto another. If the destination array is not allocated, or is not big enough, this will allocate space for it. + !! Append a single array of double precision type onto another. If the destination array is not allocated, or is not big + !! enough, this will allocate space for it. implicit none ! Arguments - real(DP), dimension(:), allocatable, intent(inout) :: arr !! Destination array - real(DP), dimension(:), allocatable, intent(in) :: source !! Array to append - integer(I4B), intent(in), optional :: nold !! Extent of original array. If passed, the source array will begin at arr(nold+1). Otherwise, the size of arr will be used. - logical, dimension(:), intent(in), optional :: lsource_mask !! Logical mask indicating which elements to append to + real(DP), dimension(:), allocatable, intent(inout) :: arr !! Destination array + real(DP), dimension(:), allocatable, intent(in) :: source !! Array to append + integer(I4B), intent(in), optional :: nold !! Extent of original array. If passed, the source array will begin at + !! arr(nold+1). Otherwise, the size of arr will be used. + logical, dimension(:), intent(in), optional :: lsource_mask !! Logical mask indicating which elements to append to ! Internals integer(I4B) :: nnew, nsrc, nend_orig @@ -381,13 +401,15 @@ end subroutine base_util_append_arr_DP subroutine base_util_append_arr_DPvec(arr, source, nold, lsource_mask) !! author: David A. Minton !! - !! Append a single array of double precision vector type of size (NDIM, n) onto another. If the destination array is not allocated, or is not big enough, this will allocate space for it. + !! Append a single array of double precision vector type of size (NDIM, n) onto another. If the destination array is not + !! allocated, or is not big enough, this will allocate space for it. implicit none ! Arguments - real(DP), dimension(:,:), allocatable, intent(inout) :: arr !! Destination array - real(DP), dimension(:,:), allocatable, intent(in) :: source !! Array to append - integer(I4B), intent(in), optional :: nold !! Extent of original array. If passed, the source array will begin at arr(nold+1). Otherwise, the size of arr will be used. - logical, dimension(:), intent(in), optional :: lsource_mask !! Logical mask indicating which elements to append to + real(DP), dimension(:,:), allocatable, intent(inout) :: arr !! Destination array + real(DP), dimension(:,:), allocatable, intent(in) :: source !! Array to append + integer(I4B), intent(in), optional :: nold !! Extent of original array. If passed, the source array will begin at + !! arr(nold+1). Otherwise, the size of arr will be used. + logical, dimension(:), intent(in), optional :: lsource_mask !! Logical mask indicating which elements to append to ! Internals integer(I4B) :: nnew, nsrc, nend_orig @@ -428,13 +450,15 @@ end subroutine base_util_append_arr_DPvec subroutine base_util_append_arr_I4B(arr, source, nold, lsource_mask) !! author: David A. Minton !! - !! Append a single array of integer(I4B) onto another. If the destination array is not allocated, or is not big enough, this will allocate space for it. + !! Append a single array of integer(I4B) onto another. If the destination array is not allocated, or is not big enough, + !! this will allocate space for it. implicit none ! Arguments - integer(I4B), dimension(:), allocatable, intent(inout) :: arr !! Destination array - integer(I4B), dimension(:), allocatable, intent(in) :: source !! Array to append - integer(I4B), intent(in), optional :: nold !! Extent of original array. If passed, the source array will begin at arr(nold+1). Otherwise, the size of arr will be used. - logical, dimension(:), intent(in), optional :: lsource_mask !! Logical mask indicating which elements to append to + integer(I4B), dimension(:), allocatable, intent(inout) :: arr !! Destination array + integer(I4B), dimension(:), allocatable, intent(in) :: source !! Array to append + integer(I4B), intent(in), optional :: nold !! Extent of original array. If passed, the source array will begin at + !! arr(nold+1). Otherwise, the size of arr will be used. + logical, dimension(:), intent(in), optional :: lsource_mask !! Logical mask indicating which elements to append to ! Internals integer(I4B) :: nnew, nsrc, nend_orig @@ -473,13 +497,15 @@ end subroutine base_util_append_arr_I4B subroutine base_util_append_arr_logical(arr, source, nold, lsource_mask) !! author: David A. Minton !! - !! Append a single array of logical type onto another. If the destination array is not allocated, or is not big enough, this will allocate space for it. + !! Append a single array of logical type onto another. If the destination array is not allocated, or is not big enough, + !! this will allocate space for it. implicit none ! Arguments - logical, dimension(:), allocatable, intent(inout) :: arr !! Destination array - logical, dimension(:), allocatable, intent(in) :: source !! Array to append - integer(I4B), intent(in), optional :: nold !! Extent of original array. If passed, the source array will begin at arr(nold+1). Otherwise, the size of arr will be used. - logical, dimension(:), intent(in), optional :: lsource_mask !! Logical mask indicating which elements to append to + logical, dimension(:), allocatable, intent(inout) :: arr !! Destination array + logical, dimension(:), allocatable, intent(in) :: source !! Array to append + integer(I4B), intent(in), optional :: nold !! Extent of original array. If passed, the source array will begin at + !! arr(nold+1). Otherwise, the size of arr will be used. + logical, dimension(:), intent(in), optional :: lsource_mask !! Logical mask indicating which elements to append to ! Internals integer(I4B) :: nnew, nsrc, nend_orig @@ -574,7 +600,8 @@ subroutine base_util_exit(code) character(*), parameter :: BAR = '("------------------------------------------------")' character(*), parameter :: SUCCESS_MSG = '(/, "Normal termination of Swiftest (version ", f3.1, ")")' character(*), parameter :: FAIL_MSG = '(/, "Terminating Swiftest (version ", f3.1, ") due to error!!")' - character(*), parameter :: USAGE_MSG = '("Usage: swiftest [bs|helio|ra15|rmvs|symba|tu4|whm] [standard|compact|progress|NONE]")' + character(*), parameter :: USAGE_MSG = '("Usage: swiftest ' // & + '[{standard}|compact|progress]")' character(*), parameter :: HELP_MSG = USAGE_MSG select case(code) @@ -605,7 +632,8 @@ subroutine base_util_fill_arr_char_string(keeps, inserts, lfill_list) ! Arguments character(len=STRMAX), dimension(:), allocatable, intent(inout) :: keeps !! Array of values to keep character(len=STRMAX), dimension(:), allocatable, intent(in) :: inserts !! Array of values to insert into keep - logical, dimension(:), intent(in) :: lfill_list !! Logical array of bodies to merge into the keeps + logical, dimension(:), intent(in) :: lfill_list !! Logical array of bodies to merge into the + !! keeps if (.not.allocated(keeps) .or. .not.allocated(inserts)) return @@ -702,7 +730,8 @@ end subroutine base_util_fill_arr_logical subroutine base_util_reset_storage(self) !! author: David A. Minton !! - !! Resets the storage object back to its original state by removing all of the saved items from the storage frames, but does not deallocate the frames + !! Resets the storage object back to its original state by removing all of the saved items from the storage frames, but + !! does not deallocate the frames implicit none ! Arguments class(base_storage), intent(inout) :: self @@ -736,7 +765,7 @@ subroutine base_util_resize_arr_char_string(arr, nnew) character(len=STRMAX), dimension(:), allocatable, intent(inout) :: arr !! Array to resize integer(I4B), intent(in) :: nnew !! New size ! Internals - character(len=STRMAX), dimension(:), allocatable :: tmp !! Temporary storage array in case the input array is already allocated + character(len=STRMAX), dimension(:), allocatable :: tmp !! Temp. storage array in case the input array is already allocated integer(I4B) :: nold !! Old size if (nnew < 0) return @@ -1008,10 +1037,12 @@ end subroutine base_util_setup_storage subroutine base_util_snapshot_save(self, snapshot) !! author: David A. Minton !! - !! Checks the current size of the storage object against the required size and extends it by a factor of 2 more than requested if it is too small. - !! Note: The reason to extend it by a factor of 2 is for performance. When there are many enounters per step, resizing every time you want to add an - !! encounter takes significant computational effort. Resizing by a factor of 2 is a tradeoff between performance (fewer resize calls) and memory managment - !! Memory usage grows by a factor of 2 each time it fills up, but no more. + !! Checks the current size of the storage object against the required size and extends it by a factor of 2 more than + !! requested if it is too small. + !! Note: The reason to extend it by a factor of 2 is for performance. When there are many enounters per step, resizing + !! every time you want to add an encounter takes significant computational effort. Resizing by a factor of 2 is a tradeoff + !! between performance (fewer resize calls) and memory managment. Memory usage grows by a factor of 2 each time it fills + !! up, but no more. implicit none ! Arguments class(base_storage), intent(inout) :: self !! Storage encounter storage object @@ -1043,8 +1074,11 @@ subroutine base_util_spill_arr_char_string(keeps, discards, lspill_list, ldestru ! Arguments character(len=STRMAX), dimension(:), allocatable, intent(inout) :: keeps !! Array of values to keep character(len=STRMAX), dimension(:), allocatable, intent(inout) :: discards !! Array of discards - logical, dimension(:), intent(in) :: lspill_list !! Logical array of bodies to spill into the discards - logical, intent(in) :: ldestructive !! Logical flag indicating whether or not this operation should alter the keeps array or not + logical, dimension(:), intent(in) :: lspill_list !! Logical array of bodies to spill into + !! the discards + logical, intent(in) :: ldestructive !! Logical flag indicating whether or not + !! this operation should alter the keeps + !! array or not ! Internals integer(I4B) :: nspill, nkeep, nlist character(len=STRMAX), dimension(:), allocatable :: tmp !! Array of values to keep @@ -1087,7 +1121,8 @@ subroutine base_util_spill_arr_DP(keeps, discards, lspill_list, ldestructive) real(DP), dimension(:), allocatable, intent(inout) :: keeps !! Array of values to keep real(DP), dimension(:), allocatable, intent(inout) :: discards !! Array of discards logical, dimension(:), intent(in) :: lspill_list !! Logical array of bodies to spill into the discardss - logical, intent(in) :: ldestructive !! Logical flag indicating whether or not this operation should alter the keeps array or not + logical, intent(in) :: ldestructive !! Logical flag indicating whether or not this operation + !! should alter the keeps array or not ! Internals integer(I4B) :: nspill, nkeep, nlist real(DP), dimension(:), allocatable :: tmp !! Array of values to keep @@ -1130,7 +1165,8 @@ subroutine base_util_spill_arr_DPvec(keeps, discards, lspill_list, ldestructive) real(DP), dimension(:,:), allocatable, intent(inout) :: keeps !! Array of values to keep real(DP), dimension(:,:), allocatable, intent(inout) :: discards !! Array discards logical, dimension(:), intent(in) :: lspill_list !! Logical array of bodies to spill into the discards - logical, intent(in) :: ldestructive !! Logical flag indicating whether or not this operation should alter the keeps array or not + logical, intent(in) :: ldestructive !! Logical flag indicating whether or not this + !! operation should alter the keeps array or not ! Internals integer(I4B) :: i, nspill, nkeep, nlist real(DP), dimension(:,:), allocatable :: tmp !! Array of values to keep @@ -1174,13 +1210,14 @@ subroutine base_util_spill_arr_I4B(keeps, discards, lspill_list, ldestructive) !! This is the inverse of a spill operation implicit none ! Arguments - integer(I4B), dimension(:), allocatable, intent(inout) :: keeps !! Array of values to keep - integer(I4B), dimension(:), allocatable, intent(inout) :: discards !! Array of discards - logical, dimension(:), intent(in) :: lspill_list !! Logical array of bodies to spill into the discards - logical, intent(in) :: ldestructive !! Logical flag indicating whether or not this operation should alter the keeps array or not + integer(I4B), dimension(:), allocatable, intent(inout) :: keeps !! Array of values to keep + integer(I4B), dimension(:), allocatable, intent(inout) :: discards !! Array of discards + logical, dimension(:), intent(in) :: lspill_list !! Logical array of bodies to spill into the discards + logical, intent(in) :: ldestructive!! Logical flag indicating whether or not this + !! operation should alter the keeps array or not ! Internals integer(I4B) :: nspill, nkeep, nlist - integer(I4B), dimension(:), allocatable :: tmp !! Array of values to keep + integer(I4B), dimension(:), allocatable :: tmp !! Array of values to keep nkeep = count(.not.lspill_list(:)) nspill = count(lspill_list(:)) @@ -1217,10 +1254,11 @@ subroutine base_util_spill_arr_I8B(keeps, discards, lspill_list, ldestructive) !! This is the inverse of a spill operation implicit none ! Arguments - integer(I8B), dimension(:), allocatable, intent(inout) :: keeps !! Array of values to keep - integer(I8B), dimension(:), allocatable, intent(inout) :: discards !! Array of discards - logical, dimension(:), intent(in) :: lspill_list !! Logical array of bodies to spill into the discards - logical, intent(in) :: ldestructive !! Logical flag indicating whether or not this operation should alter the keeps array or not + integer(I8B), dimension(:), allocatable, intent(inout) :: keeps !! Array of values to keep + integer(I8B), dimension(:), allocatable, intent(inout) :: discards !! Array of discards + logical, dimension(:), intent(in) :: lspill_list !! Logical array of bodies to spill into the discards + logical, intent(in) :: ldestructive!! Logical flag indicating whether or not this + !! operation should alter the keeps array or not ! Internals integer(I4B) :: nspill, nkeep, nlist integer(I8B), dimension(:), allocatable :: tmp !! Array of values to keep @@ -1263,7 +1301,8 @@ subroutine base_util_spill_arr_logical(keeps, discards, lspill_list, ldestructiv logical, dimension(:), allocatable, intent(inout) :: keeps !! Array of values to keep logical, dimension(:), allocatable, intent(inout) :: discards !! Array of discards logical, dimension(:), intent(in) :: lspill_list !! Logical array of bodies to spill into the discards - logical, intent(in) :: ldestructive !! Logical flag indicating whether or not this operation should alter the keeps array or no + logical, intent(in) :: ldestructive !! Logical flag indicating whether or not this operation + !! should alter the keeps array or no ! Internals integer(I4B) :: nspill, nkeep, nlist logical, dimension(:), allocatable :: tmp !! Array of values to keep @@ -1918,7 +1957,7 @@ pure subroutine base_util_sort_rearrange_arr_char_string(arr, ind, n) integer(I4B), dimension(:), intent(in) :: ind !! Index to rearrange against integer(I4B), intent(in) :: n !! Number of elements in arr and ind to rearrange ! Internals - character(len=STRMAX), dimension(:), allocatable :: tmp !! Temporary copy of arry used during rearrange operation + character(len=STRMAX), dimension(:), allocatable :: tmp !! Temporary copy of arry used during rearrange operation if (.not. allocated(arr) .or. n <= 0) return allocate(tmp, mold=arr) @@ -2062,7 +2101,9 @@ subroutine base_util_unique_DP(input_array, output_array, index_map) ! Arguments real(DP), dimension(:), intent(in) :: input_array !! Unsorted input array real(DP), dimension(:), allocatable, intent(out) :: output_array !! Sorted array of unique values - integer(I4B), dimension(:), allocatable, intent(out) :: index_map !! An array of the same size as input_array that such that any for any index i, output_array(index_map(i)) = input_array(i) + integer(I4B), dimension(:), allocatable, intent(out) :: index_map !! An array of the same size as input_array that such + !! that any for any index i, + !! output_array(index_map(i)) = input_array(i) ! Internals real(DP), dimension(:), allocatable :: unique_array integer(I4B) :: n @@ -2078,7 +2119,7 @@ subroutine base_util_unique_DP(input_array, output_array, index_map) n = n + 1 lo = minval(input_array(:), mask=input_array(:) > lo) unique_array(n) = lo - where(input_array(:) == lo) index_map(:) = n + where(abs(input_array(:) - lo) < epsilon(1.0_DP) * lo) index_map(:) = n if (lo >= hi) exit enddo allocate(output_array(n), source=unique_array(1:n)) @@ -2095,7 +2136,9 @@ subroutine base_util_unique_I4B(input_array, output_array, index_map) ! Arguments integer(I4B), dimension(:), intent(in) :: input_array !! Unsorted input array integer(I4B), dimension(:), allocatable, intent(out) :: output_array !! Sorted array of unique values - integer(I4B), dimension(:), allocatable, intent(out) :: index_map !! An array of the same size as input_array that such that any for any index i, output_array(index_map(i)) = input_array(i) + integer(I4B), dimension(:), allocatable, intent(out) :: index_map !! An array of the same size as input_array that such + !! that any for any index i, + !! output_array(index_map(i)) = input_array(i) ! Internals integer(I4B), dimension(:), allocatable :: unique_array integer(I4B) :: n, lo, hi diff --git a/src/coarray/coarray_clone.f90 b/src/coarray/coarray_clone.f90 index 9f7e1ea1a..893cff147 100644 --- a/src/coarray/coarray_clone.f90 +++ b/src/coarray/coarray_clone.f90 @@ -71,7 +71,7 @@ module subroutine coarray_component_clone_DP(var,src_img) sync all if (this_image() == si) then do img = 1, num_images() - tmp[img] = var + tmp[img] = var end do sync images(*) else @@ -117,7 +117,7 @@ module subroutine coarray_component_clone_DP_arr1D(var,src_img) allocate(tmp(n[si])[*]) if (this_image() == si) then do img = 1, num_images() - tmp(:)[img] = var + tmp(:)[img] = var end do sync images(*) else @@ -167,7 +167,7 @@ module subroutine coarray_component_clone_DP_arr2D(var,src_img) allocate(tmp(n1[si],n2[si])[*]) if (this_image() == si) then do img = 1, num_images() - tmp(:,:)[img] = var(:,:) + tmp(:,:)[img] = var(:,:) end do sync images(*) else @@ -252,7 +252,7 @@ module subroutine coarray_component_clone_DP_vec2D(var,src_img) allocate(tmp(NDIM,n[si])[*]) if (this_image() == si) then do img = 1, num_images() - tmp(:,:)[img] = var(:,:) + tmp(:,:)[img] = var(:,:) end do sync images(*) else diff --git a/src/collision/collision_check.f90 b/src/collision/collision_check.f90 index 936963caa..ebb4a3671 100644 --- a/src/collision/collision_check.f90 +++ b/src/collision/collision_check.f90 @@ -77,7 +77,8 @@ module subroutine collision_check_plpl(self, nbody_system, param, t, dt, irec, l ! Internals logical, dimension(:), allocatable :: lcollision, lmask real(DP), dimension(NDIM) :: xr, vr - integer(I4B) :: i, j, k, nenc + integer(I4B) :: i, j + integer(I8B) :: k, nenc real(DP) :: rlim, Gmtot logical :: lany_closest @@ -101,7 +102,11 @@ module subroutine collision_check_plpl(self, nbody_system, param, t, dt, irec, l lcollision(:) = .false. self%lclosest(:) = .false. - do concurrent(k = 1:nenc, lmask(k)) +#ifdef DOCONLOC + do concurrent(k = 1_I8B:nenc, lmask(k)) shared(self,pl,lmask, dt, lcollision) local(i,j,xr,vr,rlim,Gmtot) +#else + do concurrent(k = 1_I8B:nenc, lmask(k)) +#endif i = self%index1(k) j = self%index2(k) xr(:) = pl%rh(:, i) - pl%rh(:, j) @@ -116,7 +121,7 @@ module subroutine collision_check_plpl(self, nbody_system, param, t, dt, irec, l if (lany_collision .or. lany_closest) then call pl%rh2rb(nbody_system%cb) ! Update the central body barycenteric position vector to get us out of DH and into bary - do k = 1, nenc + do k = 1_I8B, nenc if (.not.lcollision(k) .and. .not. self%lclosest(k)) cycle i = self%index1(k) j = self%index2(k) @@ -174,7 +179,8 @@ module subroutine collision_check_pltp(self, nbody_system, param, t, dt, irec, l ! Internals logical, dimension(:), allocatable :: lcollision, lmask real(DP), dimension(NDIM) :: xr, vr - integer(I4B) :: i, j, k, nenc + integer(I4B) :: i, j + integer(I8B) :: k, nenc logical :: lany_closest character(len=STRMAX) :: timestr, idstri, idstrj, message class(collision_list_pltp), allocatable :: tmp @@ -190,13 +196,13 @@ module subroutine collision_check_pltp(self, nbody_system, param, t, dt, irec, l nenc = self%nenc allocate(lmask(nenc)) - lmask(:) = (self%status(1:nenc) == ACTIVE) + lmask(:) = (self%status(1:nenc) == ACTIVE) select type(pl) class is (symba_pl) - select type(tp) - class is (symba_tp) - lmask(:) = lmask(:) .and. (tp%levelg(self%index2(1:nenc)) >= irec) - end select + select type(tp) + class is (symba_tp) + lmask(:) = lmask(:) .and. (tp%levelg(self%index2(1:nenc)) >= irec) + end select end select if (.not.any(lmask(:))) return @@ -204,8 +210,11 @@ module subroutine collision_check_pltp(self, nbody_system, param, t, dt, irec, l lcollision(:) = .false. self%lclosest(:) = .false. - - do concurrent(k = 1:nenc, lmask(k)) +#ifdef DOCONLOC + do concurrent(k = 1_I8B:nenc, lmask(k)) shared(self,pl,tp,lmask, dt, lcollision) local(i,j,xr,vr) +#else + do concurrent(k = 1_I8B:nenc, lmask(k)) +#endif i = self%index1(k) j = self%index2(k) xr(:) = pl%rh(:, i) - tp%rh(:, j) diff --git a/src/collision/collision_generate.f90 b/src/collision/collision_generate.f90 index f0fc6d9e7..7f529ba02 100644 --- a/src/collision/collision_generate.f90 +++ b/src/collision/collision_generate.f90 @@ -46,7 +46,6 @@ module subroutine collision_generate_bounce(self, nbody_system, param, t) real(DP), intent(in) :: t !! The time of the collision ! Internals integer(I4B) :: i,j,nimp - real(DP), dimension(NDIM) :: rcom, vcom, rnorm logical, dimension(:), allocatable :: lmask select type(nbody_system) @@ -171,7 +170,8 @@ module subroutine collision_generate_merge(self, nbody_system, param, t) class(base_parameters), intent(inout) :: param !! Current run configuration parameters real(DP), intent(in) :: t !! The time of the collision ! Internals - integer(I4B) :: i, j, k, ibiggest + integer(I4B) :: i, j, ibiggest + integer(I8B) :: k real(DP), dimension(NDIM) :: L_spin_new, L_residual real(DP) :: volume character(len=STRMAX) :: message @@ -232,7 +232,7 @@ module subroutine collision_generate_merge(self, nbody_system, param, t) call collision_util_velocity_torque(-L_residual(:), fragments%mass(1), fragments%rb(:,1), fragments%vb(:,1)) ! Update any encounter lists that have the removed bodies in them so that they instead point to the new body - do k = 1, nbody_system%plpl_encounter%nenc + do k = 1_I8B, nbody_system%plpl_encounter%nenc do j = 1, impactors%ncoll i = impactors%id(j) if (i == ibiggest) cycle diff --git a/src/collision/collision_io.f90 b/src/collision/collision_io.f90 index d0e8f30ae..cb4a544a0 100644 --- a/src/collision/collision_io.f90 +++ b/src/collision/collision_io.f90 @@ -365,7 +365,7 @@ module subroutine collision_io_netcdf_write_frame_snapshot(self, history, param) class(encounter_storage), intent(inout) :: history !! Collision history object class(base_parameters), intent(inout) :: param !! Current run configuration parameters ! Internals - integer(I4B) :: i, idslot, old_mode, npl, stage + integer(I4B) :: i, idslot, old_mode, npl, stage, tmp character(len=NAMELEN) :: charstring class(swiftest_pl), allocatable :: pl @@ -427,7 +427,7 @@ module subroutine collision_io_netcdf_write_frame_snapshot(self, history, param) call netcdf_io_check( nf90_put_var(nc%id, nc%L_spin_varid, collider%L_spin(:,:), start=[1, 1, eslot], count=[NDIM, 2, 1]), "collision_io_netcdf_write_frame_snapshot nf90_put_var L_spin_varid before" ) end if - call netcdf_io_check( nf90_set_fill(nc%id, old_mode, old_mode) ) + call netcdf_io_check( nf90_set_fill(nc%id, old_mode, tmp) ) end associate end select return diff --git a/src/collision/collision_module.f90 b/src/collision/collision_module.f90 index 82bec250b..a06d6d2c1 100644 --- a/src/collision/collision_module.f90 +++ b/src/collision/collision_module.f90 @@ -126,7 +126,7 @@ module collision real(DP), dimension(:), allocatable :: rmag !! Array of radial distance magnitudes of individual fragments in the collisional coordinate frame real(DP), dimension(:), allocatable :: vmag !! Array of radial distance magnitudes of individual fragments in the collisional coordinate frame real(DP), dimension(:), allocatable :: rotmag !! Array of rotation magnitudes of individual fragments - integer(I1B), dimension(:), allocatable :: origin_body !! Array of indices indicating which impactor body (1 or 2) the fragment originates from + integer(I4B), dimension(:), allocatable :: origin_body !! Array of indices indicating which impactor body (1 or 2) the fragment originates from real(DP), dimension(NDIM) :: L_orbit_tot !! Orbital angular momentum vector of all fragments real(DP), dimension(NDIM) :: L_spin_tot !! Spin angular momentum vector of all fragments real(DP), dimension(:,:), allocatable :: L_orbit !! Orbital angular momentum vector of each individual fragment diff --git a/src/collision/collision_regime.f90 b/src/collision/collision_regime.f90 index a612eb37a..0c81eca14 100644 --- a/src/collision/collision_regime.f90 +++ b/src/collision/collision_regime.f90 @@ -111,7 +111,11 @@ subroutine collision_regime_LS12(collider, nbody_system, param) if (impactors%regime == COLLRESOLVE_REGIME_MERGE) then volume = 4._DP / 3._DP * PI * sum(impactors%radius(:)**3) radius = (3._DP * volume / (4._DP * PI))**(THIRD) +#ifdef DOCONLOC + do concurrent(i = 1:NDIM) shared(impactors,Ip,L_spin) +#else do concurrent(i = 1:NDIM) +#endif Ip(i) = sum(impactors%mass(:) * impactors%Ip(i,:)) L_spin(i) = sum(impactors%L_orbit(i,:) + impactors%L_spin(i,:)) end do diff --git a/src/collision/collision_resolve.f90 b/src/collision/collision_resolve.f90 index dadde1dfd..0166711ab 100644 --- a/src/collision/collision_resolve.f90 +++ b/src/collision/collision_resolve.f90 @@ -191,8 +191,9 @@ module subroutine collision_resolve_extract_plpl(self, nbody_system, param) logical, dimension(:), allocatable :: lplpl_collision logical, dimension(:), allocatable :: lplpl_unique_parent integer(I4B), dimension(:), pointer :: plparent - integer(I4B), dimension(:), allocatable :: collision_idx, unique_parent_idx - integer(I4B) :: i, index_coll, ncollisions, nunique_parent, nplplenc + integer(I4B) :: nunique_parent + integer(I8B) :: ncollisions, index_coll, k, nplplenc + integer(I8B), dimension(:), allocatable :: unique_parent_idx, collision_idx select type(nbody_system) class is (swiftest_nbody_system) @@ -201,14 +202,14 @@ module subroutine collision_resolve_extract_plpl(self, nbody_system, param) associate(idx1 => self%index1, idx2 => self%index2, plparent => pl%kin%parent) nplplenc = self%nenc allocate(lplpl_collision(nplplenc)) - lplpl_collision(:) = self%status(1:nplplenc) == COLLIDED + lplpl_collision(:) = self%status(1_I8B:nplplenc) == COLLIDED if (.not.any(lplpl_collision)) return ! Collisions have been detected in this step. So we need to determine which of them are between unique bodies. ! Get the subset of pl-pl encounters that lead to a collision ncollisions = count(lplpl_collision(:)) allocate(collision_idx(ncollisions)) - collision_idx = pack([(i, i=1, nplplenc)], lplpl_collision) + collision_idx = pack([(k, k=1_I8B, nplplenc)], lplpl_collision) ! Get the subset of collisions that involve a unique pair of parents allocate(lplpl_unique_parent(ncollisions)) @@ -223,7 +224,7 @@ module subroutine collision_resolve_extract_plpl(self, nbody_system, param) ! due to restructuring of parent/child relationships when there are large numbers of multi-body collisions in a single ! step lplpl_unique_parent(:) = .true. - do index_coll = 1, ncollisions + do index_coll = 1_I8B, ncollisions associate(ip1 => plparent(idx1(collision_idx(index_coll))), ip2 => plparent(idx2(collision_idx(index_coll)))) lplpl_unique_parent(:) = .not. ( any(plparent(idx1(unique_parent_idx(:))) == ip1) .or. & any(plparent(idx2(unique_parent_idx(:))) == ip1) .or. & @@ -350,7 +351,7 @@ module subroutine collision_resolve_mergeaddsub(nbody_system, param, t, status) class is (swiftest_nbody_system) select type(param) class is (swiftest_parameters) - associate(pl => nbody_system%pl, pl_discards => nbody_system%pl_discards, info => nbody_system%pl%info, pl_adds => nbody_system%pl_adds, cb => nbody_system%cb, npl => nbody_system%pl%nbody, & + associate(pl => nbody_system%pl, pl_discards => nbody_system%pl_discards, info => nbody_system%pl%info, pl_adds => nbody_system%pl_adds, cb => nbody_system%cb, & collider => nbody_system%collider, impactors => nbody_system%collider%impactors,fragments => nbody_system%collider%fragments) ! Add the impactors%id bodies to the subtraction list @@ -387,7 +388,11 @@ module subroutine collision_resolve_mergeaddsub(nbody_system, param, t, status) ! plnew%tlag = pl%tlag(ibiggest) ! end if +#ifdef DOCONLOC + do concurrent(i = 1:nfrag) shared(plnew,fragments) local(volume) +#else do concurrent(i = 1:nfrag) +#endif volume = 4.0_DP/3.0_DP * PI * plnew%radius(i)**3 plnew%density(i) = fragments%mass(i) / volume end do @@ -533,7 +538,8 @@ module subroutine collision_resolve_plpl(self, nbody_system, param, t, dt, irec) character(len=STRMAX) :: timestr, idstr integer(I4B), dimension(2) :: idx_parent !! Index of the two bodies considered the "parents" of the collision logical :: lgoodcollision - integer(I4B) :: i, j, nnew, loop, ncollisions + integer(I4B) :: i, j, nnew, loop + integer(I8B) :: k, ncollisions integer(I4B), dimension(:), allocatable :: idnew integer(I4B), parameter :: MAXCASCADE = 1000 @@ -572,9 +578,9 @@ module subroutine collision_resolve_plpl(self, nbody_system, param, t, dt, irec) call swiftest_io_log_one_message(COLLISION_LOG_OUT, "***********************************************************" // & "***********************************************************") - do i = 1, ncollisions - idx_parent(1) = pl%kin(idx1(i))%parent - idx_parent(2) = pl%kin(idx2(i))%parent + do k = 1_I8B, ncollisions + idx_parent(1) = pl%kin(idx1(k))%parent + idx_parent(2) = pl%kin(idx2(k))%parent call impactors%consolidate(nbody_system, param, idx_parent, lgoodcollision) if ((.not. lgoodcollision) .or. any(pl%status(idx_parent(:)) /= COLLIDED)) cycle @@ -595,7 +601,7 @@ module subroutine collision_resolve_plpl(self, nbody_system, param, t, dt, irec) call collision_history%take_snapshot(param,nbody_system, t, "after") - plpl_collision%status(i) = collider%status + plpl_collision%status(k) = collider%status call impactors%dealloc() end do diff --git a/src/collision/collision_util.f90 b/src/collision/collision_util.f90 index 95152c0aa..75c836b4c 100644 --- a/src/collision/collision_util.f90 +++ b/src/collision/collision_util.f90 @@ -21,21 +21,26 @@ module subroutine collision_util_add_fragments_to_collider(self, nbody_system, p class(base_nbody_system), intent(inout) :: nbody_system !! Swiftest nbody system object class(base_parameters), intent(in) :: param !! Current Swiftest run configuration parameters ! Internals - integer(I4B) :: i, npl_before, npl_after + integer(I4B) :: i, npl_before, npl_after, nfrag logical, dimension(:), allocatable :: lexclude select type(nbody_system) class is (swiftest_nbody_system) - associate(fragments => self%fragments, impactors => self%impactors, nfrag => self%fragments%nbody, pl => nbody_system%pl, cb => nbody_system%cb) + associate(fragments => self%fragments, impactors => self%impactors, pl => nbody_system%pl, cb => nbody_system%cb) npl_after = pl%nbody npl_before = npl_after - nfrag + nfrag = self%fragments%nbody allocate(lexclude(npl_after)) pl%status(npl_before+1:npl_after) = ACTIVE pl%mass(npl_before+1:npl_after) = fragments%mass(1:nfrag) pl%Gmass(npl_before+1:npl_after) = fragments%mass(1:nfrag) * param%GU pl%radius(npl_before+1:npl_after) = fragments%radius(1:nfrag) +#ifdef DOCONLOC + do concurrent (i = 1:nfrag) shared(cb,pl,fragments) +#else do concurrent (i = 1:nfrag) +#endif pl%rb(:,npl_before+i) = fragments%rb(:,i) pl%vb(:,npl_before+i) = fragments%vb(:,i) pl%rh(:,npl_before+i) = fragments%rb(:,i) - cb%rb(:) @@ -141,15 +146,17 @@ end subroutine collision_util_get_idvalues_snapshot module subroutine collision_util_get_energy_and_momentum(self, nbody_system, param, phase) !! Author: David A. Minton !! - !! Calculates total system energy in either the pre-collision outcome state (phase = "before") or the post-collision outcome state (lbefore = .false.) + !! Calculates total system energy in either the pre-collision outcome state (phase = "before") or the post-collision outcome + !! state (lbefore = .false.) implicit none ! Arguments class(collision_basic), intent(inout) :: self !! Encounter collision system object class(base_nbody_system), intent(inout) :: nbody_system !! Swiftest nbody system object class(base_parameters), intent(inout) :: param !! Current Swiftest run configuration parameters - character(len=*), intent(in) :: phase !! One of "before" or "after", indicating which phase of the calculation this needs to be done + character(len=*), intent(in) :: phase !! One of "before" or "after", indicating which phase of the + !! calculation this needs to be done ! Internals - integer(I4B) :: i, phase_val + integer(I4B) :: i, phase_val, nfrag select case(phase) case("before") @@ -165,13 +172,24 @@ module subroutine collision_util_get_energy_and_momentum(self, nbody_system, par class is (swiftest_nbody_system) select type(param) class is (swiftest_parameters) - associate(fragments => self%fragments, impactors => self%impactors, nfrag => self%fragments%nbody, pl => nbody_system%pl, cb => nbody_system%cb) + associate(fragments => self%fragments, impactors => self%impactors, pl => nbody_system%pl, cb => nbody_system%cb) + nfrag = self%fragments%nbody if (phase_val == 1) then +#ifdef DOCONLOC + do concurrent(i = 1:2) shared(impactors) +#else do concurrent(i = 1:2) +#endif impactors%ke_orbit(i) = 0.5_DP * impactors%mass(i) * dot_product(impactors%vc(:,i), impactors%vc(:,i)) - impactors%ke_spin(i) = 0.5_DP * impactors%mass(i) * impactors%radius(i)**2 * impactors%Ip(3,i) * dot_product(impactors%rot(:,i), impactors%rot(:,i)) + impactors%ke_spin(i) = 0.5_DP * impactors%mass(i) * impactors%radius(i)**2 * impactors%Ip(3,i) & + * dot_product(impactors%rot(:,i), impactors%rot(:,i)) impactors%be(i) = -3 * impactors%Gmass(i) * impactors%mass(i) / (5 * impactors%radius(i)) - impactors%L_orbit(:,i) = impactors%mass(i) * impactors%rc(:,i) .cross. impactors%vc(:,i) + impactors%L_orbit(1,i) = impactors%mass(i) * (impactors%rc(2,i) * impactors%vc(3,i) & + - impactors%rc(3,i) * impactors%vc(2,i)) + impactors%L_orbit(2,i) = impactors%mass(i) * (impactors%rc(3,i) * impactors%vc(1,i) & + - impactors%rc(1,i) * impactors%vc(3,i)) + impactors%L_orbit(3,i) = impactors%mass(i) * (impactors%rc(1,i) * impactors%vc(2,i) & + - impactors%rc(2,i) * impactors%vc(1,i)) impactors%L_spin(:,i) = impactors%mass(i) * impactors%radius(i)**2 * impactors%Ip(3,i) * impactors%rot(:,i) end do self%L_orbit(:,phase_val) = sum(impactors%L_orbit(:,1:2),dim=2) @@ -180,16 +198,28 @@ module subroutine collision_util_get_energy_and_momentum(self, nbody_system, par self%ke_orbit(phase_val) = sum(impactors%ke_orbit(1:2)) self%ke_spin(phase_val) = sum(impactors%ke_spin(1:2)) self%be(phase_val) = sum(impactors%be(1:2)) - call swiftest_util_get_potential_energy(2, [(.true., i = 1, 2)], 0.0_DP, impactors%Gmass, impactors%mass, impactors%rb, self%pe(phase_val)) + call swiftest_util_get_potential_energy(2, [(.true., i = 1, 2)], 0.0_DP, impactors%Gmass, impactors%mass, & + impactors%rb, self%pe(phase_val)) self%te(phase_val) = self%ke_orbit(phase_val) + self%ke_spin(phase_val) + self%be(phase_val) + self%pe(phase_val) else if (phase_val == 2) then +#ifdef DOCONLOC + do concurrent(i = 1:nfrag) shared(fragments) +#else do concurrent(i = 1:nfrag) +#endif fragments%ke_orbit(i) = 0.5_DP * fragments%mass(i) * dot_product(fragments%vc(:,i), fragments%vc(:,i)) - fragments%ke_spin(i) = 0.5_DP * fragments%mass(i) * fragments%radius(i)**2 * fragments%Ip(3,i) * dot_product(fragments%rot(:,i), fragments%rot(:,i)) - fragments%L_orbit(:,i) = fragments%mass(i) * fragments%rc(:,i) .cross. fragments%vc(:,i) + fragments%ke_spin(i) = 0.5_DP * fragments%mass(i) * fragments%radius(i)**2 * fragments%Ip(3,i) & + * dot_product(fragments%rot(:,i), fragments%rot(:,i)) + fragments%L_orbit(1,i) = fragments%mass(i) * (fragments%rc(2,i) * fragments%vc(3,i) - & + fragments%rc(3,i) * fragments%vc(2,i)) + fragments%L_orbit(2,i) = fragments%mass(i) * (fragments%rc(3,i) * fragments%vc(1,i) - & + fragments%rc(1,i) * fragments%vc(3,i)) + fragments%L_orbit(3,i) = fragments%mass(i) * (fragments%rc(1,i) * fragments%vc(2,i) - & + fragments%rc(2,i) * fragments%vc(1,i)) fragments%L_spin(:,i) = fragments%mass(i) * fragments%radius(i)**2 * fragments%Ip(3,i) * fragments%rot(:,i) end do - call swiftest_util_get_potential_energy(nfrag, [(.true., i = 1, nfrag)], 0.0_DP, fragments%Gmass, fragments%mass, fragments%rb, fragments%pe) + call swiftest_util_get_potential_energy(nfrag, [(.true., i = 1, nfrag)], 0.0_DP, fragments%Gmass, fragments%mass, & + fragments%rb, fragments%pe) fragments%be = sum(-3*fragments%Gmass(1:nfrag)*fragments%mass(1:nfrag)/(5*fragments%radius(1:nfrag))) fragments%L_orbit_tot(:) = sum(fragments%L_orbit(:,1:nfrag),dim=2) fragments%L_spin_tot(:) = sum(fragments%L_spin(:,1:nfrag),dim=2) @@ -527,14 +557,16 @@ end subroutine collision_util_setup_fragments module subroutine collision_util_set_coordinate_collider(self) - + !! author: David A. Minton + !! !! - !! Defines the collisional coordinate nbody_system, including the unit vectors of both the nbody_system and individual fragments. + !! Defines the collisional coordinate nbody_system, including the unit vectors of both the nbody_system and individual + !! fragments. implicit none ! Arguments class(collision_basic), intent(inout) :: self !! Collisional nbody_system - associate(fragments => self%fragments, impactors => self%impactors, nfrag => self%fragments%nbody) + associate(fragments => self%fragments, impactors => self%impactors) call impactors%set_coordinate_system() if (.not.allocated(self%fragments)) return @@ -550,7 +582,8 @@ end subroutine collision_util_set_coordinate_collider module subroutine collision_util_set_coordinate_fragments(self) !! author: David A. Minton !! - !! Defines the collisional coordinate nbody_system, including the unit vectors of both the nbody_system and individual fragments. + !! Defines the collisional coordinate nbody_system, including the unit vectors of both the nbody_system and individual + !! fragments. implicit none ! Arguments class(collision_fragments), intent(inout) :: self !! Collisional nbody_system @@ -576,7 +609,8 @@ end subroutine collision_util_set_coordinate_fragments module subroutine collision_util_set_coordinate_impactors(self) !! author: David A. Minton !! - !! Defines the collisional coordinate nbody_system, including the unit vectors of both the nbody_system and individual fragments. + !! Defines the collisional coordinate nbody_system, including the unit vectors of both the nbody_system and individual + !! fragments. implicit none ! Arguments class(collision_impactors), intent(inout) :: self !! Collisional nbody_system @@ -588,8 +622,8 @@ module subroutine collision_util_set_coordinate_impactors(self) delta_v(:) = impactors%vb(:, 2) - impactors%vb(:, 1) delta_r(:) = impactors%rb(:, 2) - impactors%rb(:, 1) - ! We will initialize fragments on a plane defined by the pre-impact nbody_system, with the z-axis aligned with the angular momentum vector - ! and the y-axis aligned with the pre-impact distance vector. + ! We will initialize fragments on a plane defined by the pre-impact nbody_system, with the z-axis aligned with the angular + ! momentum vector and the y-axis aligned with the pre-impact distance vector. ! y-axis is the separation distance impactors%y_unit(:) = .unit.delta_r(:) @@ -618,14 +652,16 @@ module subroutine collision_util_set_coordinate_impactors(self) impactors%vc(:,1) = impactors%vb(:,1) - impactors%vbcom(:) impactors%vc(:,2) = impactors%vb(:,2) - impactors%vbcom(:) - ! Find the point of impact between the two bodies, defined as the location (in the collisional coordinate system) at the surface of body 1 along the line connecting the two bodies. + ! Find the point of impact between the two bodies, defined as the location (in the collisional coordinate system) at the + ! surface of body 1 along the line connecting the two bodies. impactors%rcimp(:) = impactors%rb(:,1) + impactors%radius(1) * impactors%y_unit(:) - impactors%rbcom(:) ! Set the velocity direction as the "bounce" direction" for disruptions, and body 2's direction for hit and runs if (impactors%regime == COLLRESOLVE_REGIME_HIT_AND_RUN) then impactors%bounce_unit(:) = .unit. impactors%vc(:,2) else - impactors%bounce_unit(:) = .unit. (impactors%vc(:,2) - 2 * dot_product(impactors%vc(:,2),impactors%y_unit(:)) * impactors%y_unit(:)) + impactors%bounce_unit(:) = .unit. (impactors%vc(:,2) - 2 * dot_product(impactors%vc(:,2),impactors%y_unit(:)) & + * impactors%y_unit(:)) end if end associate @@ -728,7 +764,8 @@ module subroutine collision_util_snapshot(self, param, nbody_system, t, arg) class(base_parameters), intent(inout) :: param !! Current run configuration parameters class(base_nbody_system), intent(inout) :: nbody_system !! Swiftest nbody system object to store real(DP), intent(in), optional :: t !! Time of snapshot if different from nbody_system time - character(*), intent(in), optional :: arg !! "before": takes a snapshot just before the collision. "after" takes the snapshot just after the collision. + character(*), intent(in), optional :: arg !! "before": takes a snapshot just before the collision. "after" + !! takes the snapshot just after the collision. ! Arguments class(collision_snapshot), allocatable, save :: snapshot character(len=:), allocatable :: stage @@ -802,8 +839,9 @@ module subroutine collision_util_snapshot(self, param, nbody_system, t, arg) write(message,*) trim(adjustl(plnew%info(i)%name)), " (", trim(adjustl(plnew%info(i)%particle_type)),")" call swiftest_io_log_one_message(COLLISION_LOG_OUT, message) end do - call swiftest_io_log_one_message(COLLISION_LOG_OUT, "***********************************************************" // & - "***********************************************************") + call swiftest_io_log_one_message(COLLISION_LOG_OUT, & + "***********************************************************" // & + "***********************************************************") allocate(after_snap%pl, source=plnew) end select deallocate(after_orig%pl) @@ -916,7 +954,11 @@ module subroutine collision_util_set_original_scale_factors(self) impactors%L_orbit = impactors%L_orbit * collider%Lscale impactors%ke_orbit = impactors%ke_orbit * collider%Escale impactors%ke_spin = impactors%ke_spin * collider%Escale +#ifdef DOCONLOC + do concurrent(i = 1:2) shared(impactors) +#else do concurrent(i = 1:2) +#endif impactors%rot(:,i) = impactors%L_spin(:,i) * (impactors%mass(i) * impactors%radius(i)**2 * impactors%Ip(3,i)) end do diff --git a/src/encounter/encounter_check.f90 b/src/encounter/encounter_check.f90 index 06d0a76d4..deef9bdda 100644 --- a/src/encounter/encounter_check.f90 +++ b/src/encounter/encounter_check.f90 @@ -61,11 +61,6 @@ module subroutine encounter_check_all_plplm(param, nplm, nplt, rplm, vplm, rplt, integer(I4B), dimension(:), allocatable, intent(out) :: index2 !! List of indices for body 2 in each encounter logical, dimension(:), allocatable, intent(out) :: lvdotr !! Logical flag indicating the sign of v .dot. x ! Internals - ! type(interaction_timer), save :: itimer - logical, save :: lfirst = .true. - logical, save :: skipit = .false. - integer(I8B) :: nplplm = 0_I8B - integer(I4B) :: npl logical, dimension(:), allocatable :: plmplt_lvdotr !! Logical flag indicating the sign of v .dot. x in the plm-plt group integer(I4B), dimension(:), allocatable :: plmplt_index1 !! List of indices for body 1 in each encounter in the plm-plt group integer(I4B), dimension(:), allocatable :: plmplt_index2 !! List of indices for body 2 in each encounter in the plm-lt group @@ -179,8 +174,12 @@ subroutine encounter_check_all_sort_and_sweep_plpl(npl, r, v, renc, dt, nenc, in npl_last = npl end if +#ifdef DOCONLOC + do concurrent (i = 1:npl) shared(r,renc,rmin,rmax) local(rmag) +#else do concurrent (i = 1:npl) - rmag = .mag.r(:,i) +#endif + rmag = norm2(r(:,i)) rmax(i) = rmag + RSWEEP_FACTOR * renc(i) rmin(i) = rmag - RSWEEP_FACTOR * renc(i) end do @@ -232,13 +231,21 @@ subroutine encounter_check_all_sort_and_sweep_plplm(nplm, nplt, rplm, vplm, rplt ntot_last = ntot end if +#ifdef DOCONLOC + do concurrent (i = 1:nplm) shared(rmin,rmax,rplm,rencm) local(rmag) +#else do concurrent (i = 1:nplm) - rmag = .mag.rplm(:,i) +#endif + rmag = norm2(rplm(:,i)) rmax(i) = rmag + RSWEEP_FACTOR * rencm(i) rmin(i) = rmag - RSWEEP_FACTOR * rencm(i) end do +#ifdef DOCONLOC + do concurrent (i = 1:nplt) shared(rmin,rmax,rplt,renct) local(rmag) +#else do concurrent (i = 1:nplt) - rmag = .mag.rplt(:,i) +#endif + rmag = norm2(rplt(:,i)) rmax(nplm+i) = rmag + RSWEEP_FACTOR * renct(i) rmin(nplm+i) = rmag - RSWEEP_FACTOR * renct(i) end do @@ -292,13 +299,21 @@ subroutine encounter_check_all_sort_and_sweep_pltp(npl, ntp, rpl, vpl, rtp, vtp, renctp(:) = 0.0_DP +#ifdef DOCONLOC + do concurrent (i = 1:npl) shared(rmin,rmax,rpl,rencpl) local(rmag) +#else do concurrent (i = 1:npl) - rmag = .mag.rpl(:,i) +#endif + rmag = norm2(rpl(:,i)) rmax(i) = rmag + RSWEEP_FACTOR * rencpl(i) rmin(i) = rmag - RSWEEP_FACTOR * rencpl(i) end do - do concurrent (i = 1:ntp) - rmag = .mag.rtp(:,i) +#ifdef DOCONLOC + do concurrent (i = 1:ntp) shared(rmin,rmax,rtp,renctp) local(rmag) +#else + do concurrent (i = 1:ntp) +#endif + rmag = norm2(rtp(:,i)) rmax(npl+i) = rmag + RSWEEP_FACTOR * renctp(i) rmin(npl+i) = rmag - RSWEEP_FACTOR * renctp(i) end do @@ -340,7 +355,11 @@ pure subroutine encounter_check_all_sweep_one(i, n, xi, yi, zi, vxi, vyi, vzi, x logical, dimension(n) :: lencounteri, lvdotri lencounteri(:) = .false. +#ifdef DOCONLOC + do concurrent(j = 1:n, lgood(j)) shared(lgood,lencounteri,lvdotri,x,y,z,vx,vy,vz,renci,renc) local(xr,yr,zr,vxr,vyr,vzr,renc12) +#else do concurrent(j = 1:n, lgood(j)) +#endif xr = x(j) - xi yr = y(j) - yi zr = z(j) - zi @@ -387,7 +406,11 @@ subroutine encounter_check_all_triangular_one(i, n, xi, yi, zi, vxi, vyi, vzi, x real(DP) :: xr, yr, zr, vxr, vyr, vzr, renc12 logical, dimension(n) :: lencounteri, lvdotri +#ifdef DOCONLOC + do concurrent(j = i+1:n) shared(lencounteri, lvdotri, renci, renc) local(xr,yr,zr,vxr,vyr,vzr,renc12) +#else do concurrent(j = i+1:n) +#endif xr = x(j) - xi yr = y(j) - yi zr = z(j) - zi @@ -605,15 +628,15 @@ module subroutine encounter_check_collapse_ragged_list(ragged_list, n1, nenc, in implicit none ! Arguments class(encounter_list), dimension(:), intent(in) :: ragged_list !! The ragged encounter list - integer(I4B), intent(in) :: n1 !! Number of bodies 1 - integer(I8B), intent(out) :: nenc !! Total number of encountersj - integer(I4B), dimension(:), allocatable, intent(out) :: index1 !! Array of indices for body 1 - integer(I4B), dimension(:), allocatable, intent(out) :: index2 !! Array of indices for body 1 - logical, dimension(:), allocatable, intent(out), optional :: lvdotr !! Array indicating which bodies are approaching + integer(I4B), intent(in) :: n1 !! Number of bodies 1 + integer(I8B), intent(out) :: nenc !! Total number of encountersj + integer(I4B), dimension(:), allocatable, intent(out) :: index1 !! Array of indices for body 1 + integer(I4B), dimension(:), allocatable, intent(out) :: index2 !! Array of indices for body 1 + logical, dimension(:), allocatable, intent(out), optional :: lvdotr !! Array indicating which bodies are approaching ! Internals integer(I4B) :: i integer(I8B) :: j1, j0, nenci - integer(I4B), dimension(n1) :: ibeg + integer(I8B), dimension(n1) :: ibeg associate(nenc_arr => ragged_list(:)%nenc) nenc = sum(nenc_arr(:)) @@ -701,7 +724,11 @@ subroutine encounter_check_remove_duplicates(n, nenc, index1, index2, lvdotr) ! Sort on the second index and remove duplicates if (allocated(itmp)) deallocate(itmp) allocate(itmp, source=index2) +#ifdef DOCONLOC + do concurrent(i = 1:n, iend(i) - ibeg(i) > 0_I8B) shared(iend,ibeg,index2,lencounter,itmp) local(klo,khi,nenci,j) +#else do concurrent(i = 1:n, iend(i) - ibeg(i) > 0_I8B) +#endif klo = ibeg(i) khi = iend(i) nenci = khi - klo + 1_I8B @@ -748,7 +775,11 @@ pure module subroutine encounter_check_sort_aabb_1D(self, n, extent_arr) call util_sort(extent_arr, self%ind) +#ifdef DOCONLOC + do concurrent(k = 1_I8B:2_I8B * n) shared(self,n) local(i) +#else do concurrent(k = 1_I8B:2_I8B * n) +#endif i = self%ind(k) if (i <= n) then self%ibeg(i) = k @@ -834,7 +865,7 @@ module subroutine encounter_check_sweep_aabb_double_list(self, n1, n2, r1, v1, r if (loverlap(i)) then ibeg = self%aabb%ibeg(i) + 1_I8B iend = self%aabb%iend(i) - 1_I8B - nbox = iend - ibeg + 1 + nbox = int(iend - ibeg, kind=I4B) + 1 call encounter_check_all_sweep_one(i, nbox, r1(1,i), r1(2,i), r1(3,i), v1(1,i), v1(2,i), v1(3,i), & xind(ibeg:iend), yind(ibeg:iend), zind(ibeg:iend),& vxind(ibeg:iend), vyind(ibeg:iend), vzind(ibeg:iend), & @@ -850,7 +881,7 @@ module subroutine encounter_check_sweep_aabb_double_list(self, n1, n2, r1, v1, r if (loverlap(i)) then ibeg = self%aabb%ibeg(i) + 1_I8B iend = self%aabb%iend(i) - 1_I8B - nbox = iend - ibeg + 1 + nbox = int(iend - ibeg, kind=I4B) + 1 ii = i - n1 call encounter_check_all_sweep_one(ii, nbox, r2(1,ii), r2(2,ii), r2(3,ii), v2(1,ii), v2(2,ii), v2(3,ii), & xind(ibeg:iend), yind(ibeg:iend), zind(ibeg:iend),& @@ -927,7 +958,7 @@ module subroutine encounter_check_sweep_aabb_single_list(self, n, r, v, renc, dt if (loverlap(i)) then ibeg = self%aabb%ibeg(i) + 1_I8B iend = self%aabb%iend(i) - 1_I8B - nbox = int(iend - ibeg + 1, kind=I4B) + nbox = int(iend - ibeg, kind=I4B) + 1 lencounteri(ibeg:iend) = .true. call encounter_check_all_sweep_one(i, nbox, r(1,i), r(2,i), r(3,i), v(1,i), v(2,i), v(3,i), & xind(ibeg:iend), yind(ibeg:iend), zind(ibeg:iend),& @@ -941,7 +972,11 @@ module subroutine encounter_check_sweep_aabb_single_list(self, n, r, v, renc, dt call encounter_check_collapse_ragged_list(lenc, n, nenc, index1, index2, lvdotr) ! By convention, we always assume that index1 < index2, and so we must swap any that are out of order +#ifdef DOCONLOC + do concurrent(k = 1_I8B:nenc, index1(k) > index2(k)) shared(index1,index2) local(itmp) +#else do concurrent(k = 1_I8B:nenc, index1(k) > index2(k)) +#endif itmp = index1(k) index1(k) = index2(k) index2(k) = itmp diff --git a/src/encounter/encounter_io.f90 b/src/encounter/encounter_io.f90 index ceb3194fe..fb5de048f 100644 --- a/src/encounter/encounter_io.f90 +++ b/src/encounter/encounter_io.f90 @@ -231,7 +231,7 @@ module subroutine encounter_io_netcdf_write_frame_snapshot(self, history, param) class(base_parameters), intent(inout) :: param !! Current run configuration parameters ! Internals - integer(I4B) :: i, idslot, old_mode, npl, ntp + integer(I4B) :: i, idslot, old_mode, npl, ntp, tmp character(len=STRMAX) :: charstring select type(param) @@ -284,7 +284,7 @@ module subroutine encounter_io_netcdf_write_frame_snapshot(self, history, param) call netcdf_io_check( nf90_put_var(nc%id, nc%ptype_varid, charstring, start=[1, idslot], count=[NAMELEN, 1]), "encounter_io_netcdf_write_frame_snapshot nf90_put_var tp particle_type_varid" ) end do - call netcdf_io_check( nf90_set_fill(nc%id, old_mode, old_mode) ) + call netcdf_io_check( nf90_set_fill(nc%id, old_mode, tmp) ) end associate end select end select diff --git a/src/encounter/encounter_util.f90 b/src/encounter/encounter_util.f90 index f935afe5a..7307eb9cb 100644 --- a/src/encounter/encounter_util.f90 +++ b/src/encounter/encounter_util.f90 @@ -22,7 +22,7 @@ module subroutine encounter_util_append_list(self, source, lsource_mask) class(encounter_list), intent(in) :: source !! Source object to append logical, dimension(:), intent(in) :: lsource_mask !! Logical mask indicating which elements to append to ! Internals - integer(I4B) :: nold, nsrc + integer(I4B) :: nold nold = int(self%nenc, kind=I4B) call util_append(self%tcollision, source%tcollision, nold, lsource_mask) @@ -100,8 +100,6 @@ module subroutine encounter_util_dealloc_bounding_box(self) implicit none ! Arguments class(encounter_bounding_box), intent(inout) :: self !! Bounding box structure - ! Internals - integer(I4B) :: i call self%aabb%dealloc() @@ -341,7 +339,7 @@ module subroutine encounter_util_setup_aabb(self, n, n_last) integer(I4B), intent(in) :: n !! Number of objects with bounding box extents integer(I4B), intent(in) :: n_last !! Number of objects with bounding box extents the previous time this was called ! Internals - integer(I4B) :: next, next_last, k, dim + integer(I4B) :: next, next_last, k integer(I4B), dimension(:), allocatable :: itmp next = 2 * n diff --git a/src/fraggle/fraggle_generate.f90 b/src/fraggle/fraggle_generate.f90 index 93b4c3431..cd1df7033 100644 --- a/src/fraggle/fraggle_generate.f90 +++ b/src/fraggle/fraggle_generate.f90 @@ -59,7 +59,8 @@ module subroutine fraggle_generate(self, nbody_system, param, t) call self%set_mass_dist(param) call self%disrupt(nbody_system, param, t, lfailure) if (lfailure) then - call swiftest_io_log_one_message(COLLISION_LOG_OUT, "Fraggle failed to find a solution to match energy contraint. Treating this as a merge.") + call swiftest_io_log_one_message(COLLISION_LOG_OUT, & + "Fraggle failed to find a solution to match energy contraint. Treating this as a merge.") call self%merge(nbody_system, param, t) ! Use the default collision model, which is merge return end if @@ -68,7 +69,11 @@ module subroutine fraggle_generate(self, nbody_system, param, t) ! Get the energy and momentum of the system before and after the collision call self%get_energy_and_momentum(nbody_system, param, phase="before") nfrag = fragments%nbody +#ifdef DOCONLOC + do concurrent(i = 1:2) shared(fragments,impactors) +#else do concurrent(i = 1:2) +#endif fragments%rc(:,i) = fragments%rb(:,i) - impactors%rbcom(:) fragments%vc(:,i) = fragments%vb(:,i) - impactors%vbcom(:) end do @@ -127,8 +132,9 @@ module subroutine fraggle_generate_disrupt(self, nbody_system, param, t, lfailur real(DP), parameter :: fail_scale_initial = 1.0003_DP integer(I4B) :: nfrag_start - ! The minimization and linear solvers can sometimes lead to floating point exceptions. Rather than halting the code entirely if this occurs, we - ! can simply fail the attempt and try again. So we need to turn off any floating point exception halting modes temporarily + ! The minimization and linear solvers can sometimes lead to floating point exceptions. Rather than halting the code entirely + ! if this occurs, we can simply fail the attempt and try again. So we need to turn off any floating point exception halting + ! modes temporarily call ieee_get_halting_mode(IEEE_ALL,fpe_halting_modes) ! Save the current halting modes so we can turn them off temporarily fpe_quiet_modes(:) = .false. call ieee_set_halting_mode(IEEE_ALL,fpe_quiet_modes) @@ -164,7 +170,8 @@ module subroutine fraggle_generate_disrupt(self, nbody_system, param, t, lfailur if (.not.lfailure) then if (self%fragments%nbody /= nfrag_start) then write(message,*) self%fragments%nbody - call swiftest_io_log_one_message(COLLISION_LOG_OUT, "Fraggle found a solution with " // trim(adjustl(message)) // " fragments" ) + call swiftest_io_log_one_message(COLLISION_LOG_OUT, "Fraggle found a solution with " // trim(adjustl(message)) & + // " fragments" ) end if call self%get_energy_and_momentum(nbody_system, param, phase="after") @@ -172,7 +179,8 @@ module subroutine fraggle_generate_disrupt(self, nbody_system, param, t, lfailur dE = self%te(2) - self%te(1) call swiftest_io_log_one_message(COLLISION_LOG_OUT, "All quantities in collision system natural units") - call swiftest_io_log_one_message(COLLISION_LOG_OUT, "* Conversion factors (collision system units / nbody system units):") + call swiftest_io_log_one_message(COLLISION_LOG_OUT, & + "* Conversion factors (collision system units / nbody system units):") write(message,*) "* Mass: ", self%mscale call swiftest_io_log_one_message(COLLISION_LOG_OUT, message) write(message,*) "* Distance: ", self%dscale @@ -196,7 +204,8 @@ module subroutine fraggle_generate_disrupt(self, nbody_system, param, t, lfailur end if call self%set_original_scale() - self%max_rot = MAX_ROT_SI * param%TU2S ! Re-compute the spin limit from scratch so it doesn't drift due to floating point errors every time we convert + self%max_rot = MAX_ROT_SI * param%TU2S ! Re-compute the spin limit from scratch so it doesn't drift due to floating point + ! errors every time we convert ! Restore the big array if (lk_plpl) call pl%flatten(param) @@ -244,7 +253,8 @@ module subroutine fraggle_generate_hitandrun(self, nbody_system, param, t) end if ! The Fraggle disruption model (and its extended types allow for non-pure hit and run. - if (impactors%mass_dist(2) > 0.9_DP * impactors%mass(jproj)) then ! Pure hit and run, so we'll just keep the two bodies untouched + if (impactors%mass_dist(2) > 0.9_DP * impactors%mass(jproj)) then ! Pure hit and run, so we'll just keep the two bodies + ! untouched call swiftest_io_log_one_message(COLLISION_LOG_OUT, "Pure hit and run. No new fragments generated.") call self%collision_basic%hitandrun(nbody_system, param, t) return @@ -299,7 +309,7 @@ module subroutine fraggle_generate_merge(self, nbody_system, param, t) real(DP), intent(in) :: t !! The time of the collision ! Internals - integer(I4B) :: i, j + integer(I4B) :: i real(DP), dimension(NDIM) :: L_spin_new, Ip, rot real(DP) :: rotmag, mass, volume, radius @@ -309,7 +319,11 @@ module subroutine fraggle_generate_merge(self, nbody_system, param, t) mass = sum(impactors%mass(:)) volume = 4._DP / 3._DP * PI * sum(impactors%radius(:)**3) radius = (3._DP * volume / (4._DP * PI))**(THIRD) +#ifdef DOCONLOC + do concurrent(i = 1:NDIM) shared(impactors, Ip, L_spin_new) +#else do concurrent(i = 1:NDIM) +#endif Ip(i) = sum(impactors%mass(:) * impactors%Ip(i,:)) L_spin_new(i) = sum(impactors%L_orbit(i,:) + impactors%L_spin(i,:)) end do @@ -319,7 +333,8 @@ module subroutine fraggle_generate_merge(self, nbody_system, param, t) if (rotmag < self%max_rot) then call self%collision_basic%merge(nbody_system, param, t) else - call swiftest_io_log_one_message(COLLISION_LOG_OUT, "Merger would break the spin barrier. Converting to pure hit and run" ) + call swiftest_io_log_one_message(COLLISION_LOG_OUT, & + "Merger would break the spin barrier. Converting to pure hit and run" ) impactors%mass_dist(1:2) = impactors%mass(1:2) call self%hitandrun(nbody_system, param, t) end if @@ -334,9 +349,9 @@ module subroutine fraggle_generate_pos_vec(collider, nbody_system, param, lfailu !! Author: Jennifer L.L. Pouplin, Carlisle A. Wishard, and David A. Minton !! !! Initializes the position vectors of the fragments around the center of mass based on the collision style. - !! For hit and run with disruption, the fragments are generated in a random cloud around the smallest of the two colliders (body 2) - !! For disruptive collisions, the fragments are generated in a random cloud around the impact point. Bodies are checked for overlap and - !! regenerated if they overlap. + !! For hit and run with disruption, the fragments are generated in a random cloud around the smallest of the two colliders + !! (body 2). For disruptive collisions, the fragments are generated in a random cloud around the impact point. Bodies are + !! checked for overlap and regenerated if they overlap. implicit none ! Arguments class(collision_fraggle), intent(inout) :: collider !! Fraggle collision system object @@ -350,21 +365,22 @@ module subroutine fraggle_generate_pos_vec(collider, nbody_system, param, lfailu real(DP), dimension(2) :: fragment_cloud_radius logical, dimension(collider%fragments%nbody) :: loverlap real(DP), dimension(collider%fragments%nbody) :: mass_rscale, phi, theta, u - integer(I4B) :: i, j, loop, istart + integer(I4B) :: i, j, loop, istart, nfrag, npl, ntp logical :: lsupercat, lhitandrun integer(I4B), parameter :: MAXLOOP = 10000 - real(DP), parameter :: cloud_size_scale_factor = 3.0_DP ! Scale factor to apply to the size of the cloud relative to the distance from the impact point. - ! A larger value puts more space between fragments initially real(DP), parameter :: rbuffer = 1.01_DP ! Body radii are inflated by this scale factor to prevent secondary collisions real(DP), parameter :: pack_density = 0.5236_DP ! packing density of loose spheres - associate(fragments => collider%fragments, impactors => collider%impactors, nfrag => collider%fragments%nbody, & - pl => nbody_system%pl, tp => nbody_system%tp, npl => nbody_system%pl%nbody, ntp => nbody_system%tp%nbody) + associate(fragments => collider%fragments, impactors => collider%impactors, pl => nbody_system%pl, tp => nbody_system%tp) + nfrag = collider%fragments%nbody + npl = nbody_system%pl%nbody + ntp = nbody_system%tp%nbody lsupercat = (impactors%regime == COLLRESOLVE_REGIME_SUPERCATASTROPHIC) lhitandrun = (impactors%regime == COLLRESOLVE_REGIME_HIT_AND_RUN) ! We will treat the first two fragments of the list as special cases. - ! Place the first two bodies at the centers of the two fragment clouds, but be sure they are sufficiently far apart to avoid overlap + ! Place the first two bodies at the centers of the two fragment clouds, but be sure they are sufficiently far apart to + ! avoid overlap if (lhitandrun) then rdistance = impactors%radius(2) istart = 2 @@ -377,11 +393,14 @@ module subroutine fraggle_generate_pos_vec(collider, nbody_system, param, lfailu end if mass_rscale(1:istart-1) = 1.0_DP - ! Give the fragment positions a random value that is scaled with fragment mass so that the more massive bodies tend to be closer to the impact point - ! Later, velocities will be scaled such that the farther away a fragment is placed from the impact point, the higher will its velocity be. + ! Give the fragment positions a random value that is scaled with fragment mass so that the more massive bodies tend to be + ! closer to the impact point. Later, velocities will be scaled such that the farther away a fragment is placed from the + ! impact point, the higher will its velocity be. call random_number(mass_rscale(istart:nfrag)) mass_rscale(istart:nfrag) = (mass_rscale(istart:nfrag) + 1.0_DP) / 2 - mass_rscale(istart:nfrag) = mass_rscale(istart:nfrag) * (sum(fragments%mass(istart:nfrag)) / fragments%mass(istart:nfrag))**(0.125_DP) ! The power is arbitrary. It just gives the velocity a small mass dependence + ! The power of 0.125 in the scaling below is arbitrary. It just gives the velocity a small mass dependence + mass_rscale(istart:nfrag) = mass_rscale(istart:nfrag) * (sum(fragments%mass(istart:nfrag)) & + / fragments%mass(istart:nfrag))**(0.125_DP) mass_rscale(istart:nfrag) = mass_rscale(istart:nfrag) / minval(mass_rscale(istart:nfrag)) loverlap(:) = .true. @@ -394,8 +413,10 @@ module subroutine fraggle_generate_pos_vec(collider, nbody_system, param, lfailu fragment_cloud_center(:,2) = impactors%rc(:,2) fragments%rc(:,1) = fragment_cloud_center(:,1) else ! Keep the first and second bodies at approximately their original location, but so as not to be overlapping - fragment_cloud_center(:,1) = impactors%rcimp(:) - rbuffer * max(fragments%radius(1),impactors%radius(1)) * impactors%y_unit(:) - fragment_cloud_center(:,2) = impactors%rcimp(:) + rbuffer * max(fragments%radius(2),impactors%radius(2)) * impactors%y_unit(:) + fragment_cloud_center(:,1) = impactors%rcimp(:) - rbuffer * max(fragments%radius(1),& + impactors%radius(1)) * impactors%y_unit(:) + fragment_cloud_center(:,2) = impactors%rcimp(:) + rbuffer * max(fragments%radius(2), & + impactors%radius(2)) * impactors%y_unit(:) fragment_cloud_radius(:) = rdistance / pack_density fragments%rc(:,1:2) = fragment_cloud_center(:,1:2) end if @@ -411,7 +432,12 @@ module subroutine fraggle_generate_pos_vec(collider, nbody_system, param, lfailu end do ! Randomly place the n>2 fragments inside their cloud until none are overlapping +#ifdef DOCONLOC + do concurrent(i = istart:nfrag, loverlap(i)) shared(fragments, impactors, fragment_cloud_radius, fragment_cloud_center,& + loverlap, mass_rscale, u, phi, theta, lhitandrun) local(j, direction) +#else do concurrent(i = istart:nfrag, loverlap(i)) +#endif j = fragments%origin_body(i) ! Scale the cloud size @@ -430,13 +456,15 @@ module subroutine fraggle_generate_pos_vec(collider, nbody_system, param, lfailu ! Stretch out the hit and run cloud along the flight trajectory if (lhitandrun) then - fragments%rc(:,i) = fragments%rc(:,i) * (1.0_DP + 2 * fragment_cloud_radius(j) * mass_rscale(i) * impactors%bounce_unit(:)) + fragments%rc(:,i) = fragments%rc(:,i) * (1.0_DP + 2 * fragment_cloud_radius(j) * mass_rscale(i) & + * impactors%bounce_unit(:)) end if fragments%rc(:,i) = fragments%rc(:,i) + fragment_cloud_center(:,j) if (lhitandrun) then - fragments%rc(:,i) = fragments%rc(:,i) + 2 * fragment_cloud_radius(j) * mass_rscale(i) * impactors%bounce_unit(:) ! Shift the stretched cloud downrange + ! Shift the stretched cloud downrange + fragments%rc(:,i) = fragments%rc(:,i) + 2 * fragment_cloud_radius(j) * mass_rscale(i) * impactors%bounce_unit(:) else ! Make sure that the fragments are positioned away from the impact point direction = dot_product(fragments%rc(:,i) - impactors%rcimp(:), fragment_cloud_center(:,j) - impactors%rcimp(:)) @@ -448,9 +476,14 @@ module subroutine fraggle_generate_pos_vec(collider, nbody_system, param, lfailu end do ! Because body 1 and 2 are initialized near the original impactor positions, then if these bodies are still overlapping - ! when the rest are not, we will randomly walk their position in space so as not to move them too far from their starting position + ! when the rest are not, we will randomly walk their position in space so as not to move them too far from their + ! starting position if (all(.not.loverlap(istart:nfrag)) .and. any(loverlap(1:istart-1))) then +#ifdef DOCONLOC + do concurrent(i = 1:istart-1,loverlap(i)) shared(fragments,loverlap, u, theta, i) local(rwalk, dis) +#else do concurrent(i = 1:istart-1,loverlap(i)) +#endif dis = 0.1_DP * fragments%radius(i) * u(i)**(THIRD) rwalk(1) = fragments%rmag(i) * sin(theta(i)) * cos(phi(i)) rwalk(2) = fragments%rmag(i) * sin(theta(i)) * sin(phi(i)) @@ -508,25 +541,29 @@ module subroutine fraggle_generate_rot_vec(collider, nbody_system, param) class(swiftest_nbody_system), intent(inout) :: nbody_system !! Swiftest nbody system object class(swiftest_parameters), intent(inout) :: param !! Current run configuration parameters ! Internals - integer(I4B) :: i - real(DP), parameter :: frag_rot_fac = 0.1_DP ! Fraction of projectile rotation magnitude to add as random noise to fragment rotation - real(DP), parameter :: hitandrun_momentum_transfer = 0.01_DP ! Fraction of projectile momentum transfered to target in a hit and run + integer(I4B) :: i, nfrag + real(DP), parameter :: FRAG_ROT_FAC = 0.1_DP ! Fraction of projectile rotation magnitude to add as random noise to fragment + ! rotation + real(DP), parameter :: hitandrun_momentum_transfer = 0.01_DP ! Fraction of projectile momentum transfered to target in a hit + ! and run real(DP) :: mass_fac real(DP), dimension(NDIM) :: drot, dL integer(I4B), parameter :: MAXLOOP = 10 logical :: lhitandrun - associate(fragments => collider%fragments, impactors => collider%impactors, nfrag => collider%fragments%nbody) + associate(fragments => collider%fragments, impactors => collider%impactors) + nfrag = collider%fragments%nbody lhitandrun = (impactors%regime == COLLRESOLVE_REGIME_HIT_AND_RUN) - ! Initialize fragment rotations and velocities to be pre-impact rotation for body 1, and randomized for bodies >1 and scaled to the original rotation. - ! This will get updated later when conserving angular momentum + ! Initialize fragment rotations and velocities to be pre-impact rotation for body 1, and randomized for bodies >1 and + ! scaled to the original rotation. This will get updated later when conserving angular momentum mass_fac = fragments%mass(1) / impactors%mass(1) fragments%rot(:,1) = mass_fac**(5.0_DP/3.0_DP) * impactors%rot(:,1) ! If mass was added, also add spin angular momentum if (mass_fac > 1.0_DP) then - dL(:) = (fragments%mass(1) - impactors%mass(1)) * (impactors%rc(:,2) - impactors%rc(:,1)) .cross. (impactors%vc(:,2) - impactors%vc(:,1)) + dL(:) = (fragments%mass(1) - impactors%mass(1)) * (impactors%rc(:,2) - impactors%rc(:,1)) & + .cross. (impactors%vc(:,2) - impactors%vc(:,1)) drot(:) = dL(:) / (fragments%mass(1) * fragments%radius(1)**2 * fragments%Ip(3,1)) ! Check to make sure we haven't broken the spin barrier. Reduce the rotation change if so do i = 1, MAXLOOP @@ -542,7 +579,8 @@ module subroutine fraggle_generate_rot_vec(collider, nbody_system, param) end if if (lhitandrun) then - dL(:) = hitandrun_momentum_transfer * impactors%mass(2) * (impactors%rc(:,2) - impactors%rc(:,1)) .cross. (impactors%vc(:,2) - impactors%vc(:,1)) + dL(:) = hitandrun_momentum_transfer * impactors%mass(2) * (impactors%rc(:,2) - impactors%rc(:,1)) & + .cross. (impactors%vc(:,2) - impactors%vc(:,1)) drot(:) = dL(:) / (fragments%mass(1) * fragments%radius(1)**2 * fragments%Ip(3,1)) do i = 1, MAXLOOP if (.mag.(fragments%rot(:,1) + drot(:)) < collider%max_rot) exit @@ -557,9 +595,14 @@ module subroutine fraggle_generate_rot_vec(collider, nbody_system, param) end if call random_number(fragments%rot(:,2:nfrag)) +#ifdef DOCONLOC + do concurrent (i = 2:nfrag) shared(fragments,impactors) local(mass_fac) +#else do concurrent (i = 2:nfrag) +#endif mass_fac = fragments%mass(i) / impactors%mass(2) - fragments%rot(:,i) = mass_fac**(5.0_DP/3.0_DP) * impactors%rot(:,2) + 2 * (fragments%rot(:,i) - 1.0_DP) * frag_rot_fac * .mag.impactors%rot(:,2) + fragments%rot(:,i) = mass_fac**(5.0_DP/3.0_DP) * impactors%rot(:,2) + 2 * (fragments%rot(:,i) - 1.0_DP) * & + FRAG_ROT_FAC * norm2(impactors%rot(:,2)) end do fragments%rotmag(:) = .mag.fragments%rot(:,:) @@ -582,17 +625,22 @@ module subroutine fraggle_generate_vel_vec(collider, nbody_system, param, lfailu class(swiftest_parameters), intent(inout) :: param !! Current run configuration parameters logical, intent(out) :: lfailure !! Did the velocity computation fail? ! Internals - real(DP), parameter :: ENERGY_SUCCESS_METRIC = 0.1_DP !! Relative energy error to accept as a success (success also must be energy-losing in addition to being within the metric amount) + real(DP), parameter :: ENERGY_SUCCESS_METRIC = 0.1_DP !! Relative energy error to accept as a success (success also must be + !! energy-losing in addition to being within the metric amount) real(DP), parameter :: ENERGY_CONVERGENCE_TOL = 1e-3_DP !! Relative change in error before giving up on energy convergence - real(DP) :: MOMENTUM_SUCCESS_METRIC = 10*epsilon(1.0_DP) !! Relative angular momentum error to accept as a success (should be *much* stricter than energy) + real(DP) :: MOMENTUM_SUCCESS_METRIC = 10*epsilon(1.0_DP) !! Relative angular momentum error to accept as a success + !! (should be *much* stricter than energy) integer(I4B) :: i, j, loop, try, istart, nfrag, nsteps, nsteps_best, posloop logical :: lhitandrun, lsupercat - real(DP), dimension(NDIM) :: vimp_unit, rimp, vrot, vdisp, L_residual, L_residual_unit, L_residual_best, dL, drot, rot_new, dL_metric - real(DP) :: vimp, vmag, vesc, dE, E_residual, E_residual_best, E_residual_last, ke_avail, ke_remove, dE_best, fscale, dE_metric, mfrag, rn, dL1_mag, dE_conv + real(DP), dimension(NDIM) :: vimp_unit, rimp, vrot, vdisp, L_residual, L_residual_unit, L_residual_best, dL, drot, rot_new + real(DP), dimension(NDIM) :: dL_metric + real(DP) :: vimp, vmag, vesc, dE, E_residual, E_residual_best, E_residual_last, ke_avail, ke_remove, dE_best, fscale + real(DP) :: dE_metric, mfrag, rn, dL1_mag, dE_conv, vumag integer(I4B), dimension(:), allocatable :: vsign real(DP), dimension(:), allocatable :: vscale real(DP), dimension(:), allocatable :: dLi_mag - ! For the initial "guess" of fragment velocities, this is the minimum and maximum velocity relative to escape velocity that the fragments will have + ! For the initial "guess" of fragment velocities, this is the minimum and maximum velocity relative to escape velocity that + ! the fragments will have real(DP), parameter :: hitandrun_vscale = 0.25_DP real(DP) :: vmin_guess real(DP) :: vmax_guess @@ -651,7 +699,11 @@ module subroutine fraggle_generate_vel_vec(collider, nbody_system, param, lfailu ! Scale the magnitude of the velocity by the distance from the impact point ! This will reduce the chances of fragments colliding with each other immediately, and is more physically correct - do concurrent(i = 1:fragments%nbody) +#ifdef DOCONLOC + do concurrent(i = 1:fragments%nbody) shared(fragments,impactors,vscale) local(rimp) +#else + do concurrent(i = 1:fragments%nbody) +#endif rimp(:) = fragments%rc(:,i) - impactors%rcimp(:) vscale(i) = .mag. rimp(:) / sum(impactors%radius(1:2)) end do @@ -662,22 +714,35 @@ module subroutine fraggle_generate_vel_vec(collider, nbody_system, param, lfailu ! Set the velocities of all fragments using all of the scale factors determined above if (istart > 1) fragments%vc(:,1) = impactors%vc(:,1) * impactors%mass(1) / fragments%mass(1) +#ifdef DOCONLOC + do concurrent(i = istart:fragments%nbody) shared(fragments,impactors,lhitandrun, vscale, vesc, vsign) & + local(j,vrot,vmag,vdisp,rimp,vimp_unit, vumag) +#else do concurrent(i = istart:fragments%nbody) +#endif j = fragments%origin_body(i) - vrot(:) = impactors%rot(:,j) .cross. (fragments%rc(:,i) - impactors%rc(:,j)) + vrot(1) = impactors%rot(2,j) * (fragments%rc(3,i) - impactors%rc(3,j)) & + - impactors%rot(3,j) * (fragments%rc(2,i) - impactors%rc(2,j)) + vrot(2) = impactors%rot(3,j) * (fragments%rc(1,i) - impactors%rc(1,j)) & + - impactors%rot(1,j) * (fragments%rc(3,i) - impactors%rc(3,j)) + vrot(3) = impactors%rot(1,j) * (fragments%rc(2,i) - impactors%rc(2,j)) & + - impactors%rot(2,j) * (fragments%rc(1,i) - impactors%rc(1,j)) if (lhitandrun) then - vdisp(:) = .unit.(fragments%rc(:,i) - impactors%rc(:,2)) * vesc + vumag = norm2(fragments%rc(:,i) - impactors%rc(:,2)) + vdisp(:) = (fragments%rc(:,i) - impactors%rc(:,2)) / vumag * vesc fragments%vc(:,i) = vsign(i) * impactors%bounce_unit(:) * vscale(i) + vrot(:) + vdisp(:) else vmag = vscale(i) rimp(:) = fragments%rc(:,i) - impactors%rcimp(:) - vimp_unit(:) = .unit. (rimp(:) + vsign(i) * impactors%bounce_unit(:)) + vumag = norm2(rimp(:) + vsign(i) * impactors%bounce_unit(:)) + vimp_unit(:) = (rimp(:) + vsign(i) * impactors%bounce_unit(:)) / vumag fragments%vc(:,i) = vmag * vimp_unit(:) + vrot(:) end if end do fragments%vmag(:) = .mag. fragments%vc(:,:) - ! Every time the collision-frame velocities are altered, we need to be sure to shift everything back to the center-of-mass frame + ! Every time the collision-frame velocities are altered, we need to be sure to shift everything back to the + ! center-of-mass frame call collision_util_shift_vector_to_origin(fragments%mass, fragments%vc) call fragments%set_coordinate_system() @@ -686,14 +751,19 @@ module subroutine fraggle_generate_vel_vec(collider, nbody_system, param, lfailu nsteps = nsteps + 1 mfrag = sum(fragments%mass(istart:fragments%nbody)) - ! Try to put residual angular momentum into the spin, but if this would go past the spin barrier, then put it into velocity shear instead + ! Try to put residual angular momentum into the spin, but if this would go past the spin barrier, then put it into + ! velocity shear instead call collider_local%get_energy_and_momentum(nbody_system, param, phase="after") L_residual(:) = (collider_local%L_total(:,2) - collider_local%L_total(:,1)) L_residual_unit(:) = .unit. L_residual(:) if (nsteps == 1) L_residual_best(:) = L_residual(:) ! Use equipartition of spin kinetic energy to distribution spin angular momentum +#ifdef DOCONLOC + do concurrent(i = istart:fragments%nbody) shared(DLi_mag, fragments) +#else do concurrent(i = istart:fragments%nbody) +#endif dLi_mag(i) = ((fragments%mass(i) / fragments%mass(istart)) * & (fragments%radius(i) / fragments%radius(istart))**2 * & (fragments%Ip(3,i) / fragments%Ip(3,istart)))**(1.5_DP) @@ -707,7 +777,8 @@ module subroutine fraggle_generate_vel_vec(collider, nbody_system, param, lfailu if (.mag.rot_new(:) < collider_local%max_rot) then fragments%rot(:,i) = rot_new(:) fragments%rotmag(i) = .mag.fragments%rot(:,i) - else ! We would break the spin barrier here. Add a random component of rotation that is less than what would break the limit. The rest will go in velocity shear + else ! We would break the spin barrier here. Add a random component of rotation that is less than what would + ! break the limit. The rest will go in velocity shear call random_number(drot) call random_number(rn) drot(:) = (rn * collider_local%max_rot - fragments%rotmag(i)) * 2 * (drot(:) - 0.5_DP) @@ -752,7 +823,8 @@ module subroutine fraggle_generate_vel_vec(collider, nbody_system, param, lfailu ! Check if we've converged on our constraints if (all(dL_metric(:) <= 1.0_DP)) then - if ((abs(E_residual) < abs(E_residual_best)) .or. ((dE < 0.0_DP) .and. (dE_best >= 0.0_DP))) then ! This is our best case so far. Save it for posterity + if ((abs(E_residual) < abs(E_residual_best)) .or. ((dE < 0.0_DP) .and. (dE_best >= 0.0_DP))) then + ! This is our best case so far. Save it for posterity E_residual_best = E_residual L_residual_best(:) = L_residual(:) dE_best = dE @@ -766,7 +838,8 @@ module subroutine fraggle_generate_vel_vec(collider, nbody_system, param, lfailu if (dE_conv < ENERGY_CONVERGENCE_TOL) exit inner end if - ! Remove a constant amount of velocity from the bodies so we don't shift the center of mass and screw up the momentum + ! Remove a constant amount of velocity from the bodies so we don't shift the center of mass and screw up the + ! momentum ke_avail = 0.0_DP do i = fragments%nbody, 1, -1 ke_avail = ke_avail + 0.5_DP * fragments%mass(i) * max(fragments%vmag(i) - vesc / try,0.0_DP)**2 @@ -797,15 +870,21 @@ module subroutine fraggle_generate_vel_vec(collider, nbody_system, param, lfailu write(message, *) nsteps if (lfailure) then - call swiftest_io_log_one_message(COLLISION_LOG_OUT, "Fraggle velocity calculation failed to converge after " // trim(adjustl(message)) // " steps. The best solution found had:") + call swiftest_io_log_one_message(COLLISION_LOG_OUT, "Fraggle velocity calculation failed to converge after " & + // trim(adjustl(message)) // " steps. The best solution found had:") else - call swiftest_io_log_one_message(COLLISION_LOG_OUT,"Fraggle velocity calculation converged after " // trim(adjustl(message)) // " steps.") + call swiftest_io_log_one_message(COLLISION_LOG_OUT,"Fraggle velocity calculation converged after " & + // trim(adjustl(message)) // " steps.") call collider%get_energy_and_momentum(nbody_system, param, phase="after") L_residual(:) = (collider%L_total(:,2) - collider%L_total(:,1)) call collision_util_velocity_torque(-L_residual(:), collider%fragments%mtot, impactors%rbcom, impactors%vbcom) - do concurrent(i = 1:collider%fragments%nbody) +#ifdef DOCONLOC + do concurrent(i = 1:nfrag) shared(collider, impactors) +#else + do concurrent(i = 1:nfrag) +#endif collider%fragments%vb(:,i) = collider%fragments%vc(:,i) + impactors%vbcom(:) end do diff --git a/src/fraggle/fraggle_util.f90 b/src/fraggle/fraggle_util.f90 index 3b0c42d6a..19752442c 100644 --- a/src/fraggle/fraggle_util.f90 +++ b/src/fraggle/fraggle_util.f90 @@ -44,7 +44,11 @@ module subroutine fraggle_util_restructure(self, nbody_system, param, lfailure) new_fragments%Gmass(1) =sum(old_fragments%Gmass(1:2)) new_fragments%density(1) = new_fragments%mass(1) / volume new_fragments%radius(1) = (3._DP * volume / (4._DP * PI))**(THIRD) +#ifdef DOCONLOC + do concurrent(i = 1:NDIM) shared(old_fragments, new_fragments) +#else do concurrent(i = 1:NDIM) +#endif new_fragments%Ip(i,1) = sum(old_fragments%mass(1:2) * old_fragments%Ip(i,1:2)) end do new_fragments%Ip(:,1) = new_fragments%Ip(:,1) / new_fragments%mass(1) @@ -55,7 +59,11 @@ module subroutine fraggle_util_restructure(self, nbody_system, param, lfailure) new_fragments%Gmass(2:nnew) = old_fragments%Gmass(3:nold) new_fragments%density(2:nnew) = old_fragments%density(3:nold) new_fragments%radius(2:nnew) = old_fragments%radius(3:nold) +#ifdef DOCONLOC + do concurrent(i = 1:NDIM) shared(old_fragments,new_fragments) +#else do concurrent(i = 1:NDIM) +#endif new_fragments%Ip(i,2:nnew) = old_fragments%Ip(i,3:nold) end do new_fragments%origin_body(2:nnew) = old_fragments%origin_body(3:nold) @@ -87,10 +95,10 @@ module subroutine fraggle_util_set_mass_dist(self, param) class(collision_fraggle), intent(inout) :: self !! Fraggle collision system object class(swiftest_parameters), intent(in) :: param !! Current Swiftest run configuration parameters ! Internals - integer(I4B) :: i, j, k, jproj, jtarg, nfrag, istart, nfragmax, nrem + integer(I4B) :: i, j, jproj, jtarg, nfrag, istart, nfragmax real(DP), dimension(2) :: volume real(DP), dimension(NDIM) :: Ip_avg - real(DP) :: mremaining, mtot, mcumul, G, mass_noise, Mslr, x0, x1, ymid, y0, y1, y, yp, eps, Mrat + real(DP) :: mremaining, mtot, mcumul, G, mass_noise, Mslr, Mrat real(DP), dimension(:), allocatable :: mass real(DP) :: beta integer(I4B), parameter :: MASS_NOISE_FACTOR = 5 !! The number of digits of random noise that get added to the minimum mass value to prevent identical masses from being generated in a single run @@ -100,7 +108,6 @@ module subroutine fraggle_util_set_mass_dist(self, param) integer(I4B), parameter :: iMrem = 3 integer(I4B), parameter :: NFRAGMIN = iMrem + 2 !! Minimum number of fragments that can be generated integer(I4B), dimension(:), allocatable :: ind - integer(I4B), parameter :: MAXLOOP = 20 logical :: flipper logical, dimension(size(IEEE_ALL)) :: fpe_halting_modes @@ -166,7 +173,7 @@ module subroutine fraggle_util_set_mass_dist(self, param) mass(2) = impactors%mass_dist(iMslr) ! Recompute the slope parameter beta so that we span the complete size range - if (Mslr == min_mfrag) Mslr = Mslr + impactors%mass_dist(iMrem) / nfrag + if (abs(Mslr - min_mfrag) < epsilon(min_mfrag) * min_mfrag) Mslr = Mslr + impactors%mass_dist(iMrem) / nfrag mremaining = impactors%mass_dist(iMrem) ! The mass will be distributed in a power law where N>M=(M/Mslr)**(-beta/3) @@ -207,14 +214,18 @@ module subroutine fraggle_util_set_mass_dist(self, param) fragments%density(istart:nfrag) = fragments%mtot / sum(volume(:)) fragments%radius(istart:nfrag) = (3 * fragments%mass(istart:nfrag) / (4 * PI * fragments%density(istart:nfrag)))**(1.0_DP / 3.0_DP) +#ifdef DOCONLOC + do concurrent(i = istart:nfrag) shared(fragments,Ip_avg) +#else do concurrent(i = istart:nfrag) +#endif fragments%Ip(:, i) = Ip_avg(:) end do ! For catastrophic impacts, we will assign each of the n>2 fragments to one of the two original bodies so that the fragment cloud occupies ! roughly the same space as both original bodies. For all other disruption cases, we use body 2 as the center of the cloud. - fragments%origin_body(1) = 1 - fragments%origin_body(2) = 2 + fragments%origin_body(1) = 1_I1B + fragments%origin_body(2) = 2_I1B if (impactors%regime == COLLRESOLVE_REGIME_SUPERCATASTROPHIC) then mcumul = fragments%mass(1) flipper = .true. diff --git a/src/globals/globals_module.f90 b/src/globals/globals_module.f90 index fd26b3404..dd58d6dae 100644 --- a/src/globals/globals_module.f90 +++ b/src/globals/globals_module.f90 @@ -37,9 +37,12 @@ module globals real(DP), parameter :: GC = 6.6743E-11_DP !! Universal gravitational constant in SI units real(DP), parameter :: einsteinC = 299792458.0_DP !! Speed of light in SI units - integer(I4B), parameter :: LOWERCASE_BEGIN = iachar('a') !! ASCII character set parameter for lower to upper conversion - start of lowercase - integer(I4B), parameter :: LOWERCASE_END = iachar('z') !! ASCII character set parameter for lower to upper conversion - end of lowercase - integer(I4B), parameter :: UPPERCASE_OFFSET = iachar('A') - iachar('a') !! ASCII character set parameter for lower to upper conversion - offset between upper and lower + integer(I4B), parameter :: LOWERCASE_BEGIN = iachar('a') !! ASCII character set parameter for lower to upper conversion - start + !! of lowercase + integer(I4B), parameter :: LOWERCASE_END = iachar('z') !! ASCII character set parameter for lower to upper conversion - end of + !! lowercase + integer(I4B), parameter :: UPPERCASE_OFFSET = iachar('A') - iachar('a') !! ASCII character set parameter for lower to upper + !! conversion - offset between upper and lower real(SP), parameter :: VERSION_NUMBER = 1.0_SP !! Swiftest version @@ -103,9 +106,11 @@ module globals integer(I4B), parameter :: NDUMPFILES = 2 character(*), parameter :: PARAM_RESTART_FILE = "param.restart.in" #ifdef COARRAY - character(STRMAX) :: SWIFTEST_LOG_FILE !! Name of file to use to log output when using "COMPACT" or "PROGRESS" display style (each co-image gets its own log file) + character(STRMAX) :: SWIFTEST_LOG_FILE !! Name of file to use to log output when using "COMPACT" or + !! "PROGRESS" display style (each co-image gets its own log file) #else - character(*), parameter :: SWIFTEST_LOG_FILE = "swiftest.log" !! Name of file to use to log output when using "COMPACT" or "PROGRESS" display style + character(*), parameter :: SWIFTEST_LOG_FILE = "swiftest.log" !! Name of file to use to log output when using "COMPACT" or + !! "PROGRESS" display style #endif integer(I4B), parameter :: SWIFTEST_LOG_OUT = 33 !! File unit for log file when using "COMPACT" display style @@ -117,7 +122,8 @@ module globals character(*), parameter :: BIN_OUTFILE = 'data.nc' integer(I4B), parameter :: BINUNIT = 20 !! File unit number for the binary output file integer(I4B), parameter :: PARTICLEUNIT = 44 !! File unit number for the binary particle info output file - integer(I4B), parameter :: LUN = 42 !! File unit number for files that are opened and closed within a single subroutine call, and therefore should not collide + integer(I4B), parameter :: LUN = 42 !! File unit number for files that are opened and closed within a single + !! subroutine call, and therefore should not collide !> Miscellaneous constants: integer(I4B), parameter :: NDIM = 3 !! Number of dimensions in our reality diff --git a/src/helio/helio_gr.f90 b/src/helio/helio_gr.f90 index 2092fca6a..6b43714b7 100644 --- a/src/helio/helio_gr.f90 +++ b/src/helio/helio_gr.f90 @@ -71,13 +71,18 @@ pure module subroutine helio_gr_p4_pl(self, nbody_system, param, dt) class(swiftest_parameters), intent(in) :: param !! Current run configuration parameters real(DP), intent(in) :: dt !! Step size ! Internals - integer(I4B) :: i + integer(I4B) :: i, npl if (self%nbody == 0) return - associate(pl => self, npl => self%nbody) - do concurrent(i = 1:npl, pl%lmask(i)) - call swiftest_gr_p4_pos_kick(param, pl%rh(:, i), pl%vb(:, i), dt) + associate(lmask => self%lmask, rh => self%rh, vb => self%vb, inv_c2 => param%inv_c2) + npl = self%nbody +#ifdef DOCONLOC + do concurrent(i = 1:npl, lmask(i)) shared(inv_c2, lmask, rh, vb, dt) +#else + do concurrent(i = 1:npl, lmask(i)) +#endif + call swiftest_gr_p4_pos_kick(inv_c2, rh(1,i), rh(2,i), rh(3,i), vb(1,i), vb(2,i), vb(3,i), dt) end do end associate @@ -99,13 +104,18 @@ pure module subroutine helio_gr_p4_tp(self, nbody_system, param, dt) class(swiftest_parameters), intent(in) :: param !! Current run configuration parameters real(DP), intent(in) :: dt !! Step size ! Internals - integer(I4B) :: i + integer(I4B) :: i, ntp if (self%nbody == 0) return - associate(tp => self, ntp => self%nbody) - do concurrent(i = 1:ntp, tp%lmask(i)) - call swiftest_gr_p4_pos_kick(param, tp%rh(:, i), tp%vb(:, i), dt) + associate(rh => self%rh, vb => self%vb, lmask => self%lmask, inv_c2 => param%inv_c2) + ntp = self%nbody +#ifdef DOCONLOC + do concurrent(i = 1:ntp, lmask(i)) shared(inv_c2, lmask, rh, vb, dt) +#else + do concurrent(i = 1:ntp, lmask(i)) +#endif + call swiftest_gr_p4_pos_kick(inv_c2, rh(1,i), rh(2,i), rh(3,i), vb(1,i), vb(2,i), vb(3,i), dt) end do end associate diff --git a/src/helio/helio_kick.f90 b/src/helio/helio_kick.f90 index 6fb4fad43..755a385d9 100644 --- a/src/helio/helio_kick.f90 +++ b/src/helio/helio_kick.f90 @@ -104,11 +104,12 @@ module subroutine helio_kick_vb_pl(self, nbody_system, param, t, dt, lbeg) real(DP), intent(in) :: dt !! Stepsize logical, intent(in) :: lbeg !! Logical flag indicating whether this is the beginning of the half step or not. ! Internals - integer(I4B) :: i + integer(I4B) :: i, npl if (self%nbody == 0) return - associate(pl => self, npl => self%nbody) + associate(pl => self) + npl = self%nbody pl%ah(:, 1:npl) = 0.0_DP call pl%accel(nbody_system, param, t, lbeg) if (lbeg) then @@ -116,7 +117,11 @@ module subroutine helio_kick_vb_pl(self, nbody_system, param, t, dt, lbeg) else call pl%set_beg_end(rend = pl%rh) end if +#ifdef DOCONLOC + do concurrent(i = 1:npl, pl%lmask(i)) shared(pl,dt) +#else do concurrent(i = 1:npl, pl%lmask(i)) +#endif pl%vb(1, i) = pl%vb(1, i) + pl%ah(1, i) * dt pl%vb(2, i) = pl%vb(2, i) + pl%ah(2, i) * dt pl%vb(3, i) = pl%vb(3, i) + pl%ah(3, i) * dt @@ -143,14 +148,19 @@ module subroutine helio_kick_vb_tp(self, nbody_system, param, t, dt, lbeg) real(DP), intent(in) :: dt !! Stepsize logical, intent(in) :: lbeg !! Logical flag indicating whether this is the beginning of the half step or not. ! Internals - integer(I4B) :: i + integer(I4B) :: i, ntp if (self%nbody == 0) return - associate(tp => self, ntp => self%nbody) + associate(tp => self) + ntp = self%nbody tp%ah(:, 1:ntp) = 0.0_DP call tp%accel(nbody_system, param, t, lbeg) +#ifdef DOCONLOC + do concurrent(i = 1:ntp, tp%lmask(i)) shared(tp,dt) +#else do concurrent(i = 1:ntp, tp%lmask(i)) +#endif tp%vb(:, i) = tp%vb(:, i) + tp%ah(:, i) * dt end do end associate diff --git a/src/misc/solver_module.f90 b/src/misc/solver_module.f90 index 410116b2a..872db6428 100644 --- a/src/misc/solver_module.f90 +++ b/src/misc/solver_module.f90 @@ -16,7 +16,7 @@ module solver use lambda_function use, intrinsic :: ieee_exceptions private - public :: solve_linear_system, solve_rkf45, solve_roots + public :: solve_linear_system, solve_roots !, solve_rkf45 interface solve_linear_system module procedure solve_linear_system_dp @@ -424,22 +424,25 @@ end function f ! returns the minimum of two real numbers real(DP) Function Minimum(x1,x2) real(DP) x1,x2,resultat + if (x1 < x2) then - resultat = x1 + resultat = x1 else - resultat = x2 + resultat = x2 endif + Minimum = resultat end function Minimum ! TRUE if x1*x2 negative - integer Function RootBracketed(x1,x2) + logical Function RootBracketed(x1,x2) real(DP) x1,x2 - integer resultat + logical resultat + if ((x1 > 0.and.x2 > 0).or.(x1 < 0.and.x2 < 0)) then - resultat = 0 + resultat = .false. else - resultat = 1 + resultat = .true. endif RootBracketed = resultat end function RootBracketed diff --git a/src/netcdf_io/netcdf_io_implementations.f90 b/src/netcdf_io/netcdf_io_implementations.f90 index 40c561183..b4e7f0b12 100644 --- a/src/netcdf_io/netcdf_io_implementations.f90 +++ b/src/netcdf_io/netcdf_io_implementations.f90 @@ -40,8 +40,12 @@ module subroutine netcdf_io_close(self) character(namelen) :: message if (self%lfile_is_open) then +#ifdef COARRAY write(message,*) this_image() message = "netcdf_io_close on image " // trim(adjustl(message)) +#else + message = "netcdf_io_close" +#endif call netcdf_io_check( nf90_close(self%id), message) self%lfile_is_open = .false. end if @@ -55,6 +59,8 @@ module subroutine netcdf_io_find_tslot(self, t, tslot) !! !! Given an open NetCDF file and a value of time t, finds the index of the time value (aka the time slot) to place a new set of data. !! The returned value of tslot will correspond to the first index value where the value of t is greater than or equal to the saved time value. + use, intrinsic :: ieee_exceptions + use, intrinsic :: ieee_arithmetic implicit none ! Arguments class(netcdf_parameters), intent(inout) :: self !! Parameters used to identify a particular NetCDF dataset @@ -63,7 +69,10 @@ module subroutine netcdf_io_find_tslot(self, t, tslot) ! Internals real(DP), dimension(:), allocatable :: tvals integer(I4B) :: i + logical, dimension(size(IEEE_ALL)) :: fpe_halting_modes + call ieee_get_halting_mode(IEEE_ALL,fpe_halting_modes) ! Save the current halting modes so we can turn them off temporarily + call ieee_set_halting_mode(IEEE_ALL,.false.) if (.not.self%lfile_is_open) return tslot = 0 @@ -72,7 +81,7 @@ module subroutine netcdf_io_find_tslot(self, t, tslot) if (self%max_tslot > 0) then allocate(tvals(self%max_tslot)) call netcdf_io_check( nf90_get_var(self%id, self%time_varid, tvals(:), start=[1]), "netcdf_io_find_tslot get_var" ) - where(tvals(:) /= tvals(:)) tvals(:) = huge(1.0_DP) + where(.not.ieee_is_normal(tvals(:))) tvals(:) = huge(1.0_DP) else allocate(tvals(1)) tvals(1) = huge(1.0_DP) @@ -87,6 +96,8 @@ module subroutine netcdf_io_find_tslot(self, t, tslot) self%max_tslot = max(self%max_tslot, tslot) self%tslot = tslot + call ieee_set_halting_mode(IEEE_ALL,fpe_halting_modes) + return end subroutine netcdf_io_find_tslot diff --git a/src/operator/operator_cross.f90 b/src/operator/operator_cross.f90 index f784a1c98..72c24a176 100644 --- a/src/operator/operator_cross.f90 +++ b/src/operator/operator_cross.f90 @@ -104,7 +104,7 @@ pure module function operator_cross_el_sp(A, B) result(C) n = size(A, 2) if (allocated(C)) deallocate(C) allocate(C, mold = A) - do concurrent (i = 1:n) + do i = 1,n C(:,i) = operator_cross_sp(A(:,i), B(:,i)) end do return @@ -118,7 +118,7 @@ pure module function operator_cross_el_dp(A, B) result(C) n = size(A, 2) if (allocated(C)) deallocate(C) allocate(C, mold = A) - do concurrent (i = 1:n) + do i = 1,n C(:,i) = operator_cross_dp(A(:,i), B(:,i)) end do return @@ -132,7 +132,7 @@ pure module function operator_cross_el_qp(A, B) result(C) n = size(A, 2) if (allocated(C)) deallocate(C) allocate(C, mold = A) - do concurrent (i = 1:n) + do i = 1,n C(:,i) = operator_cross_qp(A(:,i), B(:,i)) end do return @@ -146,7 +146,7 @@ pure module function operator_cross_el_i1b(A, B) result(C) n = size(A, 2) if (allocated(C)) deallocate(C) allocate(C, mold = A) - do concurrent (i = 1:n) + do i = 1,n C(:,i) = operator_cross_i1b(A(:,i), B(:,i)) end do return @@ -160,7 +160,7 @@ pure module function operator_cross_el_i2b(A, B) result(C) n = size(A, 2) if (allocated(C)) deallocate(C) allocate(C, mold = A) - do concurrent (i = 1:n) + do i = 1,n C(:,i) = operator_cross_i2b(A(:,i), B(:,i)) end do return @@ -174,7 +174,7 @@ pure module function operator_cross_el_i4b(A, B) result(C) n = size(A, 2) if (allocated(C)) deallocate(C) allocate(C, mold = A) - do concurrent (i = 1:n) + do i = 1,n C(:,i) = operator_cross_i4b(A(:,i), B(:,i)) end do return @@ -188,7 +188,7 @@ pure module function operator_cross_el_i8b(A, B) result(C) n = size(A, 2) if (allocated(C)) deallocate(C) allocate(C, mold = A) - do concurrent (i = 1:n) + do i = 1,n C(:,i) = operator_cross_i8b(A(:,i), B(:,i)) end do return diff --git a/src/operator/operator_mag.f90 b/src/operator/operator_mag.f90 index 8bf6bff5b..721e4a930 100644 --- a/src/operator/operator_mag.f90 +++ b/src/operator/operator_mag.f90 @@ -44,7 +44,7 @@ pure module function operator_mag_el_sp(A) result(B) if (allocated(B)) deallocate(B) allocate(B(n)) call ieee_set_halting_mode(ieee_underflow, .false.) - do concurrent (i=1:n) + do i = 1,n B(i) = norm2(A(:, i)) end do return @@ -59,7 +59,7 @@ pure module function operator_mag_el_dp(A) result(B) if (allocated(B)) deallocate(B) allocate(B(n)) call ieee_set_halting_mode(ieee_underflow, .false.) - do concurrent (i=1:n) + do i = 1,n B(i) = norm2(A(:, i)) end do return @@ -74,7 +74,7 @@ pure module function operator_mag_el_qp(A) result(B) if (allocated(B)) deallocate(B) allocate(B(n)) call ieee_set_halting_mode(ieee_underflow, .false.) - do concurrent (i=1:n) + do i = 1,n B(i) = norm2(A(:, i)) end do return diff --git a/src/operator/operator_unit.f90 b/src/operator/operator_unit.f90 index 39f3fce53..2a14f6645 100644 --- a/src/operator/operator_unit.f90 +++ b/src/operator/operator_unit.f90 @@ -89,7 +89,7 @@ pure module function operator_unit_el_sp(A) result(B) if (allocated(B)) deallocate(B) allocate(B(NDIM,n)) - do concurrent (i=1:n) + do i=1,n B(:,i) = operator_unit_sp(A(:,i)) end do @@ -109,7 +109,7 @@ pure module function operator_unit_el_dp(A) result(B) if (allocated(B)) deallocate(B) allocate(B(NDIM,n)) - do concurrent (i=1:n) + do i=1,n B(:,i) = operator_unit_dp(A(:,i)) end do @@ -128,7 +128,7 @@ pure module function operator_unit_el_qp(A) result(B) if (allocated(B)) deallocate(B) allocate(B(NDIM,n)) - do concurrent (i=1:n) + do i=1,n B(:,i) = operator_unit_qp(A(:,i)) end do diff --git a/src/rmvs/rmvs_coarray.f90 b/src/rmvs/rmvs_coarray.f90 index 6f7467df8..3a7508159 100644 --- a/src/rmvs/rmvs_coarray.f90 +++ b/src/rmvs/rmvs_coarray.f90 @@ -68,7 +68,6 @@ module subroutine rmvs_coarray_coclone_pl(self) end subroutine rmvs_coarray_coclone_pl - module subroutine rmvs_coarray_coclone_system(self) !! author: David A. Minton !! @@ -141,14 +140,7 @@ module subroutine rmvs_coarray_component_clone_interp_arr1D(var,src_img) do i = 1, n[si] call tmp(i)%coclone() end do - if (this_image() == si) then - do img = 1, num_images() - tmp(:)[img] = var(:) - end do - - sync images(*) - else - sync images(si) + if (this_image() /= si) then if (allocated(var)) deallocate(var) allocate(var, source=tmp) end if diff --git a/src/rmvs/rmvs_kick.f90 b/src/rmvs/rmvs_kick.f90 index 7d113f863..82d19463e 100644 --- a/src/rmvs/rmvs_kick.f90 +++ b/src/rmvs/rmvs_kick.f90 @@ -29,11 +29,13 @@ module subroutine rmvs_kick_getacch_tp(self, nbody_system, param, t, lbeg) class(swiftest_parameters), allocatable :: param_planetocen real(DP), dimension(:, :), allocatable :: rh_original real(DP) :: GMcb_original - integer(I4B) :: i + integer(I4B) :: i, ntp, inner_index if (self%nbody == 0) return - associate(tp => self, ntp => self%nbody, ipleP => self%ipleP, inner_index => self%index) + associate(tp => self, ipleP => self%ipleP) + ntp = self%nbody + inner_index = self%index select type(nbody_system) class is (rmvs_nbody_system) if (nbody_system%lplanetocentric) then ! This is a close encounter step, so any accelerations requiring heliocentric position values @@ -59,17 +61,29 @@ module subroutine rmvs_kick_getacch_tp(self, nbody_system, param, t, lbeg) ! Now compute any heliocentric values of acceleration if (tp%lfirst) then +#ifdef DOCONLOC + do concurrent(i = 1:ntp, tp%lmask(i)) shared(tp) +#else do concurrent(i = 1:ntp, tp%lmask(i)) +#endif tp%rheliocentric(:,i) = tp%rh(:,i) + cb%inner(inner_index - 1)%x(:,1) end do else +#ifdef DOCONLOC + do concurrent(i = 1:ntp, tp%lmask(i)) shared(tp) +#else do concurrent(i = 1:ntp, tp%lmask(i)) +#endif tp%rheliocentric(:,i) = tp%rh(:,i) + cb%inner(inner_index )%x(:,1) end do end if ! Swap the planetocentric and heliocentric position vectors and central body masses +#ifdef DOCONLOC + do concurrent(i = 1:ntp, tp%lmask(i)) shared(tp) +#else do concurrent(i = 1:ntp, tp%lmask(i)) +#endif tp%rh(:, i) = tp%rheliocentric(:, i) end do GMcb_original = cb%Gmass diff --git a/src/rmvs/rmvs_step.f90 b/src/rmvs/rmvs_step.f90 index 11f7d2756..4a5dcf3af 100644 --- a/src/rmvs/rmvs_step.f90 +++ b/src/rmvs/rmvs_step.f90 @@ -541,9 +541,8 @@ subroutine rmvs_peri_tp(tp, pl, t, dt, lfirst, inner_index, ipleP, param) integer(I4B), intent(in) :: ipleP !! index of RMVS planet being closely encountered class(swiftest_parameters), intent(in) :: param !! Current run configuration parameters ! Internals - integer(I4B) :: i, id1, id2 - real(DP) :: r2, mu, rhill2, vdotr, a, peri, capm, tperi, rpl - real(DP), dimension(NDIM) :: rh1, rh2, vh1, vh2 + integer(I4B) :: i + real(DP) :: r2, mu, rhill2, vdotr, a, peri, capm, tperi rhill2 = pl%rhill(ipleP)**2 mu = pl%Gmass(ipleP) diff --git a/src/swiftest/swiftest_coarray.f90 b/src/swiftest/swiftest_coarray.f90 index e97488a0f..79b2de50e 100644 --- a/src/swiftest/swiftest_coarray.f90 +++ b/src/swiftest/swiftest_coarray.f90 @@ -24,23 +24,40 @@ module subroutine swiftest_coarray_balance_system(nbody_system, param) ! Internals integer(I4B), codimension[*], save :: ntp integer(I4B) :: img,ntp_min, ntp_max + character(len=NAMELEN) :: min_str, max_str, diff_str, ni_str ntp = nbody_system%tp%nbody sync all + write(param%display_unit,*) "Checking whether test particles need to be reblanced." ntp_min = huge(1) ntp_max = 0 do img = 1, num_images() if (ntp[img] < ntp_min) ntp_min = ntp[img] if (ntp[img] > ntp_max) ntp_max = ntp[img] end do + write(min_str,*) ntp_min + write(max_str,*) ntp_max + write(diff_str,*) ntp_max - ntp_min + write(ni_str,*) num_images() + write(param%display_unit,*) "ntp_min : " // trim(adjustl(min_str)) + write(param%display_unit,*) "ntp_max : " // trim(adjustl(max_str)) + write(param%display_unit,*) "difference: " // trim(adjustl(diff_str)) + flush(param%display_unit) + sync all if (ntp_max - ntp_min >= num_images()) then + write(param%display_unit,*) trim(adjustl(diff_str)) // ">=" // trim(adjustl(ni_str)) // ": Rebalancing" + flush(param%display_unit) call nbody_system%coarray_collect(param) call nbody_system%coarray_distribute(param) + write(param%display_unit,*) "Rebalancing complete" + else + write(param%display_unit,*) trim(adjustl(diff_str)) // "<" // trim(adjustl(ni_str)) // ": No rebalancing needed" end if - + flush(param%display_unit) return end subroutine swiftest_coarray_balance_system + module subroutine swiftest_coarray_coclone_body(self) !! author: David A. Minton !! @@ -81,6 +98,21 @@ module subroutine swiftest_coarray_coclone_body(self) return end subroutine swiftest_coarray_coclone_body + module subroutine swiftest_coarray_coclone_kin(self) + !! author: David A. Minton + !! + !! Broadcasts the image 1 object to all other images in a coarray + implicit none + ! Arguments + class(swiftest_kinship),intent(inout),codimension[*] :: self !! Swiftest kinship object + + call coclone(self%parent) + call coclone(self%nchild) + call coclone(self%child) + + return + end subroutine swiftest_coarray_coclone_kin + module subroutine swiftest_coarray_coclone_nc(self) !! author: David A. Minton !! @@ -314,10 +346,6 @@ module subroutine swiftest_coarray_coclone_system(self) ! Internals integer(I4B) :: i - call self%cb%coclone() - call self%pl%coclone() - call self%tp%coclone() - call coclone(self%maxid) call coclone(self%t) call coclone(self%GMtot) @@ -461,7 +489,7 @@ module subroutine swiftest_coarray_component_clone_kin_arr1D(var,src_img) integer(I4B), intent(in),optional :: src_img ! Internals type(swiftest_kinship), dimension(:), codimension[:], allocatable :: tmp - integer(I4B) :: img, si + integer(I4B) :: i, img, si integer(I4B), allocatable :: n[:] logical, allocatable :: isalloc[:] @@ -471,21 +499,17 @@ module subroutine swiftest_coarray_component_clone_kin_arr1D(var,src_img) si = 1 end if - allocate(isalloc[*]) - allocate(n[*]) + sync all isalloc = allocated(var) if (isalloc) n = size(var) sync all if (.not. isalloc[si]) return allocate(tmp(n[si])[*]) - if (this_image() == si) then - do img = 1, num_images() - tmp(:)[img] = var - end do - sync images(*) - else - sync images(si) + do i = 1, n[si] + call tmp(i)%coclone() + end do + if (this_image() /= si) then if (allocated(var)) deallocate(var) allocate(var, source=tmp) end if @@ -700,10 +724,8 @@ module subroutine swiftest_coarray_distribute_system(nbody_system, param) write(image_num_char,*) this_image() write(ntp_num_char,*) nbody_system%tp%nbody - if (this_image() /= 1) sync images(this_image() - 1) write(param%display_unit,*) "Image " // trim(adjustl(image_num_char)) // " ntp: " // trim(adjustl(ntp_num_char)) if (param%log_output) flush(param%display_unit) - if (this_image() < num_images()) sync images(this_image() + 1) deallocate(ntp, lspill_list, tmp, cotp) @@ -711,4 +733,4 @@ module subroutine swiftest_coarray_distribute_system(nbody_system, param) end subroutine swiftest_coarray_distribute_system -end submodule s_swiftest_coarray \ No newline at end of file +end submodule s_swiftest_coarray diff --git a/src/swiftest/swiftest_drift.f90 b/src/swiftest/swiftest_drift.f90 index 5c0427c62..82239e452 100644 --- a/src/swiftest/swiftest_drift.f90 +++ b/src/swiftest/swiftest_drift.f90 @@ -82,7 +82,11 @@ module subroutine swiftest_drift_all(mu, x, v, n, param, dt, lmask, iflag) allocate(dtp(n)) if (param%lgr) then +#ifdef DOCONLOC + do concurrent(i = 1:n, lmask(i)) shared(param,lmask,x,v,mu,dtp,dt) local(rmag,vmag2,energy) +#else do concurrent(i = 1:n, lmask(i)) +#endif rmag = norm2(x(:, i)) vmag2 = dot_product(v(:, i), v(:, i)) energy = 0.5_DP * vmag2 - mu(i) / rmag diff --git a/src/swiftest/swiftest_driver.f90 b/src/swiftest/swiftest_driver.f90 index d8e2ae219..6cec24feb 100644 --- a/src/swiftest/swiftest_driver.f90 +++ b/src/swiftest/swiftest_driver.f90 @@ -33,7 +33,9 @@ program swiftest_driver param%integrator = trim(adjustl(integrator)) param%display_style = trim(adjustl(display_style)) call param%read_in(param_file_name) +#ifdef COARRAY if (.not.param%lcoarray .and. (this_image() /= 1)) stop ! Single image mode +#endif associate(t0 => param%t0, & tstart => param%tstart, & diff --git a/src/swiftest/swiftest_gr.f90 b/src/swiftest/swiftest_gr.f90 index 1c37d3da4..083e5de1b 100644 --- a/src/swiftest/swiftest_gr.f90 +++ b/src/swiftest/swiftest_gr.f90 @@ -73,7 +73,11 @@ pure module subroutine swiftest_gr_kick_getacch(mu, x, lmask, n, inv_c2, agr) real(DP) :: beta, rjmag4 agr(:,:) = 0.0_DP +#ifdef DOCONLOC + do concurrent (i = 1:n, lmask(i)) shared(lmask,x,mu,agr,inv_c2) local(rjmag4,beta) +#else do concurrent (i = 1:n, lmask(i)) +#endif rjmag4 = (dot_product(x(:, i), x(:, i)))**2 beta = -mu(i)**2 * inv_c2 agr(:, i) = 2 * beta * x(:, i) / rjmag4 @@ -83,7 +87,7 @@ pure module subroutine swiftest_gr_kick_getacch(mu, x, lmask, n, inv_c2, agr) end subroutine swiftest_gr_kick_getacch - pure module subroutine swiftest_gr_p4_pos_kick(param, x, v, dt) + pure elemental module subroutine swiftest_gr_p4_pos_kick(inv_c2, rx, ry, rz, vx, vy, vz, dt) !! author: David A. Minton !! !! Position kick due to p**4 term in the post-Newtonian correction @@ -96,17 +100,21 @@ pure module subroutine swiftest_gr_p4_pos_kick(param, x, v, dt) !! Adapted from David A. Minton's Swifter routine gr_whm_p4.f90 implicit none ! Arguments - class(swiftest_parameters), intent(in) :: param !! Current run configuration parameters - real(DP), dimension(:), intent(inout) :: x !! Position vector - real(DP), dimension(:), intent(in) :: v !! Velocity vector - real(DP), intent(in) :: dt !! Step size + real(DP), intent(in) :: inv_c2 !! One over speed of light squared (1/c**2) + real(DP), intent(inout) :: rx, ry, rz !! Position vector + real(DP), intent(in) :: vx, vy, vz !! Velocity vector + real(DP), intent(in) :: dt !! Step size ! Internals - real(DP), dimension(NDIM) :: dr - real(DP) :: vmag2 - - vmag2 = dot_product(v(:), v(:)) - dr(:) = -2 * param%inv_c2 * vmag2 * v(:) - x(:) = x(:) + dr(:) * dt + real(DP) :: drx, dry, drz + real(DP) :: vmag2 + + vmag2 = vx*vx + vy*vy + vz*vz + drx = -2 * inv_c2 * vmag2 * vx + dry = -2 * inv_c2 * vmag2 * vy + drz = -2 * inv_c2 * vmag2 * vz + rx = rx + drx * dt + ry = ry + dry * dt + rz = rz + drz * dt return end subroutine swiftest_gr_p4_pos_kick diff --git a/src/swiftest/swiftest_io.f90 b/src/swiftest/swiftest_io.f90 index 96f974362..70ddac5f7 100644 --- a/src/swiftest/swiftest_io.f90 +++ b/src/swiftest/swiftest_io.f90 @@ -123,8 +123,6 @@ module subroutine swiftest_io_conservation_report(self, param, lterminal) real(DP), dimension(NDIM) :: L_total_now, L_orbit_now, L_spin_now real(DP) :: ke_orbit_now, ke_spin_now, pe_now, E_orbit_now, be_now, be_cb_now, be_cb_orig, te_now real(DP) :: GMtot_now - character(len=STRMAX) :: errmsg - integer(I4B), parameter :: EGYIU = 72 character(len=*), parameter :: EGYTERMFMT = '(" DL/L0 = ", ES12.5, "; DE_orbit/|E0| = ", ES12.5, "; DE_total/|E0| = ", ES12.5, "; DM/M0 = ", ES12.5)' associate(nbody_system => self, pl => self%pl, cb => self%cb, npl => self%pl%nbody, display_unit => param%display_unit) @@ -204,10 +202,6 @@ module subroutine swiftest_io_conservation_report(self, param, lterminal) end associate return - - 667 continue - write(*,*) "Error writing energy and momentum tracking file: " // trim(adjustl(errmsg)) - call base_util_exit(FAILURE) end subroutine swiftest_io_conservation_report @@ -226,7 +220,6 @@ module subroutine swiftest_io_display_run_information(self, param, integration_t real(DP) :: tfrac !! Fraction of total simulation time completed type(progress_bar), save :: pbar !! Object used to print out a progress bar character(len=64) :: pbarmessage - character(*), parameter :: symbacompactfmt = '(";NPLM",ES22.15,$)' #ifdef COARRAY character(*), parameter :: co_statusfmt = '("Image: ",I4, "; Time = ", ES12.5, "; fraction done = ", F6.3, ' // & '"; Number of active pl, tp = ", I6, ", ", I6)' @@ -265,7 +258,7 @@ module subroutine swiftest_io_display_run_information(self, param, integration_t if (param%display_style == "PROGRESS") then call pbar%reset(param%nloops) else if (param%display_style == "COMPACT") then - write(param%display_unit,*) "SWIFTEST START " // trim(adjustl(param%integrator)) + write(*,*) "SWIFTEST START " // trim(adjustl(param%integrator)) end if end if #ifdef COARRAY @@ -277,7 +270,7 @@ module subroutine swiftest_io_display_run_information(self, param, integration_t if (this_image() == 1) then #endif write(pbarmessage,fmt=pbarfmt) self%t, param%tstop - call pbar%update(1_I8B,message=pbarmessage) + call pbar%update(param%iloop,message=pbarmessage) #ifdef COARRAY end if !(this_image() == 1) #endif @@ -285,7 +278,7 @@ module subroutine swiftest_io_display_run_information(self, param, integration_t call self%compact_output(param,integration_timer) end if - if (self%pl%nplm > 0) then + if (param%lmtiny_pl) then #ifdef COARRAY if (param%lcoarray) then write(param%display_unit, co_symbastatfmt) this_image(),self%t, tfrac, self%pl%nbody, self%pl%nplm, self%tp%nbody @@ -414,16 +407,18 @@ module subroutine swiftest_io_dump_storage(self, param) class(swiftest_parameters), intent(inout) :: param !! Current run configuration parameters ! Internals integer(I4B) :: i +#ifdef COARRAY type(walltimer) :: iotimer +#endif if (self%iframe == 0) return call self%make_index_map() associate(nc => self%nc) #ifdef COARRAY sync all - if (param%lcoarray .and. (this_image() /= 1)) sync images(this_image() - 1) write(param%display_unit,*) "File output started" call iotimer%start() + critical #endif call nc%open(param) do i = 1, self%iframe @@ -438,10 +433,11 @@ module subroutine swiftest_io_dump_storage(self, param) end do call nc%close() #ifdef COARRAY - if (param%lcoarray .and. (this_image() < num_images())) sync images(this_image() + 1) + end critical call iotimer%stop() - call iotimer%report(message="File output :", unit=param%display_unit) sync all + call iotimer%report(message="File output :", unit=param%display_unit) + flush(param%display_unit) #endif end associate @@ -463,7 +459,6 @@ module subroutine swiftest_io_get_args(integrator, param_file_name, display_styl character(len=STRMAX), dimension(:), allocatable :: arg integer(I4B), dimension(:), allocatable :: ierr integer :: i,narg - character(len=*),parameter :: linefmt = '(A)' narg = command_argument_count() if (narg > 0) then @@ -651,6 +646,7 @@ module subroutine swiftest_io_netcdf_get_t0_values_system(self, nc, param) ! Internals integer(I4B) :: itmax, idmax, tslot real(DP), dimension(:), allocatable :: vals + logical, dimension(:), allocatable :: plmask, tpmask real(DP), dimension(1) :: rtemp real(DP), dimension(NDIM) :: rot0, Ip0, L real(DP) :: mass0 @@ -687,7 +683,8 @@ module subroutine swiftest_io_netcdf_get_t0_values_system(self, nc, param) self%L_total_orig(:) = self%L_orbit_orig(:) + self%L_spin_orig(:) call netcdf_io_check( nf90_get_var(nc%id, nc%Gmass_varid, vals, start=[1,tslot], count=[idmax,1]), "netcdf_io_get_t0_values_system Gmass_varid" ) - self%GMtot_orig = vals(1) + sum(vals(2:idmax), vals(2:idmax) == vals(2:idmax)) + call nc%get_valid_masks(plmask,tpmask) + self%GMtot_orig = vals(1) + sum(vals(2:idmax), plmask(:)) cb%GM0 = vals(1) cb%dGM = cb%Gmass - cb%GM0 @@ -1028,22 +1025,30 @@ module subroutine swiftest_io_netcdf_open(self, param, readonly) end subroutine swiftest_io_netcdf_open - module subroutine swiftest_io_netcdf_get_valid_masks(self, plmask, tpmask) + module subroutine swiftest_io_netcdf_get_valid_masks(self, plmask, tpmask, plmmask, Gmtiny) !! author: David A. Minton !! !! Given an open NetCDF, returns logical masks indicating which bodies in the body arrays are active pl and tp type at the current time. !! Uses the value of tslot stored in the NetCDF parameter object as the definition of current time + use, intrinsic :: ieee_exceptions + use, intrinsic :: ieee_arithmetic implicit none ! Arguments - class(swiftest_netcdf_parameters), intent(inout) :: self !! Parameters used to identify a particular NetCDF dataset - logical, dimension(:), allocatable, intent(out) :: plmask !! Logical mask indicating which bodies are massive bodies - logical, dimension(:), allocatable, intent(out) :: tpmask !! Logical mask indicating which bodies are test particles + class(swiftest_netcdf_parameters), intent(inout) :: self !! Parameters used to identify a particular NetCDF dataset + logical, dimension(:), allocatable, intent(out) :: plmask !! Logical mask indicating which bodies are massive bodies + logical, dimension(:), allocatable, intent(out) :: tpmask !! Logical mask indicating which bodies are test particles + logical, dimension(:), allocatable, intent(out), optional :: plmmask !! Logical mask indicating which bodies are fully interacting massive bodies + real(DP), intent(in), optional :: Gmtiny !! The cutoff G*mass between semi-interacting and fully interacting massive bodies ! Internals real(DP), dimension(:), allocatable :: Gmass, a real(DP), dimension(:,:), allocatable :: rh integer(I4B), dimension(:), allocatable :: body_status logical, dimension(:), allocatable :: lvalid - integer(I4B) :: idmax, status,i + integer(I4B) :: idmax, status + logical, dimension(size(IEEE_ALL)) :: fpe_halting_modes + + call ieee_get_halting_mode(IEEE_ALL,fpe_halting_modes) ! Save the current halting modes so we can turn them off temporarily + call ieee_set_halting_mode(IEEE_ALL,.false.) call netcdf_io_check( nf90_inquire_dimension(self%id, self%name_dimid, len=idmax), "swiftest_io_netcdf_get_valid_masks nf90_inquire_dimension name_dimid" ) @@ -1059,26 +1064,26 @@ module subroutine swiftest_io_netcdf_get_valid_masks(self, plmask, tpmask) if (status == NF90_NOERR) then allocate(body_status(idmax)) call netcdf_io_check( nf90_get_var(self%id, self%status_varid, body_status, start=[1, tslot], count=[idmax,1]), "swiftest_io_netcdf_get_valid_masks nf90_getvar status_varid" ) - lvalid(:) = body_status /= INACTIVE + lvalid(:) = body_status(:) /= INACTIVE else status = nf90_inq_varid(self%id, self%rh_varname, self%rh_varid) if (status == NF90_NOERR) then allocate(rh(NDIM,idmax)) call netcdf_io_check( nf90_get_var(self%id, self%rh_varid, rh, start=[1, 1, tslot], count=[NDIM,idmax,1]), "swiftest_io_netcdf_get_valid_masks nf90_getvar rh_varid" ) - lvalid(:) = rh(1,:) == rh(1,:) + lvalid(:) = ieee_is_normal(rh(1,:)) else status = nf90_inq_varid(self%id, self%a_varname, self%a_varid) if (status == NF90_NOERR) then allocate(a(idmax)) call netcdf_io_check( nf90_get_var(self%id, self%a_varid, a, start=[1, tslot], count=[idmax,1]), "swiftest_io_netcdf_get_valid_masks nf90_getvar a_varid" ) - lvalid(:) = a(:) == a(:) + lvalid(:) = ieee_is_normal(a(:)) else lvalid(:) = .false. end if end if end if - plmask(:) = (Gmass(:) == Gmass(:)) + plmask(:) = ieee_is_normal(Gmass(:)) where(plmask(:)) plmask(:) = Gmass(:) > 0.0_DP tpmask(:) = .not. plmask(:) plmask(1) = .false. ! This is the central body @@ -1087,6 +1092,15 @@ module subroutine swiftest_io_netcdf_get_valid_masks(self, plmask, tpmask) plmask(:) = plmask(:) .and. lvalid(:) tpmask(:) = tpmask(:) .and. lvalid(:) + if (present(plmmask) .and. present(Gmtiny)) then + allocate(plmmask, source=plmask) + where(plmask(:)) + plmmask = Gmass(:) > Gmtiny + endwhere + end if + + call ieee_set_halting_mode(IEEE_ALL,fpe_halting_modes) + end associate return @@ -1307,10 +1321,6 @@ module function swiftest_io_netcdf_read_frame_system(self, nc, param) result(ier ierr = 0 return - - 667 continue - write(*,*) "Error reading nbody_system frame in netcdf_io_read_frame_system" - end function swiftest_io_netcdf_read_frame_system @@ -1326,21 +1336,15 @@ module subroutine swiftest_io_netcdf_read_hdr_system(self, nc, param) class(swiftest_netcdf_parameters), intent(inout) :: nc !! Parameters used to for reading a NetCDF dataset to file class(swiftest_parameters), intent(inout) :: param !! Current run configuration parameters ! Internals - integer(I4B) :: status, idmax + integer(I4B) :: status logical, dimension(:), allocatable :: plmask, tpmask, plmmask - real(DP), dimension(:), allocatable :: Gmtemp associate(tslot => nc%tslot) call netcdf_io_check( nf90_get_var(nc%id, nc%time_varid, self%t, start=[tslot]), "netcdf_io_read_hdr_system nf90_getvar time_varid" ) - call nc%get_valid_masks(plmask, tpmask) - if (param%lmtiny_pl) then - idmax = size(plmask) - allocate(plmmask(idmax)) - allocate(Gmtemp(idmax)) - call netcdf_io_check( nf90_get_var(nc%id, nc%Gmass_varid, Gmtemp, start=[1,tslot], count=[idmax,1]), "netcdf_io_read_hdr_system nf90_getvar Gmass_varid" ) - where(Gmtemp(:) /= Gmtemp(:)) Gmtemp(:) = 0.0_DP - plmmask(:) = plmask(:) .and. Gmtemp(:) > param%GMTINY + call nc%get_valid_masks(plmask, tpmask, plmmask, param%GMTINY) + else + call nc%get_valid_masks(plmask, tpmask) end if status = nf90_inq_varid(nc%id, nc%npl_varname, nc%npl_varid) @@ -1615,11 +1619,14 @@ module subroutine swiftest_io_netcdf_write_frame_body(self, nc, param) class(swiftest_netcdf_parameters), intent(inout) :: nc !! Parameters used to for writing a NetCDF dataset to file class(swiftest_parameters), intent(inout) :: param !! Current run configuration parameters ! Internals - integer(I4B) :: i, j, idslot, old_mode, ntp + integer(I4B) :: i, j, idslot, old_mode, tmp integer(I4B), dimension(:), allocatable :: ind real(DP), dimension(NDIM) :: vh !! Temporary variable to store heliocentric velocity values when converting from pseudovelocity in GR-enabled runs real(DP) :: a, e, inc, omega, capom, capm, varpi, lam, f, cape, capf +#ifdef COARRAY + integer(I4B) :: ntp logical, dimension(:), allocatable :: tpmask, plmask +#endif call self%write_info(nc, param) @@ -1717,7 +1724,7 @@ module subroutine swiftest_io_netcdf_write_frame_body(self, nc, param) end select #endif - call netcdf_io_check( nf90_set_fill(nc%id, old_mode, old_mode), "netcdf_io_write_frame_body nf90_set_fill old_mode" ) + call netcdf_io_check( nf90_set_fill(nc%id, old_mode, tmp), "netcdf_io_write_frame_body nf90_set_fill old_mode" ) return end subroutine swiftest_io_netcdf_write_frame_body @@ -1733,7 +1740,7 @@ module subroutine swiftest_io_netcdf_write_frame_cb(self, nc, param) class(swiftest_netcdf_parameters), intent(inout) :: nc !! Parameters used to for writing a NetCDF dataset to file class(swiftest_parameters), intent(inout) :: param !! Current run configuration parameters ! Internals - integer(I4B) :: idslot, old_mode + integer(I4B) :: idslot, old_mode, tmp associate(tslot => nc%tslot) call self%write_info(nc, param) @@ -1754,7 +1761,7 @@ module subroutine swiftest_io_netcdf_write_frame_cb(self, nc, param) call netcdf_io_check( nf90_put_var(nc%id, nc%rot_varid, self%rot(:) * RAD2DEG, start=[1, idslot, tslot], count=[NDIM,1,1]), "swiftest_io_netcdf_write_frame_cby nf90_put_var cb rot_varid" ) end if - call netcdf_io_check( nf90_set_fill(nc%id, old_mode, old_mode), "swiftest_io_netcdf_write_frame_cb nf90_set_fill old_mode" ) + call netcdf_io_check( nf90_set_fill(nc%id, old_mode, tmp), "swiftest_io_netcdf_write_frame_cb nf90_set_fill old_mode" ) end associate return @@ -1798,7 +1805,6 @@ module subroutine swiftest_io_netcdf_write_hdr_system(self, nc, param) class(swiftest_netcdf_parameters), intent(inout) :: nc !! Parameters used to for writing a NetCDF dataset to file class(swiftest_parameters), intent(inout) :: param !! Current run configuration parameters ! Internals - logical, dimension(:), allocatable :: tpmask, plmask integer(I4B) :: tslot call nc%find_tslot(self%t, tslot) @@ -1837,7 +1843,7 @@ module subroutine swiftest_io_netcdf_write_info_body(self, nc, param) class(swiftest_netcdf_parameters), intent(inout) :: nc !! Parameters used to identify a particular NetCDF dataset class(swiftest_parameters), intent(inout) :: param !! Current run configuration parameters ! Internals - integer(I4B) :: i, j, idslot, old_mode + integer(I4B) :: i, j, idslot, old_mode, tmp integer(I4B), dimension(:), allocatable :: ind character(len=NAMELEN) :: charstring character(len=NAMELEN), dimension(self%nbody) :: origin_type @@ -1881,7 +1887,7 @@ module subroutine swiftest_io_netcdf_write_info_body(self, nc, param) end associate end select - call netcdf_io_check( nf90_set_fill(nc%id, old_mode, old_mode) ) + call netcdf_io_check( nf90_set_fill(nc%id, old_mode, tmp) ) return end subroutine swiftest_io_netcdf_write_info_body @@ -1895,7 +1901,7 @@ module subroutine swiftest_io_netcdf_write_info_cb(self, nc, param) class(swiftest_netcdf_parameters), intent(inout) :: nc !! Parameters used to identify a particular NetCDF dataset class(swiftest_parameters), intent(inout) :: param !! Current run configuration parameters ! Internals - integer(I4B) :: idslot, old_mode + integer(I4B) :: idslot, old_mode, tmp character(len=NAMELEN) :: charstring ! This string of spaces of length NAMELEN is used to clear out any old data left behind inside the string variables @@ -1924,7 +1930,7 @@ module subroutine swiftest_io_netcdf_write_info_cb(self, nc, param) call netcdf_io_check( nf90_put_var(nc%id, nc%discard_rh_varid, self%info%discard_rh(:), start=[1, idslot], count=[NDIM,1]), "netcdf_io_write_info_body nf90_put_var cb discard_rh_varid" ) call netcdf_io_check( nf90_put_var(nc%id, nc%discard_vh_varid, self%info%discard_vh(:), start=[1, idslot], count=[NDIM,1]), "netcdf_io_write_info_body nf90_put_var cb discard_vh_varid" ) end if - call netcdf_io_check( nf90_set_fill(nc%id, old_mode, old_mode) ) + call netcdf_io_check( nf90_set_fill(nc%id, old_mode, tmp) ) return end subroutine swiftest_io_netcdf_write_info_cb @@ -1978,7 +1984,6 @@ module subroutine swiftest_io_param_reader(self, unit, iotype, v_list, iostat, i character(*),parameter :: linefmt = '(A)' !! Format code for simple text string integer(I4B) :: nseeds, nseeds_from_file logical :: seed_set = .false. !! Is the random seed set in the input file? - character(len=:), allocatable :: integrator real(DP) :: tratio, y #ifdef COARRAY type(swiftest_parameters), codimension[*], save :: coparam @@ -2506,9 +2511,6 @@ module subroutine swiftest_io_param_writer(self, unit, iotype, v_list, iostat, i integer, intent(out) :: iostat !! IO status code character(len=*), intent(inout) :: iomsg !! Message to pass if iostat /= 0 ! Internals - character(*),parameter :: Ifmt = '(I0)' !! Format label for integer values - character(*),parameter :: Rfmt = '(ES25.17)' !! Format label for real values - character(*),parameter :: Lfmt = '(L1)' !! Format label for logical values integer(I4B) :: nseeds associate(param => self) @@ -2577,7 +2579,6 @@ module subroutine swiftest_io_param_writer(self, unit, iotype, v_list, iostat, i iomsg = "UDIO not implemented" end associate - 667 continue return end subroutine swiftest_io_param_writer @@ -2924,7 +2925,7 @@ module subroutine swiftest_io_read_in_system(self, nc, param) if (ierr /=0) call base_util_exit(FAILURE) end if - param%loblatecb = ((self%cb%j2rp2 /= 0.0_DP) .or. (self%cb%j4rp4 /= 0.0_DP)) + param%loblatecb = ((abs(self%cb%j2rp2) > 0.0_DP) .or. (abs(self%cb%j4rp4) > 0.0_DP)) if (.not.param%loblatecb) then if (allocated(self%pl%aobl)) deallocate(self%pl%aobl) if (allocated(self%tp%aobl)) deallocate(self%tp%aobl) @@ -3072,7 +3073,6 @@ module subroutine swiftest_io_read_in_param(self, param_file_name) call self%reader(LUN, iotype= "none", v_list=[""], iostat = ierr, iomsg = errmsg) if (ierr == 0) return - 667 continue write(self%display_unit,*) "Error reading parameter file: " // trim(adjustl(errmsg)) call base_util_exit(FAILURE) end subroutine swiftest_io_read_in_param @@ -3166,7 +3166,7 @@ module subroutine swiftest_io_initialize_output_file_system(self, nc, param) class(swiftest_parameters), intent(inout) :: param !! Current run configuration parameters ! Internals logical, save :: lfirst = .true. !! Flag to determine if this is the first call of this method - character(len=STRMAX) :: errmsg + character(len=2*STRMAX) :: errmsg logical :: fileExists associate (pl => self%pl, tp => self%tp, npl => self%pl%nbody, ntp => self%tp%nbody) @@ -3180,13 +3180,13 @@ module subroutine swiftest_io_initialize_output_file_system(self, nc, param) select case(param%out_stat) case('APPEND') if (.not.fileExists) then - errmsg = param%outfile // " not found! You must specify OUT_STAT = NEW, REPLACE, or UNKNOWN" + errmsg = trim(adjustl(param%outfile)) // " not found! You must specify OUT_STAT = NEW, REPLACE, or UNKNOWN" goto 667 end if call nc%open(param) case('NEW') if (fileExists) then - errmsg = param%outfile // " Alread Exists! You must specify OUT_STAT = APPEND, REPLACE, or UNKNOWN" + errmsg = trim(adjustl(param%outfile)) // " Alread Exists! You must specify OUT_STAT = APPEND, REPLACE, or UNKNOWN" goto 667 end if call nc%initialize(param) diff --git a/src/swiftest/swiftest_kick.f90 b/src/swiftest/swiftest_kick.f90 index 751702fa2..82cefd26c 100644 --- a/src/swiftest/swiftest_kick.f90 +++ b/src/swiftest/swiftest_kick.f90 @@ -21,7 +21,6 @@ module subroutine swiftest_kick_getacch_int_pl(self, param) class(swiftest_pl), intent(inout) :: self !! Swiftest massive body object class(swiftest_parameters), intent(inout) :: param !! Current swiftest run configuration parameters ! Internals - logical, save :: lfirst = .true. #ifdef PROFILE type(walltimer), save :: timer #endif @@ -136,7 +135,7 @@ module subroutine swiftest_kick_getacch_int_all_flat_norad_pl(npl, nplpl, k_plpl integer(I8B) :: k real(DP), dimension(NDIM,npl) :: ahi, ahj integer(I4B) :: i, j - real(DP) :: rji2, rlim2 + real(DP) :: rji2 real(DP) :: rx, ry, rz ahi(:,:) = 0.0_DP @@ -197,7 +196,11 @@ module subroutine swiftest_kick_getacch_int_all_tri_rad_pl(npl, nplm, r, Gmass, !$omp reduction(+:ahi) & !$omp reduction(-:ahj) do i = 1, nplm +#ifdef DOCONLOC + do concurrent(j = i+1:npl) shared(i,r,radius,ahi,ahj,Gmass) local(rx,ry,rz,rji2,rlim2) +#else do concurrent(j = i+1:npl) +#endif rx = r(1, j) - r(1, i) ry = r(2, j) - r(2, i) rz = r(3, j) - r(3, i) @@ -208,14 +211,22 @@ module subroutine swiftest_kick_getacch_int_all_tri_rad_pl(npl, nplm, r, Gmass, end do end do !$omp end parallel do +#ifdef DOCONLOC + do concurrent(i = 1:npl) shared(acc,ahi,ahj) +#else do concurrent(i = 1:npl) +#endif acc(:,i) = acc(:,i) + ahi(:,i) + ahj(:,i) end do else !$omp parallel do default(private) schedule(static)& !$omp shared(npl, nplm, r, Gmass, radius, acc) do i = 1, nplm +#ifdef DOCONLOC + do concurrent(j = 1:npl, i/=j) shared(i,r,radius,Gmass,acc) local(rx,ry,rz,rji2,rlim2,fac) +#else do concurrent(j = 1:npl, i/=j) +#endif rx = r(1,j) - r(1,i) ry = r(2,j) - r(2,i) rz = r(3,j) - r(3,i) @@ -235,7 +246,11 @@ module subroutine swiftest_kick_getacch_int_all_tri_rad_pl(npl, nplm, r, Gmass, !$omp parallel do default(private) schedule(static)& !$omp shared(npl, nplm, r, Gmass, radius, acc) do i = nplm+1,npl +#ifdef DOCONLOC + do concurrent(j = 1:nplm) shared(i,r,radius,Gmass,acc) local(rx,ry,rz,rji2,rlim2,fac) +#else do concurrent(j = 1:nplm) +#endif rx = r(1,j) - r(1,i) ry = r(2,j) - r(2,i) rz = r(3,j) - r(3,i) @@ -275,7 +290,7 @@ module subroutine swiftest_kick_getacch_int_all_tri_norad_pl(npl, nplm, r, Gmass real(DP), dimension(:,:), intent(inout) :: acc !! Acceleration vector array ! Internals integer(I4B) :: i, j, nplt - real(DP) :: rji2, rlim2, fac, rx, ry, rz + real(DP) :: rji2, fac, rx, ry, rz real(DP), dimension(NDIM,npl) :: ahi, ahj logical :: lmtiny @@ -290,7 +305,11 @@ module subroutine swiftest_kick_getacch_int_all_tri_norad_pl(npl, nplm, r, Gmass !$omp reduction(+:ahi) & !$omp reduction(-:ahj) do i = 1, nplm +#ifdef DOCONLOC + do concurrent(j = i+1:npl) shared(i,r,Gmass,ahi,ahj) local(rx,ry,rz,rji2) +#else do concurrent(j = i+1:npl) +#endif rx = r(1, j) - r(1, i) ry = r(2, j) - r(2, i) rz = r(3, j) - r(3, i) @@ -300,14 +319,22 @@ module subroutine swiftest_kick_getacch_int_all_tri_norad_pl(npl, nplm, r, Gmass end do end do !$omp end parallel do +#ifdef DOCONLOC + do concurrent(i = 1:npl) shared(acc,ahi,ahj) +#else do concurrent(i = 1:npl) +#endif acc(:,i) = acc(:,i) + ahi(:,i) + ahj(:,i) end do else !$omp parallel do default(private) schedule(static)& !$omp shared(npl, nplm, r, Gmass, acc) do i = 1, nplm +#ifdef DOCONLOC + do concurrent(j = 1:npl, j/=i) shared(i,r,Gmass, acc) local(rx,ry,rz,rji2,fac) +#else do concurrent(j = 1:npl, j/=i) +#endif rx = r(1,j) - r(1,i) ry = r(2,j) - r(2,i) rz = r(3,j) - r(3,i) @@ -324,7 +351,11 @@ module subroutine swiftest_kick_getacch_int_all_tri_norad_pl(npl, nplm, r, Gmass !$omp parallel do default(private) schedule(static)& !$omp shared(npl, nplm, r, Gmass, acc) do i = nplm+1,npl +#ifdef DOCONLOC + do concurrent(j = 1:nplm) shared(i,r,Gmass,acc) local(rx,ry,rz,rji2,fac) +#else do concurrent(j = 1:nplm) +#endif rx = r(1,j) - r(1,i) ry = r(2,j) - r(2,i) rz = r(3,j) - r(3,i) @@ -369,7 +400,11 @@ module subroutine swiftest_kick_getacch_int_all_tp(ntp, npl, rtp, rpl, GMpl, lma !$omp reduction(-:acc) do i = 1, ntp if (lmask(i)) then +#ifdef DOCONLOC + do concurrent (j = 1:npl) shared(rtp,rpl,GMpl,acc) local(rx,ry,rz,rji2) +#else do concurrent (j = 1:npl) +#endif rx = rtp(1, i) - rpl(1, j) ry = rtp(2, i) - rpl(2, j) rz = rtp(3, i) - rpl(3, j) diff --git a/src/swiftest/swiftest_module.f90 b/src/swiftest/swiftest_module.f90 index f7be030d1..b7bbd109c 100644 --- a/src/swiftest/swiftest_module.f90 +++ b/src/swiftest/swiftest_module.f90 @@ -93,10 +93,32 @@ module swiftest integer(I4B), dimension(:), allocatable :: child !! Index of children particles contains procedure :: dealloc => swiftest_util_dealloc_kin !! Deallocates all allocatable arrays +#ifdef COARRAY + procedure :: coclone => swiftest_coarray_coclone_kin !! Clones the image 1 body object to all other images in the coarray structure. +#endif final :: swiftest_final_kin !! Finalizes the Swiftest kinship object - deallocates all allocatables end type swiftest_kinship + type, extends(base_particle_info) :: swiftest_particle_info + character(len=NAMELEN) :: name !! Non-unique name + character(len=NAMELEN) :: particle_type !! String containing a description of the particle type (e.g. Central Body, Massive Body, Test Particle) + character(len=NAMELEN) :: origin_type !! String containing a description of the origin of the particle (e.g. Initial Conditions, Supercatastrophic, Disruption, etc.) + real(DP) :: origin_time !! The time of the particle's formation + integer(I4B) :: collision_id !! The ID of the collision that formed the particle + real(DP), dimension(NDIM) :: origin_rh !! The heliocentric distance vector at the time of the particle's formation + real(DP), dimension(NDIM) :: origin_vh !! The heliocentric velocity vector at the time of the particle's formation + real(DP) :: discard_time !! The time of the particle's discard + character(len=NAMELEN) :: status !! Particle status description: Active, Merged, Fragmented, etc. + real(DP), dimension(NDIM) :: discard_rh !! The heliocentric distance vector at the time of the particle's discard + real(DP), dimension(NDIM) :: discard_vh !! The heliocentric velocity vector at the time of the particle's discard + integer(I4B) :: discard_body_id !! The id of the other body involved in the discard (0 if no other body involved) + contains + procedure :: copy => swiftest_util_copy_particle_info !! Copies one set of information object components into another, component-by-component + procedure :: set_value => swiftest_util_set_particle_info !! Sets one or more values of the particle information metadata object + end type swiftest_particle_info + + !> An abstract class for a generic collection of Swiftest bodies type, abstract, extends(base_object) :: swiftest_body !! Superclass that defines the generic elements of a Swiftest particle @@ -167,25 +189,6 @@ module swiftest end type swiftest_body - type, extends(base_particle_info) :: swiftest_particle_info - character(len=NAMELEN) :: name !! Non-unique name - character(len=NAMELEN) :: particle_type !! String containing a description of the particle type (e.g. Central Body, Massive Body, Test Particle) - character(len=NAMELEN) :: origin_type !! String containing a description of the origin of the particle (e.g. Initial Conditions, Supercatastrophic, Disruption, etc.) - real(DP) :: origin_time !! The time of the particle's formation - integer(I4B) :: collision_id !! The ID of the collision that formed the particle - real(DP), dimension(NDIM) :: origin_rh !! The heliocentric distance vector at the time of the particle's formation - real(DP), dimension(NDIM) :: origin_vh !! The heliocentric velocity vector at the time of the particle's formation - real(DP) :: discard_time !! The time of the particle's discard - character(len=NAMELEN) :: status !! Particle status description: Active, Merged, Fragmented, etc. - real(DP), dimension(NDIM) :: discard_rh !! The heliocentric distance vector at the time of the particle's discard - real(DP), dimension(NDIM) :: discard_vh !! The heliocentric velocity vector at the time of the particle's discard - integer(I4B) :: discard_body_id !! The id of the other body involved in the discard (0 if no other body involved) - contains - procedure :: copy => swiftest_util_copy_particle_info !! Copies one set of information object components into another, component-by-component - procedure :: set_value => swiftest_util_set_particle_info !! Sets one or more values of the particle information metadata object - end type swiftest_particle_info - - type, abstract, extends(base_object) :: swiftest_cb !> An abstract class for a generic central body in a Swiftest simulation class(swiftest_particle_info), allocatable :: info !! Particle metadata information @@ -248,7 +251,7 @@ module swiftest integer(I8B) :: nplpl !! Number of body-body comparisons in the flattened upper triangular matrix type(swiftest_kinship), dimension(:), allocatable :: kin !! Array of merger relationship structures that can account for multiple pairwise mergers in a single step logical, dimension(:), allocatable :: lmtiny !! flag indicating whether this body is below the GMTINY cutoff value - integer(I4B) :: nplm !! number of bodies above the GMTINY limit + integer(I4B) :: nplm = 0 !! number of bodies above the GMTINY limit integer(I8B) :: nplplm !! Number of body (all massive)-body (only those above GMTINY) comparisons in the flattened upper triangular matrix integer(I4B), dimension(:), allocatable :: nplenc !! number of encounters with other planets this time step integer(I4B), dimension(:), allocatable :: ntpenc !! number of encounters with test particles this time step @@ -550,12 +553,12 @@ pure module subroutine swiftest_gr_kick_getacch(mu, x, lmask, n, inv_c2, agr) real(DP), dimension(:,:), intent(out) :: agr !! Accelerations end subroutine swiftest_gr_kick_getacch - pure module subroutine swiftest_gr_p4_pos_kick(param, x, v, dt) + pure elemental module subroutine swiftest_gr_p4_pos_kick(inv_c2, rx, ry, rz, vx, vy, vz, dt) implicit none - class(swiftest_parameters), intent(in) :: param !! Current run configuration parameters - real(DP), dimension(:), intent(inout) :: x !! Position vector - real(DP), dimension(:), intent(in) :: v !! Velocity vector - real(DP), intent(in) :: dt !! Step size + real(DP), intent(in) :: inv_c2 !! One over speed of light squared (1/c**2) + real(DP), intent(inout) :: rx, ry, rz !! Position vector + real(DP), intent(in) :: vx, vy, vz !! Velocity vector + real(DP), intent(in) :: dt !! Step size end subroutine swiftest_gr_p4_pos_kick pure module subroutine swiftest_gr_pseudovel2vel(param, mu, rh, pv, vh) @@ -671,11 +674,13 @@ module subroutine swiftest_io_netcdf_get_t0_values_system(self, nc, param) class(swiftest_parameters), intent(inout) :: param !! Current run configuration parameters end subroutine swiftest_io_netcdf_get_t0_values_system - module subroutine swiftest_io_netcdf_get_valid_masks(self, plmask, tpmask) + module subroutine swiftest_io_netcdf_get_valid_masks(self, plmask, tpmask, plmmask, Gmtiny) implicit none - class(swiftest_netcdf_parameters), intent(inout) :: self !! Parameters used to identify a particular NetCDF dataset - logical, dimension(:), allocatable, intent(out) :: plmask !! Logical mask indicating which bodies are massive bodies - logical, dimension(:), allocatable, intent(out) :: tpmask !! Logical mask indicating which bodies are test particles + class(swiftest_netcdf_parameters), intent(inout) :: self !! Parameters used to identify a particular NetCDF dataset + logical, dimension(:), allocatable, intent(out) :: plmask !! Logical mask indicating which bodies are massive bodies + logical, dimension(:), allocatable, intent(out) :: tpmask !! Logical mask indicating which bodies are test particles + logical, dimension(:), allocatable, intent(out), optional :: plmmask !! Logical mask indicating which bodies are fully interacting massive bodies + real(DP), intent(in), optional :: Gmtiny !! The cutoff G*mass between semi-interacting and fully interacting massive bodies end subroutine swiftest_io_netcdf_get_valid_masks module subroutine swiftest_io_netcdf_initialize_output(self, param) @@ -1774,6 +1779,11 @@ module subroutine swiftest_coarray_coclone_cb(self) class(swiftest_cb),intent(inout),codimension[*] :: self !! Swiftest cb object end subroutine swiftest_coarray_coclone_cb + module subroutine swiftest_coarray_coclone_kin(self) + implicit none + class(swiftest_kinship),intent(inout),codimension[*] :: self !! Swiftest kinship object + end subroutine swiftest_coarray_coclone_kin + module subroutine swiftest_coarray_coclone_nc(self) implicit none class(swiftest_netcdf_parameters),intent(inout),codimension[*] :: self !! Swiftest body object diff --git a/src/swiftest/swiftest_obl.f90 b/src/swiftest/swiftest_obl.f90 index 6bd7480fb..21f4363ae 100644 --- a/src/swiftest/swiftest_obl.f90 +++ b/src/swiftest/swiftest_obl.f90 @@ -35,7 +35,11 @@ module subroutine swiftest_obl_acc(n, GMcb, j2rp2, j4rp4, rh, lmask, aobl, GMpl, if (n == 0) return aobl(:,:) = 0.0_DP +#ifdef DOCONLOC + do concurrent(i = 1:n, lmask(i)) shared(lmask,rh,aobl) local(r2,irh,rinv2,t0,t1,t2,t3,fac1,fac2) +#else do concurrent(i = 1:n, lmask(i)) +#endif r2 = dot_product(rh(:, i), rh(:, i)) irh = 1.0_DP / sqrt(r2) rinv2 = irh**2 @@ -73,14 +77,19 @@ module subroutine swiftest_obl_acc_pl(self, nbody_system) class(swiftest_pl), intent(inout) :: self !! Swiftest massive body object class(swiftest_nbody_system), intent(inout) :: nbody_system !! Swiftest nbody system object ! Internals - integer(I4B) :: i + integer(I4B) :: i, npl if (self%nbody == 0) return - associate(pl => self, npl => self%nbody, cb => nbody_system%cb) + associate(pl => self, cb => nbody_system%cb) + npl = self%nbody call swiftest_obl_acc(npl, cb%Gmass, cb%j2rp2, cb%j4rp4, pl%rh, pl%lmask, pl%aobl, pl%Gmass, cb%aobl) +#ifdef DOCONLOC + do concurrent(i = 1:npl, pl%lmask(i)) shared(cb,pl) +#else do concurrent(i = 1:npl, pl%lmask(i)) +#endif pl%ah(:, i) = pl%ah(:, i) + pl%aobl(:, i) - cb%aobl(:) end do end associate @@ -103,11 +112,12 @@ module subroutine swiftest_obl_acc_tp(self, nbody_system) class(swiftest_nbody_system), intent(inout) :: nbody_system !! Swiftest nbody system object ! Internals real(DP), dimension(NDIM) :: aoblcb - integer(I4B) :: i + integer(I4B) :: i, ntp if (self%nbody == 0) return - associate(tp => self, ntp => self%nbody, cb => nbody_system%cb) + associate(tp => self, cb => nbody_system%cb) + ntp = self%nbody call swiftest_obl_acc(ntp, cb%Gmass, cb%j2rp2, cb%j4rp4, tp%rh, tp%lmask, tp%aobl) if (nbody_system%lbeg) then aoblcb = cb%aoblbeg @@ -115,7 +125,11 @@ module subroutine swiftest_obl_acc_tp(self, nbody_system) aoblcb = cb%aoblend end if +#ifdef DOCONLOC + do concurrent(i = 1:ntp, tp%lmask(i)) shared(tp,aoblcb) +#else do concurrent(i = 1:ntp, tp%lmask(i)) +#endif tp%ah(:, i) = tp%ah(:, i) + tp%aobl(:, i) - aoblcb(:) end do @@ -139,12 +153,17 @@ module subroutine swiftest_obl_pot_system(self) ! Arguments class(swiftest_nbody_system), intent(inout) :: self !! Swiftest nbody system object ! Internals - integer(I4B) :: i + integer(I4B) :: i, npl real(DP), dimension(self%pl%nbody) :: oblpot_arr - associate(nbody_system => self, pl => self%pl, npl => self%pl%nbody, cb => self%cb) + associate(nbody_system => self, pl => self%pl, cb => self%cb) + npl = self%pl%nbody if (.not. any(pl%lmask(1:npl))) return +#ifdef DOCONLOC + do concurrent (i = 1:npl, pl%lmask(i)) shared(cb,pl,oblpot_arr) +#else do concurrent (i = 1:npl, pl%lmask(i)) +#endif oblpot_arr(i) = swiftest_obl_pot_one(cb%Gmass, pl%Gmass(i), cb%j2rp2, cb%j4rp4, pl%rh(3,i), 1.0_DP / norm2(pl%rh(:,i))) end do nbody_system%oblpot = sum(oblpot_arr, pl%lmask(1:npl)) diff --git a/src/swiftest/swiftest_orbel.f90 b/src/swiftest/swiftest_orbel.f90 index a3ec6f424..431d182ab 100644 --- a/src/swiftest/swiftest_orbel.f90 +++ b/src/swiftest/swiftest_orbel.f90 @@ -21,20 +21,28 @@ module subroutine swiftest_orbel_el2xv_vec(self, cb) class(swiftest_body), intent(inout) :: self !! Swiftest body object class(swiftest_cb), intent(inout) :: cb !! Swiftest central body objec ! Internals - integer(I4B) :: i + integer(I4B) :: i, n if (self%nbody == 0) return + n = self%nbody call self%set_mu(cb) - do concurrent (i = 1:self%nbody) - call swiftest_orbel_el2xv(self%mu(i), self%a(i), self%e(i), self%inc(i), self%capom(i), & - self%omega(i), self%capm(i), self%rh(:, i), self%vh(:, i)) - end do + associate(mu => self%mu, a => self%a, e => self%e, inc => self%inc, capom => self%capom, omega => self%omega, & + capm => self%capm, rh => self%rh, vh => self%vh) +#ifdef DOCONLOC + do concurrent (i = 1:n) shared(mu, a, e, inc, capom, omega, capm, rh, vh) +#else + do concurrent (i = 1:n) +#endif + call swiftest_orbel_el2xv(mu(i), a(i), e(i), inc(i), capom(i), omega(i), capm(i), & + rh(1,i), rh(2,i), rh(3,i), vh(1,i), vh(2,i), vh(3,i)) + end do + end associate return end subroutine swiftest_orbel_el2xv_vec - pure subroutine swiftest_orbel_el2xv(mu, a, ie, inc, capom, omega, capm, x, v) + pure elemental subroutine swiftest_orbel_el2xv(mu, a, ie, inc, capom, omega, capm, rx, ry, rz, vx, vy, vz) !! author: David A. Minton !! !! Compute osculating orbital elements from relative C)rtesian position and velocity @@ -52,7 +60,7 @@ pure subroutine swiftest_orbel_el2xv(mu, a, ie, inc, capom, omega, capm, x, v) implicit none real(DP), intent(in) :: mu real(DP), intent(in) :: a, ie, inc, capom, omega, capm - real(DP), dimension(:), intent(out) :: x, v + real(DP), intent(out) :: rx, ry, rz, vx, vy, vz integer(I4B) :: iorbit_type real(DP) :: e, cape, capf, zpara, em1 @@ -125,12 +133,12 @@ pure subroutine swiftest_orbel_el2xv(mu, a, ie, inc, capom, omega, capm, x, v) vfac2 = ri * sqgma endif !-- - x(1) = d11 * xfac1 + d21 * xfac2 - x(2) = d12 * xfac1 + d22 * xfac2 - x(3) = d13 * xfac1 + d23 * xfac2 - v(1) = d11 * vfac1 + d21 * vfac2 - v(2) = d12 * vfac1 + d22 * vfac2 - v(3) = d13 * vfac1 + d23 * vfac2 + rx = d11 * xfac1 + d21 * xfac2 + ry = d12 * xfac1 + d22 * xfac2 + rz = d13 * xfac1 + d23 * xfac2 + vx = d11 * vfac1 + d21 * vfac2 + vy = d12 * vfac1 + d22 * vfac2 + vz = d13 * vfac1 + d23 * vfac2 return end subroutine swiftest_orbel_el2xv @@ -232,7 +240,6 @@ real(DP) pure function swiftest_orbel_flon(e,icapn) real(DP), parameter :: a5 = 51891840._DP, a3 = 1037836800._DP real(DP), parameter :: b11 = 11 * a11, b9 = 9 * a9, b7 = 7 * a7 real(DP), parameter :: b5 = 5 * a5, b3 = 3 * a3 - real(DP), parameter :: THIRD = 1._DP / 3._DP ! Function to solve "Kepler's eqn" for F (here called ! x) for given e and CAPN. Only good for smallish CAPN @@ -721,7 +728,7 @@ pure elemental module subroutine swiftest_orbel_xv2aeq(mu, rx, ry, rz, vx, vy, v hz = rx*vy - ry*vx h2 = hx*hx + hy*hy + hz*hz - if (h2 == 0.0_DP) return + if (h2 < tiny(h2)) return energy = 0.5_DP * v2 - mu / r if (abs(energy * r / mu) < sqrt(TINYVALUE)) then iorbit_type = PARABOLA @@ -787,7 +794,7 @@ pure module subroutine swiftest_orbel_xv2aqt(mu, rx, ry, rz, vx, vy, vz, a, q, c hy = rz*vx - rx*vz hz = rx*vy - ry*vx h2 = hx*hx + hy*hy + hz*hz - if (h2 == 0.0_DP) return + if (h2 < tiny(h2)) return r = sqrt(rx*rx + ry*ry + rz*rz) v2 = vx*vx + vy*vy + vz*vz @@ -887,7 +894,11 @@ module subroutine swiftest_orbel_xv2el_vec(self, cb) if (allocated(self%capom)) deallocate(self%capom); allocate(self%capom(self%nbody)) if (allocated(self%omega)) deallocate(self%omega); allocate(self%omega(self%nbody)) if (allocated(self%capm)) deallocate(self%capm); allocate(self%capm(self%nbody)) +#ifdef DOCONLOC + do concurrent (i = 1:self%nbody) shared(self) local(varpi,lam,f,cape,capf) +#else do concurrent (i = 1:self%nbody) +#endif call swiftest_orbel_xv2el(self%mu(i), self%rh(1,i), self%rh(2,i), self%rh(3,i), & self%vh(1,i), self%vh(2,i), self%vh(3,i), & self%a(i), self%e(i), self%inc(i), & @@ -932,7 +943,7 @@ pure module subroutine swiftest_orbel_xv2el(mu, rx, ry, rz, vx, vy, vz, a, e, in real(DP), intent(out) :: capf !! hyperbolic anomaly (hyperbolic orbits) ! Internals integer(I4B) :: iorbit_type - real(DP) :: hx, hy, hz, r, v2, h2, h, rdotv, energy, fac, u, w, cw, sw, face, tmpf, sf, cf, rdot, h_over_r2 + real(DP) :: hx, hy, hz, r, v2, h2, h, rdotv, energy, fac, u, w, cw, sw, face, tmpf, sf, cf, rdot a = 0.0_DP e = 0.0_DP diff --git a/src/swiftest/swiftest_util.f90 b/src/swiftest/swiftest_util.f90 index b620a7f9d..f3ce3082f 100644 --- a/src/swiftest/swiftest_util.f90 +++ b/src/swiftest/swiftest_util.f90 @@ -277,11 +277,16 @@ module subroutine swiftest_util_coord_h2b_tp(self, cb) class(swiftest_tp), intent(inout) :: self !! Swiftest test particle object class(swiftest_cb), intent(in) :: cb !! Swiftest central body object ! Internals - integer(I4B) :: i + integer(I4B) :: i, ntp if (self%nbody == 0) return - associate(tp => self, ntp => self%nbody) + associate(tp => self) + ntp = self%nbody +#ifdef DOCONLOC + do concurrent (i = 1:ntp, tp%status(i) /= INACTIVE) shared(cb,tp) +#else do concurrent (i = 1:ntp, tp%status(i) /= INACTIVE) +#endif tp%rb(:, i) = tp%rh(:, i) + cb%rb(:) tp%vb(:, i) = tp%vh(:, i) + cb%vb(:) end do @@ -303,12 +308,17 @@ module subroutine swiftest_util_coord_b2h_pl(self, cb) class(swiftest_pl), intent(inout) :: self !! Swiftest massive body object class(swiftest_cb), intent(inout) :: cb !! Swiftest central body object ! Internals - integer(I4B) :: i + integer(I4B) :: i, npl if (self%nbody == 0) return - associate(pl => self, npl => self%nbody) + associate(pl => self) + npl = self%nbody +#ifdef DOCONLOC + do concurrent (i = 1:npl, pl%status(i) /= INACTIVE) shared(cb,pl) +#else do concurrent (i = 1:npl, pl%status(i) /= INACTIVE) +#endif pl%rh(:, i) = pl%rb(:, i) - cb%rb(:) pl%vh(:, i) = pl%vb(:, i) - cb%vb(:) end do @@ -330,12 +340,17 @@ module subroutine swiftest_util_coord_b2h_tp(self, cb) class(swiftest_tp), intent(inout) :: self !! Swiftest massive body object class(swiftest_cb), intent(in) :: cb !! Swiftest central body object ! Internals - integer(I4B) :: i + integer(I4B) :: i, ntp if (self%nbody == 0) return - associate(tp => self, ntp => self%nbody) + associate(tp => self) + ntp = self%nbody +#ifdef DOCONLOC + do concurrent(i = 1:ntp, tp%status(i) /= INACTIVE) shared(cb,tp) +#else do concurrent(i = 1:ntp, tp%status(i) /= INACTIVE) +#endif tp%rh(:, i) = tp%rb(:, i) - cb%rb(:) tp%vh(:, i) = tp%vb(:, i) - cb%vb(:) end do @@ -357,16 +372,21 @@ module subroutine swiftest_util_coord_vb2vh_pl(self, cb) class(swiftest_pl), intent(inout) :: self !! Swiftest massive body object class(swiftest_cb), intent(inout) :: cb !! Swiftest central body object ! Internals - integer(I4B) :: i + integer(I4B) :: i, npl if (self%nbody == 0) return - associate(pl => self, npl => self%nbody) + associate(pl => self) + npl = self%nbody cb%vb(:) = 0.0_DP do i = npl, 1, -1 if (pl%status(i) /= INACTIVE) cb%vb(:) = cb%vb(:) - pl%Gmass(i) * pl%vb(:, i) / cb%Gmass end do +#ifdef DOCONLOC + do concurrent(i = 1:npl) shared(cb,pl) +#else do concurrent(i = 1:npl) +#endif pl%vh(:, i) = pl%vb(:, i) - cb%vb(:) end do end associate @@ -413,19 +433,24 @@ module subroutine swiftest_util_coord_vh2vb_pl(self, cb) class(swiftest_pl), intent(inout) :: self !! Swiftest massive body object class(swiftest_cb), intent(inout) :: cb !! Swiftest central body object ! Internals - integer(I4B) :: i + integer(I4B) :: i, npl real(DP) :: Gmtot if (self%nbody == 0) return - associate(pl => self, npl => self%nbody) + associate(pl => self) + npl = self%nbody Gmtot = cb%Gmass + sum(pl%Gmass(1:npl)) cb%vb(:) = 0.0_DP do i = 1, npl cb%vb(:) = cb%vb(:) - pl%Gmass(i) * pl%vh(:, i) end do cb%vb(:) = cb%vb(:) / Gmtot +#ifdef DOCONLOC + do concurrent(i = 1:npl) shared(cb,pl) +#else do concurrent(i = 1:npl) +#endif pl%vb(:, i) = pl%vh(:, i) + cb%vb(:) end do end associate @@ -508,11 +533,16 @@ module subroutine swiftest_util_coord_rh2rb_tp(self, cb) class(swiftest_tp), intent(inout) :: self !! Swiftest test particle object class(swiftest_cb), intent(in) :: cb !! Swiftest central body object ! Internals - integer(I4B) :: i + integer(I4B) :: i, ntp if (self%nbody == 0) return - associate(tp => self, ntp => self%nbody) + associate(tp => self) + ntp = self%nbody +#ifdef DOCONLOC + do concurrent (i = 1:ntp, tp%status(i) /= INACTIVE) shared(cb,tp) +#else do concurrent (i = 1:ntp, tp%status(i) /= INACTIVE) +#endif tp%rb(:, i) = tp%rh(:, i) + cb%rb(:) end do end associate @@ -1071,20 +1101,25 @@ module subroutine swiftest_util_flatten_eucl_plpl(self, param) class(swiftest_pl), intent(inout) :: self !! Swiftest massive body object class(swiftest_parameters), intent(inout) :: param !! Current run configuration parameters ! Internals - integer(I4B) :: i, j, err - integer(I8B) :: k, npl + integer(I4B) :: err, i, j, npl + integer(I8B) :: k, npl8 - npl = int(self%nbody, kind=I8B) associate(nplpl => self%nplpl) - nplpl = npl * (npl - 1_I8B) / 2_I8B ! number of entries in a strict lower triangle, npl x npl + npl = self%nbody + npl8 = int(npl, kind=I8B) + nplpl = npl8 * (npl8 - 1_I8B) / 2_I8B ! number of entries in a strict lower triangle, npl x npl if (param%lflatten_interactions) then if (allocated(self%k_plpl)) deallocate(self%k_plpl) ! Reset the index array if it's been set previously allocate(self%k_plpl(2, nplpl), stat=err) if (err /=0) then ! An error occurred trying to allocate this big array. This probably means it's too big to fit in memory, and so we will force the run back into triangular mode param%lflatten_interactions = .false. else +#ifdef DOCONLOC + do concurrent (i=1:npl, j=1:npl, j>i) shared(self) local(k) +#else do concurrent (i=1:npl, j=1:npl, j>i) - call swiftest_util_flatten_eucl_ij_to_k(self%nbody, i, j, k) +#endif + call swiftest_util_flatten_eucl_ij_to_k(npl, i, j, k) self%k_plpl(1, k) = i self%k_plpl(2, k) = j end do @@ -1111,17 +1146,18 @@ module subroutine swiftest_util_flatten_eucl_pltp(self, pl, param) class(swiftest_pl), intent(in) :: pl !! Swiftest massive body object class(swiftest_parameters), intent(inout) :: param !! Current run configuration parameters ! Internals - integer(I8B) :: i, j, counter, npl, ntp + integer(I4B) :: i, j + integer(I8B) :: counter, npl8, ntp8 - ntp = int(self%nbody, kind=I8B) - npl = int(pl%nbody, kind=I8B) - associate(npltp => self%npltp) - npltp = npl * ntp + associate(ntp => self%nbody, npl => pl%nbody, npltp => self%npltp) + npl8 = int(npl, kind=I8B) + ntp8 = int(ntp, kind=I8B) + npltp = npl8 * ntp8 if (allocated(self%k_pltp)) deallocate(self%k_pltp) ! Reset the index array if it's been set previously allocate(self%k_pltp(2, npltp)) - do i = 1_I8B, npl - counter = (i - 1_I8B) * npl + 1_I8B - do j = 1_I8B, ntp + counter = 1_I8B + do i = 1, npl + do j = 1, ntp self%k_pltp(1, counter) = i self%k_pltp(2, counter) = j counter = counter + 1_I8B @@ -1145,7 +1181,7 @@ module subroutine swiftest_util_get_energy_and_momentum_system(self, param) class(swiftest_nbody_system), intent(inout) :: self !! Swiftest nbody system object class(swiftest_parameters), intent(in) :: param !! Current run configuration parameters ! Internals - integer(I4B) :: i,j + integer(I4B) :: i,j, npl real(DP) :: kecb, kespincb real(DP), dimension(self%pl%nbody) :: kepl, kespinpl real(DP), dimension(NDIM,self%pl%nbody) :: Lplorbit @@ -1153,7 +1189,8 @@ module subroutine swiftest_util_get_energy_and_momentum_system(self, param) real(DP), dimension(NDIM) :: Lcborbit, Lcbspin real(DP), dimension(NDIM) :: h - associate(nbody_system => self, pl => self%pl, npl => self%pl%nbody, cb => self%cb) + associate(nbody_system => self, pl => self%pl, cb => self%cb) + npl = self%pl%nbody nbody_system%L_orbit(:) = 0.0_DP nbody_system%L_spin(:) = 0.0_DP nbody_system%L_total(:) = 0.0_DP @@ -1171,8 +1208,14 @@ module subroutine swiftest_util_get_energy_and_momentum_system(self, param) nbody_system%be_cb = -3*cb%Gmass * cb%mass / (5 * cb%radius) Lcborbit(:) = cb%mass * (cb%rb(:) .cross. cb%vb(:)) +#ifdef DOCONLOC + do concurrent (i = 1:npl, pl%lmask(i)) shared(pl,Lplorbit,kepl) local(h) +#else do concurrent (i = 1:npl, pl%lmask(i)) - h(:) = pl%rb(:,i) .cross. pl%vb(:,i) +#endif + h(1) = pl%rb(2,i) * pl%vb(3,i) - pl%rb(3,i) * pl%vb(2,i) + h(2) = pl%rb(3,i) * pl%vb(1,i) - pl%rb(1,i) * pl%vb(3,i) + h(3) = pl%rb(1,i) * pl%vb(2,i) - pl%rb(2,i) * pl%vb(1,i) ! Angular momentum from orbit Lplorbit(:,i) = pl%mass(i) * h(:) @@ -1187,7 +1230,11 @@ module subroutine swiftest_util_get_energy_and_momentum_system(self, param) ! For simplicity, we always assume that the rotation pole is the 3rd principal axis Lcbspin(:) = cb%Ip(3) * cb%mass * cb%radius**2 * cb%rot(:) +#ifdef DOCONLOC + do concurrent (i = 1:npl, pl%lmask(i)) shared(pl,Lplspin,kespinpl) +#else do concurrent (i = 1:npl, pl%lmask(i)) +#endif ! Currently we assume that the rotation pole is the 3rd principal axis ! Angular momentum from spin Lplspin(:,i) = pl%mass(i) * pl%Ip(3,i) * pl%radius(i)**2 * pl%rot(:,i) @@ -1198,7 +1245,11 @@ module subroutine swiftest_util_get_energy_and_momentum_system(self, param) nbody_system%ke_spin = 0.5_DP * (kespincb + sum(kespinpl(1:npl), pl%lmask(1:npl))) +#ifdef DOCONLOC + do concurrent (j = 1:NDIM) shared(nbody_system,pl,Lplspin,Lcbspin) +#else do concurrent (j = 1:NDIM) +#endif nbody_system%L_spin(j) = Lcbspin(j) + sum(Lplspin(j,1:npl), pl%lmask(1:npl)) end do else @@ -1219,8 +1270,11 @@ module subroutine swiftest_util_get_energy_and_momentum_system(self, param) end if nbody_system%ke_orbit = 0.5_DP * (kecb + sum(kepl(1:npl), pl%lmask(1:npl))) - +#ifdef DOCONLOC + do concurrent (j = 1:NDIM) shared(nbody_system,pl,Lcborbit,Lplorbit,npl) +#else do concurrent (j = 1:NDIM) +#endif nbody_system%L_orbit(j) = Lcborbit(j) + sum(Lplorbit(j,1:npl), pl%lmask(1:npl)) end do @@ -1264,7 +1318,11 @@ module subroutine swiftest_util_get_potential_energy_flat(npl, nplpl, k_plpl, lm pecb(1:npl) = 0.0_DP end where +#ifdef DOCONLOC + do concurrent(i = 1:npl, lmask(i)) shared(lmask,pecb,GMcb,mass,rb) +#else do concurrent(i = 1:npl, lmask(i)) +#endif pecb(i) = -GMcb * mass(i) / norm2(rb(:,i)) end do @@ -1311,7 +1369,11 @@ module subroutine swiftest_util_get_potential_energy_triangular(npl, lmask, GMcb pecb(1:npl) = 0.0_DP end where +#ifdef DOCONLOC + do concurrent(i = 1:npl, lmask(i)) shared(lmask, pecb, GMcb, mass, rb, lmask) +#else do concurrent(i = 1:npl, lmask(i)) +#endif pecb(i) = -GMcb * mass(i) / norm2(rb(:,i)) end do @@ -1322,7 +1384,11 @@ module subroutine swiftest_util_get_potential_energy_triangular(npl, lmask, GMcb !$omp reduction(+:pe) do i = 1, npl if (lmask(i)) then +#ifdef DOCONLOC + do concurrent(j = i+1:npl, lmask(i) .and. lmask(j)) shared(lmask, pepl, rb, mass, Gmass, lmask) +#else do concurrent(j = i+1:npl, lmask(i) .and. lmask(j)) +#endif pepl(j) = - (Gmass(i) * mass(j)) / norm2(rb(:, i) - rb(:, j)) end do pe = pe + sum(pepl(i+1:npl), lmask(i+1:npl)) @@ -1516,7 +1582,6 @@ module subroutine swiftest_util_peri(n,m, r, v, atp, q, isperi) integer(I4B) :: i real(DP), dimension(n) :: e !! Temporary, just to make use of the xv2aeq subroutine real(DP) :: vdotr - character(len=NAMELEN) :: message do i = 1,n vdotr = dot_product(r(:,i),v(:,i)) @@ -1582,7 +1647,8 @@ module subroutine swiftest_util_rearray_pl(self, nbody_system, param) class(swiftest_parameters), intent(inout) :: param !! Current run configuration parameters ! Internals class(swiftest_pl), allocatable :: tmp !! The discarded body list. - integer(I4B) :: i, k, npl, nadd, nencmin, nenc_old, idnew1, idnew2, idold1, idold2 + integer(I4B) :: i, npl, nadd, idnew1, idnew2, idold1, idold2 + integer(I8B) :: k, nenc_old, nencmin logical, dimension(:), allocatable :: lmask class(encounter_list), allocatable :: plplenc_old logical :: lencounter @@ -1615,7 +1681,10 @@ module subroutine swiftest_util_rearray_pl(self, nbody_system, param) npl = pl%nbody end if - if (npl == 0) return + if (npl == 0) then + if (param%lmtiny_pl) pl%nplm = 0 + return + end if ! Reset all of the status flags for this body pl%status(1:npl) = ACTIVE @@ -1633,6 +1702,7 @@ module subroutine swiftest_util_rearray_pl(self, nbody_system, param) elsewhere pl%info(1:npl)%particle_type = PL_TYPE_NAME end where + pl%nplm = count(.not.pl%lmtiny(1:npl)) end if ! Reindex the new list of bodies @@ -1647,7 +1717,7 @@ module subroutine swiftest_util_rearray_pl(self, nbody_system, param) if (allocated(nbody_system%plpl_encounter)) then ! Store the original plplenc list so we don't remove any of the original encounters nenc_old = nbody_system%plpl_encounter%nenc - if (nenc_old > 0) then + if (nenc_old > 0_I8B) then allocate(plplenc_old, source=nbody_system%plpl_encounter) call plplenc_old%copy(nbody_system%plpl_encounter) end if @@ -1669,10 +1739,10 @@ module subroutine swiftest_util_rearray_pl(self, nbody_system, param) end select ! Re-index the encounter list as the index values may have changed - if (nenc_old > 0) then + if (nenc_old > 0_I8B) then nencmin = min(nbody_system%plpl_encounter%nenc, plplenc_old%nenc) nbody_system%plpl_encounter%nenc = nencmin - do k = 1, nencmin + do k = 1_I8B, nencmin idnew1 = nbody_system%plpl_encounter%id1(k) idnew2 = nbody_system%plpl_encounter%id2(k) idold1 = plplenc_old%id1(k) @@ -1713,7 +1783,7 @@ module subroutine swiftest_util_rearray_pl(self, nbody_system, param) end if nencmin = count(lmask(:)) nbody_system%plpl_encounter%nenc = nencmin - if (nencmin > 0) then + if (nencmin > 0_I8B) then nbody_system%plpl_encounter%index1(1:nencmin) = pack(nbody_system%plpl_encounter%index1(1:nenc_old), lmask(1:nenc_old)) nbody_system%plpl_encounter%index2(1:nencmin) = pack(nbody_system%plpl_encounter%index2(1:nenc_old), lmask(1:nenc_old)) nbody_system%plpl_encounter%id1(1:nencmin) = pack(nbody_system%plpl_encounter%id1(1:nenc_old), lmask(1:nenc_old)) @@ -3264,9 +3334,7 @@ module subroutine swiftest_util_version() "Authors:", //, & " The Purdue University Swiftest Development team ", /, & " Lead by David A. Minton ", /, & - " Single loop blocking by Jacob R. Elliott", /, & - " Fragmentation by Carlisle A. Wishard and", //, & - " Jennifer L. L. Poutplin ", //, & + " Carlisle Wishard, Jennifer Pouplin, Jacob Elliott, Dana Singh." & "Please address comments and questions to:", //, & " David A. Minton", /, & " Department Earth, Atmospheric, & Planetary Sciences ",/, & @@ -3280,22 +3348,6 @@ module subroutine swiftest_util_version() "************************************************", /) - 100 FORMAT(/, "************* SWIFTER: Version ", F3.1, " *************", //, & - "Authors:", //, & - " Martin Duncan: Queen's University", /, & - " Hal Levison : Southwest Research Institute", //, & - "Please address comments and questions to:", //, & - " Hal Levison or David Kaufmann", /, & - " Department of Space Studies", /, & - " Southwest Research Institute", /, & - " 1050 Walnut Street, Suite 400", /, & - " Boulder, Colorado 80302", /, & - " 303-546-0290 (HFL), 720-240-0119 (DEK)", /, & - " 303-546-9687 (fax)", /, & - " hal@gort.boulder.swri.edu (HFL)", /, & - " kaufmann@boulder.swri.edu (DEK)", //, & - "************************************************", /) - return end subroutine swiftest_util_version diff --git a/src/symba/symba_encounter_check.f90 b/src/symba/symba_encounter_check.f90 index 895a9f34d..e66791409 100644 --- a/src/symba/symba_encounter_check.f90 +++ b/src/symba/symba_encounter_check.f90 @@ -96,11 +96,12 @@ module function symba_encounter_check_list_plpl(self, param, nbody_system, dt, i integer(I4B), intent(in) :: irec !! Current recursion level logical :: lany_encounter !! Returns true if there is at least one close encounter ! Internals - integer(I4B) :: i, j, k, lidx, nenc_enc + integer(I4B) :: i, j, nenc_enc + integer(I8B) :: k, lidx real(DP), dimension(NDIM) :: xr, vr real(DP) :: rlim2, rji2, rcrit12 logical, dimension(:), allocatable :: lencmask, lencounter - integer(I4B), dimension(:), allocatable :: eidx + integer(I8B), dimension(:), allocatable :: eidx lany_encounter = .false. if (self%nenc == 0) return @@ -116,10 +117,14 @@ module function symba_encounter_check_list_plpl(self, param, nbody_system, dt, i allocate(eidx(nenc_enc)) allocate(lencounter(nenc_enc)) - eidx(:) = pack([(k, k = 1, self%nenc)], lencmask(:)) + eidx(:) = pack([(k, k = 1_I8B, self%nenc)], lencmask(:)) lencounter(:) = .false. - do concurrent(lidx = 1:nenc_enc) +#ifdef DOCONLOC + do concurrent(lidx = 1_I8B:nenc_enc) shared(self,pl,eidx,lencounter,dt) local(i,j,k,xr,vr,rcrit12,rlim2,rji2) +#else + do concurrent(lidx = 1_I8B:nenc_enc) +#endif k = eidx(lidx) i = self%index1(k) j = self%index2(k) @@ -137,8 +142,8 @@ module function symba_encounter_check_list_plpl(self, param, nbody_system, dt, i lany_encounter = any(lencounter(:)) if (lany_encounter) then nenc_enc = count(lencounter(:)) - eidx(1:nenc_enc) = pack(eidx(:), lencounter(:)) - do lidx = 1, nenc_enc + eidx(1_I8B:nenc_enc) = pack(eidx(:), lencounter(:)) + do lidx = 1_I8B, nenc_enc k = eidx(lidx) i = self%index1(k) j = self%index2(k) @@ -164,11 +169,12 @@ module function symba_encounter_check_list_pltp(self, param, nbody_system, dt, i integer(I4B), intent(in) :: irec !! Current recursion level logical :: lany_encounter !! Returns true if there is at least one close encounter ! Internals - integer(I4B) :: i, j, k, lidx, nenc_enc + integer(I4B) :: i, j, nenc_enc + integer(I8B) :: k, lidx real(DP), dimension(NDIM) :: xr, vr real(DP) :: rlim2, rji2 logical, dimension(:), allocatable :: lencmask, lencounter - integer(I4B), dimension(:), allocatable :: eidx + integer(I8B), dimension(:), allocatable :: eidx lany_encounter = .false. if (self%nenc == 0) return @@ -186,10 +192,13 @@ module function symba_encounter_check_list_pltp(self, param, nbody_system, dt, i allocate(eidx(nenc_enc)) allocate(lencounter(nenc_enc)) - eidx(:) = pack([(k, k = 1, self%nenc)], lencmask(:)) + eidx(:) = pack([(k, k = 1_I8B, self%nenc)], lencmask(:)) lencounter(:) = .false. - - do concurrent(lidx = 1:nenc_enc) +#ifdef DOCONLOC + do concurrent(lidx = 1_I8B:nenc_enc) shared(self,pl,tp,eidx,lencounter,dt) local(i,j,k,xr,vr,rlim2,rji2) +#else + do concurrent(lidx = 1_I8B:nenc_enc) +#endif k = eidx(lidx) i = self%index1(k) j = self%index2(k) @@ -207,8 +216,8 @@ module function symba_encounter_check_list_pltp(self, param, nbody_system, dt, i lany_encounter = any(lencounter(:)) if (lany_encounter) then nenc_enc = count(lencounter(:)) - eidx(1:nenc_enc) = pack(eidx(:), lencounter(:)) - do lidx = 1, nenc_enc + eidx(1_I8B:nenc_enc) = pack(eidx(:), lencounter(:)) + do lidx = 1_I8B, nenc_enc k = eidx(lidx) i = self%index1(k) j = self%index2(k) diff --git a/src/symba/symba_kick.f90 b/src/symba/symba_kick.f90 index 0c75e1054..2db14456c 100644 --- a/src/symba/symba_kick.f90 +++ b/src/symba/symba_kick.f90 @@ -91,7 +91,8 @@ module subroutine symba_kick_getacch_tp(self, nbody_system, param, t, lbeg) real(DP), intent(in) :: t !! Current time logical, intent(in) :: lbeg !! Logical flag that determines whether or not this is the beginning or end of the step ! Internals - integer(I4B) :: i, j, k + integer(I4B) :: i, j + integer(I8B) :: k real(DP) :: rjj, fac real(DP), dimension(NDIM) :: dx @@ -142,7 +143,7 @@ module subroutine symba_kick_list_plpl(self, nbody_system, dt, irec, sgn) real(DP) :: r, rr, ri, ris, rim1, r2, ir3, fac, faci, facj real(DP), dimension(NDIM) :: dx logical, dimension(:), allocatable :: lgoodlevel - integer(I4B), dimension(:), allocatable :: good_idx + integer(I8B), dimension(:), allocatable :: good_idx if (self%nenc == 0) return @@ -170,9 +171,13 @@ module subroutine symba_kick_list_plpl(self, nbody_system, dt, irec, sgn) ngood = count(lgoodlevel(:)) if (ngood > 0_I8B) then allocate(good_idx(ngood)) - good_idx(:) = pack([(i, i = 1, nenc)], lgoodlevel(:)) + good_idx(:) = pack([(k, k = 1_I8B, nenc)], lgoodlevel(:)) +#ifdef DOCONLOC + do concurrent (k = 1:ngood) shared(self,pl,good_idx) local(i,j) +#else do concurrent (k = 1:ngood) +#endif i = self%index1(good_idx(k)) j = self%index2(good_idx(k)) pl%ah(:,i) = 0.0_DP @@ -208,7 +213,7 @@ module subroutine symba_kick_list_plpl(self, nbody_system, dt, irec, sgn) end do ngood = count(lgoodlevel(:)) if (ngood == 0_I8B) return - good_idx(1:ngood) = pack([(i, i = 1, nenc)], lgoodlevel(:)) + good_idx(1:ngood) = pack([(k, k = 1_I8B, nenc)], lgoodlevel(:)) do k = 1, ngood i = self%index1(good_idx(k)) @@ -247,7 +252,7 @@ module subroutine symba_kick_list_pltp(self, nbody_system, dt, irec, sgn) real(DP) :: r, rr, ri, ris, rim1, r2, ir3, fac, faci real(DP), dimension(NDIM) :: dx logical, dimension(:), allocatable :: lgoodlevel - integer(I4B), dimension(:), allocatable :: good_idx + integer(I8B), dimension(:), allocatable :: good_idx if (self%nenc == 0) return @@ -280,10 +285,13 @@ module subroutine symba_kick_list_pltp(self, nbody_system, dt, irec, sgn) if (ngood > 0_I8B) then allocate(good_idx(ngood)) - good_idx(:) = pack([(i, i = 1, nenc)], lgoodlevel(:)) - + good_idx(:) = pack([(k, k = 1_I8B, nenc)], lgoodlevel(:)) +#ifdef DOCONLOC + do concurrent (k = 1_I8B:ngood) shared(self,tp,good_idx) local(j) +#else do concurrent (k = 1_I8B:ngood) +#endif j = self%index2(good_idx(k)) tp%ah(:,j) = 0.0_DP end do @@ -316,7 +324,7 @@ module subroutine symba_kick_list_pltp(self, nbody_system, dt, irec, sgn) end do ngood = count(lgoodlevel(:)) if (ngood == 0_I8B) return - good_idx(1:ngood) = pack([(i, i = 1, nenc)], lgoodlevel(:)) + good_idx(1:ngood) = pack([(k, k = 1_I8B, nenc)], lgoodlevel(:)) do k = 1, ngood j = self%index2(good_idx(k)) diff --git a/src/symba/symba_step.f90 b/src/symba/symba_step.f90 index 6e72c3e95..22cdddeb6 100644 --- a/src/symba/symba_step.f90 +++ b/src/symba/symba_step.f90 @@ -27,7 +27,6 @@ module subroutine symba_step_system(self, param, t, dt) real(DP), intent(in) :: dt !! Current stepsize ! Internals logical :: lencounter - type(walltimer) :: timer1,timer2,timer3 !! Object used for computing elapsed wall time select type(pl => self%pl) class is (symba_pl) diff --git a/src/symba/symba_util.f90 b/src/symba/symba_util.f90 index 94b6aa9f5..c7abd79f3 100644 --- a/src/symba/symba_util.f90 +++ b/src/symba/symba_util.f90 @@ -71,8 +71,6 @@ module subroutine symba_util_dealloc_pl(self) implicit none ! Arguments class(symba_pl), intent(inout) :: self !! SyMBA massive body object - ! Internals - integer(I4B) :: i if (allocated(self%levelg)) deallocate(self%levelg) if (allocated(self%levelm)) deallocate(self%levelm) @@ -352,8 +350,6 @@ module subroutine symba_util_setup_pl(self, n, param) class(symba_pl), intent(inout) :: self !! SyMBA massive body object integer(I4B), intent(in) :: n !! Number of particles to allocate space for class(swiftest_parameters), intent(in) :: param !! Current run configuration parameter - ! Internals - integer(I4B) :: i !> Call allocation method for parent class. call self%helio_pl%setup(n, param) diff --git a/src/whm/whm_gr.f90 b/src/whm/whm_gr.f90 index b0891f006..6e23ca21b 100644 --- a/src/whm/whm_gr.f90 +++ b/src/whm/whm_gr.f90 @@ -25,8 +25,6 @@ pure module subroutine whm_gr_kick_getacch_pl(self, param) ! Internals integer(I4B) :: i real(DP), dimension(NDIM) :: suma - real(DP), dimension(:, :), allocatable :: aj - real(DP) :: beta, rjmag4 if (self%nbody == 0) return @@ -56,8 +54,6 @@ pure module subroutine whm_gr_kick_getacch_tp(self, param) class(whm_tp), intent(inout) :: self !! WHM massive body particle data structure class(swiftest_parameters), intent(in) :: param !! Current run configuration parameters ! Internals - integer(I4B) :: i - real(DP) :: rjmag4, beta if (self%nbody == 0) return @@ -84,12 +80,18 @@ pure module subroutine whm_gr_p4_pl(self, nbody_system, param, dt) class(swiftest_parameters), intent(in) :: param !! Current run configuration parameters real(DP), intent(in) :: dt !! Step size ! Internals - integer(I4B) :: i + integer(I4B) :: i, npl + + if (self%nbody == 0) return - associate(pl => self, npl => self%nbody) - if (npl == 0) return - do concurrent(i = 1:npl, pl%lmask(i)) - call swiftest_gr_p4_pos_kick(param, pl%xj(:, i), pl%vj(:, i), dt) + associate(xj => self%xj, vj => self%vj, lmask => self%lmask, inv_c2 => param%inv_c2) + npl = self%nbody +#ifdef DOCONLOC + do concurrent(i = 1:npl, lmask(i)) shared(lmask, inv_c2, xj, vj,dt) +#else + do concurrent(i = 1:npl, lmask(i)) +#endif + call swiftest_gr_p4_pos_kick(inv_c2, xj(1,i), xj(2,i), xj(3,i), vj(1,i), vj(2,i), vj(3,i), dt) end do end associate @@ -111,12 +113,17 @@ pure module subroutine whm_gr_p4_tp(self, nbody_system, param, dt) class(swiftest_parameters), intent(in) :: param !! Current run configuration parameters real(DP), intent(in) :: dt !! Step size ! Internals - integer(I4B) :: i + integer(I4B) :: i, ntp - associate(tp => self, ntp => self%nbody) + associate(rh => self%rh, vh => self%vh, lmask => self%lmask, inv_c2 => param%inv_c2) + ntp = self%nbody if (ntp == 0) return - do concurrent(i = 1:ntp, tp%lmask(i)) - call swiftest_gr_p4_pos_kick(param, tp%rh(:, i), tp%vh(:, i), dt) +#ifdef DOCONLOC + do concurrent(i = 1:ntp, lmask(i)) shared(lmask, rh, vh, inv_c2, dt) +#else + do concurrent(i = 1:ntp, lmask(i)) +#endif + call swiftest_gr_p4_pos_kick(inv_c2, rh(1,i), rh(2,i), rh(3,i), vh(1,i), vh(2,i), vh(3,i), dt) end do end associate diff --git a/src/whm/whm_kick.f90 b/src/whm/whm_kick.f90 index 403678ed6..bcddf0de7 100644 --- a/src/whm/whm_kick.f90 +++ b/src/whm/whm_kick.f90 @@ -82,22 +82,32 @@ module subroutine whm_kick_getacch_tp(self, nbody_system, param, t, lbeg) real(DP), intent(in) :: t !! Current time logical, intent(in) :: lbeg !! Logical flag that determines whether or not this is the beginning or end of the step ! Internals - integer(I4B) :: i + integer(I4B) :: i, npl, ntp real(DP), dimension(NDIM) :: ah0 - associate(tp => self, ntp => self%nbody, pl => nbody_system%pl, cb => nbody_system%cb, npl => nbody_system%pl%nbody) + associate(tp => self, pl => nbody_system%pl, cb => nbody_system%cb) + npl = nbody_system%pl%nbody + ntp = self%nbody if (ntp == 0 .or. npl == 0) return nbody_system%lbeg = lbeg if (lbeg) then ah0(:) = whm_kick_getacch_ah0(pl%Gmass(1:npl), pl%rbeg(:, 1:npl), npl) +#ifdef DOCONLOC + do concurrent(i = 1:ntp, tp%lmask(i)) shared(tp,ah0) +#else do concurrent(i = 1:ntp, tp%lmask(i)) +#endif tp%ah(:, i) = tp%ah(:, i) + ah0(:) end do call tp%accel_int(param, pl%Gmass(1:npl), pl%rbeg(:, 1:npl), npl) else ah0(:) = whm_kick_getacch_ah0(pl%Gmass(1:npl), pl%rend(:, 1:npl), npl) +#ifdef DOCONLOC + do concurrent(i = 1:ntp, tp%lmask(i)) shared(tp,ah0) +#else do concurrent(i = 1:ntp, tp%lmask(i)) +#endif tp%ah(:, i) = tp%ah(:, i) + ah0(:) end do call tp%accel_int(param, pl%Gmass(1:npl), pl%rend(:, 1:npl), npl) @@ -151,16 +161,19 @@ pure subroutine whm_kick_getacch_ah1(cb, pl) class(swiftest_cb), intent(in) :: cb !! WHM central body object class(whm_pl), intent(inout) :: pl !! WHM massive body object ! Internals - integer(I4B) :: i + integer(I4B) :: i, npl real(DP), dimension(NDIM) :: ah1h, ah1j - associate(npl => pl%nbody) - do concurrent (i = 2:npl, pl%lmask(i)) - ah1j(:) = pl%xj(:, i) * pl%ir3j(i) - ah1h(:) = pl%rh(:, i) * pl%ir3h(i) - pl%ah(:, i) = pl%ah(:, i) + cb%Gmass * (ah1j(:) - ah1h(:)) - end do - end associate + npl = pl%nbody +#ifdef DOCONLOC + do concurrent (i = 2:npl, pl%lmask(i)) shared(pl,cb) local(ah1j,ah1h) +#else + do concurrent (i = 2:npl, pl%lmask(i)) +#endif + ah1j(:) = pl%xj(:, i) * pl%ir3j(i) + ah1h(:) = pl%rh(:, i) * pl%ir3h(i) + pl%ah(:, i) = pl%ah(:, i) + cb%Gmass * (ah1j(:) - ah1h(:)) + end do return end subroutine whm_kick_getacch_ah1 @@ -178,22 +191,25 @@ pure subroutine whm_kick_getacch_ah2(cb, pl) class(swiftest_cb), intent(in) :: cb !! Swiftest central body object class(whm_pl), intent(inout) :: pl !! WHM massive body object ! Internals - integer(I4B) :: i + integer(I4B) :: i, npl real(DP) :: etaj, fac real(DP), dimension(NDIM) :: ah2, ah2o - associate(npl => pl%nbody) - ah2(:) = 0.0_DP - ah2o(:) = 0.0_DP - etaj = cb%Gmass - do concurrent(i = 2:npl, pl%lmask(i)) - etaj = etaj + pl%Gmass(i - 1) - fac = pl%Gmass(i) * cb%Gmass * pl%ir3j(i) / etaj - ah2(:) = ah2o + fac * pl%xj(:, i) - pl%ah(:,i) = pl%ah(:, i) + ah2(:) - ah2o(:) = ah2(:) - end do - end associate + npl = pl%nbody + ah2(:) = 0.0_DP + ah2o(:) = 0.0_DP + etaj = cb%Gmass +#ifdef DOCONLOC + do concurrent(i = 2:npl, pl%lmask(i)) shared(pl,cb,ah2,ah2o) local(etaj,fac) +#else + do concurrent(i = 2:npl, pl%lmask(i)) +#endif + etaj = etaj + pl%Gmass(i - 1) + fac = pl%Gmass(i) * cb%Gmass * pl%ir3j(i) / etaj + ah2(:) = ah2o(:) + fac * pl%xj(:, i) + pl%ah(:,i) = pl%ah(:, i) + ah2(:) + ah2o(:) = ah2(:) + end do return end subroutine whm_kick_getacch_ah2 @@ -215,9 +231,10 @@ module subroutine whm_kick_vh_pl(self, nbody_system, param, t, dt, lbeg) real(DP), intent(in) :: dt !! Stepsize logical, intent(in) :: lbeg !! Logical flag indicating whether this is the beginning of the half step or not. ! Internals - integer(I4B) :: i + integer(I4B) :: i, npl - associate(pl => self, npl => self%nbody, cb => nbody_system%cb) + associate(pl => self, cb => nbody_system%cb) + npl = self%nbody if (npl == 0) return if (lbeg) then if (pl%lfirst) then @@ -232,7 +249,11 @@ module subroutine whm_kick_vh_pl(self, nbody_system, param, t, dt, lbeg) call pl%accel(nbody_system, param, t, lbeg) call pl%set_beg_end(rend = pl%rh) end if +#ifdef DOCONLOC + do concurrent(i = 1:npl, pl%lmask(i)) shared(pl,dt) +#else do concurrent(i = 1:npl, pl%lmask(i)) +#endif pl%vh(:, i) = pl%vh(:, i) + pl%ah(:, i) * dt end do end associate @@ -257,25 +278,38 @@ module subroutine whm_kick_vh_tp(self, nbody_system, param, t, dt, lbeg) real(DP), intent(in) :: dt !! Stepsize logical, intent(in) :: lbeg !! Logical flag indicating whether this is the beginning of the half step or not. ! Internals - integer(I4B) :: i + integer(I4B) :: i, ntp if (self%nbody == 0) return - associate(tp => self, ntp => self%nbody) + associate(tp => self) + ntp = self%nbody if (tp%lfirst) then +#ifdef DOCONLOC + do concurrent(i = 1:ntp, tp%lmask(i)) shared(tp) +#else do concurrent(i = 1:ntp, tp%lmask(i)) +#endif tp%ah(:, i) = 0.0_DP end do call tp%accel(nbody_system, param, t, lbeg=.true.) tp%lfirst = .false. end if if (.not.lbeg) then +#ifdef DOCONLOC + do concurrent(i = 1:ntp, tp%lmask(i)) shared(tp) +#else do concurrent(i = 1:ntp, tp%lmask(i)) +#endif tp%ah(:, i) = 0.0_DP end do call tp%accel(nbody_system, param, t, lbeg) end if +#ifdef DOCONLOC + do concurrent(i = 1:ntp, tp%lmask(i)) shared(tp) +#else do concurrent(i = 1:ntp, tp%lmask(i)) +#endif tp%vh(:, i) = tp%vh(:, i) + tp%ah(:, i) * dt end do end associate