From 922c3f00aac8a56d5d97c732f5062bad2d642b52 Mon Sep 17 00:00:00 2001 From: David Minton Date: Fri, 19 May 2023 12:04:53 -0400 Subject: [PATCH] Reviewed compiler flags and improved the consistency, and added ability to turn off OpenMP SIMD directives when building for CPU-agnostic container --- CMakeLists.txt | 1 + cmake/Modules/FindOpenMP_Fortran.cmake | 31 ++- cmake/Modules/SetFortranFlags.cmake | 272 ++++++++++++++----------- 3 files changed, 167 insertions(+), 137 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0aed20380..a047c0163 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,6 +30,7 @@ ENDIF(NOT CMAKE_Fortran_COMPILER_SUPPORTS_F90) # Set some options the user may choose OPTION(USE_COARRAY "Use Coarray Fortran for parallelization of test particles" OFF) OPTION(USE_OPENMP "Use OpenMP for parallelization" ON) +OPTION(USE_SIMD "Use SIMD vectorization" ON) # Locate and set parallelization libraries. There are some CMake peculiarities # taken care of here, such as the fact that the FindOpenMP routine doesn't know diff --git a/cmake/Modules/FindOpenMP_Fortran.cmake b/cmake/Modules/FindOpenMP_Fortran.cmake index 32777569e..06d679e7c 100644 --- a/cmake/Modules/FindOpenMP_Fortran.cmake +++ b/cmake/Modules/FindOpenMP_Fortran.cmake @@ -25,24 +25,19 @@ INCLUDE (${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake) -SET (OpenMP_Fortran_FLAG_CANDIDATES - #Intel - "-qopenmp" - #Intel windows - "/Qopenmp" - #Gnu - "-fopenmp" - #Portland Group - "-mp" - #Empty, if compiler automatically accepts openmp - " " - #Sun - "-xopenmp" - #HP - "+Oopenmp" - #IBM XL C/c++ - "-qsmp" -) +IF (USE_SIMD) + SET (OpenMP_Fortran_FLAG_CANDIDATES + "-qopenmp" # Intel + "/Qopenmp" # Intel Windows + "-fopenmp" # GNU + ) +ELSE () + SET (OpenMP_Fortran_FLAG_CANDIDATES + "-qopenmp -qno-openmp-simd" # Intel + "/Qopenmp-simd-" # Intel Windows + "-fopenmp" # GNU + ) +ENDIF (USE_SIMD) IF (DEFINED OpenMP_Fortran_FLAGS) SET (OpenMP_Fortran_FLAG_CANDIDATES) diff --git a/cmake/Modules/SetFortranFlags.cmake b/cmake/Modules/SetFortranFlags.cmake index 76f23f5cf..ecf9c34e1 100644 --- a/cmake/Modules/SetFortranFlags.cmake +++ b/cmake/Modules/SetFortranFlags.cmake @@ -64,9 +64,51 @@ ENDIF(CMAKE_Fortran_FLAGS_RELEASE AND CMAKE_Fortran_FLAGS_TESTING AND CMAKE_Fort ### GENERAL FLAGS ### ##################### +# Free form +SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}" + Fortran "-ffree-form" # GNU + ) + # Don't add underscores in symbols for C-compatability SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}" - Fortran "-fno-underscoring") + Fortran "-fno-underscoring" # GNU + ) + +# Determines whether the current Fortran Standard behavior of the compiler is fully implemented. +SET_COMPILE_FLAG(CMAKE_Fortran_Flags "${CMAKE_Fortran_FLAGS}" + Fortran "-standard-semantics" # Intel + "/standard-semantics" # Intel Windows + ) + +# Tells the compiler to issue compile-time messages for nonstandard language elements (Fortran 2018). +SET_COMPILE_FLAG(CMAKE_Fortran_Flags "${CMAKE_Fortran_FLAGS}" + Fortran "-stand f18" # Intel + "/stand:f18" # Intel Windows + "-fstd=f2018" # GNU + ) + +# Allows for lines longer than 80 characters without truncation +SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}" + Fortran "-ffree-line-length-none" # GNU (gfortran) + ) + +# Disables right margin wrapping in list-directed output +SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}" + Fortran "-no-wrap-margin" # Intel + "/wrap-margin-" # Intel Windows + ) + +# Aligns a variable to a specified boundary and offset +SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}" + Fortran "-align all -align array64byte" # Intel + "/align:all /align:array64byte" # Intel Windows + ) + +# Enables changing the variable and array memory layout +SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}" + Fortran "-pad" # Intel + "/Qpad" # Intel Windows + ) # There is some bug where -march=native doesn't work on Mac IF(APPLE) @@ -74,18 +116,10 @@ IF(APPLE) ELSE() SET(GNUNATIVE "-march=native") ENDIF() -# Optimize for the host's architecture -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}" - Fortran "-xhost" # Intel - "/QxHost" # Intel Windows - ${GNUNATIVE} # GNU - ) - ################### ### DEBUG FLAGS ### ################### -# NOTE: debugging symbols (-g or /debug:full) are already on by default # Disable optimizations SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" @@ -106,33 +140,34 @@ SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" Fortran "-traceback" # Intel Group "/traceback" # Intel Windows "-fbacktrace" # GNU (gfortran) - "-ftrace=full" # GNU (g95) ) # Sanitize SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-fsanitize=address" # Gnu + Fortran "-fsanitize=address, undefined" # Gnu ) # Check everything SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-check" # Intel - "/check" # Intel Windows + Fortran "-check all" # Intel + "/check:all" # Intel Windows "-fcheck=all" # GNU ) SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-fstack-check" # GNU + Fortran "-fstack-check" # GNU ) # Initializes matrices/arrays with NaN values SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-init=snan,arrays" # Intel + Fortran "-init=snan,arrays" # Intel + "/Qinit:snan,arrays" # Intel Windows ) # Does not generate an interface block for each routine in a source file SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" Fortran "-nogen-interfaces" # Intel + "/nogen-interfaces" # Intel Windows ) # Does not generate aposition independent executable @@ -143,74 +178,43 @@ SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" # Does not set denormal results from floating-point calculations to zero SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" Fortran "-no-ftz" # Intel + "/Qftz-" # Intel Windows ) # Enables floating-point invalid, divide-by-zero, and overflow exceptions SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-fpe-all=0" # Intel + Fortran "-fpe-all=0" # Intel + "/fpe-all:0" # Intel Windows "-ffpe-trap=zero,overflow,underflow" # GNU ) -# Improves floating-point precision and consistency -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-mp1" # Intel - ) - -# Strict model for floating-point calculations (precise and except) -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-fp-model=strict" # Intel - ) - # Enables floating-point invalid, divide-by-zero, and overflow exceptions SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-fpe0" # Intel + Fortran "-fpe0" # Intel + "/fpe:0" # Intel Windows ) # Enables debug info SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" Fortran "-debug all" # Intel + "/debug:all" # Intel Windows ) -# Aligns a variable to a specified boundary and offset +# Disables additional interprocedural optimizations for a single file compilation SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-align all -align array64byte" # Intel + Fortran "-no-ip" # Intel + "/Qip-" # Intel Windows ) -# Enables changing the variable and array memory layout -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-pad" # Intel - ) - -# Enables additional interprocedural optimizations for a single file cimpilation -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-ip" # Intel - ) - -# Improves precision when dividing floating-points -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-prec-div" # Intel - ) - -# Improves precision when taking the square root of floating-points -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-prec-sqrt" # Intel - ) - -# Treat parentheses in accordance with the Fortran standard (ifort 10 only) -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-assume protect-parens" # Intel - ) - -# Checks the bounds of arrays at run-time +# Disables prefetch insertion optimization SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-CB" # Intel + Fortran "-qno-opt-prefetch" # Intel + "/Qopt-prefetch-" # Intel Windows ) - -# Allows for lines longer than 80 characters without truncation + SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-no-wrap-margin" # Intel - "-ffree-line-length-none" # GNU (gfortran) - ) + Fortran "-fstack-check" # GNU + ) ##################### ### TESTING FLAGS ### @@ -218,8 +222,8 @@ SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" # Optimizations SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_TESTING "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran REQUIRED "-O3" # All compilers not on Windows - "/O3" # Intel Windows + Fortran REQUIRED "-O3" # All compilers not on Windows + "/O3" # Intel Windows ) ##################### @@ -229,9 +233,9 @@ SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_TESTING "${CMAKE_Fortran_FLAGS_DEBUG}" # Unroll loops SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" - Fortran "-unroll" # Intel - "/unroll" # Intel Windows - "-funroll-loops" # GNU + Fortran "-unroll" # Intel + "/unroll" # Intel Windows + "-funroll-loops" # GNU ) # Inline functions @@ -241,91 +245,121 @@ SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" "-finline-functions" # GNU ) - -# Allows for lines longer than 80 characters without truncation -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" - Fortran "-no-wrap-margin" # Intel - "-ffree-line-length-none" # GNU (gfortran) - ) - -# Disables prefetch insertion optimization -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" - Fortran "-qopt-prefetch=0" # Intel - ) - # Calls the Matrix Multiply library SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" - Fortran "-qopt-matmul" # Intel + Fortran "-qopt-matmul" # Intel + "/Qopt-matmul" # Intel Windows ) # Saves the compiler options and version number to the executable SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" - Fortran "-sox" # Intel - ) - -# Enforces vectorization of loops -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" - Fortran "-simd" # Intel + Fortran "-sox" # Intel ) # Aligns a variable to a specified boundary and offset SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" - Fortran "-align all" # Intel - ) - -# Generate an extended set of vector functions -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" - Fortran "-vecabi=cmdtarget" # Intel + Fortran "-align all" # Intel + "/align:all" # Intel Windows ) # No floating-point exceptions SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" - Fortran "-fp-model no-except" # Intel + Fortran "-fp-model no-except" # Intel + "/fp:no-except" # Intel Windows ) # Generate fused multiply-add instructions SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" - Fortran "-fma" # Intel - ) + Fortran "-fma" # Intel + "/Qfma" # Intel Windows + ) -# Generate fused multiply-add instructions +# Tells the compiler to link to certain libraries in the Intel oneAPI Math Kernel Library (oneMKL). SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" - Fortran "-qmkl=cluster" # Intel - Fortran "-qmkl" # Intel - Fortran "-mkl" # Old Intel - ) + Fortran "-qmkl=cluster" # Intel + "-qmkl" # Intel + "/Qmkl:cluster" # Intel Windows + "/Qmkl" # Intel Windows + ) + +# Enables additional interprocedural optimizations for a single file compilation +SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}" + Fortran "-ip" # Intel + "/Qip" # Intel Windows + ) + ##################### ### MATH FLAGS ### ##################### # Some subroutines require more strict floating point operation optimizations for repeatability SET_COMPILE_FLAG(STRICTMATH_FLAGS "${STRICTMATH_FLAGS}" - Fortran "-fp-model=precise -prec-div -prec-sqrt -assume protect-parens" # Intel - "/fp:precise /Qprec-div /Qprec-sqrt /assume:protect-parens" # Intel Windows - ) + Fortran "-fp-model=precise" # Intel + "/fp:precise" # Intel Windows + ) + +SET_COMPILE_FLAG(STRICTMATH_FLAGS "${STRICTMATH_FLAGS}" + Fortran "-prec-div" # Intel + "/Qprec-div" # Intel Windows + ) + +SET_COMPILE_FLAG(STRICTMATH_FLAGS "${STRICTMATH_FLAGS}" + Fortran "-prec-sqrt" # Intel + "/Qprec-sqrt" # Intel Windows + ) + +SET_COMPILE_FLAG(STRICTMATH_FLAGS "${STRICTMATH_FLAGS}" + Fortran "-assume protect-parens" # Intel + "/assume:protect-parens" # Intel Windows + ) + +# Improves floating-point precision and consistency +SET_COMPILE_FLAG(STRICTMATH_FLAGS "${STRICTMATH_FLAGS}" + Fortran "-mp1" # Intel + "/Qprec" # Intel Windows + ) # Most subroutines can use aggressive optimization of floating point operations without problems. SET_COMPILE_FLAG(FASTMATH_FLAGS "${FASTMATH_FLAGS}" - Fortran "-fp-model=fast" - "/fp:fast" - ) + Fortran "-fp-model=fast" # Intel + "/fp:fast" # Intel Windows + "-ffast-math" # GNU + ) + + +SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" + Fortran ${STRICTMATH_FLAGS} + ) ##################### ### PROFILE FLAGS ### ##################### # Enables the optimization reports to be generated SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_PROFILE "${CMAKE_Fortran_FLAGS_RELEASE}" - Fortran "-O2 -pg -qopt-report=5 -traceback -p -g3" # Intel - "/O2 /Qopt-report:5 /traceback -g3" # Windows Intel - "-O2 -pg -fbacktrace" - ) - -# Sanitize -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-fsanitize=address,undefined" # Gnu - ) - - -SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" - Fortran "-fstack-check" # GNU - ) + Fortran "-O2 -pg -qopt-report=5 -traceback -p -g3" # Intel + "/O2 /Qopt-report:5 /traceback /Z7" # Intel Windows + "-O2 -pg -fbacktrace" # GNU + ) + +IF (USE_SIMD) + # Enables OpenMP SIMD compilation when OpenMP parallelization is disabled. + IF (NOT USE_OPENMP) + SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}" + Fortran "-qno-openmp -qopenmp-simd" # Intel + Fortran "/Qopenmp- /Qopenmp-simd" # Intel Windows + ) + ENDIF (NOT USE_OPENMP) + + # Optimize for the host's architecture + SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}" + Fortran "-xhost" # Intel + "/QxHost" # Intel Windows + ${GNUNATIVE} # GNU + ) + + # Generate an extended set of vector functions + SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}" + Fortran "-vecabi=cmdtarget" # Intel + Fortran "/Qvecabi:cmdtarget" # Intel Windows + ) +ENDIF (USE_SIMD) \ No newline at end of file