From 922c3f00aac8a56d5d97c732f5062bad2d642b52 Mon Sep 17 00:00:00 2001
From: David Minton <daminton@purdue.edu>
Date: Fri, 19 May 2023 12:04:53 -0400
Subject: [PATCH] Reviewed compiler flags and improved the consistency, and
 added ability to turn off OpenMP SIMD directives when building for
 CPU-agnostic container

---
 CMakeLists.txt                         |   1 +
 cmake/Modules/FindOpenMP_Fortran.cmake |  31 ++-
 cmake/Modules/SetFortranFlags.cmake    | 272 ++++++++++++++-----------
 3 files changed, 167 insertions(+), 137 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0aed20380..a047c0163 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -30,6 +30,7 @@ ENDIF(NOT CMAKE_Fortran_COMPILER_SUPPORTS_F90)
 # Set some options the user may choose
 OPTION(USE_COARRAY "Use Coarray Fortran for parallelization of test particles" OFF)
 OPTION(USE_OPENMP "Use OpenMP for parallelization" ON)
+OPTION(USE_SIMD "Use SIMD vectorization" ON)
 
 # Locate and set parallelization libraries.  There are some CMake peculiarities
 # taken care of here, such as the fact that the FindOpenMP routine doesn't know
diff --git a/cmake/Modules/FindOpenMP_Fortran.cmake b/cmake/Modules/FindOpenMP_Fortran.cmake
index 32777569e..06d679e7c 100644
--- a/cmake/Modules/FindOpenMP_Fortran.cmake
+++ b/cmake/Modules/FindOpenMP_Fortran.cmake
@@ -25,24 +25,19 @@
 
 INCLUDE (${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake)
 
-SET (OpenMP_Fortran_FLAG_CANDIDATES
-     #Intel
-     "-qopenmp" 
-     #Intel windows
-     "/Qopenmp" 
-     #Gnu
-     "-fopenmp"
-     #Portland Group
-     "-mp"
-     #Empty, if compiler automatically accepts openmp
-     " "
-     #Sun
-     "-xopenmp"
-     #HP
-     "+Oopenmp"
-     #IBM XL C/c++
-     "-qsmp"
-)
+IF (USE_SIMD)
+    SET (OpenMP_Fortran_FLAG_CANDIDATES
+        "-qopenmp" # Intel
+        "/Qopenmp" # Intel Windows
+        "-fopenmp" # GNU
+    )
+ELSE ()
+    SET (OpenMP_Fortran_FLAG_CANDIDATES
+        "-qopenmp -qno-openmp-simd"  # Intel
+        "/Qopenmp-simd-"             # Intel Windows
+        "-fopenmp"                   # GNU
+    )
+ENDIF (USE_SIMD)
 
 IF (DEFINED OpenMP_Fortran_FLAGS)
     SET (OpenMP_Fortran_FLAG_CANDIDATES)
diff --git a/cmake/Modules/SetFortranFlags.cmake b/cmake/Modules/SetFortranFlags.cmake
index 76f23f5cf..ecf9c34e1 100644
--- a/cmake/Modules/SetFortranFlags.cmake
+++ b/cmake/Modules/SetFortranFlags.cmake
@@ -64,9 +64,51 @@ ENDIF(CMAKE_Fortran_FLAGS_RELEASE AND CMAKE_Fortran_FLAGS_TESTING AND CMAKE_Fort
 ### GENERAL FLAGS ###
 #####################
 
+# Free form
+SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}"
+                Fortran "-ffree-form" # GNU
+                ) 
+
 # Don't add underscores in symbols for C-compatability
 SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}"
-                 Fortran "-fno-underscoring")
+                Fortran "-fno-underscoring" # GNU
+                ) 
+
+# Determines whether the current Fortran Standard behavior of the compiler is fully implemented. 
+SET_COMPILE_FLAG(CMAKE_Fortran_Flags "${CMAKE_Fortran_FLAGS}"
+                Fortran "-standard-semantics" # Intel
+                        "/standard-semantics" # Intel Windows
+                )
+
+# Tells the compiler to issue compile-time messages for nonstandard language elements (Fortran 2018).                
+SET_COMPILE_FLAG(CMAKE_Fortran_Flags "${CMAKE_Fortran_FLAGS}"
+                Fortran "-stand f18"  # Intel
+                        "/stand:f18"  # Intel Windows
+                        "-fstd=f2018" # GNU
+                )  
+
+# Allows for lines longer than 80 characters without truncation
+SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}"
+                Fortran "-ffree-line-length-none" # GNU (gfortran)
+                )
+
+# Disables right margin wrapping in list-directed output
+SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}"
+                Fortran  "-no-wrap-margin" # Intel
+                         "/wrap-margin-"   # Intel Windows        
+                )
+
+# Aligns a variable to a specified boundary and offset
+SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}"
+                Fortran "-align all -align array64byte" # Intel
+                        "/align:all /align:array64byte" # Intel Windows
+                )
+
+# Enables changing the variable and array memory layout
+SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}"
+                Fortran "-pad"  # Intel
+                        "/Qpad" # Intel Windows
+                )
 
 # There is some bug where -march=native doesn't work on Mac
 IF(APPLE)
@@ -74,18 +116,10 @@ IF(APPLE)
 ELSE()
     SET(GNUNATIVE "-march=native")
 ENDIF()
-# Optimize for the host's architecture
-SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}"
-                 Fortran "-xhost"        # Intel
-                         "/QxHost"       # Intel Windows
-                         ${GNUNATIVE}    # GNU
-                )
-
 
 ###################
 ### DEBUG FLAGS ###
 ###################
-# NOTE: debugging symbols (-g or /debug:full) are already on by default
 
 # Disable optimizations
 SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}"
@@ -106,33 +140,34 @@ SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}"
                  Fortran "-traceback"   # Intel Group
                          "/traceback"   # Intel Windows
                          "-fbacktrace"  # GNU (gfortran)
-                         "-ftrace=full" # GNU (g95)
                 )
 # Sanitize
 SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}"
-                 Fortran "-fsanitize=address"  # Gnu 
+                Fortran "-fsanitize=address, undefined"  # Gnu 
                 )
                 
 
 # Check everything
 SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}"
-                 Fortran "-check"  # Intel
-                         "/check"  # Intel Windows
+                Fortran "-check all"       # Intel
+                         "/check:all"      # Intel Windows
                          "-fcheck=all" # GNU 
                 )
 SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}"
-                 Fortran "-fstack-check" # GNU 
+                Fortran "-fstack-check" # GNU 
                 )
                 
 
 # Initializes matrices/arrays with NaN values
 SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}"
-                 Fortran "-init=snan,arrays" # Intel
+                Fortran "-init=snan,arrays"  # Intel
+                        "/Qinit:snan,arrays" # Intel Windows
                 )
 
 # Does not generate an interface block for each routine in a source file
 SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}"
                  Fortran "-nogen-interfaces" # Intel
+                         "/nogen-interfaces" # Intel Windows
                 )
 
 # Does not generate aposition independent executable
@@ -143,74 +178,43 @@ SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}"
 # Does not set denormal results from floating-point calculations to zero
 SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}"
                  Fortran "-no-ftz" # Intel
+                         "/Qftz-"  # Intel Windows
                 )
 
 # Enables floating-point invalid, divide-by-zero, and overflow exceptions
 SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}"
-                 Fortran "-fpe-all=0" # Intel
+                 Fortran "-fpe-all=0"                         # Intel
+                         "/fpe-all:0"                         # Intel Windows
                          "-ffpe-trap=zero,overflow,underflow" # GNU
                 )
 
-# Improves floating-point precision and consistency
-SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}"
-                 Fortran "-mp1" # Intel
-                )
-
-# Strict model for floating-point calculations (precise and except)
-SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}"
-                 Fortran "-fp-model=strict" # Intel
-                )
-
 # Enables floating-point invalid, divide-by-zero, and overflow exceptions
 SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}"
-                 Fortran "-fpe0" # Intel
+                 Fortran "-fpe0"  # Intel
+                         "/fpe:0" # Intel Windows
                 )
 
 # Enables debug info
 SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}"
                  Fortran "-debug all" # Intel
+                         "/debug:all" # Intel Windows
                 )
 
-# Aligns a variable to a specified boundary and offset
+# Disables additional interprocedural optimizations for a single file compilation
 SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}"
-                 Fortran "-align all -align array64byte" # Intel
+                 Fortran "-no-ip" # Intel
+                         "/Qip-"  # Intel Windows
                 )
 
-# Enables changing the variable and array memory layout
-SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}"
-                 Fortran "-pad" # Intel
-                )
-
-# Enables additional interprocedural optimizations for a single file cimpilation
-SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}"
-                 Fortran "-ip" # Intel
-                )
-
-# Improves precision when dividing floating-points
-SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}"
-                 Fortran "-prec-div" # Intel
-                )
-
-# Improves precision when taking the square root of floating-points
-SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}"
-                 Fortran "-prec-sqrt" # Intel
-                )
-
-# Treat parentheses in accordance with the Fortran standard (ifort 10 only)
-SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}"
-                 Fortran "-assume protect-parens" # Intel
-                )
-
-# Checks the bounds of arrays at run-time
+# Disables prefetch insertion optimization
 SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}"
-                 Fortran "-CB" # Intel
+                Fortran "-qno-opt-prefetch" # Intel
+                        "/Qopt-prefetch-"   # Intel Windows
                 )
-
-# Allows for lines longer than 80 characters without truncation
+                
 SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}"
-                 Fortran "-no-wrap-margin"         # Intel
-                         "-ffree-line-length-none" # GNU (gfortran)
-                )
+                 Fortran "-fstack-check" # GNU 
+                )   
 
 #####################
 ### TESTING FLAGS ###
@@ -218,8 +222,8 @@ SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}"
 
 # Optimizations
 SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_TESTING "${CMAKE_Fortran_FLAGS_DEBUG}"
-                 Fortran REQUIRED "-O3" # All compilers not on Windows
-                                  "/O3" # Intel Windows
+                Fortran REQUIRED "-O3" # All compilers not on Windows
+                                 "/O3" # Intel Windows
                 )
 
 #####################
@@ -229,9 +233,9 @@ SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_TESTING "${CMAKE_Fortran_FLAGS_DEBUG}"
 
 # Unroll loops
 SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}"
-                 Fortran "-unroll"        # Intel
-                         "/unroll"        # Intel Windows
-                         "-funroll-loops" # GNU
+                Fortran "-unroll"        # Intel
+                        "/unroll"        # Intel Windows
+                        "-funroll-loops" # GNU
                 )
 
 # Inline functions
@@ -241,91 +245,121 @@ SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}"
                          "-finline-functions" # GNU
                 )
 
-
-# Allows for lines longer than 80 characters without truncation
-SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}"
-                 Fortran "-no-wrap-margin"         # Intel
-                         "-ffree-line-length-none" # GNU (gfortran)
-                )
-
-# Disables prefetch insertion optimization
-SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}"
-                 Fortran "-qopt-prefetch=0" # Intel
-                )
-
 # Calls the Matrix Multiply library
 SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}"
-                 Fortran "-qopt-matmul" # Intel
+                Fortran "-qopt-matmul" # Intel
+                        "/Qopt-matmul" # Intel Windows
                 )
 
 # Saves the compiler options and version number to the executable 
 SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}"
-                 Fortran "-sox" # Intel
-                )
-
-# Enforces vectorization of loops
-SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}"
-                 Fortran "-simd" # Intel
+                Fortran "-sox" # Intel
                 )
 
 # Aligns a variable to a specified boundary and offset
 SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}"
-                 Fortran "-align all" # Intel
-                )
-
-# Generate an extended set of vector functions
-SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}"
-                 Fortran "-vecabi=cmdtarget" # Intel
+                Fortran "-align all" # Intel
+                        "/align:all" # Intel Windows
                 )
 
 # No floating-point exceptions
 SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}"
-                 Fortran "-fp-model no-except" # Intel
+                Fortran "-fp-model no-except" # Intel
+                        "/fp:no-except"       # Intel Windows
                 )
 
 # Generate fused multiply-add instructions
  SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}"
-                  Fortran "-fma" # Intel
-                 )
+                Fortran "-fma"  # Intel
+                        "/Qfma" # Intel Windows
+                )
 
-# Generate fused multiply-add instructions
+# Tells the compiler to link to certain libraries in the Intel oneAPI Math Kernel Library (oneMKL). 
  SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}"
-                  Fortran "-qmkl=cluster" # Intel
-                  Fortran "-qmkl" # Intel
-                  Fortran "-mkl" # Old Intel
-                 ) 
+                Fortran "-qmkl=cluster" # Intel
+                        "-qmkl"         # Intel
+                        "/Qmkl:cluster" # Intel Windows
+                        "/Qmkl"         # Intel Windows
+                ) 
+
+# Enables additional interprocedural optimizations for a single file compilation
+SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE}"
+                Fortran "-ip"  # Intel
+                        "/Qip" # Intel Windows
+                )
+
  
 #####################
 ### MATH FLAGS ###
 #####################
 # Some subroutines require more strict floating point operation optimizations for repeatability
 SET_COMPILE_FLAG(STRICTMATH_FLAGS "${STRICTMATH_FLAGS}"
-                  Fortran "-fp-model=precise -prec-div -prec-sqrt -assume protect-parens" # Intel
-                          "/fp:precise /Qprec-div /Qprec-sqrt /assume:protect-parens" # Intel Windows 
-                  )
+                Fortran "-fp-model=precise" # Intel
+                         "/fp:precise" # Intel Windows 
+                )
+
+SET_COMPILE_FLAG(STRICTMATH_FLAGS "${STRICTMATH_FLAGS}"
+                Fortran "-prec-div"  # Intel
+                        "/Qprec-div" # Intel Windows 
+                ) 
+
+SET_COMPILE_FLAG(STRICTMATH_FLAGS "${STRICTMATH_FLAGS}"
+                Fortran "-prec-sqrt"   # Intel
+                         "/Qprec-sqrt" # Intel Windows 
+                )
+
+SET_COMPILE_FLAG(STRICTMATH_FLAGS "${STRICTMATH_FLAGS}"
+                Fortran "-assume protect-parens" # Intel
+                        "/assume:protect-parens" # Intel Windows 
+                ) 
+
+# Improves floating-point precision and consistency
+SET_COMPILE_FLAG(STRICTMATH_FLAGS "${STRICTMATH_FLAGS}"
+                Fortran "-mp1"   # Intel
+                        "/Qprec" # Intel Windows
+                ) 
 
 # Most subroutines can use aggressive optimization of floating point operations without problems.           
 SET_COMPILE_FLAG(FASTMATH_FLAGS "${FASTMATH_FLAGS}"
-                  Fortran "-fp-model=fast"
-                          "/fp:fast"
-                  )
+                Fortran "-fp-model=fast" # Intel
+                        "/fp:fast"       # Intel Windows
+                        "-ffast-math"    # GNU
+                )
+
+
+SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}" 
+                Fortran ${STRICTMATH_FLAGS}
+                )
 
 #####################
 ### PROFILE FLAGS ###
 #####################
 # Enables the optimization reports to be generated
 SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_PROFILE "${CMAKE_Fortran_FLAGS_RELEASE}"
-                 Fortran "-O2 -pg -qopt-report=5 -traceback -p -g3" # Intel
-                         "/O2 /Qopt-report:5 /traceback -g3" # Windows Intel
-                         "-O2 -pg -fbacktrace"
-                )
-
-# Sanitize
-SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}"
-                 Fortran "-fsanitize=address,undefined"  # Gnu 
-                )
-                
-
-SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG}"
-                 Fortran "-fstack-check" # GNU 
-                )     
+                Fortran "-O2 -pg -qopt-report=5 -traceback -p -g3" # Intel
+                        "/O2 /Qopt-report:5 /traceback /Z7"        # Intel Windows
+                        "-O2 -pg -fbacktrace"                      # GNU
+                )
+
+IF (USE_SIMD)
+        # Enables OpenMP SIMD compilation when OpenMP parallelization is disabled. 
+        IF (NOT USE_OPENMP)
+                SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}"
+                        Fortran "-qno-openmp -qopenmp-simd" # Intel
+                        Fortran "/Qopenmp- /Qopenmp-simd" # Intel Windows
+                        )     
+        ENDIF (NOT USE_OPENMP)
+
+        # Optimize for the host's architecture
+        SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}"
+                        Fortran "-xhost"        # Intel
+                                "/QxHost"       # Intel Windows
+                                ${GNUNATIVE}    # GNU
+                        )
+
+        # Generate an extended set of vector functions
+        SET_COMPILE_FLAG(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}"
+                        Fortran "-vecabi=cmdtarget" # Intel
+                        Fortran "/Qvecabi:cmdtarget" # Intel Windows
+                        )
+ENDIF (USE_SIMD)
\ No newline at end of file