From abcc5a8cdc0c9921e907e9bbfb7363587e17e708 Mon Sep 17 00:00:00 2001
From: Matteo Cicuttin <datafl4sh@toxicnet.eu>
Date: Thu, 26 Mar 2020 10:51:59 +0100
Subject: [PATCH] Added code to disable denormals.

---
 kokkos-testing/fd_catalog/CMakeLists.txt |  7 ++++++-
 kokkos-testing/fd_catalog/fd_main.cpp    | 13 +++++++++++--
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/kokkos-testing/fd_catalog/CMakeLists.txt b/kokkos-testing/fd_catalog/CMakeLists.txt
index 68d02e1..e385fbd 100644
--- a/kokkos-testing/fd_catalog/CMakeLists.txt
+++ b/kokkos-testing/fd_catalog/CMakeLists.txt
@@ -32,6 +32,11 @@ if (ENABLE_ITERTIME_OUTPUT)
     add_definitions(-DSAVE_ITERTIME)
 endif()
 
+option(ENABLE_DAZ_FTZ "Enable Denormals Are Zero and Flush To Zero flags" ON)
+if (ENABLE_DAZ_FTZ)
+    add_definitions(-DDISALLOW_DENORMALS)
+endif()
+
 option(ENABLE_KOKKOS "Enable Kokkos" OFF)
 if (ENABLE_KOKKOS)
     FetchContent_Declare(kokkos
@@ -67,7 +72,7 @@ endif()
 
 option(ENABLE_VECTORIZER_REMARKS "Enable Clang vectorizer remarks" ON)
 if (ENABLE_VECTORIZER_REMARKS)
-    if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+    if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
         set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Rpass=loop-vectorize")
     endif()
 endif()
diff --git a/kokkos-testing/fd_catalog/fd_main.cpp b/kokkos-testing/fd_catalog/fd_main.cpp
index d16075a..61abba8 100644
--- a/kokkos-testing/fd_catalog/fd_main.cpp
+++ b/kokkos-testing/fd_catalog/fd_main.cpp
@@ -8,8 +8,17 @@
 #include "fd_wave_cuda.hpp"
 #endif
 
-int main(void)
+#include <pmmintrin.h>
+#include <xmmintrin.h>
+
+int main(int argc, char **argv)
 {
+#ifdef DISALLOW_DENORMALS
+    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
+    _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
+#endif
+    _MM_SET_EXCEPTION_MASK(_MM_GET_EXCEPTION_MASK() & ~_MM_MASK_INVALID);
+
 #ifdef SINGLE_PRECISION
     using T = float;
     std::ofstream ofs("timings-float.txt");
@@ -34,7 +43,7 @@ int main(void)
 
     for (size_t sz = 128; sz <= 1024; sz *= 2)
     {
-        wave_equation_context<T> wec(sz, sz, 1, 0.1, 0.001, 5000);
+        wave_equation_context<T> wec(sz, sz, 1, 0.1, 0.0001, 5000);
         ofs << sz << "    ";
 
         wec.init();
-- 
GitLab