diff --git a/kokkos-testing/fd_catalog/CMakeLists.txt b/kokkos-testing/fd_catalog/CMakeLists.txt index 68d02e116b830bc4280740915f0fe89fe7fbc53d..e385fbd1a87e47a472a25410121a97dcf669845d 100644 --- a/kokkos-testing/fd_catalog/CMakeLists.txt +++ b/kokkos-testing/fd_catalog/CMakeLists.txt @@ -32,6 +32,11 @@ if (ENABLE_ITERTIME_OUTPUT) add_definitions(-DSAVE_ITERTIME) endif() +option(ENABLE_DAZ_FTZ "Enable Denormals Are Zero and Flush To Zero flags" ON) +if (ENABLE_DAZ_FTZ) + add_definitions(-DDISALLOW_DENORMALS) +endif() + option(ENABLE_KOKKOS "Enable Kokkos" OFF) if (ENABLE_KOKKOS) FetchContent_Declare(kokkos @@ -67,7 +72,7 @@ endif() option(ENABLE_VECTORIZER_REMARKS "Enable Clang vectorizer remarks" ON) if (ENABLE_VECTORIZER_REMARKS) - if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Rpass=loop-vectorize") endif() endif() diff --git a/kokkos-testing/fd_catalog/fd_main.cpp b/kokkos-testing/fd_catalog/fd_main.cpp index d16075aa0e106bc729da2833c621b4f6088891d4..61abba8d82d6ced9d77570899c5c835f98507578 100644 --- a/kokkos-testing/fd_catalog/fd_main.cpp +++ b/kokkos-testing/fd_catalog/fd_main.cpp @@ -8,8 +8,17 @@ #include "fd_wave_cuda.hpp" #endif -int main(void) +#include <pmmintrin.h> +#include <xmmintrin.h> + +int main(int argc, char **argv) { +#ifdef DISALLOW_DENORMALS + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); +#endif + _MM_SET_EXCEPTION_MASK(_MM_GET_EXCEPTION_MASK() & ~_MM_MASK_INVALID); + #ifdef SINGLE_PRECISION using T = float; std::ofstream ofs("timings-float.txt"); @@ -34,7 +43,7 @@ int main(void) for (size_t sz = 128; sz <= 1024; sz *= 2) { - wave_equation_context<T> wec(sz, sz, 1, 0.1, 0.001, 5000); + wave_equation_context<T> wec(sz, sz, 1, 0.1, 0.0001, 5000); ofs << sz << " "; wec.init();