diff --git a/kokkos-testing/fd_catalog/fd_kokkos.cpp b/kokkos-testing/fd_catalog/fd_kokkos.cpp index 823fb1fbdef6beb0a3d2f1410206920137a91f00..98ddd90a0074d903a6531f27246bbc1aea353a0b 100644 --- a/kokkos-testing/fd_catalog/fd_kokkos.cpp +++ b/kokkos-testing/fd_catalog/fd_kokkos.cpp @@ -9,6 +9,9 @@ #include <silo.h> #include <Kokkos_Core.hpp> +#include <pmmintrin.h> +#include <xmmintrin.h> + #define WAVE_8_HALO_SIZE 4 using namespace Kokkos; @@ -179,6 +182,11 @@ double solve_kokkos(wave_equation_context_kokkos<T>& wec) static const T w3 = 8.0/315.0; static const T w4 = -1.0/560.0; static const T w[9] = { w4, w3, w2, w1, w0, w1, w2, w3, w4 }; + +#ifdef DISALLOW_DENORMALS + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); +#endif i += WAVE_8_HALO_SIZE; j += WAVE_8_HALO_SIZE; @@ -226,7 +234,20 @@ double solve_kokkos(wave_equation_context_kokkos<T>& wec) int main(int argc, char *argv[]) { +#ifdef SINGLE_PRECISION + using T = float; + std::cout << "Precision: single" << std::endl; +#else using T = double; + std::cout << "Precision: single" << std::endl; +#endif + +#ifdef DISALLOW_DENORMALS + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); + std::cout << "Denormals: FTZ and DAZ" << std::endl; +#endif + _MM_SET_EXCEPTION_MASK(_MM_GET_EXCEPTION_MASK() & ~_MM_MASK_INVALID); Kokkos::initialize( argc, argv ); diff --git a/kokkos-testing/fd_catalog/fd_main.cpp b/kokkos-testing/fd_catalog/fd_main.cpp index 61abba8d82d6ced9d77570899c5c835f98507578..e0d50d513e0be19e1b81046613364b4a217b33b7 100644 --- a/kokkos-testing/fd_catalog/fd_main.cpp +++ b/kokkos-testing/fd_catalog/fd_main.cpp @@ -2,31 +2,36 @@ #include <fstream> #include <cstdio> #include <unistd.h> + +#include <pmmintrin.h> +#include <xmmintrin.h> + #include "fd_wave_cpu.hpp" #ifdef HAVE_CUDA #include "fd_wave_cuda.hpp" #endif -#include <pmmintrin.h> -#include <xmmintrin.h> - int main(int argc, char **argv) { -#ifdef DISALLOW_DENORMALS - _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); - _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); -#endif - _MM_SET_EXCEPTION_MASK(_MM_GET_EXCEPTION_MASK() & ~_MM_MASK_INVALID); - #ifdef SINGLE_PRECISION using T = float; + std::cout << "Precision: single" << std::endl; std::ofstream ofs("timings-float.txt"); #else using T = double; + std::cout << "Precision: double" << std::endl; std::ofstream ofs("timings-double.txt"); #endif +#ifdef DISALLOW_DENORMALS + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); + std::cout << "Denormals: FTZ and DAZ" << std::endl; +#endif + _MM_SET_EXCEPTION_MASK(_MM_GET_EXCEPTION_MASK() & ~_MM_MASK_INVALID); + + /* Make header */ ofs << "\"SIZE\" \"Seq\" \"SeqBlk\" "; diff --git a/kokkos-testing/fd_catalog/fd_openacc.cpp b/kokkos-testing/fd_catalog/fd_openacc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9a857a17916ac9cbf7231c0025d980095d2a52b8 --- /dev/null +++ b/kokkos-testing/fd_catalog/fd_openacc.cpp @@ -0,0 +1,32 @@ +#include <iostream> +#include <fstream> +#include <cstdio> +#include <unistd.h> + +#include "fd_wave_cpu.hpp" + +template<typename T> +double solve_openacc(wave_equation_context<T>& wec, size_t nths) +{ +} + +int main(void) +{ +#ifdef SINGLE_PRECISION + using T = float; + std::cout << "Precision: single" << std::endl; +#else + using T = double; + std::cout << "Precision: double" << std::endl; +#endif + + for (size_t sz = 128; sz <= 1024; sz *= 2) + { + wave_equation_context<T> wec(sz, sz, 1, 0.1, 0.0001, 5000); + + wec.init(); + time = solve_openacc(wec); + } + + return 0; +} \ No newline at end of file diff --git a/kokkos-testing/fd_catalog/fd_wave_cpu.hpp b/kokkos-testing/fd_catalog/fd_wave_cpu.hpp index 96e11c13ec853aab7098e2ebd571e6838982895d..152c2b19ccfc7cc03e3bfb33fbddfa8b2a5f7b88 100644 --- a/kokkos-testing/fd_catalog/fd_wave_cpu.hpp +++ b/kokkos-testing/fd_catalog/fd_wave_cpu.hpp @@ -359,7 +359,7 @@ solve_sequential_aux(wave_equation_context<T>& wec) std::cout << "[Wave][SeqBlk] Wall Time: " << time << "ms" << std::endl; double itertime = time/wec.maxiter; - double gflops_s = 60*(params.maxrow*params.maxcol)/(1e6*itertime); + double gflops_s = 58*(params.maxrow*params.maxcol)/(1e6*itertime); std::cout << "[Wave][SeqBlk] GFlops/s: " << gflops_s << std::endl; } else @@ -368,7 +368,7 @@ solve_sequential_aux(wave_equation_context<T>& wec) std::cout << "[Wave][Sequential] Wall Time: " << time << "ms" << std::endl; double itertime = time/wec.maxiter; - double gflops_s = 60*(params.maxrow*params.maxcol)/(1e6*itertime); + double gflops_s = 58*(params.maxrow*params.maxcol)/(1e6*itertime); std::cout << "[Wave][Sequential] GFlops/s: " << gflops_s << std::endl; size_t kernel_bytes = 3*sizeof(T)*(params.maxrow*params.maxcol); @@ -418,6 +418,10 @@ double solve_multithread(wave_equation_context<T>& wec, size_t nths) bool iteration_finished = false; auto thread_lambda = [&](size_t thread_id, size_t num_threads) { +#ifdef DISALLOW_DENORMALS + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); +#endif while (1) { /* Wait for the producer to notify that there's something to do */ diff --git a/kokkos-testing/test_daz_ftz.cpp b/kokkos-testing/test_daz_ftz.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b127429d7981ec3736eebae516dd057a8932bd96 --- /dev/null +++ b/kokkos-testing/test_daz_ftz.cpp @@ -0,0 +1,10 @@ +#include <iostream> +#include <pmmintrin.h> +#include <xmmintrin.h> + +int main(void) +{ + std::cout << "DAZ: " << _MM_GET_DENORMALS_ZERO_MODE() << std::endl; + std::cout << "FTZ: " << _MM_GET_FLUSH_ZERO_MODE() << std::endl; + return 0; +}