diff --git a/kokkos-testing/experimental_data/makefigs.sh b/kokkos-testing/experimental_data/makefigs.sh new file mode 100644 index 0000000000000000000000000000000000000000..e236c4703e0ecd74c45de6060a77604874ba4429 --- /dev/null +++ b/kokkos-testing/experimental_data/makefigs.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +for file in *.plot; do + gnuplot $file +done + +for file in *.eps; do + ps2pdf $file +done + +for file in *.pdf; do + pdfcrop $file +done + +for file in *-crop.pdf; do + mv $file $(echo $file | sed 's/-crop//') +done diff --git a/kokkos-testing/fd_catalog/CMakeLists.txt b/kokkos-testing/fd_catalog/CMakeLists.txt index 2f765c79618dd1674de131666fb6a987899f612a..742d534bc875cb4f0bfbebe74189bf289cb8901b 100644 --- a/kokkos-testing/fd_catalog/CMakeLists.txt +++ b/kokkos-testing/fd_catalog/CMakeLists.txt @@ -1,31 +1,61 @@ -cmake_minimum_required(VERSION 3.10 FATAL_ERROR) +cmake_minimum_required(VERSION 3.11 FATAL_ERROR) project(fd_catalog) include(CheckLanguage) include(FetchContent) +set (CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake) + set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) +set(LINK_LIBS ${LINK_LIBS} Threads::Threads) + +find_package(SILO) +if (SILO_FOUND) + add_definitions(-DHAVE_SILO) + include_directories("${SILO_INCLUDE_DIR}") + set(LINK_LIBS ${LINK_LIBS} ${SILO_LIBRARY}) +endif() + +option(ENABLE_TIMESTEP_OUTPUT "Save timesteps (don't use during perf meas)" OFF) +if (ENABLE_TIMESTEP_OUTPUT) + if (NOT SILO_FOUND) + message(FATAL_ERROR "You need SILO to output iteration data") + endif() + add_definitions(-DSAVE_TIMESTEPS) +endif() + +option(ENABLE_ITERTIME_OUTPUT "Save iteration times (don't use during perf meas)" OFF) +if (ENABLE_ITERTIME_OUTPUT) + add_definitions(-DSAVE_ITERTIME) +endif() -option(ENABLE_KOKKOS "Enable Kokkos" ON) +option(ENABLE_KOKKOS "Enable Kokkos" OFF) if (ENABLE_KOKKOS) FetchContent_Declare(kokkos GIT_REPOSITORY https://github.com/kokkos/kokkos.git ) FetchContent_MakeAvailable(kokkos) - add_executable(finite_difference_kokkos finite_difference_kokkos.cpp) - target_link_libraries(finite_difference_kokkos Kokkos::kokkos siloh5) - #add_definitions(-DHAVE_KOKKOS) - #set(HAVE_KOKKOS TRUE) + if (ENABLE_SINGLE) + add_executable(fd_kokkos_single fd_kokkos.cpp) + target_compile_definitions(fd_kokkos_single PUBLIC -DSINGLE_PRECISION) + target_link_libraries(fd_kokkos_single ${LINK_LIBS} Kokkos::kokkos) + endif() + + if (ENABLE_DOUBLE) + add_executable(fd_kokkos_double fd_kokkos.cpp) + target_link_libraries(fd_kokkos_double ${LINK_LIBS} Kokkos::kokkos) + endif() + + set(HAVE_KOKKOS TRUE) endif() option(ENABLE_CUDA "Enable CUDA if present" ON) if (ENABLE_CUDA) check_language(CUDA) - if (CMAKE_CUDA_COMPILER) enable_language(CUDA) set(CMAKE_CUDA_STANDARD 14) @@ -35,41 +65,45 @@ if (ENABLE_CUDA) endif() endif() +option(ENABLE_VECTORIZER_REMARKS "Enable Clang vectorizer remarks" ON) +if (ENABLE_VECTORIZER_REMARKS) + if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Rpass=loop-vectorize") + endif() +endif() + set(CMAKE_CXX_FLAGS_DEBUG "-g") set(CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -g -DNDEBUG") set(CMAKE_CXX_FLAGS_RELEASEASSERT "-O3 -march=native -g -fpermissive") -option(A_SAVE_TS "Save timesteps" OFF) -if (A_SAVE_TS) - add_definitions(-DHAVE_SILO) - add_definitions(-DSAVE_TIMESTEPS) -endif() +macro(setup_fd_catalog_target FD_TGT_NAME SINGLE_PRECISION) + set(FD_SOURCES "fd_main.cpp") -option(ENABLE_SINGLE "Enable single precision build" ON) -if (ENABLE_SINGLE) if (HAVE_CUDA) - add_executable(fd_catalog_single fd_main.cpp fd_wave_cuda.cu) - if(APPLE) - set_property(TARGET fd_catalog_single PROPERTY BUILD_RPATH /usr/local/cuda/lib) - endif() - else() - add_executable(fd_catalog_single fd_main.cpp) + set(FD_SOURCES ${FD_SOURCES} fd_wave_cuda.cu) + endif() + + add_executable(${FD_TGT_NAME} ${FD_SOURCES}) + + if (HAVE_CUDA AND APPLE) + set_property(TARGET ${FD_TGT_NAME} PROPERTY BUILD_RPATH /usr/local/cuda/lib) + endif() + + if (SINGLE_PRECISION) + target_compile_definitions(${SP_TARGET_NAME} PUBLIC -DSINGLE_PRECISION) endif() - target_compile_definitions(fd_catalog_single PUBLIC -DSINGLE_PRECISION) - target_link_libraries(fd_catalog_single siloh5) - target_link_libraries(fd_catalog_single Threads::Threads) + + target_link_libraries(${FD_TGT_NAME} ${LINK_LIBS}) +endmacro() + +option(ENABLE_SINGLE "Enable single precision build" ON) +if (ENABLE_SINGLE) + setup_fd_catalog_target("fd_catalog_single" TRUE) endif() option(ENABLE_DOUBLE "Enable double precision build" ON) if (ENABLE_DOUBLE) - if (HAVE_CUDA) - add_executable(fd_catalog_double fd_main.cpp fd_wave_cuda.cu) - if(APPLE) - set_property(TARGET fd_catalog_double PROPERTY BUILD_RPATH /usr/local/cuda/lib) - endif() - else() - add_executable(fd_catalog_double fd_main.cpp) - endif() - target_link_libraries(fd_catalog_double siloh5) - target_link_libraries(fd_catalog_double Threads::Threads) + setup_fd_catalog_target("fd_catalog_double" FALSE) endif() + + diff --git a/kokkos-testing/fd_catalog/cmake/FindSILO.cmake b/kokkos-testing/fd_catalog/cmake/FindSILO.cmake new file mode 100644 index 0000000000000000000000000000000000000000..c1144bf762fcd39b2eac0e808576065b3dc98261 --- /dev/null +++ b/kokkos-testing/fd_catalog/cmake/FindSILO.cmake @@ -0,0 +1,12 @@ +include(FindPackageHandleStandardArgs) + + +find_path(SILO_INCLUDE_DIR + NAMES silo.h + PATHS /usr/include /usr/local/include) + +find_library(SILO_LIBRARY + NAMES silo siloh5 + PATHS /usr/lib /usr/local/lib) + +FIND_PACKAGE_HANDLE_STANDARD_ARGS(SILO DEFAULT_MSG SILO_LIBRARY SILO_INCLUDE_DIR) diff --git a/kokkos-testing/fd_catalog/finite_difference_kokkos.cpp b/kokkos-testing/fd_catalog/fd_kokkos.cpp similarity index 100% rename from kokkos-testing/fd_catalog/finite_difference_kokkos.cpp rename to kokkos-testing/fd_catalog/fd_kokkos.cpp diff --git a/kokkos-testing/fd_catalog/fd_main.cpp b/kokkos-testing/fd_catalog/fd_main.cpp index 458d937dc852cb1ba2418a821ad57116df1a07f5..4f104c3c0e97d64a2560789a5d137643646eb6aa 100644 --- a/kokkos-testing/fd_catalog/fd_main.cpp +++ b/kokkos-testing/fd_catalog/fd_main.cpp @@ -12,12 +12,12 @@ int main(void) { #ifdef SINGLE_PRECISION using T = float; + std::ofstream ofs("timings-float.txt"); #else using T = double; + std::ofstream ofs("timings-double.txt"); #endif - std::ofstream ofs("timings.txt"); - /* Make header */ ofs << "\"SIZE\" \"Seq\" \"SeqBlk\" "; @@ -32,9 +32,9 @@ int main(void) double time; - for (size_t sz = 128; sz <= 1024; sz *= 2) + for (size_t sz = 256; sz <= 256; sz *= 2) { - wave_equation_context<T> wec(sz, sz, 1, 0.1, 0.0001, 5000); + wave_equation_context<T> wec(sz, sz, 1, 0.1, 0.001, 5000); ofs << sz << " "; wec.init(); @@ -51,8 +51,6 @@ int main(void) ofs << time << " "; #endif - auto maxthreads = std::thread::hardware_concurrency(); - for (size_t threads = 1; threads < maxthreads; threads *= 2) { wec.init(); diff --git a/kokkos-testing/fd_catalog/fd_wave_cpu.hpp b/kokkos-testing/fd_catalog/fd_wave_cpu.hpp index 63e60e14184b974bcffc51fb134b1fda76928b16..96e11c13ec853aab7098e2ebd571e6838982895d 100644 --- a/kokkos-testing/fd_catalog/fd_wave_cpu.hpp +++ b/kokkos-testing/fd_catalog/fd_wave_cpu.hpp @@ -309,14 +309,14 @@ solve_sequential_aux(wave_equation_context<T>& wec) double time = 0.0; - std::ofstream ofs; - - /* +#ifdef SAVE_ITERTIME + std::ofstream ofs; if (blocked) ofs.open("itertime-blocked.txt"); else ofs.open("itertime-naive.txt"); - */ +#endif /* SAVE_ITERTIME */ + for (size_t i = 0; i < wec.maxiter; i++) { auto start = std::chrono::high_resolution_clock::now(); @@ -339,13 +339,18 @@ solve_sequential_aux(wave_equation_context<T>& wec) if ( (i%100) == 0 ) { std::stringstream ss; - ss << "wave_seq_" << i << ".silo"; + if (blocked) + ss << "wave_seqblk_" << i << ".silo"; + else + ss << "wave_seq_" << i << ".silo"; visit_dump(wec.g_curr, ss.str()); } #endif /* SAVE_TIMESTEPS */ #endif /* HAVE_SILO */ - //ofs << i << " " << ms.count() << std::endl; +#ifdef SAVE_ITERTIME + ofs << i << " " << ms.count() << std::endl; +#endif /* SAVE_ITERTIME */ } if (blocked) @@ -370,8 +375,12 @@ solve_sequential_aux(wave_equation_context<T>& wec) double gbytes_s = kernel_bytes/(1e6*itertime); std::cout << "[Wave][Sequential] Bandwidth: " << gbytes_s << "GB/s" << std::endl; } + #ifdef HAVE_SILO - visit_dump(wec.g_curr, "wave_sequential_lastiter.silo"); + if (blocked) + visit_dump(wec.g_curr, "wave_seqblk_lastiter.silo"); + else + visit_dump(wec.g_curr, "wave_seq_lastiter.silo"); #endif /* HAVE_SILO */ return time/wec.maxiter; @@ -489,7 +498,7 @@ double solve_multithread(wave_equation_context<T>& wec, size_t nths) std::cout << time << "ms" << std::endl; #ifdef HAVE_SILO - visit_dump(wec.g_curr, "wave_tiled_lastiter.silo"); + visit_dump(wec.g_curr, "wave_mt_lastiter.silo"); #endif /* HAVE_SILO */ return time/wec.maxiter; diff --git a/kokkos-testing/fd_catalog/fd_wave_cuda.cu b/kokkos-testing/fd_catalog/fd_wave_cuda.cu index 065444a5b15788f81c3e2878cb0b78b14e9a440a..cb03028e44975510a46c5a8e8f2e3d563e989e5c 100644 --- a/kokkos-testing/fd_catalog/fd_wave_cuda.cu +++ b/kokkos-testing/fd_catalog/fd_wave_cuda.cu @@ -232,7 +232,8 @@ double solve_cuda_aux(wave_equation_context<T>& wec) std::cout << "[Wave][Cuda] Wall Time: " << milliseconds << "ms" << std::endl; double itertime = milliseconds/wec.maxiter; - double gflops_s = 70*(params.maxrow*params.maxcol)/(1e6*itertime); + + double gflops_s = 70.0*(params.maxrow*params.maxcol)/(1e6*itertime); std::cout << "[Wave][Cuda] GFlops/s: " << gflops_s << std::endl; size_t kb1 = (2*WAVE_8_HALO_SIZE+WAVE_8_KER_ROWS)*(2*WAVE_8_HALO_SIZE+WAVE_8_KER_COLS);