diff --git a/CMakeLists.txt b/CMakeLists.txt index 2aa195cedb185f8550a9c958ba7d8fdc1759a6e3..94a7ce7ee8784bd90d1e534f9477ec99714598a1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -433,6 +433,31 @@ if(ENABLE_MPI) endif() endif() +# Thrust +set(Thrust_NVIDIA FALSE) +set(Thrust_ROCm FALSE) +if(ENABLE_DDMGPU) + if(NOT ENABLE_OPENMP) + message(WARNING "DDMGPU requires OPENMP ! Disabling DDMGPU.") + set(ENABLE_DDMGPU OFF) + else() + find_package(Thrust) #Find it the NVIDIA way + if(THRUST_FOUND) + set(Thrust_NVIDIA TRUE) + thrust_create_target(Thrust HOST CPP DEVICE CPP) + set_config_option("DDMGPU" DDMGPU) # -> GPU DDM USED + else() # Try to find rocThrust + find_package(rocprim CONFIG PATHS "/opt/rocm/rocprim") + find_package(rocthrust CONFIG PATHS "/opt/rocm/rocthrust") + if(rocprim_FOUND AND rocthrust_FOUND) + set(Thrust_ROC TRUE) + set_config_option("DDMGPU" DDMGPU) # -> GPU DDM USED + set(Thrust_ROCm TRUE) + endif() + endif() + endif() +endif() + # OpenMP if(ENABLE_OPENMP) @@ -454,7 +479,7 @@ if(ENABLE_OPENMP) list(APPEND EXTRA_LIBS "-L/opt/homebrew/opt/libomp/lib -lomp") set_config_option("OpenMP[Homebrew]" HAVE_OPENMP) else() - if(OPENMP_FOUND) + if(OPENMP_FOUND AND NOT ENABLE_DDMGPU) set_config_option("OpenMP" HAVE_OPENMP) endif() endif() @@ -477,30 +502,6 @@ if(ENABLE_ROBINHOOD) endif() endif() -# Thrust -set(Thrust_NVIDIA FALSE) -set(Thrust_ROCm FALSE) -if(ENABLE_DDMGPU) - if(ENABLE_OPENMP) - message(WARNING "DDMGPU and OpenMP are not compatible. Disabling DDMGPU.") - set(ENABLE_DDMGPU OFF) - else() - find_package(Thrust) #Find it the NVIDIA way - if(THRUST_FOUND) - set(Thrust_NVIDIA TRUE) - thrust_create_target(Thrust HOST CPP DEVICE CPP) - set_config_option("DDMGPU" DDMGPU) # -> GPU DDM USED - else() # Try to find rocThrust - find_package(rocprim CONFIG PATHS "/opt/rocm/rocprim") - find_package(rocthrust CONFIG PATHS "/opt/rocm/rocthrust") - if(rocprim_FOUND AND rocthrust_FOUND) - set(Thrust_ROC TRUE) - set_config_option("DDMGPU" DDMGPU) # -> GPU DDM USED - set(Thrust_ROCm TRUE) - endif() - endif() - endif() -endif() # Coverage diff --git a/src/function/executionTree/FunctionAllocator.h b/src/function/executionTree/FunctionAllocator.h index f3db33ee14cb4c220d0b27683a26e705b916c634..b645ef52f19ee3a392e34f178b9917a69eaab259 100644 --- a/src/function/executionTree/FunctionAllocator.h +++ b/src/function/executionTree/FunctionAllocator.h @@ -106,9 +106,15 @@ namespace gmshfem::function } T *p = nullptr; if(n * sizeof(value_type) <= GMSHFEM_FUNCTION_MEMORY_ALIGNMENT) { +#ifdef DDMGPU +#pragma omp critical +#endif p = static_cast< T * >(MemoryPoolAllocator::instance()->allocateSmall(n * sizeof(value_type))); } else { +#ifdef DDMGPU +#pragma omp critical +#endif p = static_cast< T * >(MemoryPoolAllocator::instance()->allocate(n * sizeof(value_type))); } @@ -122,9 +128,15 @@ namespace gmshfem::function } if(n * sizeof(value_type) <= GMSHFEM_FUNCTION_MEMORY_ALIGNMENT) { +#ifdef DDMGPU +#pragma omp critical +#endif MemoryPoolAllocator::instance()->deallocateSmall(p, n * sizeof(value_type)); } else { +#ifdef DDMGPU +#pragma omp critical +#endif MemoryPoolAllocator::instance()->deallocate(p, n * sizeof(value_type)); } } @@ -142,6 +154,9 @@ namespace gmshfem::function static T *s_p = nullptr; #ifdef HAVE_OPENMP #pragma omp master +#endif +#ifdef DDMGPU +#pragma omp critical #endif s_p = static_cast< T * >(MemoryPoolAllocator::instance()->allocate(n * sizeof(value_type))); #ifdef HAVE_OPENMP @@ -157,6 +172,9 @@ namespace gmshfem::function #pragma omp barrier #pragma omp master +#endif +#ifdef DDMGPU +#pragma omp critical #endif MemoryPoolAllocator::instance()->deallocate(s_p, n * sizeof(value_type)); #ifdef HAVE_OPENMP diff --git a/src/problem/Formulation.cpp b/src/problem/Formulation.cpp index a9bc72f87063a073522534800bf0099bb10edfde..a339319906f2c55c272117d4b9c4220eb5ed6fa9 100644 --- a/src/problem/Formulation.cpp +++ b/src/problem/Formulation.cpp @@ -1108,12 +1108,12 @@ namespace gmshfem::problem } time.tock(); - msg::info << "Done pre-processing in " << time << "s" << msg::endl; + //msg::info << "Done pre-processing in " << time << "s" << msg::endl; if(common::Options::instance()->memory) { auto memory = _A->memory(); for (auto& bi: _multiB) memory += bi.memory(); - msg::info << "Memory footprint of system: " << memory << msg::endl; + //msg::info << "Memory footprint of system: " << memory << msg::endl; } return time;