diff --git a/CMakeLists.txt b/CMakeLists.txt index 2aa195cedb185f8550a9c958ba7d8fdc1759a6e3..9b6ebb68614e4615517291271e4f003ff53b3834 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -147,6 +147,8 @@ if(HAVE_DEPRECATED) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations") endif() +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread") + # remove "could not create compact unwind" linker warnings on macOS; but breaks # exception handling if(APPLE) @@ -433,6 +435,31 @@ if(ENABLE_MPI) endif() endif() +# Thrust +set(Thrust_NVIDIA FALSE) +set(Thrust_ROCm FALSE) +if(ENABLE_DDMGPU) + if(NOT ENABLE_OPENMP) + message(WARNING "DDMGPU requires OPENMP ! Disabling DDMGPU.") + set(ENABLE_DDMGPU OFF) + else() + find_package(Thrust) #Find it the NVIDIA way + if(THRUST_FOUND) + set(Thrust_NVIDIA TRUE) + thrust_create_target(Thrust HOST CPP DEVICE CPP) + set_config_option("DDMGPU" DDMGPU) # -> GPU DDM USED + else() # Try to find rocThrust + find_package(rocprim CONFIG PATHS "/opt/rocm/rocprim") + find_package(rocthrust CONFIG PATHS "/opt/rocm/rocthrust") + if(rocprim_FOUND AND rocthrust_FOUND) + set(Thrust_ROC TRUE) + set_config_option("DDMGPU" DDMGPU) # -> GPU DDM USED + set(Thrust_ROCm TRUE) + endif() + endif() + endif() +endif() + # OpenMP if(ENABLE_OPENMP) @@ -454,7 +481,7 @@ if(ENABLE_OPENMP) list(APPEND EXTRA_LIBS "-L/opt/homebrew/opt/libomp/lib -lomp") set_config_option("OpenMP[Homebrew]" HAVE_OPENMP) else() - if(OPENMP_FOUND) + if(OPENMP_FOUND AND NOT ENABLE_DDMGPU) set_config_option("OpenMP" HAVE_OPENMP) endif() endif() @@ -477,30 +504,6 @@ if(ENABLE_ROBINHOOD) endif() endif() -# Thrust -set(Thrust_NVIDIA FALSE) -set(Thrust_ROCm FALSE) -if(ENABLE_DDMGPU) - if(ENABLE_OPENMP) - message(WARNING "DDMGPU and OpenMP are not compatible. Disabling DDMGPU.") - set(ENABLE_DDMGPU OFF) - else() - find_package(Thrust) #Find it the NVIDIA way - if(THRUST_FOUND) - set(Thrust_NVIDIA TRUE) - thrust_create_target(Thrust HOST CPP DEVICE CPP) - set_config_option("DDMGPU" DDMGPU) # -> GPU DDM USED - else() # Try to find rocThrust - find_package(rocprim CONFIG PATHS "/opt/rocm/rocprim") - find_package(rocthrust CONFIG PATHS "/opt/rocm/rocthrust") - if(rocprim_FOUND AND rocthrust_FOUND) - set(Thrust_ROC TRUE) - set_config_option("DDMGPU" DDMGPU) # -> GPU DDM USED - set(Thrust_ROCm TRUE) - endif() - endif() - endif() -endif() # Coverage diff --git a/src/function/executionTree/FunctionAllocator.h b/src/function/executionTree/FunctionAllocator.h index f3db33ee14cb4c220d0b27683a26e705b916c634..b645ef52f19ee3a392e34f178b9917a69eaab259 100644 --- a/src/function/executionTree/FunctionAllocator.h +++ b/src/function/executionTree/FunctionAllocator.h @@ -106,9 +106,15 @@ namespace gmshfem::function } T *p = nullptr; if(n * sizeof(value_type) <= GMSHFEM_FUNCTION_MEMORY_ALIGNMENT) { +#ifdef DDMGPU +#pragma omp critical +#endif p = static_cast< T * >(MemoryPoolAllocator::instance()->allocateSmall(n * sizeof(value_type))); } else { +#ifdef DDMGPU +#pragma omp critical +#endif p = static_cast< T * >(MemoryPoolAllocator::instance()->allocate(n * sizeof(value_type))); } @@ -122,9 +128,15 @@ namespace gmshfem::function } if(n * sizeof(value_type) <= GMSHFEM_FUNCTION_MEMORY_ALIGNMENT) { +#ifdef DDMGPU +#pragma omp critical +#endif MemoryPoolAllocator::instance()->deallocateSmall(p, n * sizeof(value_type)); } else { +#ifdef DDMGPU +#pragma omp critical +#endif MemoryPoolAllocator::instance()->deallocate(p, n * sizeof(value_type)); } } @@ -142,6 +154,9 @@ namespace gmshfem::function static T *s_p = nullptr; #ifdef HAVE_OPENMP #pragma omp master +#endif +#ifdef DDMGPU +#pragma omp critical #endif s_p = static_cast< T * >(MemoryPoolAllocator::instance()->allocate(n * sizeof(value_type))); #ifdef HAVE_OPENMP @@ -157,6 +172,9 @@ namespace gmshfem::function #pragma omp barrier #pragma omp master +#endif +#ifdef DDMGPU +#pragma omp critical #endif MemoryPoolAllocator::instance()->deallocate(s_p, n * sizeof(value_type)); #ifdef HAVE_OPENMP diff --git a/src/problem/Formulation.cpp b/src/problem/Formulation.cpp index a9bc72f87063a073522534800bf0099bb10edfde..4f185ec63e338a8ac52a1f9348b97d542a5c302a 100644 --- a/src/problem/Formulation.cpp +++ b/src/problem/Formulation.cpp @@ -976,14 +976,14 @@ namespace gmshfem::problem common::Timer time; time.tick(); - msg::info << "Pre-processing " << _name << "..." << msg::endl; + //msg::info << "Pre-processing " << _name << "..." << msg::endl; if(_A == nullptr || !_solver) { throw common::Exception("This system is not initialized: did you forgot to call 'Formulation::initSystem()'?"); } if(!s_checkFieldsValidity(_unknownFields, _name)) { - msg::info << "Pre-processing aborted" << msg::endl; + //msg::info << "Pre-processing aborted" << msg::endl; time.tock(); return time; } @@ -1019,20 +1019,20 @@ namespace gmshfem::problem prepro.tock(); double bubbleRatio = _dofs.nbrUnknownDofs() == 0 ? 0 : 100. * double(_dofs.nbrBubbleDofs()) / _dofs.nbrUnknownDofs(); - msg::info << _dofs.nbrDofs() << " dofs created in " << prepro << "s:" << msg::endl; - msg::info << " - " << _dofs.nbrUnknownDofs() << " unknown dofs" << msg::endl; - msg::info << " * " << _dofs.nbrBubbleDofs() << " bubble unknown dofs " << msg::fill(40, '.') << " " << msg::precision(3) << bubbleRatio << "%" << msg::endl; + // msg::info << _dofs.nbrDofs() << " dofs created in " << prepro << "s:" << msg::endl; + // msg::info << " - " << _dofs.nbrUnknownDofs() << " unknown dofs" << msg::endl; + // msg::info << " * " << _dofs.nbrBubbleDofs() << " bubble unknown dofs " << msg::fill(40, '.') << " " << msg::precision(3) << bubbleRatio << "%" << msg::endl; if(_dofs.nbrUnknownGlobalDofs()) { - msg::info << " * " << _dofs.nbrUnknownGlobalDofs() << " unknown global dofs" << msg::endl; + //msg::info << " * " << _dofs.nbrUnknownGlobalDofs() << " unknown global dofs" << msg::endl; } - msg::info << " - " << _dofs.nbrFixedDofs() << " fixed dofs" << msg::endl; + //msg::info << " - " << _dofs.nbrFixedDofs() << " fixed dofs" << msg::endl; if(_dofs.nbrFixedGlobalDofs()) { - msg::info << " * " << _dofs.nbrFixedGlobalDofs() << " fixed global dofs" << msg::endl; + //msg::info << " * " << _dofs.nbrFixedGlobalDofs() << " fixed global dofs" << msg::endl; } if(_dofs.nbrLinkedDofs()) { double bubbleLinkedRatio = _dofs.nbrLinkedDofs() == 0 ? 0 : 100. * double(_dofs.nbrBubbleLinkedDofs()) / _dofs.nbrLinkedDofs(); - msg::info << " - " << _dofs.nbrLinkedDofs() << " linked dofs" << msg::endl; - msg::info << " * " << _dofs.nbrBubbleLinkedDofs() << " bubble linked dofs " << msg::fill(40, '.') << " " << msg::precision(3) << bubbleLinkedRatio << "%" << msg::endl; + // msg::info << " - " << _dofs.nbrLinkedDofs() << " linked dofs" << msg::endl; + // msg::info << " * " << _dofs.nbrBubbleLinkedDofs() << " bubble linked dofs " << msg::fill(40, '.') << " " << msg::precision(3) << bubbleLinkedRatio << "%" << msg::endl; } if(common::Options::instance()->memory) { @@ -1040,7 +1040,7 @@ namespace gmshfem::problem for(auto i = 0ULL; i < _unknownFields.size(); ++i) { memory += _unknownFields[i].second->memory(); } - msg::info << "Memory footprint of fields: " << memory << msg::endl; + //msg::info << "Memory footprint of fields: " << memory << msg::endl; } for(auto i = 0ULL; i < _unknownFields.size(); ++i) { @@ -1108,12 +1108,12 @@ namespace gmshfem::problem } time.tock(); - msg::info << "Done pre-processing in " << time << "s" << msg::endl; + //msg::info << "Done pre-processing in " << time << "s" << msg::endl; if(common::Options::instance()->memory) { auto memory = _A->memory(); for (auto& bi: _multiB) memory += bi.memory(); - msg::info << "Memory footprint of system: " << memory << msg::endl; + //msg::info << "Memory footprint of system: " << memory << msg::endl; } return time;