From 458174da5198fb2e6239616e594b955ac6b8e2b0 Mon Sep 17 00:00:00 2001 From: Roland Greffe <r.greffe@uliege.be> Date: Wed, 14 May 2025 12:24:07 +0200 Subject: [PATCH] Faster grouped saving --- src/post/SaveFunction.cpp | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/post/SaveFunction.cpp b/src/post/SaveFunction.cpp index ddacf019..2743e81f 100644 --- a/src/post/SaveFunction.cpp +++ b/src/post/SaveFunction.cpp @@ -33,6 +33,9 @@ namespace gmshfem::post // real part #ifdef HAVE_OPENMP #pragma omp for +#endif +#ifdef DDMGPU +#pragma omp parallel for #endif for(auto i = 0ULL; i < gmshElementsTags.size(); ++i) { for(auto j = 0U; j < nbrNodesByElements; ++j) { @@ -47,6 +50,9 @@ namespace gmshfem::post // imaginary part #ifdef HAVE_OPENMP #pragma omp for +#endif +#ifdef DDMGPU +#pragma omp parallel for #endif for(auto i = 0ULL; i < gmshElementsTags.size(); ++i) { gmshData[i].clear(); @@ -62,6 +68,9 @@ namespace gmshfem::post else { #ifdef HAVE_OPENMP #pragma omp for +#endif +#ifdef DDMGPU +#pragma omp parallel for #endif for(auto i = 0ULL; i < gmshElementsTags.size(); ++i) { for(auto j = 0U; j < nbrNodesByElements; ++j) { @@ -84,6 +93,9 @@ namespace gmshfem::post // real part #ifdef HAVE_OPENMP #pragma omp for +#endif +#ifdef DDMGPU +#pragma omp parallel for #endif for(auto i = 0ULL; i < gmshElementsTags.size(); ++i) { gmshData[i].reserve(3); @@ -101,6 +113,9 @@ namespace gmshfem::post // imaginary part #ifdef HAVE_OPENMP #pragma omp for +#endif +#ifdef DDMGPU +#pragma omp parallel for #endif for(auto i = 0ULL; i < gmshElementsTags.size(); ++i) { gmshData[i].clear(); @@ -119,6 +134,9 @@ namespace gmshfem::post else { #ifdef HAVE_OPENMP #pragma omp for +#endif +#ifdef DDMGPU +#pragma omp parallel for #endif for(auto i = 0ULL; i < gmshElementsTags.size(); ++i) { gmshData[i].reserve(3); @@ -144,6 +162,9 @@ namespace gmshfem::post // real part #ifdef HAVE_OPENMP #pragma omp for +#endif +#ifdef DDMGPU +#pragma omp parallel for #endif for(auto i = 0ULL; i < gmshElementsTags.size(); ++i) { gmshData[i].reserve(9); @@ -163,6 +184,9 @@ namespace gmshfem::post // imaginary part #ifdef HAVE_OPENMP #pragma omp for +#endif +#ifdef DDMGPU +#pragma omp parallel for #endif for(auto i = 0ULL; i < gmshElementsTags.size(); ++i) { gmshData[i].clear(); @@ -183,6 +207,9 @@ namespace gmshfem::post else { #ifdef HAVE_OPENMP #pragma omp for +#endif +#ifdef DDMGPU +#pragma omp parallel for #endif for(auto i = 0ULL; i < gmshElementsTags.size(); ++i) { gmshData[i].reserve(9); @@ -242,6 +269,7 @@ namespace gmshfem::post #ifdef HAVE_OPENMP #pragma omp parallel num_threads(omp::getMaxThreads()) #endif + { const unsigned int numThreads = omp::getNumThreads(); const unsigned int myThreadID = omp::getThreadNum(); -- GitLab