From a263fbe60341acb8ddc782dcc93d2b74eb621cf8 Mon Sep 17 00:00:00 2001
From: Christophe Geuzaine <cgeuzaine@uliege.be>
Date: Sun, 20 Feb 2022 08:56:29 +0100
Subject: [PATCH] remove all calls to omp_set_num_threads() and replace them
 with explicit num_threads() clauses; this makes it easier for Gmsh to "play
 nice" with other OpenMP codes that use the Gmsh API

---
 CHANGELOG.txt                           |  3 +-
 contrib/MeshOptimizer/MeshOptimizer.cpp |  8 ++-
 doc/texinfo/opt_general.texi            |  6 +-
 doc/texinfo/opt_mesh.texi               |  6 +-
 src/common/Context.h                    |  2 +
 src/common/DefaultOptions.h             |  8 +--
 src/common/GmshGlobal.cpp               |  5 +-
 src/common/GmshMessage.cpp              | 10 +--
 src/common/GmshMessage.h                |  3 -
 src/common/Options.cpp                  |  7 +-
 src/common/gmsh.cpp                     |  5 +-
 src/fltk/optionWindow.cpp               |  2 +-
 src/geo/GModelIO_MSH4.cpp               |  4 +-
 src/geo/GModelVertexArrays.cpp          |  4 +-
 src/mesh/Generator.cpp                  | 93 ++++++++++---------------
 src/mesh/meshGRegionHxt.cpp             | 28 ++++++--
 src/mesh/meshQuadQuasiStructured.cpp    | 46 ++++++++----
 17 files changed, 126 insertions(+), 114 deletions(-)

diff --git a/CHANGELOG.txt b/CHANGELOG.txt
index 262b9b49ff..66e121b624 100644
--- a/CHANGELOG.txt
+++ b/CHANGELOG.txt
@@ -1,5 +1,6 @@
 (Work-in-progress): dynamic Gmsh library now also only exports public symbols on
-macOS and Linux, like it does on Windows; small bug fixes.
+macOS and Linux, like it does on Windows; better handling of max. thread
+settings; small bug fixes.
 
 * New API function: mesh/getDuplicateNodes
 
diff --git a/contrib/MeshOptimizer/MeshOptimizer.cpp b/contrib/MeshOptimizer/MeshOptimizer.cpp
index 7f0fb1c28f..5843da3563 100644
--- a/contrib/MeshOptimizer/MeshOptimizer.cpp
+++ b/contrib/MeshOptimizer/MeshOptimizer.cpp
@@ -456,7 +456,9 @@ namespace {
     }
     if(par.nCurses) displayResultTable(nbPatchSuccess, toOptimize.size());
 
-//#pragma omp parallel for schedule(dynamic)
+  int nthreads = CTX::instance()->numThreads;
+  if(!nthreads) nthreads = Msg::GetMaxThreads();
+#pragma omp parallel for schedule(dynamic) num_threads(nthreads)
     for(int iPatch = 0; iPatch < toOptimize.size(); ++iPatch) {
       // Initialize optimization and output if asked
       if(par.nCurses) {
@@ -493,7 +495,7 @@ namespace {
       // Evaluate mesh and update it if (partial) success
       opt.updateResults();
 
-//#pragma omp critical
+#pragma omp critical
       {
         if(newObjFunctionRange.size() == 0) {
           newObjFunctionRange = opt.objFunction()->minMax();
@@ -513,7 +515,7 @@ namespace {
 
       if(success >= 0) opt.patch.updateGEntityPositions();
 
-//#pragma omp critical
+#pragma omp critical
       {
         par.success = std::min(par.success, success);
         nbPatchSuccess[success + 1]++;
diff --git a/doc/texinfo/opt_general.texi b/doc/texinfo/opt_general.texi
index be1adcff71..a5402fd824 100644
--- a/doc/texinfo/opt_general.texi
+++ b/doc/texinfo/opt_general.texi
@@ -38,7 +38,7 @@ Saved in: @code{General.OptionsFileName}
 
 @item General.BuildInfo
 Gmsh build information (read-only)@*
-Default value: @code{"Version: 4.9.5-git-f511f1b2c; License: GNU General Public License; Build OS: MacOSX-sdk; Build date: 20220204; Build host: MacBook-Pro-Christophe.local; Build options: 64Bit ALGLIB[contrib] ANN[contrib] Bamg Blossom Cairo Cgns DIntegration Dlopen DomHex Eigen[contrib] Fltk GMP Gmm[contrib] Hxt Jpeg Kbipack MathEx[contrib] Med Mesh Metis[contrib] Mmg Mpeg Netgen ONELAB ONELABMetamodel OpenCASCADE OpenCASCADE-CAF OpenGL OpenMP OptHom Parasolid ParasolidSTEP Parser Plugins Png Post QuadMeshingTools QuadTri Solver TetGen/BR TouchBar Voro++[contrib] WinslowUntangler Zlib; FLTK version: 1.4.0; OCC version: 7.7.0; MED version: 4.1.1; Packaged by: geuzaine; Web site: https://gmsh.info; Issue tracker: https://gitlab.onelab.info/gmsh/gmsh/issues"}@*
+Default value: @code{"Version: 4.9.5-git-a653c2790; License: GNU General Public License; Build OS: MacOSX-sdk; Build date: 20220217; Build host: MacBook-Pro-Christophe.local; Build options: 64Bit ALGLIB[contrib] ANN[contrib] Bamg Blossom Cairo Cgns DIntegration Dlopen DomHex Eigen[contrib] Fltk GMP Gmm[contrib] Hxt Jpeg Kbipack MathEx[contrib] Med Mesh Metis[contrib] Mmg Mpeg Netgen ONELAB ONELABMetamodel OpenCASCADE OpenCASCADE-CAF OpenGL OpenMP OptHom Parasolid ParasolidSTEP Parser Plugins Png Post QuadMeshingTools QuadTri Solver TetGen/BR TouchBar Voro++[contrib] WinslowUntangler Zlib; FLTK version: 1.4.0; OCC version: 7.7.0; MED version: 4.1.1; Packaged by: geuzaine; Web site: https://gmsh.info; Issue tracker: https://gitlab.onelab.info/gmsh/gmsh/issues"}@*
 Saved in: @code{-}
 
 @item General.BuildOptions
@@ -168,7 +168,7 @@ Saved in: @code{General.SessionFileName}
 
 @item General.Version
 Gmsh version (read-only)@*
-Default value: @code{"4.9.5-git-f511f1b2c"}@*
+Default value: @code{"4.9.5-git-a653c2790"}@*
 Saved in: @code{-}
 
 @item General.WatchFilePattern
@@ -942,7 +942,7 @@ Default value: @code{0}@*
 Saved in: @code{General.OptionsFileName}
 
 @item General.NumThreads
-Set the maximum number of threads when Gmsh is compiled with OpenMP support (0: use system default, i.e. OMP_NUM_THREADS)@*
+Maximum number of threads used by Gmsh when compiled with OpenMP support (0: use system default, i.e. OMP_NUM_THREADS)@*
 Default value: @code{1}@*
 Saved in: @code{General.OptionsFileName}
 
diff --git a/doc/texinfo/opt_mesh.texi b/doc/texinfo/opt_mesh.texi
index ed21efb414..70f79b1f2d 100644
--- a/doc/texinfo/opt_mesh.texi
+++ b/doc/texinfo/opt_mesh.texi
@@ -297,17 +297,17 @@ Default value: @code{0}@*
 Saved in: @code{General.OptionsFileName}
 
 @item Mesh.MaxNumThreads1D
-Maximum number of threads for 1D meshing (0: use default)@*
+Maximum number of threads for 1D meshing (0: use General.NumThreads)@*
 Default value: @code{0}@*
 Saved in: @code{General.OptionsFileName}
 
 @item Mesh.MaxNumThreads2D
-Maximum number of threads for 2D meshing (0: use default)@*
+Maximum number of threads for 2D meshing (0: use General.NumThreads)@*
 Default value: @code{0}@*
 Saved in: @code{General.OptionsFileName}
 
 @item Mesh.MaxNumThreads3D
-Maximum number of threads for 3D meshing (0: use default)@*
+Maximum number of threads for 3D meshing (0: use General.NumThreads)@*
 Default value: @code{0}@*
 Saved in: @code{General.OptionsFileName}
 
diff --git a/src/common/Context.h b/src/common/Context.h
index 1d84d1c6e1..4cf002a860 100644
--- a/src/common/Context.h
+++ b/src/common/Context.h
@@ -163,6 +163,8 @@ public:
   int guiColorScheme, guiRefreshRate;
   // print messages on to the terminal?
   int terminal;
+  // number of threads (0 == use system default)
+  int numThreads;
   // detached processes (WIN32)?
   int detachedProcess;
   // number of graphical windows/tiles
diff --git a/src/common/DefaultOptions.h b/src/common/DefaultOptions.h
index e985835684..f31244c24a 100644
--- a/src/common/DefaultOptions.h
+++ b/src/common/DefaultOptions.h
@@ -713,7 +713,7 @@ StringXNumber GeneralOptions_Number[] = {
     "Disable interactive dialog windows in scripts (and use default values "
     "instead)" },
   { F|O, "NumThreads" , opt_general_num_threads , 1. ,
-    "Set the maximum number of threads when Gmsh is compiled with OpenMP support "
+    "Maximum number of threads used by Gmsh when compiled with OpenMP support "
     "(0: use system default, i.e. OMP_NUM_THREADS)"},
 
   { F|S, "OptionsPositionX" , opt_general_option_position0 , 650. ,
@@ -1232,11 +1232,11 @@ StringXNumber MeshOptions_Number[] = {
     "Maximum number of point insertion iterations in 3D Delaunay mesher "
     "(0: unlimited)" },
   { F|O, "MaxNumThreads1D" , opt_mesh_max_num_threads_1d , 0. ,
-    "Maximum number of threads for 1D meshing (0: use default)" },
+    "Maximum number of threads for 1D meshing (0: use General.NumThreads)" },
   { F|O, "MaxNumThreads2D" , opt_mesh_max_num_threads_2d , 0. ,
-    "Maximum number of threads for 2D meshing (0: use default)" },
+    "Maximum number of threads for 2D meshing (0: use General.NumThreads)" },
   { F|O, "MaxNumThreads3D" , opt_mesh_max_num_threads_3d , 0. ,
-    "Maximum number of threads for 3D meshing (0: use default)" },
+    "Maximum number of threads for 3D meshing (0: use General.NumThreads)" },
   { F|O, "MaxRetries" , opt_mesh_max_retries , 10 ,
     "Maximum number of times meshing is retried on curves and surfaces with a "
     "pending mesh"},
diff --git a/src/common/GmshGlobal.cpp b/src/common/GmshGlobal.cpp
index babc2083c7..dc1e54fd36 100644
--- a/src/common/GmshGlobal.cpp
+++ b/src/common/GmshGlobal.cpp
@@ -257,10 +257,11 @@ int GmshFinalize()
 
 static void StartupMessage()
 {
+  int nt = CTX::instance()->numThreads;
+  if(!nt) nt = Msg::GetMaxThreads();
   Msg::Info("Running '%s' [Gmsh %s, %d node%s, max. %d thread%s]",
             Msg::GetCommandLineFull().c_str(), GMSH_VERSION, Msg::GetCommSize(),
-            Msg::GetCommSize() > 1 ? "s" : "", Msg::GetMaxThreads(),
-            Msg::GetMaxThreads() > 1 ? "s" : "");
+            Msg::GetCommSize() > 1 ? "s" : "", nt, nt > 1 ? "s" : "");
   Msg::Info("Started on %s", Msg::GetLaunchDate().c_str());
 }
 
diff --git a/src/common/GmshMessage.cpp b/src/common/GmshMessage.cpp
index e1657a5b4a..3f335123c0 100644
--- a/src/common/GmshMessage.cpp
+++ b/src/common/GmshMessage.cpp
@@ -65,7 +65,6 @@ std::map<std::string, double> Msg::_timers;
 bool Msg::_infoCpu = false;
 bool Msg::_infoMem = false;
 double Msg::_startTime = 0.;
-int Msg::_startMaxThreads = 0;
 int Msg::_warningCount = 0;
 int Msg::_errorCount = 0;
 int Msg::_atLeastOneErrorInRun = 0;
@@ -126,7 +125,6 @@ static void addGmshPathToEnvironmentVar(const std::string &name)
 void Msg::Initialize(int argc, char **argv)
 {
   _startTime = TimeOfDay();
-  _startMaxThreads = GetMaxThreads();
 #if defined(HAVE_MPI)
   int flag;
   MPI_Initialized(&flag);
@@ -216,8 +214,6 @@ void Msg::Finalize()
     MPI_Finalize();
 #endif
   FinalizeOnelab();
-  if(GetMaxThreads() != _startMaxThreads)
-    SetNumThreads(_startMaxThreads);
 }
 
 int Msg::GetCommRank()
@@ -1629,11 +1625,7 @@ int Msg::GetThreadNum(){ return omp_get_thread_num(); }
 #else
 
 int Msg::GetNumThreads(){ return 1; }
-void Msg::SetNumThreads(int num)
-{
-  if(num > 1)
-    Msg::Warning("Setting number of threads to %d requires OpenMP", num);
-}
+void Msg::SetNumThreads(int num){ }
 int Msg::GetMaxThreads(){ return 1; }
 int Msg::GetThreadNum(){ return 0; }
 
diff --git a/src/common/GmshMessage.h b/src/common/GmshMessage.h
index ddc587cf7f..4f20a9a3fc 100644
--- a/src/common/GmshMessage.h
+++ b/src/common/GmshMessage.h
@@ -48,8 +48,6 @@ private:
   static bool _infoMem;
   // starting time (gettimeofday at startup)
   static double _startTime;
-  // max number of threads at startup
-  static int _startMaxThreads;
   // counters
   static int _warningCount, _errorCount, _atLeastOneErrorInRun;
   static std::string _firstWarning, _firstError, _lastError;
@@ -177,7 +175,6 @@ public:
                               const std::string &exe = "");
   static void SetOnelabChanged(int value, const std::string &client = "Gmsh");
   static void ImportPhysicalGroupsInOnelab();
-  static int GetStartMaxThreads(){ return _startMaxThreads; }
 };
 
 // a class to print the progression and estimated remaining time
diff --git a/src/common/Options.cpp b/src/common/Options.cpp
index 0bd57de3b8..c0a8f6814c 100644
--- a/src/common/Options.cpp
+++ b/src/common/Options.cpp
@@ -4139,14 +4139,13 @@ double opt_general_light53(OPT_ARGS_NUM)
 double opt_general_num_threads(OPT_ARGS_NUM)
 {
   if(action & GMSH_SET) {
-    if(val > 0) Msg::SetNumThreads(val);
-    else Msg::SetNumThreads(Msg::GetStartMaxThreads());
+    CTX::instance()->numThreads = (int)val;
   }
 #if defined(HAVE_FLTK)
   if(FlGui::available() && (action & GMSH_GUI))
-    FlGui::instance()->options->general.value[32]->value(Msg::GetMaxThreads());
+    FlGui::instance()->options->general.value[32]->value(CTX::instance()->numThreads);
 #endif
-  return Msg::GetMaxThreads();
+  return CTX::instance()->numThreads;
 }
 
 double opt_geometry_transform(OPT_ARGS_NUM)
diff --git a/src/common/gmsh.cpp b/src/common/gmsh.cpp
index dfbeb1f5f2..566abb3805 100644
--- a/src/common/gmsh.cpp
+++ b/src/common/gmsh.cpp
@@ -5209,9 +5209,12 @@ GMSH_API void gmsh::model::mesh::getPeriodicKeys(
   entityKeysMaster = entityKeys;
   coordMaster = coord;
 
+  int nthreads = CTX::instance()->numThreads;
+  if(!nthreads) nthreads = Msg::GetMaxThreads();
+
   if(functionSpaceType == "IsoParametric" ||
      functionSpaceType == "Lagrange") {
-#pragma omp parallel for
+#pragma omp parallel for num_threads(nthreads)
     for(std::size_t i = 0; i < entityKeys.size(); i++) {
       MVertex v(0., 0., 0., nullptr, entityKeys[i]);
       auto mv = ge->correspondingVertices.find(&v);
diff --git a/src/fltk/optionWindow.cpp b/src/fltk/optionWindow.cpp
index 012f75c13b..b72f1a8b99 100644
--- a/src/fltk/optionWindow.cpp
+++ b/src/fltk/optionWindow.cpp
@@ -1546,7 +1546,7 @@ optionWindow::optionWindow(int deltaFontSize)
       general.butt[10]->callback(general_options_ok_cb);
 
       general.value[32] = new Fl_Value_Input(L + 2 * WB, 2 * WB + 9 * BH, IW,
-                                             BH, "Number of threads");
+                                             BH, "Maximum number of threads");
       general.value[32]->tooltip("General.NumThreads");
       general.value[32]->minimum(0);
       general.value[32]->maximum(16);
diff --git a/src/geo/GModelIO_MSH4.cpp b/src/geo/GModelIO_MSH4.cpp
index 5154467c1f..83b8e2a639 100644
--- a/src/geo/GModelIO_MSH4.cpp
+++ b/src/geo/GModelIO_MSH4.cpp
@@ -2845,7 +2845,9 @@ int GModel::_writePartitionedMSH4(const std::string &baseName, double version,
                                   bool binary, bool saveAll,
                                   bool saveParametric, double scalingFactor)
 {
-#pragma omp parallel for
+  int nthreads = CTX::instance()->numThreads;
+  if(!nthreads) nthreads = Msg::GetMaxThreads();
+#pragma omp parallel for num_threads(nthreads)
   for(std::size_t part = 1; part <= getNumPartitions(); part++) {
     std::ostringstream sstream;
     sstream << baseName << "_" << part << ".msh";
diff --git a/src/geo/GModelVertexArrays.cpp b/src/geo/GModelVertexArrays.cpp
index eedffd3d98..c0d707352b 100644
--- a/src/geo/GModelVertexArrays.cpp
+++ b/src/geo/GModelVertexArrays.cpp
@@ -189,7 +189,9 @@ template <class T>
 static void addElementsInArrays(GEntity *e, std::vector<T *> &elements,
                                 bool edges, bool faces)
 {
-#pragma omp parallel for schedule(dynamic)
+  int nthreads = CTX::instance()->numThreads;
+  if(!nthreads) nthreads = Msg::GetMaxThreads();
+#pragma omp parallel for schedule(dynamic) num_threads(nthreads)
   for(std::size_t i = 0; i < elements.size(); i++) {
     MElement *ele = elements[i];
 
diff --git a/src/mesh/Generator.cpp b/src/mesh/Generator.cpp
index 46ac8eaf3a..85c1dc4a18 100644
--- a/src/mesh/Generator.cpp
+++ b/src/mesh/Generator.cpp
@@ -38,7 +38,6 @@
 #include "meshGFaceBipartiteLabelling.h"
 #include "sizeField.h"
 
-
 #if defined(HAVE_DOMHEX)
 #include "simple3D.h"
 #include "yamakawa.h"
@@ -347,19 +346,20 @@ static void Mesh1D(GModel *m)
   Msg::StatusBar(true, "Meshing 1D...");
   double t1 = Cpu(), w1 = TimeOfDay();
 
-  int prevNumThreads = Msg::GetMaxThreads();
-  if(CTX::instance()->mesh.maxNumThreads1D > 0 &&
-     CTX::instance()->mesh.maxNumThreads1D <= Msg::GetMaxThreads())
-    Msg::SetNumThreads(CTX::instance()->mesh.maxNumThreads1D);
+  int nthreads = CTX::instance()->numThreads;
+  if(CTX::instance()->mesh.maxNumThreads1D > 0)
+    nthreads = CTX::instance()->mesh.maxNumThreads1D;
+  if(!nthreads) nthreads = Msg::GetMaxThreads();
 
   // boundary layers are not yet thread-safe
-  if(m->getFields()->getNumBoundaryLayerFields()) Msg::SetNumThreads(1);
+  if(m->getFields()->getNumBoundaryLayerFields())
+    nthreads = 1;
 
   for(auto it = m->firstEdge(); it != m->lastEdge(); ++it) {
     // Extruded meshes are not yet fully thread-safe (not sure why!)
     if((*it)->meshAttributes.extrude &&
        (*it)->meshAttributes.extrude->mesh.ExtrudeMesh)
-      Msg::SetNumThreads(1);
+      nthreads = 1;
   }
 
   std::vector<GEdge *> temp;
@@ -378,9 +378,8 @@ static void Mesh1D(GModel *m)
     }
 
     int nPending = 0;
-    const size_t sss = temp.size();
-#pragma omp parallel for schedule(dynamic)
-    for(size_t K = 0; K < sss; K++) {
+#pragma omp parallel for schedule(dynamic) num_threads(nthreads)
+    for(size_t K = 0; K < temp.size(); K++) {
       int localPending = 0;
       GEdge *ed = temp[K];
       if(ed->meshStatistics.status == GEdge::PENDING) {
@@ -400,8 +399,6 @@ static void Mesh1D(GModel *m)
 
   Msg::StopProgressMeter();
 
-  Msg::SetNumThreads(prevNumThreads);
-
   double t2 = Cpu(), w2 = TimeOfDay();
   CTX::instance()->meshTimer[0] = w2 - w1;
   Msg::StatusBar(true, "Done meshing 1D (Wall %gs, CPU %gs)",
@@ -483,26 +480,28 @@ static void Mesh2D(GModel *m)
   Msg::StatusBar(true, "Meshing 2D...");
   double t1 = Cpu(), w1 = TimeOfDay();
 
-  int prevNumThreads = Msg::GetMaxThreads();
-  if(CTX::instance()->mesh.maxNumThreads2D > 0 &&
-     CTX::instance()->mesh.maxNumThreads2D <= Msg::GetMaxThreads())
-    Msg::SetNumThreads(CTX::instance()->mesh.maxNumThreads2D);
+  int nthreads = CTX::instance()->numThreads;
+  if(CTX::instance()->mesh.maxNumThreads2D > 0)
+    nthreads = CTX::instance()->mesh.maxNumThreads2D;
+  if(!nthreads) nthreads = Msg::GetMaxThreads();
 
   // boundary layers are not yet thread-safe
-  if(m->getFields()->getNumBoundaryLayerFields()) Msg::SetNumThreads(1);
+  if(m->getFields()->getNumBoundaryLayerFields()) nthreads = 1;
 
   for(auto it = m->firstFace(); it != m->lastFace(); ++it) {
     // Frontal-Delaunay for quads and co are not yet thread-safe
     if((*it)->getMeshingAlgo() == ALGO_2D_FRONTAL_QUAD ||
        (*it)->getMeshingAlgo() == ALGO_2D_PACK_PRLGRMS ||
        (*it)->getMeshingAlgo() == ALGO_2D_PACK_PRLGRMS_CSTR)
-      Msg::SetNumThreads(1);
+      nthreads = 1;
+
     // Periodic meshing is not yet thread-safe
-    if((*it)->getMeshMaster() != *it) Msg::SetNumThreads(1);
+    if((*it)->getMeshMaster() != *it) nthreads = 1;
+
     // Extruded meshes are not yet fully thread-safe (not sure why!)
     if((*it)->meshAttributes.extrude &&
        (*it)->meshAttributes.extrude->mesh.ExtrudeMesh)
-      Msg::SetNumThreads(1);
+      nthreads = 1;
   }
 
   for(auto it = m->firstFace(); it != m->lastFace(); ++it)
@@ -528,7 +527,7 @@ static void Mesh2D(GModel *m)
       int nPending = 0;
       std::vector<GFace *> temp;
       temp.insert(temp.begin(), f.begin(), f.end());
-#pragma omp parallel for schedule(dynamic)
+#pragma omp parallel for schedule(dynamic) num_threads(nthreads)
       for(size_t K = 0; K < temp.size(); K++) {
         int localPending = 0;
         if(temp[K]->meshStatistics.status == GFace::PENDING) {
@@ -545,24 +544,21 @@ static void Mesh2D(GModel *m)
       if(!nPending) break;
       // iter == 2 is for meshing re-parametrized surfaces; after that, we
       // serialize (self-intersections of 1D meshes are not thread safe)!
-      if(nIter > 2) Msg::SetNumThreads(1);
+      if(nIter > 2) nthreads = 1;
       if(nIter++ > CTX::instance()->mesh.maxRetries) break;
     }
 
     Msg::StopProgressMeter();
   }
 
-  Msg::SetNumThreads(prevNumThreads);
-
   if(CTX::instance()->mesh.algo2d == ALGO_2D_QUAD_QUASI_STRUCT) {
     replaceBadQuadDominantMeshes(m);
 
-    /* In the quasi-structured pipeline, the quad-dominant mesh
-     * is subdivided into a full quad mesh */
-    /* TODO: - a faster CAD projection approach (from uv)
-     *       - verify quality during projection */
-    // bool linear = false;
-    // RefineMesh(m, linear, true, false);
+    // In the quasi-structured pipeline, the quad-dominant mesh is subdivided
+    // into a full quad mesh. TODO: 1) a faster CAD projection approach (from
+    // uv) and 2) verify quality during projection
+
+    // bool linear = false; RefineMesh(m, linear, true, false);
     RefineMeshWithBackgroundMeshProjection(m);
 
     OptimizeMesh(m, "QuadQuasiStructured");
@@ -825,18 +821,6 @@ static void Mesh3D(GModel *m)
   Msg::StatusBar(true, "Meshing 3D...");
   double t1 = Cpu(), w1 = TimeOfDay();
 
-  int prevNumThreads = Msg::GetMaxThreads();
-  if(CTX::instance()->mesh.maxNumThreads3D > 0 &&
-     CTX::instance()->mesh.maxNumThreads3D <= Msg::GetMaxThreads())
-    Msg::SetNumThreads(CTX::instance()->mesh.maxNumThreads3D);
-
-  for(auto it = m->firstRegion(); it != m->lastRegion(); ++it) {
-    // Extruded meshes are not yet fully thread-safe (not sure why!)
-    if((*it)->meshAttributes.extrude &&
-       (*it)->meshAttributes.extrude->mesh.ExtrudeMesh)
-      Msg::SetNumThreads(1);
-  }
-
   if(m->getNumRegions()) {
     Msg::StartProgressMeter(1);
     Msg::ProgressMeter(0, false, "Meshing 3D...");
@@ -977,8 +961,6 @@ static void Mesh3D(GModel *m)
     Msg::Error(debugInfo.str().c_str());
   }
 
-  Msg::SetNumThreads(prevNumThreads);
-
   double t2 = Cpu(), w2 = TimeOfDay();
   CTX::instance()->meshTimer[2] = w2 - w1;
 
@@ -1111,7 +1093,7 @@ void OptimizeMesh(GModel *m, const std::string &how, bool force, int niter)
       }
   }
   else if(how == "QuadQuasiStructured") {
-    /* The following methods only act on faces whose status is PENDING */
+    // The following methods only act on faces whose status is PENDING
     for(GFace *gf : m->getFaces())
       if(gf->meshStatistics.status == GFace::DONE) {
         gf->meshStatistics.status = GFace::PENDING;
@@ -1504,12 +1486,12 @@ void GenerateMesh(GModel *m, int ask)
     int old = m->getMeshStatus(false);
     bool doIt = (ask >= 1 && ask <= 3);
     bool exists = backgroundMeshAndGuidingFieldExists(m);
-    bool overwriteGModelMesh = false; /* use current mesh if available */
+    bool overwriteGModelMesh = false; // use current mesh if available
     bool overwriteField = false;
     if(old == 1 && ask == 1 && exists) doIt = true;
     if(old == 1 && ask == 2 && exists) doIt = false;
     if(old == 2 && exists && (ask == 1 || ask == 2)) {
-      /* User has a mesh and wants a new one (all options may have changed) */
+      // User has a mesh and wants a new one (all options may have changed)
       doIt = true;
       overwriteField = true;
       overwriteGModelMesh = true;
@@ -1518,24 +1500,23 @@ void GenerateMesh(GModel *m, int ask)
     if(old == 2 && ask == 2 && exists) doIt = true;
     if(doIt) {
       bool deleteGModelMeshAfter =
-        true; /* mesh saved in background, no longer needed */
+        true; // mesh saved in background, no longer needed
       BuildBackgroundMeshAndGuidingField(m, overwriteGModelMesh,
                                          deleteGModelMeshAfter, overwriteField);
     }
 
     if(CTX::instance()->mesh.algo2d == ALGO_2D_QUAD_QUASI_STRUCT && old == 2 &&
        exists && (ask == 1 || ask == 2)) {
-      /* transferSeamGEdgesVerticesToGFace() called by quadqs remove the 1D
-       * meshes of the seam GEdge, so 2D initial meshing does not work without
-       * first remeshing the seam GEdge. We delete the whole mesh by security */
+      // transferSeamGEdgesVerticesToGFace() called by quadqs remove the 1D
+      // meshes of the seam GEdge, so 2D initial meshing does not work without
+      // first remeshing the seam GEdge. We delete the whole mesh by security
       m->deleteMesh();
     }
 
     if(CTX::instance()->mesh.algo2d == ALGO_2D_QUAD_QUASI_STRUCT) {
-      /* note: the creation of QuadqsContextUpdater modifies many
-       *       meshing parameters
-       *       current parameter values are saved and will be restored
-       *       at the destruction of qqs */
+      // note: the creation of QuadqsContextUpdater modifies many meshing
+      // parameters; current parameter values are saved and will be restored at
+      // the destruction of qqs
       qqs = new QuadqsContextUpdater();
     }
 
@@ -1543,7 +1524,7 @@ void GenerateMesh(GModel *m, int ask)
       std::set<GFace *> faces;
       for(GFace *gf : m->getFaces())
         if(gf->edges().size() == 4) { faces.insert(gf); }
-      double maxDiffRel = 0.34; /* do not deviate more than 34% from size map */
+      double maxDiffRel = 0.34; // do not deviate more than 34% from size map
       MeshSetTransfiniteFacesAutomatic(faces, 2.35, true, maxDiffRel);
     }
   }
diff --git a/src/mesh/meshGRegionHxt.cpp b/src/mesh/meshGRegionHxt.cpp
index ab87c8a36c..a399ee5b8d 100644
--- a/src/mesh/meshGRegionHxt.cpp
+++ b/src/mesh/meshGRegionHxt.cpp
@@ -29,6 +29,15 @@ extern "C" {
 #include "hxt_tetDelaunay.h"
 }
 
+static int getNumThreads()
+{
+  int nthreads = CTX::instance()->numThreads;
+  if(CTX::instance()->mesh.maxNumThreads3D > 0)
+    nthreads = CTX::instance()->mesh.maxNumThreads3D;
+  if(!nthreads) nthreads = Msg::GetMaxThreads();
+  return nthreads;
+}
+
 static HXTStatus messageCallback(HXTMessage *msg)
 {
   if(msg) Msg::Info("%s", msg->string);
@@ -45,7 +54,8 @@ static HXTStatus nodalSizesCallBack(double *pts, uint32_t *volume,
 
   HXT_INFO("Computing %smesh sizes...", useInterpolatedSize ? "interpolated " : "");
 
-#pragma omp parallel for schedule(dynamic)
+  int nthreads = getNumThreads();
+#pragma omp parallel for schedule(dynamic) num_threads(nthreads)
   for(size_t i = 0; i < numPts; i++) {
     if(volume[i] < 0 || volume[i] >= allGR->size()) {
       Msg::Error("Invalid volume tag %d in mesh size calculation", volume[i]);
@@ -253,7 +263,7 @@ static HXTStatus Hxt2Gmsh(std::vector<GRegion *> &regions, HXTMesh *m,
   HXT_CHECK( hxtAlignedFree(&m->triangles.color) );
 
 #if defined(_OPENMP)
-  int nthreads = omp_get_max_threads();
+  int nthreads = getNumThreads();
   if(nthreads > 1) {
     const uint32_t nR = regions.size();
     const uint32_t nV = m->vertices.num;
@@ -265,7 +275,7 @@ static HXTStatus Hxt2Gmsh(std::vector<GRegion *> &regions, HXTMesh *m,
                          sizeof(uint32_t)) );
     uint32_t* vR_all = hp_all + nR * (nthreads + 1); // color per point and per thread
 
-    #pragma omp parallel
+    #pragma omp parallel num_threads(nthreads)
     {
       #pragma omp single
       {
@@ -525,10 +535,12 @@ static HXTStatus _meshGRegionHxt(std::vector<GRegion *> &regions)
   std::vector<MVertex *> c2v;
   Gmsh2Hxt(regions, mesh, v2c, c2v);
 
+  int nthreads = getNumThreads();
+
   HXTTetMeshOptions options = {
-    0, // int defaultThreads;
-    0, // int delaunayThreads;
-    0, // int improveThreads;
+    nthreads, // int defaultThreads;
+    nthreads, // int delaunayThreads;
+    nthreads, // int improveThreads;
     1, // int reproducible;
     (Msg::GetVerbosity() > 5) ? 2 : 1, // int verbosity;
     1, // int stat;
@@ -583,6 +595,8 @@ static HXTStatus _delaunayMeshIn3DHxt(std::vector<MVertex *> &verts,
   mesh->vertices.num = nvert;
   mesh->vertices.size = nvert;
 
+  int nthreads = getNumThreads();
+
   HXTDelaunayOptions delOptions = {
     nullptr, // bbox
     nullptr, // nodalSizes
@@ -592,7 +606,7 @@ static HXTStatus _delaunayMeshIn3DHxt(std::vector<MVertex *> &verts,
     1, // perfectDelaunay
     0, // verbosity
     0, // reproducible
-    0 // delaunayThreads (0 = omp_get_max_threads)
+    nthreads // delaunayThreads (0 = omp_get_max_threads)
   };
 
   // HXT_CHECK(hxtDelaunay(mesh, &delOptions));
diff --git a/src/mesh/meshQuadQuasiStructured.cpp b/src/mesh/meshQuadQuasiStructured.cpp
index dd28933a14..5c3c74d61b 100644
--- a/src/mesh/meshQuadQuasiStructured.cpp
+++ b/src/mesh/meshQuadQuasiStructured.cpp
@@ -44,7 +44,6 @@
 #endif
 
 #if defined(HAVE_QUADMESHINGTOOLS)
-/* QuadMeshingTools includes */
 #include "cppUtils.h"
 #include "qmtMeshUtils.h"
 #include "qmtCrossField.h"
@@ -86,6 +85,15 @@ constexpr bool SHOW_DQR = false;
  * so the visualization is not broken by the integers */
 constexpr double VIEW_INT_SCALING = 1.e-8;
 
+static int getNumThreads()
+{
+  int nthreads = CTX::instance()->numThreads;
+  if(CTX::instance()->mesh.maxNumThreads2D > 0)
+    nthreads = CTX::instance()->mesh.maxNumThreads2D;
+  if(!nthreads) nthreads = Msg::GetMaxThreads();
+  return nthreads;
+}
+
 int buildBackgroundField(
   GModel *gm, const std::vector<MTriangle *> &global_triangles,
   const std::vector<std::array<double, 9> > &global_triangle_directions,
@@ -570,7 +578,8 @@ int BuildBackgroundMeshAndGuidingField(GModel *gm, bool overwriteGModelMesh,
     global_triangles.reserve(ntris);
     global_size_map.reserve(3 * ntris);
 
-#pragma omp parallel for schedule(dynamic)
+    int nthreads = getNumThreads();
+#pragma omp parallel for schedule(dynamic) num_threads(nthreads)
     for(size_t f = 0; f < faces.size(); ++f) {
       GFace *gf = faces[f];
 
@@ -2063,7 +2072,8 @@ int RefineMeshWithBackgroundMeshProjectionSimple(GModel *gm)
   /* old2new use to update mesh elements after */
   unordered_map<MVertex *, MVertex *> old2new;
 
-#pragma omp parallel for schedule(dynamic)
+  int nthreads = getNumThreads();
+#pragma omp parallel for schedule(dynamic) num_threads(nthreads)
   for(size_t e = 0; e < edges.size(); ++e) {
     GEdge *ge = edges[e];
     if(CTX::instance()->mesh.meshOnlyVisible && !ge->getVisibility()) continue;
@@ -2119,7 +2129,7 @@ int RefineMeshWithBackgroundMeshProjectionSimple(GModel *gm)
                  "using CAD projection (slow)");
   }
 
-#pragma omp parallel for schedule(dynamic)
+#pragma omp parallel for schedule(dynamic) num_threads(nthreads)
   for(size_t f = 0; f < faces.size(); ++f) {
     GFace *gf = faces[f];
     if(CTX::instance()->mesh.meshOnlyVisible && !gf->getVisibility()) continue;
@@ -2213,7 +2223,7 @@ int RefineMeshWithBackgroundMeshProjectionSimple(GModel *gm)
   }
 
 /* Update elements */
-#pragma omp parallel for schedule(dynamic)
+#pragma omp parallel for schedule(dynamic) num_threads(nthreads)
   for(size_t e = 0; e < edges.size(); ++e) {
     GEdge *ge = edges[e];
     for(MLine *l : ge->lines) {
@@ -2225,7 +2235,7 @@ int RefineMeshWithBackgroundMeshProjectionSimple(GModel *gm)
     }
   }
 
-#pragma omp parallel for schedule(dynamic)
+#pragma omp parallel for schedule(dynamic) num_threads(nthreads)
   for(size_t f = 0; f < faces.size(); ++f) {
     GFace *gf = faces[f];
     for(size_t i = 0; i < gf->getNumMeshElements(); ++i) {
@@ -2315,7 +2325,8 @@ int RefineMeshWithBackgroundMeshProjection(GModel *gm)
     for(GEdge *ge : fedges) edgeToFaces[ge].insert(gf);
   }
 
-#pragma omp parallel for schedule(dynamic)
+  int nthreads = getNumThreads();
+#pragma omp parallel for schedule(dynamic) num_threads(nthreads)
   for(size_t e = 0; e < edges.size(); ++e) {
     GEdge *ge = edges[e];
     if(CTX::instance()->mesh.meshOnlyVisible && !ge->getVisibility()) continue;
@@ -2356,7 +2367,7 @@ int RefineMeshWithBackgroundMeshProjection(GModel *gm)
     }
   }
 
-#pragma omp parallel for schedule(dynamic)
+#pragma omp parallel for schedule(dynamic) num_threads(nthreads)
   for(size_t f = 0; f < faces.size(); ++f) {
     GFace *gf = faces[f];
     if(CTX::instance()->mesh.meshOnlyVisible && !gf->getVisibility()) continue;
@@ -2399,7 +2410,7 @@ int RefineMeshWithBackgroundMeshProjection(GModel *gm)
 /* Geometric projection on model */
 
 /* - projections on curves */
-#pragma omp parallel for schedule(dynamic)
+#pragma omp parallel for schedule(dynamic) num_threads(nthreads)
   for(size_t e = 0; e < edges.size(); ++e) {
     GEdge *ge = edges[e];
     auto it = toProjectOnCurve.find(ge);
@@ -2434,7 +2445,7 @@ int RefineMeshWithBackgroundMeshProjection(GModel *gm)
   }
 
 /* - projections on faces */
-#pragma omp parallel for schedule(dynamic)
+#pragma omp parallel for schedule(dynamic) num_threads(nthreads)
   for(size_t f = 0; f < faces.size(); ++f) {
     GFace *gf = faces[f];
 
@@ -2581,7 +2592,8 @@ int replaceBadQuadDominantMeshes(GModel *gm)
 {
   std::vector<GFace *> faces = model_faces(gm);
 
-#pragma omp parallel for schedule(dynamic)
+  int nthreads = getNumThreads();
+#pragma omp parallel for schedule(dynamic) num_threads(nthreads)
   for(size_t f = 0; f < faces.size(); ++f) {
     GFace *gf = faces[f];
     if(CTX::instance()->mesh.meshOnlyVisible && !gf->getVisibility()) continue;
@@ -3033,7 +3045,8 @@ int quadMeshingOfSimpleFacesWithPatterns(GModel *gm,
 
   initQuadPatterns();
 
-#pragma omp parallel for schedule(dynamic)
+  int nthreads = getNumThreads();
+#pragma omp parallel for schedule(dynamic) num_threads(nthreads)
   for(size_t f = 0; f < faces.size(); ++f) {
     GFace *gf = faces[f];
     if(gf->meshStatistics.status != GFace::PENDING) continue;
@@ -3088,7 +3101,8 @@ int optimizeTopologyWithDiskQuadrangulationRemeshing(GModel *gm)
 
   std::vector<GFace *> faces = model_faces(gm);
 
-#pragma omp parallel for schedule(dynamic)
+  int nthreads = getNumThreads();
+#pragma omp parallel for schedule(dynamic) num_threads(nthreads)
   for(size_t f = 0; f < faces.size(); ++f) {
     GFace *gf = faces[f];
     if(gf->meshStatistics.status != GFace::PENDING) continue;
@@ -3156,7 +3170,8 @@ int optimizeTopologyWithCavityRemeshing(GModel *gm)
 
   GlobalBackgroundMesh &bmesh = getBackgroundMesh(BMESH_NAME);
 
-#pragma omp parallel for schedule(dynamic)
+  int nthreads = getNumThreads();
+#pragma omp parallel for schedule(dynamic) num_threads(nthreads)
   for(size_t f = 0; f < faces.size(); ++f) {
     GFace *gf = faces[f];
     if(gf->meshStatistics.status != GFace::PENDING) continue;
@@ -3212,7 +3227,8 @@ int optimizeQuadMeshBoundaries(GModel *gm)
 
   std::vector<GFace *> faces = model_faces(gm);
 
-#pragma omp parallel for schedule(dynamic)
+  int nthreads = getNumThreads();
+#pragma omp parallel for schedule(dynamic) num_threads(nthreads)
   for(size_t f = 0; f < faces.size(); ++f) {
     GFace *gf = faces[f];
     if(gf->meshStatistics.status != GFace::PENDING) continue;
-- 
GitLab