diff --git a/Mesh/meshGRegionHxt.cpp b/Mesh/meshGRegionHxt.cpp
index 46798909951335fedd210f034b1197e21461a88d..923798557e0264a49e271b2d81454725db46c42d 100644
--- a/Mesh/meshGRegionHxt.cpp
+++ b/Mesh/meshGRegionHxt.cpp
@@ -11,7 +11,9 @@
 #include "MVertex.h"
 #include "GRegion.h"
 #include "GFace.h"
+#include "MTetrahedron.h"
 #include "MTriangle.h"
+#include "MLine.h"
 #include "GmshMessage.h"
 
 #ifdef HAVE_HXT
@@ -21,37 +23,190 @@ extern "C" {
 #include "hxt_opt.h"
 }
 
-
 // This is a list of regions that are simply connected
 
 static HXTStatus getAllFacesOfAllRegions (std::vector<GRegion *> &regions, HXTMesh *m, std::vector<GFace *> &allFaces){
   std::set<GFace *, GEntityLessThan> allFacesSet;
-  m->brep.numVolumes = regions.size();
-  HXT_CHECK(
-    hxtAlignedMalloc(&m->brep.numSurfacesPerVolume, m->brep.numVolumes
-		     * sizeof(uint32_t)));
+  if (m){
+    m->brep.numVolumes = regions.size();
+    HXT_CHECK(
+	      hxtAlignedMalloc(&m->brep.numSurfacesPerVolume, m->brep.numVolumes
+			       * sizeof(uint32_t)));
+  }
   uint32_t to_alloc = 0;
   for(unsigned int i = 0; i < regions.size(); i++) {
     std::vector<GFace *> const &f = regions[i]->faces();
     std::vector<GFace *> const &f_e = regions[i]->embeddedFaces();
-    m->brep.numSurfacesPerVolume[i] = f.size() + f_e.size();
-    to_alloc += m->brep.numSurfacesPerVolume[i];
+    if (m){
+      m->brep.numSurfacesPerVolume[i] = f.size() + f_e.size();
+      to_alloc += m->brep.numSurfacesPerVolume[i];
+    }
     allFacesSet.insert(f.begin(), f.end());
     allFacesSet.insert(f_e.begin(), f_e.end());
   }
   allFaces.insert (allFaces.begin(), allFacesSet.begin(), allFacesSet.end());
+
+  if (!m)return HXT_STATUS_OK;
+
   HXT_CHECK(hxtAlignedMalloc(&m->brep.surfacesPerVolume, to_alloc* sizeof(uint32_t)));
   
   uint32_t counter = 0;
   for(unsigned int i = 0; i < regions.size(); i++) {
     std::vector<GFace *> const &f = regions[i]->faces();
     std::vector<GFace *> const &f_e = regions[i]->embeddedFaces();
-    for (size_t j=0;j<f.size();j++)m->brep.surfacesPerVolume[counter++]=f[i]->tag();
-    for (size_t j=0;j<f_e.size();j++)m->brep.surfacesPerVolume[counter++]=f_e[i]->tag();
+    for (size_t j=0;j<f.size();j++)m->brep.surfacesPerVolume[counter++]=f[j]->tag();
+    for (size_t j=0;j<f_e.size();j++)m->brep.surfacesPerVolume[counter++]=f_e[j]->tag();
+  }
+
+  //  printf("volume 0 has %d faces\n",m->brep.numSurfacesPerVolume[0]);
+  //  for (int i=0;i<m->brep.numSurfacesPerVolume[0];i++)printf("%d ",m->brep.surfacesPerVolume[i]);
+  //  printf("\n");
+  
+  return HXT_STATUS_OK;
+}
+
+static HXTStatus getAllEdgesOfAllFaces (std::vector<GFace *> &faces, HXTMesh *m, std::vector<GEdge *> &allEdges){
+  if (m){
+    m->brep.numSurfaces = faces.size();
+    HXT_CHECK(
+	      hxtAlignedMalloc(&m->brep.numCurvesPerSurface, m->brep.numSurfaces
+			       * sizeof(uint32_t)));
+  }
+  uint32_t to_alloc = 0;
+
+  std::set<GEdge *, GEntityLessThan> allEdgesSet;
+  for(unsigned int i = 0; i < faces.size(); i++) {
+    std::vector<GEdge *> const &f = faces[i]->edges();
+    std::vector<GEdge *> const &f_e = faces[i]->embeddedEdges();
+    if (m){
+      m->brep.numCurvesPerSurface[i] = f.size() + f_e.size();
+      to_alloc += m->brep.numCurvesPerSurface[i];
+    }
+    allEdgesSet.insert(f.begin(), f.end());
+    allEdgesSet.insert(f_e.begin(), f_e.end());
+  }
+  allEdges.insert (allEdges.begin(), allEdgesSet.begin(), allEdgesSet.end());
+
+  if (!m)return HXT_STATUS_OK;
+
+  HXT_CHECK(hxtAlignedMalloc(&m->brep.curvesPerSurface, to_alloc* sizeof(uint32_t)));
+  
+  uint32_t counter = 0;
+  for(unsigned int i = 0; i < faces.size(); i++) {
+    std::vector<GEdge *> const &f = faces[i]->edges();
+    std::vector<GEdge *> const &f_e = faces[i]->embeddedEdges();
+    for (size_t j=0;j<f.size();j++)m->brep.curvesPerSurface[counter++]=f[j]->tag();
+    for (size_t j=0;j<f_e.size();j++)m->brep.curvesPerSurface[counter++]=f_e[j]->tag();
+  }
+  return HXT_STATUS_OK;
+}
+
+static HXTStatus Hxt2Gmsh(std::vector<GRegion *> &regions, HXTMesh *m,
+                          std::map<MVertex *, int> &v2c,
+                          std::vector<MVertex *> &c2v){
+
+  std::vector<GFace *> allFaces;
+  std::vector<GEdge *> allEdges;
+  HXT_CHECK(getAllFacesOfAllRegions (regions, NULL, allFaces));
+  HXT_CHECK(getAllEdgesOfAllFaces (allFaces, NULL, allEdges));
+  std::map<int,GEdge*> i2e;
+  std::map<int,GFace*> i2f;
+  for (size_t i = 0;i<allFaces.size();i++)i2f[allFaces[i]->tag()] = allFaces[i];
+  for (size_t i = 0;i<allEdges.size();i++)i2e[allEdges[i]->tag()] = allEdges[i];
+
+  c2v.resize( m->vertices.num);
+  for (size_t i = c2v.size() ; i < m->vertices.num; i++){
+    c2v[i] = NULL;
+  }
+
+  for(size_t j = 0; j < allEdges.size(); j++) {
+    GEdge *ge = allEdges[j];
+    for(size_t i = 0; i < ge->lines.size(); i++) {
+      delete ge->lines[i];      
+    }
+    ge->lines.clear();
+  }
+  
+  for(size_t j = 0; j < allFaces.size(); j++) {
+    GFace *gf = allFaces[j];
+    for(size_t i = 0; i < gf->triangles.size(); i++) {
+      delete gf->triangles[i];      
+    }
+    gf->triangles.clear();
+  }
+
+  
+  for (size_t i = 0 ; i < m->lines.num ; i++){
+    uint32_t i0 = m->lines.node[2*i+0];
+    uint32_t i1 = m->lines.node[2*i+1];
+    uint16_t c = m->lines.colors[i];
+    MVertex *v0 = c2v[i0];
+    MVertex *v1 = c2v[i1];
+    std::map<int,GEdge *>::iterator ge = i2e.find(c);
+    if (ge == i2e.end())return HXT_STATUS_ERROR;
+    if (!v0){
+      double *x = &m->vertices.coord[4*i0];				    
+      // FIXME compute true coordinates
+      v0 = new MEdgeVertex (x[0],x[1],x[2],ge->second,0);
+    }
+    if (!v1){
+      // FIXME compute true coordinates
+      double *x = &m->vertices.coord[4*i1];				    
+      v1 = new MEdgeVertex (x[0],x[1],x[2],ge->second,0);
+    }
+    ge->second->lines.push_back(new MLine(v0,v1));
+  }
+  
+  for (size_t i = 0 ; i < m->triangles.num ; i++){
+    uint32_t i0 = m->triangles.node[3*i+0];
+    uint32_t i1 = m->triangles.node[3*i+1];
+    uint32_t i2 = m->triangles.node[3*i+2];
+    uint16_t c = m->triangles.colors[i];
+    MVertex *v0 = c2v[i0];
+    MVertex *v1 = c2v[i1];
+    MVertex *v2 = c2v[i2];
+    std::map<int,GFace *>::iterator gf = i2f.find(c);
+    if (gf == i2f.end())return HXT_STATUS_ERROR;
+    if (!v0){
+      // FIXME compute true coordinates
+      double *x = &m->vertices.coord[4*i0];				    
+      v0 = new MFaceVertex (x[0],x[1],x[2],gf->second,0,0);
+    }
+    if (!v1){
+      // FIXME compute true coordinates
+      double *x = &m->vertices.coord[4*i1];				    
+      v1 = new MFaceVertex (x[0],x[1],x[2],gf->second,0,0);
+    }
+    if (!v2){
+      // FIXME compute true coordinates
+      double *x = &m->vertices.coord[4*i2];				    
+      v2 = new MFaceVertex (x[0],x[1],x[2],gf->second,0,0);
+    }
+    gf->second->triangles.push_back(new MTriangle(v0,v1,v2));
+  }
+
+  for (size_t i = 0 ; i < m->tetrahedra.num ; i++){
+    uint32_t *i0 = &m->tetrahedra.node[4*i+0];
+    uint16_t c = m->tetrahedra.colors[i];
+    if (c < regions.size()){
+      MVertex *vv[4];
+      GRegion *gr = regions[c];
+      for (int j=0;j<4;j++){
+	//	printf("%d %d %d %d\n",i,j,i0[j],c);
+	MVertex *v0 = c2v[i0[j]];
+	if (!v0) {
+	  double *x = &m->vertices.coord[4*i0[j]];				    
+	  v0 = new MVertex(x[0],x[1],x[2],gr);
+	  gr->mesh_vertices.push_back(v0);
+	  c2v[i0[j]] = v0;
+	}
+	vv[j] = v0;
+      }
+      gr->tetrahedra.push_back(new MTetrahedron(vv[0],vv[1],vv[2],vv[3]));
+    }
   }
   return HXT_STATUS_OK;
 }
-				    
 
 
 static HXTStatus Gmsh2Hxt(std::vector<GRegion *> &regions, HXTMesh *m,
@@ -60,10 +215,22 @@ static HXTStatus Gmsh2Hxt(std::vector<GRegion *> &regions, HXTMesh *m,
 {
   std::set<MVertex *> all;
   std::vector<GFace *> faces;
+  std::vector<GEdge *> edges;
   
   HXT_CHECK(getAllFacesOfAllRegions (regions, m, faces));
+  HXT_CHECK(getAllEdgesOfAllFaces (faces, m, edges));
   
   uint64_t ntri = 0;
+  uint64_t nedg = 0;
+
+  for(size_t j = 0; j < edges.size(); j++) {
+    GEdge *ge = edges[j];
+    nedg += ge->lines.size();
+    for(size_t i = 0; i < ge->lines.size(); i++) {
+      all.insert(ge->lines[i]->getVertex(0));
+      all.insert(ge->lines[i]->getVertex(1));
+    }
+  }
 
   for(size_t j = 0; j < faces.size(); j++) {
     GFace *gf = faces[j];
@@ -75,6 +242,8 @@ static HXTStatus Gmsh2Hxt(std::vector<GRegion *> &regions, HXTMesh *m,
     }
   }
 
+  //  printf("%d vertices %d triangles\n",all.size(),ntri);
+  
   m->vertices.num = m->vertices.size = all.size();
   HXT_CHECK(
     hxtAlignedMalloc(&m->vertices.coord, 4 * m->vertices.num * sizeof(double)));
@@ -91,13 +260,32 @@ static HXTStatus Gmsh2Hxt(std::vector<GRegion *> &regions, HXTMesh *m,
   }
   all.clear();
 
+  m->lines.num = m->lines.size = nedg;
+  uint64_t index = 0;
+  
+  HXT_CHECK(hxtAlignedMalloc(&m->lines.node,
+                             (m->lines.num) * 2 * sizeof(uint32_t)));
+  HXT_CHECK(hxtAlignedMalloc(&m->lines.colors,
+                             (m->lines.num) * sizeof(uint16_t)));
+
+  for(size_t j = 0; j < edges.size(); j++) {
+    GEdge *ge = edges[j];
+    for(size_t i = 0; i < ge->lines.size(); i++) {
+      m->lines.node[2 * index + 0] = v2c[ge->lines[i]->getVertex(0)];
+      m->lines.node[2 * index + 1] = v2c[ge->lines[i]->getVertex(1)];
+      m->lines.colors[index] = ge->tag();
+      index++;
+    }
+  }
+  
+
   m->triangles.num = m->triangles.size = ntri;
   HXT_CHECK(hxtAlignedMalloc(&m->triangles.node,
                              (m->triangles.num) * 3 * sizeof(uint32_t)));
   HXT_CHECK(hxtAlignedMalloc(&m->triangles.colors,
                              (m->triangles.num) * sizeof(uint16_t)));
 
-  uint64_t index = 0;
+  index = 0;
   for(size_t j = 0; j < faces.size(); j++) {
     GFace *gf = faces[j];
     for(size_t i = 0; i < gf->triangles.size(); i++) {
@@ -108,9 +296,6 @@ static HXTStatus Gmsh2Hxt(std::vector<GRegion *> &regions, HXTMesh *m,
       index++;
     }
   }
-
-  m->lines.num = m->lines.size = 0;
-
   return HXT_STATUS_OK;
 }
 
@@ -132,10 +317,24 @@ static HXTStatus _meshGRegionHxt(std::vector<GRegion *> &regions)
   std::map<MVertex *, int> v2c;
   std::vector<MVertex *> c2v;
   Gmsh2Hxt(regions, mesh, v2c, c2v);
+  
+  HXT_CHECK(hxtTetMesh3d(mesh,
+			 nthreads,
+			 nthreads,
+			 nthreads,
+			 reproducible,
+			 verbosity,
+			 stat,
+			 refine,
+                         optimize,
+			 threshold,
+			 hxt_boundary_recovery));  
+
+  
+  //  HXT_CHECK(hxtMeshWriteGmsh(mesh, "hxt.msh"));
+
+  HXT_CHECK(Hxt2Gmsh(regions, mesh,v2c,c2v));
 
-  HXT_CHECK(hxtTetMesh3d(mesh, nthreads, reproducible, verbosity, stat, refine,
-                         optimize, threshold, hxt_boundary_recovery));
-  HXT_CHECK(hxtMeshWriteGmsh(mesh, "hxt.msh"));
   HXT_CHECK(hxtMeshDelete(&mesh));
   HXT_CHECK(hxtContextDelete(&context));
   return HXT_STATUS_OK;
diff --git a/contrib/hxt/CMakeLists.txt b/contrib/hxt/CMakeLists.txt
index 91c518bcf397de49c06b6230d336f09cfe93ab1e..81ea2b9f68ecc11ed40bc21f933be3fe01b0f28f 100644
--- a/contrib/hxt/CMakeLists.txt
+++ b/contrib/hxt/CMakeLists.txt
@@ -24,17 +24,31 @@ set(SRC
   hxt_mesh3d_main.c
   hxt_mesh_size.c
   hxt_tetOpti.c
-  hxt_tetrahedra.c
   hxt_tetRepair.c
   hxt_tetUtils.c
+  hxt_tetColor.c
   hxt_tetFlag.c
   hxt_tetPostpro.c
   hxt_tet_aspect_ratio.c
+  hxt_tetDelaunay.c
   hxt_vertices.c
   hxt_parametrization.c
   hxt_mean_values.c 
   hxt_boundary_recovery.cxx
 )
 
+# do not use arithmetic contraction in predicates.c
+if ("x${CMAKE_C_COMPILER_ID}" STREQUAL "xMSVC" )
+  set_source_files_properties(predicates.c PROPERTIES COMPILE_FLAGS "/fp:strict")
+endif()
+if (CMAKE_C_COMPILER_ID MATCHES "GNU|Clang")
+  set_source_files_properties(predicates.c PROPERTIES COMPILE_FLAGS "-fno-unsafe-math-optimizations -ffp-contract=off")
+endif()
+if (CMAKE_C_COMPILER_ID STREQUAL "Intel")
+  set_source_files_properties(predicates.c PROPERTIES COMPILE_FLAGS "-fp-model strict")
+endif()
+
+
+
 file(GLOB_RECURSE HDR RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.h)
 append_gmsh_src(contrib/hxt "${SRC};${HDR}")
diff --git a/contrib/hxt/hxt_bbox.c b/contrib/hxt/hxt_bbox.c
index 87b078b403d38917c6a593e4c3a6f9b4e630109d..44c6dc28f6bccaee553b50eb03bde93dea980b88 100644
--- a/contrib/hxt/hxt_bbox.c
+++ b/contrib/hxt/hxt_bbox.c
@@ -82,3 +82,22 @@ HXTStatus hxtBboxAdd(HXTBbox* bbox, double* coord, const uint32_t n){
         return HXT_STATUS_OK;
 }
 
+
+HXTStatus hxtBboxMerge(HXTBbox* bbox1, HXTBbox* bbox2, HXTBbox* bboxResult)
+{
+      unsigned i;
+      for (i=0; i<3; i++)
+      {
+              if(bbox1->min[i]<bbox2->min[i])
+                      bboxResult->min[i] = bbox1->min[i];
+              else
+                      bboxResult->min[i] = bbox2->min[i];
+              if(bbox1->max[i]>bbox2->max[i])
+                      bboxResult->max[i] = bbox1->max[i];
+              else
+                      bboxResult->max[i] = bbox2->max[i];
+      }
+
+      return HXT_STATUS_OK;
+}
+
diff --git a/contrib/hxt/hxt_bbox.h b/contrib/hxt/hxt_bbox.h
index 6db445d5f09eace24f96323e05a2df9a2d4b33a8..6dca04f97d0614efb1e3fa3386362793e3689158 100644
--- a/contrib/hxt/hxt_bbox.h
+++ b/contrib/hxt/hxt_bbox.h
@@ -35,6 +35,9 @@ HXTStatus hxtBboxAddOne(HXTBbox* bbox, double* coord);
 /* update the bounding box with an array of n vertices at once (far quicker) */
 HXTStatus hxtBboxAdd(HXTBbox* bbox, double* coord, uint32_t n);
 
+/* merge two bbox  (result can be a pointer to bbox1 or bbox2) */
+HXTStatus hxtBboxMerge(HXTBbox* bbox1, HXTBbox* bbox2, HXTBbox* bboxResult);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/contrib/hxt/hxt_boundary_recovery.cxx b/contrib/hxt/hxt_boundary_recovery.cxx
index d276b8bc29f8de6414c407795bfb841c59fe885f..a869fa62e755e4844ac9e006017dbd1dfdaf1a38 100644
--- a/contrib/hxt/hxt_boundary_recovery.cxx
+++ b/contrib/hxt/hxt_boundary_recovery.cxx
@@ -1,24 +1,14 @@
-#define __STDC_LIMIT_MACROS // FIXME Gmsh: this is needed so that stdint.h defines
-                            // UINT_MAX & co in C++ code
+extern "C" {
+#include "hxt_mesh.h"
+#include "predicates.h"
+#include "hxt_omp.h"
+}
 
-#include <stdio.h>
-#include <string.h>
 #include <assert.h>
 #include <math.h>
 #include <set>
 #include <vector>
 #include <time.h>
-#if !defined(HAVE_NO_STDINT_H)
-#include <stdint.h>
-#elif defined(HAVE_NO_INTPTR_T)
-typedef unsigned long intptr_t;
-#endif
-extern "C" {
-#include "hxt_mesh.h"
-#include "predicates.h"
-#include "hxt_api.h"
-#include "hxt_boundary_recovery.h"
-}
 
 #define REAL double
 
@@ -105,16 +95,18 @@ static inline int computeTetGenVersion2(uint32_t v1, uint32_t* v2Choices, const
         break;
       }
   }
-#ifdef DEBUG
+#ifndef NDEBUG
   if(i==3)
-    HXT_WARNING("should never happen (file:%s line:%s)\n", __FILE__, __LINE__);
+    HXT_WARNING("should never happen (file:%s line:%d)\n", __FILE__, __LINE__);
 #endif
 
+  // version%4 : corresponding face in adjacent tet
+  // version/4 : which of the 3 rotation of the facet the tetrahedra has...
   return 4*i + iface2;
 }
 
 
-bool tetgenmesh::reconstructmesh(void *p){
+int tetgenmesh::reconstructmesh(void *p){
   HXTMesh *mesh = (HXTMesh*) p;
   in = new tetgenio();
   b = new tetgenbehavior();
@@ -125,10 +117,10 @@ bool tetgenmesh::reconstructmesh(void *p){
   initializepools();
 
   //  printf("we have %u vertices\n", mesh->vertices.num);
-
+  
   {
     point pointloop;
-    REAL x, y, z;
+    REAL x, y, z;    
     // Read the points.
     for (uint32_t i = 0; i < mesh->vertices.num; i++) {
       makepoint(&pointloop, UNUSEDVERTEX);
@@ -158,7 +150,7 @@ bool tetgenmesh::reconstructmesh(void *p){
     z = zmax - zmin;
     longest = sqrt(x * x + y * y + z * z);
     if (longest == 0.0) {
-      return true;
+      return HXT_STATUS_OK;
     }
 
     // Two identical points are distinguished by 'lengthlimit'.
@@ -168,141 +160,91 @@ bool tetgenmesh::reconstructmesh(void *p){
   }
 
   point *idx2verlist;
-
+  
   // Create a map from indices to vertices.
   //  printf("we create a map from indices to vertices\n");
   makeindex2pointmap(idx2verlist);
-  // 'idx2verlist' has length 'in->numberofpoints + 1'.
-  if (in->firstnumber == 1) {
-    idx2verlist[0] = dummypoint; // Let 0th-entry be dummypoint.
-  }
-  {
-    tetrahedron *ver2tetarray;
-    //point *idx2verlist;
-    triface tetloop, checktet, prevchktet;
-    triface hulltet, face1, face2;
-    tetrahedron tptr;
-    point p[4], q[3];
-    REAL ori; //, attrib, volume;
-    int bondflag;
-    int t1ver;
-    int idx, k;
-
-    // Allocate an array that maps each vertex to its adjacent tets.
-    //    printf("Allocate an array that maps each vertex to its adjacent tets\n");
-    ver2tetarray = new tetrahedron[mesh->vertices.num + 1];
-    for (unsigned int i = in->firstnumber; i < mesh->vertices.num + in->firstnumber; i++) {
-      setpointtype(idx2verlist[i], VOLVERTEX); // initial type.
-      ver2tetarray[i] = NULL;
-    }
 
+  
+  {
+    hullsize = 0;
 
     // Create the tetrahedra and connect those that share a common face.
     //    printf("Connect %d tetrahedra\n", mesh->tetrahedra.num);
-
-    const int perm[4] = {1,0,2,3};
     std::vector<triface> ts( mesh->tetrahedra.num );
     for (uint64_t i = 0; i < mesh->tetrahedra.num; i++) {
-      if (mesh->tetrahedra.node[4*i+3] == HXT_GHOST_VERTEX)
-        continue;
+      maketetrahedron(&ts[i]); // ts[i].ver = 11.
 
-      maketetrahedron(&tetloop); // tetloop.ver = 11.
-      for (uint64_t j = 0; j < 4; j++) {
-        p[j] = idx2verlist[mesh->tetrahedra.node[j+4*i]];
+      uint32_t* nodes = mesh->tetrahedra.node + 4*i;
+      point p[4];
+
+      p[0] = idx2verlist[nodes[0]];
+      p[1] = idx2verlist[nodes[1]];
+      p[2] = idx2verlist[nodes[2]];
+      if(nodes[3]==HXT_GHOST_VERTEX) {
+        hullsize++;
+        p[3] = dummypoint;
+      }
+      else {
+        p[3] = idx2verlist[nodes[3]];
+      }
+      setvertices(ts[i], p[0], p[1], p[2], p[3]);
+
+      #ifndef NDEBUG
+      for (int j=0; j<4; j++) {
+        if(mesh->tetrahedra.neigh[4*i+j]>=4*mesh->tetrahedra.num)
+          return HXT_ERROR_MSG(HXT_STATUS_ERROR, "A tetrahedron is missing a neighbor");
       }
-      setvertices(tetloop, p[perm[0]], p[perm[1]], p[perm[2]], p[perm[3]]);
-      ts[i] = tetloop;
+      #endif
     }
 
+    // we can make this in parallel, iterations are totally independent
+    #pragma omp parallel for
     for (uint64_t i = 0; i < mesh->tetrahedra.num; i++) {
-      if (mesh->tetrahedra.node[4*i+3] != HXT_GHOST_VERTEX){
-
-        for (int iface1=0; iface1<4; iface1++){
-          uint64_t neigh = mesh->tetrahedra.neigh[4*i + perm[iface1]];
-          // p[1] and p[0] have been exchanged
-          if(neigh!=HXT_NO_ADJACENT) {
-            uint64_t n = neigh >> 2;
-            int iface2 = perm[neigh&3];
-
-            if (mesh->tetrahedra.node[4*n+3] != HXT_GHOST_VERTEX){
-              triface tf1 = ts[i];
-              triface tf2 = ts[n];
-              tf1.ver = iface1;
-
-              // the face of the neighbor tetrahedra that is the same
-              uint32_t face2[3] = {mesh->tetrahedra.node[4*n+perm[(iface2+1)&3]],
-                                   mesh->tetrahedra.node[4*n+perm[((iface2&2)^3)]],
-                                   mesh->tetrahedra.node[4*n+perm[((iface2+3)&2)]]};
-
-              tf2.ver = computeTetGenVersion2 (mesh->tetrahedra.node[4*i+perm[(iface1+1)&3]], face2, iface2);
-              bond(tf1,tf2);
-            }
-          }
-        }
+      triface tf1 = ts[i];
+
+      for (tf1.ver=0; tf1.ver<4; tf1.ver++){
+        uint64_t neigh = mesh->tetrahedra.neigh[4*i + tf1.ver];
+        uint64_t n = neigh/4;
+        int iface2 = neigh%4;
+        
+        triface tf2 = ts[n];
+
+        // the face of the neighbor tetrahedra that is the same
+        uint32_t face2[3] = {mesh->tetrahedra.node[4*n+((iface2+1)&3)],
+                             mesh->tetrahedra.node[4*n+((iface2&2)^3)],
+                             mesh->tetrahedra.node[4*n+((iface2+3)&2)]};
+
+        tf2.ver = computeTetGenVersion2(mesh->tetrahedra.node[4*i+((tf1.ver+1)&3)], face2, iface2);
+        bond(tf1,tf2);
       }
     }
+  }
 
-    // printf("Create hull tets, create the point-to-tet map, and clean up the temporary spaces used in each tet\n");
-    // Create hull tets, create the point-to-tet map, and clean up the
-    //   temporary spaces used in each tet.
-    hullsize = tetrahedrons->items;
+  {
+    // Create the point-to-tet map, and clean up the temporary spaces used in each tet.
+    triface tetloop;
     tetrahedrons->traversalinit();
     tetloop.tet = tetrahedrontraverse();
     while (tetloop.tet != (tetrahedron *) NULL) {
-      tptr = encode(tetloop);
+      tetrahedron tptr = encode(tetloop);
       for (tetloop.ver = 0; tetloop.ver < 4; tetloop.ver++) {
-        if (tetloop.tet[tetloop.ver] == NULL) {
-          // Create a hull tet.
-          maketetrahedron(&hulltet);
-          p[0] =  org(tetloop);
-          p[1] = dest(tetloop);
-          p[2] = apex(tetloop);
-          setvertices(hulltet, p[1], p[0], p[2], dummypoint);
-          bond(tetloop, hulltet);
-          // Try connecting this to others that share common hull edges.
-          for (int j = 0; j < 3; j++) {
-            fsym(hulltet, face2);
-            while (1) {
-              if (face2.tet == NULL)
-                break;
-
-              esymself(face2);
-
-              if (apex(face2) == dummypoint)
-                break;
-
-              fsymself(face2);
-            }
-            if (face2.tet != NULL) {
-              // Found an adjacent hull tet.
-              assert(face2.tet[face2.ver & 3] == NULL);
-              esym(hulltet, face1);
-              bond(face1, face2);
-            }
-            enextself(hulltet);
-          }
-          //hullsize++;
-        }
-
         // Create the point-to-tet map.
-        setpoint2tet((point) (tetloop.tet[4 + tetloop.ver]), tptr);
-
+        setpoint2tet((point) (tetloop.tet[4 + tetloop.ver]), tptr); 
+  
         // Clean the temporary used space.
         tetloop.tet[8 + tetloop.ver] = NULL;
       }
       tetloop.tet = tetrahedrontraverse();
     }
-
-    hullsize = tetrahedrons->items - hullsize;
-
-    delete [] ver2tetarray;
   }
+
   {
     face newsh;
     face newseg;
     point p[4];
     int idx;
-
+    
     for (uint64_t i=0;i<mesh->triangles.num;i++){
       for (uint64_t j = 0; j < 3; j++) {
         p[j] = idx2verlist[mesh->triangles.node[3*i+j]];
@@ -310,7 +252,7 @@ bool tetgenmesh::reconstructmesh(void *p){
           setpointtype(p[j], FACETVERTEX);
         }
       }
-
+      
       // Create an initial triangulation.
       makeshellface(subfaces, &newsh);
       setshvertices(newsh, p[0], p[1], p[2]);
@@ -328,17 +270,17 @@ bool tetgenmesh::reconstructmesh(void *p){
     } // i
 
     unifysegments();
-
-
+    
+    
     face* shperverlist;
     int* idx2shlist;
     face searchsh, neighsh;
     face segloop, checkseg;
     point checkpt;
-
+    
     // Construct a map from points to subfaces.
     makepoint2submap(subfaces, idx2shlist, shperverlist);
-
+    
     // Process the set of PSC edges.
     // Remeber that all segments have default marker '-1'.
     //    int COUNTER = 0;
@@ -354,7 +296,7 @@ bool tetgenmesh::reconstructmesh(void *p){
           // This is a potential problem in surface mesh.
           continue; // Skip this edge.
         }
-
+        
         // Find a face contains the edge p[0], p[1].
         newseg.sh = NULL;
         searchsh.sh = NULL;
@@ -418,7 +360,7 @@ bool tetgenmesh::reconstructmesh(void *p){
         }
         setshellmark(newseg, mesh->lines.colors[i]);
       } // i
-
+      
       delete [] shperverlist;
       delete [] idx2shlist;
       insegments = subsegs->items;
@@ -427,13 +369,15 @@ bool tetgenmesh::reconstructmesh(void *p){
 
   delete [] idx2verlist;
   clock_t t = clock();
-  recoverboundary(t);
+  recoverboundary(t);  
   //  printf("Carve Holes\n");
   //  carveholes();
   if (subvertstack->objects > 0l) {
     HXT_INFO("Suppressing Steiner points...");
     suppresssteinerpoints();
   }
+
+// TODO: is this usefull ?
 #if 1
   HXT_INFO("Recover Delaunay");
   recoverdelaunay();
@@ -445,7 +389,7 @@ bool tetgenmesh::reconstructmesh(void *p){
     // Write mesh into to HXT.
     point p[4];
     std::set<int> /*l_faces, */l_edges;
-
+    
     if (points->items > mesh->vertices.num) {
       mesh->vertices.num = points->items;
       if(mesh->vertices.num > mesh->vertices.size) {
@@ -454,7 +398,7 @@ bool tetgenmesh::reconstructmesh(void *p){
                                     4*mesh->vertices.num*sizeof( double )) );
         mesh->vertices.size = mesh->vertices.num;
       }
-
+            
       face parentseg, parentsh, spinsh;
       point pointloop;
       // Create newly added mesh vertices.
@@ -482,7 +426,6 @@ bool tetgenmesh::reconstructmesh(void *p){
             reconstructingTriangularMeshIsRequired = 1;
             sdecode(point2sh(pointloop), parentsh);
             assert(parentsh.sh != NULL);
-            int ftag = shellmark(parentsh);
             mesh->vertices.coord[4*pointmark(pointloop)  ] = pointloop[0];
             mesh->vertices.coord[4*pointmark(pointloop)+1] = pointloop[1];
             mesh->vertices.coord[4*pointmark(pointloop)+2] = pointloop[2];
@@ -505,10 +448,10 @@ bool tetgenmesh::reconstructmesh(void *p){
       if (reconstructingTriangularMeshIsRequired) {
         // restore 2D mesh ...
         HXT_CHECK( hxtAlignedFree(&(mesh->triangles.node)));
-        HXT_CHECK( hxtAlignedFree(&(mesh->triangles.colors)));
+        HXT_CHECK( hxtAlignedFree(&(mesh->triangles.colors)));      
         HXT_INFO("deleting %u triangles",mesh->triangles.num);
         mesh->triangles.num = 0; // firstindex; // in->firstnumber;
-        {
+        {    
           face subloop;
           subloop.shver = 0;
           subfaces->traversalinit();
@@ -549,26 +492,32 @@ bool tetgenmesh::reconstructmesh(void *p){
         }
       }
     }
-
-    int elementnumber = 1; // firstindex; // in->firstnumber;
-    {
+    
+    // TODO: maybe fill a vector with triface and use that to convert in parallel ?
+    int elementnumber = 0; // firstindex; // in->firstnumber;
+    {    
       // number tets
       triface tetloop;
       tetrahedrons->traversalinit();
-      tetloop.tet = tetrahedrontraverse();
+      tetloop.tet = alltetrahedrontraverse();
       while (tetloop.tet != (tetrahedron *) NULL) {
         setelemindex(tetloop.tet, elementnumber);
-        tetloop.tet = tetrahedrontraverse();
+        tetloop.tet = alltetrahedrontraverse();
         elementnumber++;
       }
     }
 
+    if(elementnumber!=tetrahedrons->items)
+      return HXT_ERROR_MSG(HXT_STATUS_ERROR, "This can not happen...");
+    
     {
       // move data to HXT
-      triface tetloop;
+      triface tetloop;    
       tetrahedrons->traversalinit();
-      tetloop.tet = tetrahedrontraverse();
-      mesh->tetrahedra.num  = elementnumber-1;
+      tetloop.tet = alltetrahedrontraverse();
+
+      // TODO: maybe free during recovery to save size...
+      mesh->tetrahedra.num  = tetrahedrons->items;
       if(mesh->tetrahedra.num > mesh->tetrahedra.size) {
         HXT_CHECK( hxtAlignedFree(&mesh->tetrahedra.node) );
         HXT_CHECK( hxtAlignedFree(&mesh->tetrahedra.neigh) );
@@ -586,8 +535,8 @@ bool tetgenmesh::reconstructmesh(void *p){
 
         mesh->tetrahedra.size = mesh->tetrahedra.num;
       }
-
-
+      
+      
       int counter = 0;
       while (tetloop.tet != (tetrahedron *) NULL) {
         tetloop.ver = 11;
@@ -595,52 +544,60 @@ bool tetgenmesh::reconstructmesh(void *p){
         p[1] = dest(tetloop);
         p[2] = apex(tetloop);
         p[3] = oppo(tetloop);
-        triface E, N[4];
-        E = tetloop;
-        for (E.ver = 0; E.ver < 4; E.ver++) {
-          fsym(E, N[E.ver]);
-        }
-        int orderHXT[4] = {1,0,2,3};
+
         mesh->tetrahedra.colors[counter] = 0;
-        for (int k=0;k<4;k++){
-          mesh->tetrahedra.node[4*counter+k] = pointmark(p[orderHXT[k]]);
-          if (mesh->tetrahedra.node[4*counter+k] >= mesh->vertices.num)
-            HXT_WARNING("ERROR : index %u out of range (%u)\n",mesh->tetrahedra.node[4*counter+k],mesh->vertices.num);
-        }
-        for (int i=0;i<4;i++){
-          int ngh =  elemindex(N[orderHXT[i]].tet);
-          if (ngh) {
-            //      mesh->tetrahedra.neigh[4*counter+i] = 4*(elemindex(N[i].tet)-1)+i;
+        mesh->tetrahedra.flag[counter] = 0;
+
+        for (tetloop.ver=0;tetloop.ver<4;tetloop.ver++){
+          int k = tetloop.ver;
+          triface N;
+          fsym(tetloop, N);
+
+          if(p[k]==dummypoint) {
+            if(k!=3)
+              return HXT_ERROR_MSG(HXT_STATUS_ERROR, "Error: the ghost vertex is not the third vertex");
+            mesh->tetrahedra.node[4*counter+k] = HXT_GHOST_VERTEX;
           }
-          else{
-            //      mesh->tetrahedra.neigh[4*counter+i] = HXT_NO_ADJACENT;
+          else {
+            mesh->tetrahedra.node[4*counter+k] = pointmark(p[k]);
+            if (mesh->tetrahedra.node[4*counter+k] >= mesh->vertices.num)
+              return HXT_ERROR_MSG(HXT_STATUS_ERROR, "ERROR : index %u out of range (%u)\n", 
+                                   mesh->tetrahedra.node[4*counter+k], mesh->vertices.num);
           }
+
+          // set the neighbor
+          uint64_t ngh =  elemindex(N.tet);
+          int face = N.ver%4;
+
+          mesh->tetrahedra.neigh[4*counter+k] = 4*ngh+face;
         }
-        //  printf("%d --> %d %d %d %d\n", counter,  pointmark(p[0]),pointmark(p[1]),pointmark(p[2]),pointmark(p[3]));
 
         counter++;
-        tetloop.tet = tetrahedrontraverse();
+        tetloop.tet = alltetrahedrontraverse();
       }
     } // mesh output
   }
 
   delete in;
   delete b;
-  return true;
+  return HXT_STATUS_OK;
 }
 
 extern "C" {
   HXTStatus hxt_boundary_recovery(HXTMesh *mesh)
   {
-    bool ret = false;
+    HXTStatus status;
     try{
       tetgenmesh *m = new tetgenmesh();
-      ret = m->reconstructmesh((void*)mesh);
+      status = (HXTStatus) m->reconstructmesh((void*)mesh);
+      if(status!=HXT_STATUS_OK)
+        HXT_TRACE(status);
       delete m;
-      return HXT_STATUS_OK ;
     }
     catch (...){
       return HXT_ERROR_MSG(HXT_STATUS_FAILED, "failed to recover constrained lines/triangles") ;
     }
+
+    return status;
   }
 }
diff --git a/contrib/hxt/hxt_linear_system_lu.c b/contrib/hxt/hxt_linear_system_lu.c
index 6b8c9896f5e0367c0fbd85d224d82ab6c6ec675e..ad21fef5def674041642667f06039df3b3d1f105 100644
--- a/contrib/hxt/hxt_linear_system_lu.c
+++ b/contrib/hxt/hxt_linear_system_lu.c
@@ -294,7 +294,7 @@ HXTStatus hxtLinearSystemLUCreate(HXTLinearSystemLU **pSystem, int nElements, in
   }
   free(nodeRowStart);
   free(nodeRowEnd);
-  system->M = malloc(sizeof(double)*totalSize); // FIXME Gmsh instead of _mm_malloc
+  system->M = _mm_malloc(sizeof(double)*totalSize, PADDING*8);
   system->rows = malloc(sizeof(double*)*system->n);
   for (int i = 0; i < totalSize; ++i)
     system->M[i] = 0;
@@ -306,7 +306,7 @@ HXTStatus hxtLinearSystemLUCreate(HXTLinearSystemLU **pSystem, int nElements, in
     totalSize += system->rowEnd[i]-system->rowStart[i]+(paddedStart-start);
     system->rows[i] = system->M + paddedStart;
   }
-  system->x = malloc(sizeof(double)*system->n); // FIXME Gmsh instead of _mm_malloc
+  system->x = _mm_malloc(sizeof(double)*system->n, PADDING*8);
   return HXT_STATUS_OK;
 }
 
@@ -399,7 +399,7 @@ HXTStatus hxtLinearSystemLUAddMatrixEntry(HXTLinearSystemLU *system, int node0,
     HXT_ERROR_MSG(HXT_STATUS_FAILED, "node %i or %i not in the domain", node0, node1);
   int row0 = system->nodeMap[node0]*system->nFields + field0;
   int col1 = system->nodeMap[node1]*system->nFields + field1;
-
+  
   system->rows[row0][col1] += v;
   return HXT_STATUS_OK;
 }
@@ -418,7 +418,7 @@ HXTStatus hxtLinearSystemLUSolve(HXTLinearSystemLU *system, double *rhs, double
     LUPDecompose(system);
     system->flaglu=1;
   }
-
+  
   LUPSolve(system, rhs);
   for (int i = 0; i < system->nNodes; ++i){
     int ii = system->nodeMap[i];
diff --git a/contrib/hxt/hxt_linear_system_petsc.c b/contrib/hxt/hxt_linear_system_petsc.c
index 03f2db4cce6c527605a1509752bd4f4c662e260f..d359fc642be4c9e99a2f392ca03c8dad4403b35c 100644
--- a/contrib/hxt/hxt_linear_system_petsc.c
+++ b/contrib/hxt/hxt_linear_system_petsc.c
@@ -229,30 +229,16 @@ HXTStatus hxtLinearSystemPETScAddRhsEntry(HXTLinearSystemPETSc *system, double *
 
 HXTStatus hxtLinearSystemPETScSolve(HXTLinearSystemPETSc *system, double *rhs, double *solution){
   Vec b;
-  Vec x;
   HXT_PETSC_CHECK(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, system->nDofs, rhs, &b));
-  HXT_PETSC_CHECK(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, system->nDofs, solution, &x));
-  double normTest=0.0;
-  for (int iv = 0; iv < system->nDofs; ++iv){
-      normTest+=solution[iv]*solution[iv];
-  }
-  /* printf("IN PETSC norm vect sol : %g ; nofs : %i\n",normTest,system->nDofs);     */
   if(system->assemblyNeeded) {
     HXT_PETSC_CHECK(MatAssemblyBegin(system->a, MAT_FINAL_ASSEMBLY));
     HXT_PETSC_CHECK(MatAssemblyEnd(system->a, MAT_FINAL_ASSEMBLY));
     system->assemblyNeeded = 0;
   }
   HXT_PETSC_CHECK(KSPSetOperators(system->ksp, system->a, system->a));
-  /* HXT_PETSC_CHECK(KSPSolve(system->ksp, b, system->x)); */
-  KSPSetInitialGuessNonzero(system->ksp,PETSC_TRUE);
-  PetscBool flag[1];
-  KSPGetInitialGuessNonzero(system->ksp,flag);
-  /* printf("IN PETSC initialguessnonzero : %i\n",flag[0]); */
-  HXT_PETSC_CHECK(KSPSolve(system->ksp, b, x));
+  HXT_PETSC_CHECK(KSPSolve(system->ksp, b, system->x));
   HXT_PETSC_CHECK(VecDestroy(&b));
-  HXT_CHECK(hxtLinearSystemPETScMapFromVec(system, x, solution));
-  HXT_PETSC_CHECK(VecDestroy(&x));
-  /* HXT_CHECK(hxtLinearSystemPETScMapFromVec(system, system->x, solution)); */
+  HXT_CHECK(hxtLinearSystemPETScMapFromVec(system, system->x, solution));
   return HXT_STATUS_OK;
 }
 
diff --git a/contrib/hxt/hxt_mesh.c b/contrib/hxt/hxt_mesh.c
index 5c86fbb76af93004c364d7bbdb45b6ce4ce69394..7ea65b5bf5a758df8d058f70ee449e19123c27be 100644
--- a/contrib/hxt/hxt_mesh.c
+++ b/contrib/hxt/hxt_mesh.c
@@ -77,6 +77,18 @@ HXTStatus  hxtMeshCreate ( HXTContext* ctx, HXTMesh** mesh) {
   (*mesh)->lines.num = 0;
   (*mesh)->lines.size = 0;
 
+  // boundary representation
+  (*mesh)->brep.numVolumes = 0;
+  (*mesh)->brep.numSurfacesPerVolume = NULL;
+  (*mesh)->brep.surfacesPerVolume = NULL;
+  (*mesh)->brep.numSurfaces = 0;
+  (*mesh)->brep.numCurvesPerSurface = NULL;
+  (*mesh)->brep.curvesPerSurface = NULL;
+  (*mesh)->brep.numCurves = 0;
+  (*mesh)->brep.endPointsOfCurves = NULL;
+  (*mesh)->brep.numPoints = 0;
+  (*mesh)->brep.points = NULL;
+
   return HXT_STATUS_OK;
 }
 
@@ -127,6 +139,14 @@ HXTStatus hxtMeshDelete ( HXTMesh** mesh) {
   HXT_CHECK( hxtAlignedFree(&(*mesh)->lines.node) );
   HXT_CHECK( hxtAlignedFree(&(*mesh)->lines.colors) );
 
+  // boundary representation
+  HXT_CHECK( hxtAlignedFree(&(*mesh)->brep.numSurfacesPerVolume) );
+  HXT_CHECK( hxtAlignedFree(&(*mesh)->brep.surfacesPerVolume) );
+  HXT_CHECK( hxtAlignedFree(&(*mesh)->brep.numCurvesPerSurface) );
+  HXT_CHECK( hxtAlignedFree(&(*mesh)->brep.curvesPerSurface) );
+  HXT_CHECK( hxtAlignedFree(&(*mesh)->brep.endPointsOfCurves) );
+  HXT_CHECK( hxtAlignedFree(&(*mesh)->brep.points) );
+
 
   HXT_CHECK( hxtFree(mesh) );
   return HXT_STATUS_OK;
@@ -450,7 +470,7 @@ HXTStatus  hxtMeshWriteGmsh  ( HXTMesh* mesh , const char *filename) {
     if(mesh->tetrahedra.node[i*4 + 3]!=UINT32_MAX){
       uint16_t myColor = mesh->tetrahedra.colors ? mesh->tetrahedra.colors[i] : 0;
       // color = UINT16_MAX --> outside the domain
-      if (myColor != UINT16_MAX)
+      //      if (myColor != UINT16_MAX)
         ++index;
     }
   }
@@ -498,7 +518,7 @@ HXTStatus  hxtMeshWriteGmsh  ( HXTMesh* mesh , const char *filename) {
     for (i=0; i<mesh->tetrahedra.num; i++){
       if(mesh->tetrahedra.node[i*4 + 3]!=UINT32_MAX){
         uint16_t myColor = mesh->tetrahedra.colors ? mesh->tetrahedra.colors[i] : 0;
-        if (myColor != UINT16_MAX)
+	//        if (myColor != UINT16_MAX)
           fprintf(file,"%lu %u 2 0 %u %u %u %u %u\n", ++index,TETID,
               myColor,
               mesh->tetrahedra.node[i*4]+1,
diff --git a/contrib/hxt/hxt_mesh.h b/contrib/hxt/hxt_mesh.h
index ac338fb7e3ab028e8bab15860ad1c462d297ad7e..bc66c60a391f80a2ec62ea0b317b8d1f6721b734 100644
--- a/contrib/hxt/hxt_mesh.h
+++ b/contrib/hxt/hxt_mesh.h
@@ -3,8 +3,8 @@
 
 #include "hxt_tools.h" // to have SIMD_ALIGN and stdint.h
 
+
 #define HXT_GHOST_VERTEX UINT32_MAX
-#define HXT_DELETED_COLOR (UINT16_MAX-1)
 
 #define HXT_NO_ADJACENT UINT64_MAX
 
@@ -34,18 +34,18 @@ struct hxtMeshStruct {
   struct {
     uint32_t* node;  // aligned (size = tetrahedra.size*4*sizeof(uint32_t))
     uint64_t* neigh; // aligned (size = tetrahedra.size*4*sizeof(uint64_t))
-    char* neighType;
+    uint8_t* neighType;
     uint16_t* colors;
     uint16_t* flag;
     uint64_t num;    // number of tetrahedra
     uint64_t size;   // reserved number of tetrahedra (size of the vector)
   } tetrahedra;
-
+  
   // hexahedra
   struct {
     uint32_t* node;  // aligned (size = hexahedra.size*8*sizeof(uint32_t))
     uint64_t* neigh; // aligned (size = hexahedra.size*6*sizeof(uint64_t))
-    char* neighType;
+    uint8_t* neighType;
     uint16_t* colors;
     uint16_t* flag;
     uint64_t num;    // number of tetrahedra
@@ -56,18 +56,18 @@ struct hxtMeshStruct {
   struct {
     uint32_t* node;  // aligned (size = prisms.size*6*sizeof(uint32_t))
     uint64_t* neigh; // aligned (size = prisms.size*5*sizeof(uint64_t))
-    char* neighType;
+    uint8_t* neighType;
     uint16_t* colors;
     uint16_t* flag;
     uint64_t num;    // number of tetrahedra
     uint64_t size;   // reserved number of prisms (size of the vector)
   } prisms;
-
+  
   // pyramids
   struct {
     uint32_t* node;  // aligned (size = pyramids.size*5*sizeof(uint32_t))
     uint64_t* neigh; // aligned (size = pyramids.size*5*sizeof(uint64_t))
-    char* neighType;
+    uint8_t* neighType;
     uint16_t* colors;
     uint16_t* flag;
     uint64_t num;    // number of tetrahedra
@@ -77,7 +77,7 @@ struct hxtMeshStruct {
   // triangles // TODO: consider writing a array of structure...
   struct {
     uint32_t* node;
-    uint64_t* neigh;
+    uint64_t* neigh; 
     uint16_t* colors;
     uint64_t num;
     uint64_t size;
@@ -98,17 +98,19 @@ struct hxtMeshStruct {
     uint64_t num;
     uint64_t size;
   } lines;
+
+  // boundary representation
   struct {
-    uint32_t numVolumes;
-    uint32_t *numSurfacesPerVolume;
-    uint32_t *surfacesPerVolume;
-    uint32_t numSurfaces;
-    uint32_t *numCurvesPerSurface;
-    uint32_t *curvesPerSurface;
-    uint32_t numCurves;
-    uint32_t *endPointsOfCurves;
-    uint32_t numPoints;
-    uint32_t *points;
+    uint16_t numVolumes;
+    uint16_t *numSurfacesPerVolume;
+    uint16_t *surfacesPerVolume;
+    uint16_t numSurfaces;
+    uint16_t *numCurvesPerSurface;
+    uint16_t *curvesPerSurface;
+    uint16_t numCurves;
+    uint16_t *endPointsOfCurves;
+    uint16_t numPoints;
+    uint16_t *points;
   } brep;
 };
 
diff --git a/contrib/hxt/hxt_mesh3d.c b/contrib/hxt/hxt_mesh3d.c
index 2d8a8c0f336c8d3a342eb6b2f339ec98ee50d369..b1600ec1224c57f9888e6341063aab3a413e1b40 100644
--- a/contrib/hxt/hxt_mesh3d.c
+++ b/contrib/hxt/hxt_mesh3d.c
@@ -1,5 +1,5 @@
 // #include "hxt_mesh_size.h"
-#include "hxt_tetrahedra.h"
+#include "hxt_tetDelaunay.h"
 // #include "hxt_vertices.h"
 #include "hxt_mesh3d.h"
 #include "predicates.h"
@@ -17,7 +17,8 @@
 
 
 
-HXTStatus hxtComputeMeshSizeFromTrianglesAndLines(HXTMesh* mesh, HXTDelaunayOptions* delOptions) {
+HXTStatus hxtCreateNodalsizeFromTrianglesAndLines(HXTMesh* mesh, HXTDelaunayOptions* delOptions)
+{
 
   HXT_CHECK(hxtAlignedMalloc(&delOptions->nodalSizes,mesh->vertices.num*sizeof(double))); 
   
@@ -63,7 +64,8 @@ HXTStatus hxtComputeMeshSizeFromTrianglesAndLines(HXTMesh* mesh, HXTDelaunayOpti
   return HXT_STATUS_OK;    
 }
 
-HXTStatus hxtComputeMeshSizeFromMesh (HXTMesh* mesh, HXTDelaunayOptions* delOptions) {
+HXTStatus hxtCreateNodalsizeFromMesh(HXTMesh* mesh, HXTDelaunayOptions* delOptions)
+{
 
   HXT_CHECK(hxtAlignedMalloc(&delOptions->nodalSizes,mesh->vertices.num*sizeof(double))); 
   
@@ -92,13 +94,20 @@ HXTStatus hxtComputeMeshSizeFromMesh (HXTMesh* mesh, HXTDelaunayOptions* delOpti
       }
     }
   }
-  return HXT_STATUS_OK;    
+  return HXT_STATUS_OK;
+}
+
+HXTStatus hxtDestroyNodalsize(HXTDelaunayOptions* delOptions)
+{
+  HXT_CHECK( hxtAlignedFree(&delOptions->nodalSizes) );
+  return HXT_STATUS_OK;
 }
 
 
 
 
-HXTStatus hxtEmptyMesh(HXTMesh* mesh, HXTDelaunayOptions* delOptions){
+HXTStatus hxtEmptyMesh(HXTMesh* mesh, HXTDelaunayOptions* delOptions)
+{
 // we assume that the input is a surface mesh
   if (mesh->tetrahedra.num)  
     return HXT_ERROR_MSG(HXT_STATUS_FAILED, "The input mesh should only contain triangles");
@@ -156,78 +165,12 @@ HXTStatus hxtEmptyMesh(HXTMesh* mesh, HXTDelaunayOptions* delOptions){
 
 
 
-
-
-/***************************************************
- *      Coloring the mesh                          *
- ***************************************************/
-HXTStatus hxtColorMesh(HXTMesh* mesh, uint16_t *nbColors) {
-  uint64_t *stack;
-  HXT_CHECK(hxtMalloc(&stack,mesh->tetrahedra.num*sizeof(uint64_t))); 
-  // now that tetrahedra are flaged, we can proceed to colorize the mesh
-  memset(mesh->tetrahedra.colors, 0, mesh->tetrahedra.size*sizeof(uint16_t));
-
-  uint16_t color = 1;
-  uint16_t colorOut = 0;
-
-  while (1){
-    uint64_t stackSize = 0;
-    uint64_t first = UINT64_MAX;
-
-    for (uint64_t i=0; i<mesh->tetrahedra.num; i++) {
-      if(mesh->tetrahedra.colors[i]==0){
-        first = i;
-        break;
-      }
-    }
-
-    if(first==UINT64_MAX)
-      break;
-
-    stack[stackSize++] = first;
-    mesh->tetrahedra.colors[first] = color;
-
-    for (uint64_t i=0; i<stackSize; i++) {
-      uint64_t t = stack[i];
-
-      if (mesh->tetrahedra.node[4*t+3] == HXT_GHOST_VERTEX)
-        colorOut = color;
-
-      for (unsigned j=0; j<4; j++) {
-        if(mesh->tetrahedra.neigh[4*t+j]!=HXT_NO_ADJACENT && isFacetConstrained(mesh, 4*t+j)==0){ // the facet is not a boundary facet
-          uint64_t neigh = mesh->tetrahedra.neigh[4*t+j]/4;
-          if(mesh->tetrahedra.colors[neigh]==0){
-            stack[stackSize++] = neigh;
-            mesh->tetrahedra.colors[neigh] = color;
-          }
-        }
-      }
-    }
-    color++;
-  }
-  *nbColors = color-1; // -1 because we began at one AND the colorout is counted...
-
-  HXT_CHECK( hxtFree(&stack) );
-
-  #pragma omp parallel for
-  for (int i=0;i<mesh->tetrahedra.num;i++){
-    if (mesh->tetrahedra.colors[i] == colorOut){
-      mesh->tetrahedra.colors[i] = UINT16_MAX;
-    }
-    else if(mesh->tetrahedra.colors[i] > colorOut){
-      mesh->tetrahedra.colors[i]--;
-    }
-  }
-
-  return HXT_STATUS_OK;
-}
-
-
 // refine 
 
 
 double hxtTetCircumcenter(double a[3], double b[3], double c[3], double d[3],
-                            double circumcenter[3], double *xi, double *eta, double *zeta){
+                            double circumcenter[3], double *xi, double *eta, double *zeta)
+{
   double xba, yba, zba, xca, yca, zca, xda, yda, zda;
   double balength, calength, dalength;
   double xcrosscd, ycrosscd, zcrosscd;
@@ -326,7 +269,7 @@ HXTStatus hxtRefineTetrahedraOneStep(HXTMesh* mesh, HXTDelaunayOptions* delOptio
     for (uint64_t i=0; i<mesh->tetrahedra.num; i++)
     {
       newVertices[(size_t) 4*i+3] = -1.0;
-      if (mesh->tetrahedra.colors[i] != UINT16_MAX && isTetProcessed(mesh, i)==0){
+      if (mesh->tetrahedra.colors[i] != UINT16_MAX && getProcessedFlag(mesh, i)==0){
         double *a = mesh->vertices.coord + (size_t) 4*mesh->tetrahedra.node[4*i+0];
         double *b = mesh->vertices.coord + (size_t) 4*mesh->tetrahedra.node[4*i+1];
         double *c = mesh->vertices.coord + (size_t) 4*mesh->tetrahedra.node[4*i+2];
@@ -372,7 +315,7 @@ HXTStatus hxtRefineTetrahedraOneStep(HXTMesh* mesh, HXTDelaunayOptions* delOptio
           localAdd++;
         }
 
-        markTetAsProcessed(mesh, i); // we do not need to refine that tetrahedra anymore
+        setProcessedFlag(mesh, i); // we do not need to refine that tetrahedra anymore
       }
     }
 
diff --git a/contrib/hxt/hxt_mesh3d.h b/contrib/hxt/hxt_mesh3d.h
index 8b67844d75ad8faef1ffc73717da4529d1f37824..61d5104a56d41aac6ffa9b909f4fe02cad8cb15d 100644
--- a/contrib/hxt/hxt_mesh3d.h
+++ b/contrib/hxt/hxt_mesh3d.h
@@ -1,19 +1,19 @@
 #ifndef _HXT_MESH_3D_
 #define _HXT_MESH_3D_
 
-#include "hxt_tetrahedra.h"
+#include "hxt_tetDelaunay.h"
 
 /// Creates a structure that allows to look over triangular faces of the 2D mesh
 HXTStatus hxtCreateFaceSearchStructure(HXTMesh* mesh, uint32_t **pfaces);
+
 //// creates a mesh with all points of the surface mesh
 HXTStatus hxtEmptyMesh(HXTMesh* mesh, HXTDelaunayOptions* delOptions);
+
 /// Compute sizes at vertices of the mesh from existing edges of the tetrahera
-HXTStatus hxtComputeMeshSizeFromTrianglesAndLines (HXTMesh* mesh, HXTDelaunayOptions* delOptions);
-HXTStatus hxtComputeMeshSizeFromMesh (HXTMesh* mesh, HXTDelaunayOptions* delOptions);
-/// Gives a unique color to each enclosed volume
-HXTStatus hxtColorMesh(HXTMesh* mesh, uint16_t *nbColors);
-/// Recover the boundary
-HXTStatus hxtRecoverBoundary(HXTMesh* mesh);
+HXTStatus hxtCreateNodalsizeFromTrianglesAndLines(HXTMesh* mesh, HXTDelaunayOptions* delOptions);
+HXTStatus hxtCreateNodalsizeFromMesh(HXTMesh* mesh, HXTDelaunayOptions* delOptions);
+HXTStatus hxtDestroyNodalsize(HXTDelaunayOptions* delOptions);
+
 /// Add points at tets circumcenter in order to fullfill a mesh size constraint 
 HXTStatus hxtRefineTetrahedra(HXTMesh* mesh, HXTDelaunayOptions* delOptions, HXTMeshSize* meshsize);
 
diff --git a/contrib/hxt/hxt_mesh3d_main.c b/contrib/hxt/hxt_mesh3d_main.c
index bbd5e7c6b36f7a79c4a12fa708f4c234280e1971..52011b0012eee8a75098a496129890909e0854d1 100644
--- a/contrib/hxt/hxt_mesh3d_main.c
+++ b/contrib/hxt/hxt_mesh3d_main.c
@@ -1,19 +1,28 @@
 #include "hxt_mesh3d.h"
-#include "hxt_tetrahedra.h"
+#include "hxt_tetDelaunay.h"
 #include "hxt_tetRepair.h"
 #include "hxt_tetUtils.h"
 #include "hxt_tetFlag.h"
+#include "hxt_tetColor.h"
 #include "hxt_tetOpti.h"
 
+
 HXTStatus hxtTetMesh3d(HXTMesh* mesh,
-                      int nthreads,
+                      int defaulThreads,
+                      int DelaunayThreads,
+                      int optimizationThreads,
                       int reproducible,
                       int verbosity,
                       int displayStat,
-                      int refine,  // refine if !=0
-                      int optimize,// optimize quality if !=0
+                      int refine,
+                      int optimize,
                       double qualityThreshold,
                       HXTStatus (*bnd_recovery)(HXTMesh* mesh)) {
+
+  if(defaulThreads>0) {
+    omp_set_num_threads(defaulThreads);
+  }
+
   double t[8]={0};
   t[0] = omp_get_wtime();
 
@@ -21,7 +30,7 @@ HXTStatus hxtTetMesh3d(HXTMesh* mesh,
   hxtBboxInit(&bbox);
   HXT_CHECK( hxtBboxAdd(&bbox, mesh->vertices.coord, mesh->vertices.num) );
 
-  HXTDelaunayOptions delOptions = {&bbox, NULL, 0.0, 0.0, 0, verbosity, reproducible, nthreads};
+  HXTDelaunayOptions delOptions = {&bbox, NULL, 0.0, 0.0, 0, verbosity, reproducible, DelaunayThreads};
   uint32_t numVerticesConstrained = mesh->vertices.num;
   
   HXT_INFO_COND(verbosity>0, "Creating an empty mesh with %u vertices", numVerticesConstrained);
@@ -29,22 +38,38 @@ HXTStatus hxtTetMesh3d(HXTMesh* mesh,
 
   t[1] = omp_get_wtime();
 
-  uint64_t nbMissingFacets, nbMissingEdges=0;
+  uint64_t nbMissingTriangles, nbLinesNotInTriangles, nbMissingLines=0;
   uint16_t nbColors;
-  HXT_CHECK( hxtConstrainTriangles(mesh, &nbMissingFacets) );
-  if(nbMissingFacets==0) // TODO: differentiating missing triangles and missing edges ??
-    HXT_CHECK( hxtConstrainEdgesNotInTriangles(mesh, &nbMissingEdges) );
+  uint64_t* tri2TetMap = NULL;
+  uint64_t* lines2TriMap = NULL;
+  uint64_t* lines2TetMap = NULL;
+
+  HXT_CHECK( hxtAlignedMalloc(&tri2TetMap, mesh->triangles.num*sizeof(uint64_t)) );
+  HXT_CHECK( hxtAlignedMalloc(&lines2TriMap, mesh->lines.num*sizeof(uint64_t)) );
+  
+  HXT_CHECK( hxtGetTri2TetMap(mesh, tri2TetMap, &nbMissingTriangles) );
+  HXT_CHECK( hxtGetLines2TriMap(mesh, lines2TriMap, &nbLinesNotInTriangles) );
+
+  if(nbLinesNotInTriangles!=0) {
+    HXT_CHECK( hxtAlignedMalloc(&lines2TetMap, mesh->lines.num*sizeof(uint64_t)) );
+    if(nbMissingTriangles==0) {
+      HXT_CHECK( hxtGetLines2TetMap(mesh, lines2TetMap, &nbMissingLines) );
+    }
+  }
+
 
   t[2] = omp_get_wtime();
 
-  if (nbMissingFacets != 0 || nbMissingEdges!=0){
+  if (nbMissingTriangles!=0 || nbMissingLines!=0){
     if(bnd_recovery==NULL)
       return HXT_ERROR_MSG(HXT_STATUS_ERROR,
         "there are missing features but no boundary recovery function is given");
-    if(nbMissingFacets)
-      HXT_INFO("Recovering %lu missing facet(s)", nbMissingFacets);
-    else if(nbMissingEdges)
-      HXT_INFO("Recovering %lu missing edge(s)", nbMissingEdges);
+
+    if(nbMissingTriangles)
+      HXT_INFO("Recovering %lu missing facet(s)", nbMissingTriangles);
+    else if(nbMissingLines)
+      HXT_INFO("Recovering %lu missing edge(s)", nbMissingLines);
+
     HXT_CHECK(bnd_recovery(mesh));
 
     if(delOptions.numVerticesInMesh < mesh->vertices.num) {
@@ -54,113 +79,62 @@ HXTStatus hxtTetMesh3d(HXTMesh* mesh,
 
     t[3] = omp_get_wtime();
 
-    memset(mesh->tetrahedra.flag, 0, mesh->tetrahedra.num*sizeof(uint16_t));
-    HXT_CHECK(hxtTetOrientNodes(mesh));
-    HXT_CHECK(hxtTetAdjacencies(mesh));
-    HXT_CHECK(hxtAddGhosts(mesh));
-    // HXT_CHECK( hxtTetVerify(mesh) );
-
-    HXT_CHECK( hxtConstrainTriangles(mesh, &nbMissingFacets) );
-    if(nbMissingFacets!=0)
+    HXT_CHECK( hxtGetTri2TetMap(mesh, tri2TetMap, &nbMissingTriangles) );
+    if(nbMissingTriangles!=0)
       return HXT_ERROR_MSG( HXT_STATUS_ERROR,
         "%d boundary face%s still missing (after recovery step).",
-        nbMissingFacets, (nbMissingFacets>1)?"s are":" is" );
+        nbMissingTriangles, (nbMissingTriangles>1)?"s are":" is" );
 
-    HXT_CHECK( hxtConstrainEdgesNotInTriangles(mesh, &nbMissingEdges) );
-    if(nbMissingEdges!=0)
+    if(nbLinesNotInTriangles!=0)
+      HXT_CHECK( hxtGetLines2TetMap(mesh, lines2TetMap, &nbMissingLines) );
+
+    if(nbMissingLines!=0)
       return HXT_ERROR_MSG( HXT_STATUS_ERROR,
         "%d constrained edge%s still missing (after recovery step).",
-        nbMissingEdges, (nbMissingEdges>1)?"s are":" is" );
+        nbMissingLines, (nbMissingLines>1)?"s are":" is" );
   }
 
-  HXT_CHECK(hxtColorMesh(mesh, &nbColors));
-
-#ifdef DEBUG
-  HXT_CHECK( hxtTetVerify(mesh) );
+  HXT_CHECK( hxtConstrainTriangles(mesh, tri2TetMap) );
+  
+  if(nbLinesNotInTriangles!=0)
+    HXT_CHECK( hxtConstrainLinesNotInTriangles(mesh, lines2TetMap, lines2TriMap) );
 
-  memset(mesh->tetrahedra.flag, 0, mesh->tetrahedra.num*sizeof(uint16_t));
-  HXT_CHECK( hxtConstrainTriangles(mesh, &nbMissingFacets) );
-  if(nbMissingFacets!=0)
-    return HXT_ERROR_MSG( HXT_STATUS_ERROR,
-      "%d boundary face%s still missing (after refine).",
-      nbMissingFacets, (nbMissingFacets>1)?"s are":" is" );
+  HXT_CHECK( hxtColorMesh(mesh, &nbColors) );
+ 
+  HXT_CHECK( hxtMapColorsToBrep(mesh, nbColors, tri2TetMap) );
 
-  HXT_CHECK( hxtConstrainEdgesNotInTriangles(mesh, &nbMissingEdges) );
-  if(nbMissingEdges!=0)
-    return HXT_ERROR_MSG( HXT_STATUS_ERROR,
-      "%d constrained edge%s still missing (after refine).",
-      nbMissingEdges, (nbMissingEdges>1)?"s are":" is" );
-#endif
+  HXT_CHECK( hxtAlignedFree(&tri2TetMap) );
+  HXT_CHECK( hxtAlignedFree(&lines2TetMap) );
+  HXT_CHECK( hxtAlignedFree(&lines2TriMap) );
 
   t[4] = omp_get_wtime();
 
   if(refine){
     // HXT_CHECK(hxtComputeMeshSizeFromMesh(mesh, &delOptions));
-    HXT_CHECK(hxtComputeMeshSizeFromTrianglesAndLines(mesh, &delOptions));
-
-    // // triangulate only one color
-    // if(color>=0) {
-    //   #pragma omp parallel for simd
-    //   for (uint64_t i=0; i<mesh->tetrahedra.num; i++) {
-    //     if(mesh->tetrahedra.colors[i]!=theColor){
-    //       markTetAsProcessed(mesh,i) = 1;
-    //     }
-    //   }
-    // }
-
+    HXT_CHECK(hxtCreateNodalsizeFromTrianglesAndLines(mesh, &delOptions));
     
+    if(nbColors!=mesh->brep.numVolumes) {
+      HXT_CHECK( setFlagsToProcessOnlyVolumesInBrep(mesh) );
+    }
+
     HXTMeshSize *meshSize = NULL;
     // HXT_CHECK(hxtMeshSizeCreate (context,&meshSize));
     // HXT_CHECK(hxtMeshSizeCompute (meshSize, bbox.min, bbox.max, mySize, NULL));
     //    printf("time from empty mesh to first insertion: %f second\n", omp_get_wtime() - time);
     HXT_CHECK(hxtRefineTetrahedra(mesh, &delOptions, meshSize));
     // HXT_CHECK(hxtMeshSizeDelete (&meshSize));
-    HXT_CHECK(hxtAlignedFree(&delOptions.nodalSizes));
+    HXT_CHECK( hxtDestroyNodalsize(&delOptions) );
     // #endif
   }
 
   t[5] = omp_get_wtime();
 
-#ifdef DEBUG
-  HXT_CHECK( hxtTetVerify(mesh) );
-
-  memset(mesh->tetrahedra.flag, 0, mesh->tetrahedra.num*sizeof(uint16_t));
-  HXT_CHECK( hxtConstrainTriangles(mesh, &nbMissingFacets) );
-  if(nbMissingFacets!=0)
-    return HXT_ERROR_MSG( HXT_STATUS_ERROR,
-      "%d boundary face%s still missing (after refine).",
-      nbMissingFacets, (nbMissingFacets>1)?"s are":" is" );
-
-  HXT_CHECK( hxtConstrainEdgesNotInTriangles(mesh, &nbMissingEdges) );
-  if(nbMissingEdges!=0)
-    return HXT_ERROR_MSG( HXT_STATUS_ERROR,
-      "%d constrained edge%s still missing (after refine).",
-      nbMissingEdges, (nbMissingEdges>1)?"s are":" is" );
-#endif
 
   if(optimize)
-    HXT_CHECK( hxtOptimizeTetrahedra(mesh, &bbox, delOptions.minSizeEnd, qualityThreshold, numVerticesConstrained) );
+    HXT_CHECK( hxtOptimizeTetrahedra(mesh, &bbox, optimizationThreads, delOptions.minSizeEnd, qualityThreshold, numVerticesConstrained) );
 
   t[6] = omp_get_wtime();
 
-
-#ifdef DEBUG
-  HXT_CHECK( hxtTetVerify(mesh) );
-
-  memset(mesh->tetrahedra.flag, 0, mesh->tetrahedra.num*sizeof(uint16_t));
-  HXT_CHECK( hxtConstrainTriangles(mesh, &nbMissingFacets) );
-  if(nbMissingFacets!=0)
-    return HXT_ERROR_MSG( HXT_STATUS_ERROR,
-      "%d boundary face%s still missing (after refine).",
-      nbMissingFacets, (nbMissingFacets>1)?"s are":" is" );
-
-  HXT_CHECK( hxtConstrainEdgesNotInTriangles(mesh, &nbMissingEdges) );
-  if(nbMissingEdges!=0)
-    return HXT_ERROR_MSG( HXT_STATUS_ERROR,
-      "%d constrained edge%s still missing (after refine).",
-      nbMissingEdges, (nbMissingEdges>1)?"s are":" is" );
-#endif
-
   
   if(displayStat){
     HXT_INFO("\n\t\tFinal tet. mesh contains %lu tetrahedra"
diff --git a/contrib/hxt/hxt_mesh3d_main.h b/contrib/hxt/hxt_mesh3d_main.h
index af2b9ec27213f0fd795f1801d0c0090f61792dba..320112d674a11a80daf0b000ec640fc446b4836f 100644
--- a/contrib/hxt/hxt_mesh3d_main.h
+++ b/contrib/hxt/hxt_mesh3d_main.h
@@ -4,7 +4,9 @@
 #include "hxt_mesh.h"
 
 HXTStatus hxtTetMesh3d(HXTMesh* mesh,
-                      int nthreads,
+                      int defaulThreads,
+                      int DelaunayThreads,
+                      int optimizationThreads,
                       int reproducible,
                       int verbosity,
                       int displayStat,
diff --git a/contrib/hxt/hxt_omp.h b/contrib/hxt/hxt_omp.h
index ae829dda00f84a24c2dae12609158d29ce8f5c98..ca81aa4c74d2b0a8cee798b0d6a20ba39a03b5f4 100644
--- a/contrib/hxt/hxt_omp.h
+++ b/contrib/hxt/hxt_omp.h
@@ -4,12 +4,13 @@
 #include <omp.h>
 #else
 #include <time.h>
-static inline int omp_get_max_threads()  {return 1;}
-static inline int omp_get_thread_num()  {return 0;}
-static inline int omp_get_num_threads() {return 1;}
-static inline int omp_get_num_procs() {return 1;}
-static inline int omp_get_thread_limit() {return 1;}
-static inline double omp_get_wtime() {
+static inline int omp_get_max_threads(void)  {return 1;}
+static inline int omp_get_thread_num(void)  {return 0;}
+static inline int omp_get_num_threads(void) {return 1;}
+static inline int omp_get_num_procs(void) {return 1;}
+static inline int omp_get_thread_limit(void) {return 1;}
+static inline void omp_set_num_threads(int n) { ; }
+static inline double omp_get_wtime(void) {
   clock_t t = clock();
   return (double)t/CLOCKS_PER_SEC;
 }
diff --git a/contrib/hxt/hxt_opt.c b/contrib/hxt/hxt_opt.c
index b0f27051610a3cf567f53db42af514d780ce787e..70257871d89d6719b5e1e190ff978d96aa234659 100644
--- a/contrib/hxt/hxt_opt.c
+++ b/contrib/hxt/hxt_opt.c
@@ -196,7 +196,7 @@ HXTStatus hxtAddOption(char shortName,
 #endif
 
 	if(shortName=='\0' && (longName==NULL || longName[0]=='\0') &&
-	   (valueType==HXT_FLAG || valueType==HXT_NO_FLAG) ) {
+	   (valueType==HXT_FLAG || valueType==HXT_NO_FLAG) && valuePtr!=NULL) {
 		return HXT_ERROR_MSG(HXT_STATUS_ERROR, "A flag must have an option name. Therefore, it can not be a trailing option");
 	}
 
@@ -527,7 +527,7 @@ static int getNextTrailingOption(int n) {
 }
 
 
-HXTStatus hxtParseOptions(const int argc, const char* argv[])
+HXTStatus hxtParseOptions(const int argc, char* argv[])
 {
 	int dashdash = 0;
 	int trailing = getNextTrailingOption(0);
@@ -783,7 +783,7 @@ HXTStatus hxtGetOptionHelp(char text[16384],
 	int offsetval = 0;
 	int* offset = &offsetval;
 	if(programName!=NULL) {
-		MY_SPRINTF("%s [OPTION] ...", programName);
+		MY_SPRINTF("Usage:  %s [OPTION]...", programName);
 		int n = getNextTrailingOption(0);
 		while(n!=-1 && optionList[n].valuePtr!=NULL) {
 			int n2 = getNextTrailingOption(n);
diff --git a/contrib/hxt/hxt_opt.h b/contrib/hxt/hxt_opt.h
index c2ad5b8b5c49e15d8d049a1815b5392fc523796a..2fc77be45020316fa54ce04fadd66591348905cd 100644
--- a/contrib/hxt/hxt_opt.h
+++ b/contrib/hxt/hxt_opt.h
@@ -143,7 +143,7 @@ HXTStatus hxtGetOptionHelp(char message[16384],
                            const char* additionalInfo);
 
 // use the HXT_PARSE_COMMAND_LINE macro instead
-HXTStatus hxtParseOptions(const int argc, const char* argv[]);
+HXTStatus hxtParseOptions(const int argc, char* argv[]);
 
 // This macro should be placed in the main, after all options are added !
 #define HXT_PARSE_COMMAND_LINE(argc, argv, programName, programDescription, additionalInfo)   \
diff --git a/contrib/hxt/hxt_option.c b/contrib/hxt/hxt_option.c
index 500ce1d87aa5cf4d4a3092094f7c434d8b44480d..090db3cee521b5dda6f8988d304e0dccd2e0ae66 100644
--- a/contrib/hxt/hxt_option.c
+++ b/contrib/hxt/hxt_option.c
@@ -85,7 +85,7 @@ HXTStatus hxtOptProgDelete(HXTOptProgram* program){
 static int searchLongOption(HXTOptProgram* program,
 			                const char* string)
 {
-	for (int i=0; i<program->opt_length; i++) {
+	for (size_t i=0; i<program->opt_length; i++) {
 		if(program->opts[i]->longs!=NULL &&
 		   strcmp(program->opts[i]->longs, string)==0) {
 			return i;
@@ -102,7 +102,7 @@ static int searchLongOption(HXTOptProgram* program,
 static int searchShortOption(HXTOptProgram* program,
                              char c)
 {
-	for (int i=0; i<program->opt_length; i++) {
+	for (size_t i=0; i<program->opt_length; i++) {
 		if(program->opts[i]->shorts!=NULL &&
 		   strchr(program->opts[i]->shorts, c)!=NULL) {
 			return i;
@@ -217,6 +217,7 @@ static HXTStatus doOption(HXTOpt* opt,
 					return HXT_ERROR_MSG(HXT_STATUS_RANGE_ERROR,
 					       "cannot convert argument \"%s\" of option \"%s\" to %s (value was negative)",
 					       arg, optName, getArgTypeName(opt->argType));
+			/* fallthrough */
 			case ARG_FLOAT:
 			{
 				float f32 = real;
@@ -377,7 +378,7 @@ HXTStatus hxtOptProgParse(HXTOptProgram* program,
 		*optind = i+1;
 	}
 
-	for (int i=0; i<program->opt_length; i++) {
+	for (size_t i=0; i<program->opt_length; i++) {
 		HXTOpt* opt = program->opts[i];
 		if(opt->argRequirement==ARG_NON_NULL_REQUIRED &&
 			 ((opt->argType>-4 && opt->integer==0) ||
@@ -493,7 +494,7 @@ HXTStatus hxtOptProgGetHelp(HXTOptProgram* program, char text[16384])
 	MY_SPRINTF("%.*s\n", char_printed>64?63:char_printed-1,
    "---------------------------------------------------------------");
 
-	for (int i=0; i<program->opt_length; i++) {
+	for (size_t i=0; i<program->opt_length; i++) {
 		printOptionLine(program->opts[i], text, offset);
 	}
 
diff --git a/contrib/hxt/hxt_option.h b/contrib/hxt/hxt_option.h
index 29dab9f9728c0c2d95e9d3ef525b34afe98e47ff..c07e33e1cc5abb180e4e797b8755a517a65aaea9 100644
--- a/contrib/hxt/hxt_option.h
+++ b/contrib/hxt/hxt_option.h
@@ -117,10 +117,11 @@ typedef struct HXTOptStruct{
   const char* const argName;
   const ARG_REQUIREMENT argRequirement;
   const int argType;
-  
-  int64_t     integer;
-  double      real;
-  const char* string;
+//  struct {
+    int64_t     integer;
+    double      real;
+    const char* string;
+//  }
 }HXTOpt;
 
 
diff --git a/contrib/hxt/hxt_sort.c b/contrib/hxt/hxt_sort.c
index 74ba36c4bda40d2f8448b4c125571245dd6d6310..4bece40a22eac4f48fd54a64f238112b5dcc4606 100644
--- a/contrib/hxt/hxt_sort.c
+++ b/contrib/hxt/hxt_sort.c
@@ -1,14 +1,16 @@
 #include "hxt_sort.h"
 
+
 /***************************************
  *     for 1 value                     *
  ***************************************/
 static inline uint64_t group1_get(uint64_t* val, const void* userData){
+  HXT_UNUSED(userData);
   return *val;
 }
 
 HXTStatus group1_sort(uint64_t* val, const uint64_t n, const uint64_t max){
-  PARALLEL_HYBRID64(uint64_t, val, n, max, group1_get, NULL);
+  HXTSORT64_UNIFORM(uint64_t, val, n, max, group1_get, NULL);
   return HXT_STATUS_OK;
 }
 
@@ -16,20 +18,22 @@ HXTStatus group1_sort(uint64_t* val, const uint64_t n, const uint64_t max){
  *     for 2 values                    *
  ***************************************/
 static inline uint64_t group2_get_v0(HXTGroup2* pair, const void* userData){
+  HXT_UNUSED(userData);
   return pair->v[0];
 }
 
 HXTStatus group2_sort_v0(HXTGroup2* pair, const uint64_t n, const uint64_t max){
-  PARALLEL_HYBRID64(HXTGroup2, pair, n, max, group2_get_v0, NULL);
+  HXTSORT64_UNIFORM(HXTGroup2, pair, n, max, group2_get_v0, NULL);
   return HXT_STATUS_OK;
 }
 
 static inline uint64_t group2_get_v1(HXTGroup2* pair, const void* userData){
+  HXT_UNUSED(userData);
   return pair->v[1];
 }
 
 HXTStatus group2_sort_v1(HXTGroup2* pair, const uint64_t n, const uint64_t max){
-  PARALLEL_HYBRID64(HXTGroup2, pair, n, max, group2_get_v1, NULL);
+  HXTSORT64_UNIFORM(HXTGroup2, pair, n, max, group2_get_v1, NULL);
   return HXT_STATUS_OK;
 }
 
@@ -38,28 +42,31 @@ HXTStatus group2_sort_v1(HXTGroup2* pair, const uint64_t n, const uint64_t max){
  *     for 3 values                    *
  ***************************************/
 static inline uint64_t group3_get_v0(HXTGroup3* triplet, const void* userData){
+  HXT_UNUSED(userData);
   return triplet->v[0];
 }
 
 HXTStatus group3_sort_v0(HXTGroup3* triplet, const uint64_t n, const uint64_t max){
-  PARALLEL_HYBRID64(HXTGroup3, triplet, n, max, group3_get_v0, NULL);
+  HXTSORT64_UNIFORM(HXTGroup3, triplet, n, max, group3_get_v0, NULL);
   return HXT_STATUS_OK;
 }
 
 static inline uint64_t group3_get_v1(HXTGroup3* triplet, const void* userData){
+  HXT_UNUSED(userData);
   return triplet->v[1];
 }
 
 HXTStatus group3_sort_v1(HXTGroup3* triplet, const uint64_t n, const uint64_t max){
-  PARALLEL_HYBRID64(HXTGroup3, triplet, n, max, group3_get_v1, NULL);
+  HXTSORT64_UNIFORM(HXTGroup3, triplet, n, max, group3_get_v1, NULL);
   return HXT_STATUS_OK;
 }
 
 static inline uint64_t group3_get_v2(HXTGroup3* triplet, const void* userData){
+  HXT_UNUSED(userData);
   return triplet->v[2];
 }
 
 HXTStatus group3_sort_v2(HXTGroup3* triplet, const uint64_t n, const uint64_t max){
-  PARALLEL_HYBRID64(HXTGroup3, triplet, n, max, group3_get_v2, NULL);
+  HXTSORT64_UNIFORM(HXTGroup3, triplet, n, max, group3_get_v2, NULL);
   return HXT_STATUS_OK;
 }
\ No newline at end of file
diff --git a/contrib/hxt/hxt_sort.h b/contrib/hxt/hxt_sort.h
index ae409d33474cbf9729e52d5be5291a114228b7db..6e11f76c583e8ea2c61a37ec107e6a1acb656622 100644
--- a/contrib/hxt/hxt_sort.h
+++ b/contrib/hxt/hxt_sort.h
@@ -22,6 +22,10 @@ Author: Célestin Marot (celestin.marot@uclouvain.be)                        */
 
 #include "hxt_tools.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 // sorting function already defined
 typedef struct{
   uint64_t v[2];
@@ -82,7 +86,7 @@ static inline unsigned u64_log2(uint64_t v) {
 
 #ifndef HXT_SORT_SEQUENTIAL_LIMIT
 // below this number, launching OpenMP threads is unnecessary
-#define HXT_SORT_SEQUENTIAL_LIMIT 131072
+#define HXT_SORT_SEQUENTIAL_LIMIT 32768
 #endif
 
 
@@ -92,7 +96,7 @@ do{
   if(_copyNb<64){                                                                        \
     INSERTION_SORT32(HXTSORT_TYPE, ARRAY, _copyNb, GET_KEY, USER_DATA);                  \
   }                                                                                      \
-  else if(_copyNb < HXT_SORT_SEQUENTIAL_LIMIT){                                          \
+  else if(_copyNb < HXT_SORT_SEQUENTIAL_LIMIT || omp_get_max_threads()<=1){              \
     LSB32(HXTSORT_TYPE, ARRAY, _copyNb, MAX, GET_KEY, USER_DATA);                        \
   }                                                                                      \
   else {                                                                                 \
@@ -108,7 +112,7 @@ do{
   if(_copyNb < 64){                                                                      \
     INSERTION_SORT64(HXTSORT_TYPE, ARRAY, _copyNb, GET_KEY, USER_DATA);                  \
   }                                                                                      \
-  else if(_copyNb < HXT_SORT_SEQUENTIAL_LIMIT) {                                         \
+  else if(_copyNb < HXT_SORT_SEQUENTIAL_LIMIT || omp_get_max_threads()<=1) {             \
     LSB64(HXTSORT_TYPE, ARRAY, _copyNb, _copyMax, GET_KEY, USER_DATA);                   \
   }                                                                                      \
   else {                                                                                 \
@@ -123,7 +127,7 @@ do{
   if(_copyNb < 64) {                                                                     \
     INSERTION_SORT32(HXTSORT_TYPE, ARRAY, _copyNb, GET_KEY, USER_DATA);                  \
   }                                                                                      \
-  else if(_copyNb < HXT_SORT_SEQUENTIAL_LIMIT) {                                         \
+  else if(_copyNb < HXT_SORT_SEQUENTIAL_LIMIT || omp_get_max_threads()<=1) {             \
     LSB32(HXTSORT_TYPE, ARRAY, _copyNb, MAX, GET_KEY, USER_DATA);                        \
   }                                                                                      \
   else {                                                                                 \
@@ -139,7 +143,7 @@ do{
   if(_copyNb < 64) {                                                                     \
     INSERTION_SORT64(HXTSORT_TYPE, ARRAY, _copyNb, GET_KEY, USER_DATA);                  \
   }                                                                                      \
-  else if(_copyNb < HXT_SORT_SEQUENTIAL_LIMIT) {                                         \
+  else if(_copyNb < HXT_SORT_SEQUENTIAL_LIMIT || omp_get_max_threads()<=1) {             \
     LSB64(HXTSORT_TYPE, ARRAY, _copyNb, _copyMax, GET_KEY, USER_DATA);                   \
   }                                                                                      \
   else if(_copyMax < UINT64_MAX/2048/SIMD_ALIGN) {                                       \
@@ -285,11 +289,12 @@ do{
   uint ## BASE ## _t (*_getKey)(HXTSORT_TYPE*, const void*)=GET_KEY; /*verify prototype*/\
   if(0) {_getKey(_copyAr1, _userData);} /* this is just to suppress warning... */        \
                                                                                          \
-  int nthreads = omp_get_max_threads();                                                  \
+  int nthreads = _copyN/8192 + 1;                                                        \
+  nthreads = nthreads>omp_get_max_threads()?omp_get_max_threads():nthreads;              \
   uint64_t* h_all, *h_tot;                                                               \
   h_all = (uint64_t*) HXTSORT_MEMALIGN(2048*(nthreads+1)*sizeof(uint64_t));              \
                                                                                          \
-  _HXTSORT_PRAGMA(omp parallel)                                                          \
+  _HXTSORT_PRAGMA(omp parallel num_threads(nthreads))                                    \
   {                                                                                      \
     _HXTSORT_PRAGMA(omp single)                                                          \
     {                                                                                    \
@@ -388,11 +393,12 @@ do{
   uint32_t (*_getKey)(HXTSORT_TYPE*, const void*) = GET_KEY; /* verify prototype */      \
   if(0) {_getKey(_copyAr1, _userData);} /* this is just to suppress warning... */        \
                                                                                          \
-  int nthreads = omp_get_max_threads();                                                  \
+  int nthreads = _copyN/8192 + 1;                                                        \
+  nthreads = nthreads>omp_get_max_threads()?omp_get_max_threads():nthreads;              \
   uint64_t* h_all, *h_tot;                                                               \
   HXT_CHECK( hxtAlignedMalloc(&h_all, (2048*(nthreads+1)+1)*sizeof(uint64_t)) );         \
                                                                                          \
-  _HXTSORT_PRAGMA(omp parallel)                                                          \
+  _HXTSORT_PRAGMA(omp parallel num_threads(nthreads))                                    \
   {                                                                                      \
     _HXTSORT_PRAGMA(omp single)                                                          \
     {                                                                                    \
@@ -511,11 +517,12 @@ do{
   uint64_t (*_getKey)(HXTSORT_TYPE*, const void*) = GET_KEY; /* verify prototype */      \
   if(0) {_getKey(_copyAr1, _userData);} /* this is just to suppress warning... */        \
                                                                                          \
-  int nthreads = omp_get_max_threads();                                                  \
+  int nthreads = _copyN/8192 + 1;                                                        \
+  nthreads = nthreads>omp_get_max_threads()?omp_get_max_threads():nthreads;              \
   uint64_t* h_all, *h_tot;                                                               \
   HXT_CHECK( hxtAlignedMalloc(&h_all, (2048*(nthreads+1)+1)*sizeof(uint64_t)) );         \
                                                                                          \
-  _HXTSORT_PRAGMA(omp parallel)                                                          \
+  _HXTSORT_PRAGMA(omp parallel num_threads(nthreads))                                    \
   {                                                                                      \
     _HXTSORT_PRAGMA(omp single)                                                          \
     {                                                                                    \
@@ -623,11 +630,12 @@ do{
   uint64_t (*_getKey)(HXTSORT_TYPE*, const void*) = GET_KEY; /* verify prototype */      \
   if(0) {_getKey(_copyAr1, _userData);} /* this is just to suppress warning... */        \
                                                                                          \
-  int nthreads = omp_get_max_threads();                                                  \
+  int nthreads = _copyN/8192 + 1;                                                        \
+  nthreads = nthreads>omp_get_max_threads()?omp_get_max_threads():nthreads;              \
   uint64_t* h_all, *h_tot;                                                               \
   HXT_CHECK( hxtAlignedMalloc(&h_all, (2048*(nthreads+1)+1)*sizeof(uint64_t)) );         \
                                                                                          \
-  _HXTSORT_PRAGMA(omp parallel)                                                          \
+  _HXTSORT_PRAGMA(omp parallel num_threads(nthreads))                                    \
   {                                                                                      \
     _HXTSORT_PRAGMA(omp single)                                                          \
     {                                                                                    \
@@ -710,5 +718,8 @@ do{
   HXT_CHECK( hxtAlignedFree(&_copyAr2) );                                                \
 }while(0)
 
+#ifdef __cplusplus
+}
+#endif
 
 #endif
diff --git a/contrib/hxt/hxt_tetColor.c b/contrib/hxt/hxt_tetColor.c
new file mode 100644
index 0000000000000000000000000000000000000000..ec9570377070543a05e3a2389e8a0327d5038f7c
--- /dev/null
+++ b/contrib/hxt/hxt_tetColor.c
@@ -0,0 +1,470 @@
+#include "hxt_tetColor.h"
+#include "hxt_tetFlag.h"
+#include "hxt_sort.h"
+
+
+/***************************************************
+ *      Coloring the mesh                          *
+ ***************************************************/
+HXTStatus hxtColorMesh(HXTMesh* mesh, uint16_t *nbColors) {
+  uint64_t *stack;
+  HXT_CHECK(hxtMalloc(&stack,mesh->tetrahedra.num*sizeof(uint64_t))); 
+  // now that tetrahedra are flaged, we can proceed to colorize the mesh
+  memset(mesh->tetrahedra.colors, 0, mesh->tetrahedra.size*sizeof(uint16_t));
+
+  uint16_t color = 1;
+  uint16_t colorOut = 0;
+
+  while (1){
+    uint64_t stackSize = 0;
+    uint64_t first = UINT64_MAX;
+
+    for (uint64_t i=0; i<mesh->tetrahedra.num; i++) {
+      if(mesh->tetrahedra.colors[i]==0){
+        first = i;
+        break;
+      }
+    }
+
+    if(first==UINT64_MAX)
+      break;
+
+    stack[stackSize++] = first;
+    mesh->tetrahedra.colors[first] = color;
+
+    for (uint64_t i=0; i<stackSize; i++) {
+      uint64_t t = stack[i];
+
+      if (mesh->tetrahedra.node[4*t+3] == HXT_GHOST_VERTEX)
+        colorOut = color;
+
+      for (unsigned j=0; j<4; j++) {
+        if(mesh->tetrahedra.neigh[4*t+j]!=HXT_NO_ADJACENT && getFacetConstraint(mesh, t, j)==0){ // the facet is not a boundary facet
+          uint64_t neigh = mesh->tetrahedra.neigh[4*t+j]/4;
+          if(mesh->tetrahedra.colors[neigh]==0){
+            stack[stackSize++] = neigh;
+            mesh->tetrahedra.colors[neigh] = color;
+          }
+        }
+      }
+    }
+    color++;
+  }
+  *nbColors = color-2; // -2 because we began at one AND because colorout is not counted...
+
+  HXT_CHECK( hxtFree(&stack) );
+
+  #pragma omp parallel for
+  for (uint64_t i=0;i<mesh->tetrahedra.num;i++){
+    if (mesh->tetrahedra.colors[i] == colorOut){
+      mesh->tetrahedra.colors[i] = UINT16_MAX;
+    }
+    else if(mesh->tetrahedra.colors[i] > colorOut){
+      mesh->tetrahedra.colors[i]-=2;
+    }
+    else {
+      mesh->tetrahedra.colors[i]--;
+    }
+  }
+
+  return HXT_STATUS_OK;
+}
+
+
+
+HXTStatus hxtFillVolumeBrep(HXTMesh* mesh, uint64_t* tri2TetMap,
+                            uint16_t numVolumes, uint16_t** numSurfacesPerVolume_ptr, uint16_t** surfacesPerVolume_ptr)
+{
+  int nbTriangleColor = 0;
+  uint16_t colorTriMax = 0;
+  char triangleColor[65536] = {0};
+
+  #pragma omp parallel for
+  for (uint64_t i=0; i<mesh->triangles.num; i++) {
+    #pragma omp atomic write
+    triangleColor[mesh->triangles.colors[i]] = 1;
+  }
+
+  #pragma omp parallel for reduction(+: nbTriangleColor) reduction(max: colorTriMax)
+  for (unsigned i=0; i<65536; i++) {
+    if(triangleColor[i]==1) {
+      nbTriangleColor++;
+      triangleColor[i] = 0;
+      if(i>colorTriMax)
+        colorTriMax = i;
+    }
+  }
+
+  uint16_t* numSurfacesPerVolume;
+  uint16_t* surfacesPerVolume;
+  HXT_CHECK( hxtAlignedMalloc(&numSurfacesPerVolume, sizeof(uint16_t)*numVolumes) );
+
+  // a surface can only appear in two volumes. (this allocate to much, but usually not by a lot)
+  HXT_CHECK( hxtAlignedMalloc(&surfacesPerVolume, sizeof(uint16_t)*nbTriangleColor*2) );
+
+  const int maxThreads = omp_get_max_threads();
+  int *numColors;
+  HXT_CHECK( hxtAlignedMalloc(&numColors, maxThreads*sizeof(int)) );
+
+  int currentSurfaceIndex = 0;
+  int totalCount = 0;
+  
+  #pragma omp parallel
+  {
+    // for every tet-color
+    for (uint16_t color=0; color<numVolumes; color++) {
+      #pragma omp for
+      for (uint64_t tri=0; tri<mesh->triangles.num; tri++) {
+        // the tetrahedra on both sides of the triangle
+        uint64_t tet1 = tri2TetMap[tri];
+        uint64_t tet2 = mesh->tetrahedra.neigh[tet1];
+        tet1/=4;
+        tet2/=4;
+
+        // we do a xor because the surface is not a bounding if the same volume is on both its sides
+        if((mesh->tetrahedra.colors[tet1]==color)^(mesh->tetrahedra.colors[tet2]==color)) {
+          #pragma omp atomic write // this atomic should do nothing (it usually is already atomic)
+          triangleColor[mesh->triangles.colors[tri]] = 1;
+        }
+      }
+
+      int threadID = omp_get_thread_num();
+      int localCount = 0;
+
+      #pragma omp for schedule(static)
+      for (uint16_t i=0; i<=colorTriMax; i++) {
+        if(triangleColor[i]==1) {
+          localCount++;
+        }
+      }
+
+      numColors[threadID] = localCount;
+
+      #pragma omp barrier
+      #pragma omp single
+      {
+        int nthreads = omp_get_num_threads();
+        currentSurfaceIndex+=totalCount;
+        totalCount = 0;
+        for (int thread=0; thread<nthreads; thread++) {
+          int tsum = totalCount + numColors[thread];
+          numColors[thread] = totalCount;
+          totalCount = tsum;
+        }
+
+        numSurfacesPerVolume[color] = totalCount;
+      }
+
+      #pragma omp for schedule(static)
+      for (uint16_t i=0; i<=colorTriMax; i++) {
+        if(triangleColor[i]==1) {
+          surfacesPerVolume[currentSurfaceIndex + numColors[threadID]] = i;
+          numColors[threadID]++;
+          triangleColor[i] = 0;
+        }
+      }
+    }
+  }
+
+  *numSurfacesPerVolume_ptr = numSurfacesPerVolume;
+  *surfacesPerVolume_ptr = surfacesPerVolume;
+  
+  HXT_CHECK( hxtAlignedFree(&numColors) );
+
+  return HXT_STATUS_OK;
+}
+
+
+static int compareVolumes(uint16_t numSurfaces1,
+                          uint16_t numSurfaces2,
+                          uint16_t* surfaces1,
+                          uint16_t* surfaces2)
+{
+  if(numSurfaces1<numSurfaces2)
+    return -1;
+  else if(numSurfaces1>numSurfaces2)
+    return 1;
+
+  int diff = 0;
+  uint64_t* surface1Sorted;
+  uint64_t* surface2Sorted;
+  HXT_CHECK( hxtAlignedMalloc(&surface1Sorted, numSurfaces1*sizeof(uint64_t)) );
+  HXT_CHECK( hxtAlignedMalloc(&surface2Sorted, numSurfaces1*sizeof(uint64_t)) );
+
+  uint64_t max1 = 0;
+  uint64_t max2 = 0;
+  for (uint16_t i=0; i<numSurfaces1; i++) {
+    surface1Sorted[i] = surfaces1[i];
+    if(surfaces1[i]>max1)
+      max1 = surfaces1[i];
+    surface2Sorted[i] = surfaces2[i];
+    if(surfaces2[i]>max2)
+      max2 = surfaces2[i];
+  }
+
+  if(max1!=max2) {
+    diff = max1 - max2;
+    goto endGoto;
+  }
+
+  HXT_CHECK( group1_sort(surface1Sorted, numSurfaces1, max1) );
+  HXT_CHECK( group1_sort(surface2Sorted, numSurfaces1, max1) );
+
+
+  for (uint16_t i=0; i<numSurfaces1; i++) {
+    if(surface1Sorted[i]!=surface2Sorted[i]) {
+      diff = surface1Sorted[i]-surface2Sorted[i];
+      goto endGoto;
+    }
+  }
+
+endGoto:
+  HXT_CHECK( hxtAlignedFree(&surface1Sorted) );
+  HXT_CHECK( hxtAlignedFree(&surface2Sorted) );
+
+  return diff;
+}
+
+
+static inline HXTStatus swapPairsIfNeeded(uint16_t numVolumes,
+                                         uint16_t* numSurfacesPerVolume,
+                                         uint16_t* surfacesPerVolume,
+                                         HXTGroup2* pairs)
+{
+  int alreadySwapped = 0;
+  for (int i=1; i<numVolumes; i++) {
+    if(pairs[i-1].v[1]==pairs[i].v[1]){
+      int vol1 = pairs[i-1].v[0];
+      int vol2 = pairs[i].v[0];
+      uint16_t start1 = numSurfacesPerVolume[vol1-1];
+      uint16_t end1 = numSurfacesPerVolume[vol1];
+      uint16_t start2 = numSurfacesPerVolume[vol2-1];
+      uint16_t end2 = numSurfacesPerVolume[vol2];
+      int comp = compareVolumes(end1-start1, end2-start2,
+                                &surfacesPerVolume[start1],
+                                &surfacesPerVolume[start2]);
+      if(comp > 0) {
+        if(alreadySwapped)
+          return HXT_ERROR_MSG(HXT_STATUS_ERROR, "The minimum surface of volume %d and %d appears more than twice...", vol1, vol2);
+        HXTGroup2 tmp = pairs[i-1];
+        pairs[i-1] = pairs[i];
+        pairs[i] = tmp;
+        alreadySwapped = 1;
+      }
+      else if (comp==0) {
+        return HXT_ERROR_MSG(HXT_STATUS_ERROR, "duplicated volume definition in the BREP");
+      }
+      else {
+        alreadySwapped = 0;
+      }
+    }
+  }
+  return HXT_STATUS_OK;
+}
+
+
+static HXTStatus getVolumesHashes(uint16_t numVolumes,
+                                  uint16_t* numSurfacesPerVolume,
+                                  uint16_t* surfacesPerVolume,
+                                  HXTGroup2* pairs)
+{
+  uint16_t maxMin = 0;
+
+  #pragma omp parallel for reduction(max:maxMin)
+  for (int vol=0; vol<numVolumes; vol++) {
+    pairs[vol].v[0] = vol;
+    uint16_t start = vol==0?0:numSurfacesPerVolume[vol-1];
+    uint16_t end = numSurfacesPerVolume[vol];
+
+    uint64_t hash = 0;
+    uint16_t minimum = UINT16_MAX;
+    while(start<end) {
+      uint16_t s = surfacesPerVolume[start++];
+
+      hash ^= (UINT64_C(1)<<(s & 31));
+      if(s < minimum)
+        minimum = s;
+    }
+
+    if(minimum > maxMin)
+      maxMin = minimum;
+
+    hash |= (uint64_t) minimum << 32;
+    pairs[vol].v[1] = hash;
+  }
+
+  HXT_CHECK( group2_sort_v1(pairs, numVolumes, (uint64_t) maxMin << 32) );
+
+  // it can happen that two volumes have the same hash. We must give them a unique order
+  HXT_CHECK( swapPairsIfNeeded(numVolumes, numSurfacesPerVolume, surfacesPerVolume, pairs) );
+
+  return HXT_STATUS_OK;
+}
+
+
+static HXTStatus matchVolumes(HXTMesh* mesh, uint16_t* numSurfacesPerVolume, uint16_t* surfacesPerVolume, uint16_t nbColors)
+{
+  HXTGroup2* ourPairs;
+  const uint16_t ourNumVolumes = nbColors;
+  const uint16_t theirNumVolumes = mesh->brep.numVolumes;
+  HXT_CHECK( hxtAlignedMalloc(&ourPairs, 2*ourNumVolumes*sizeof(HXTGroup2) ) );
+  HXTGroup2* theirPairs = ourPairs + ourNumVolumes;
+
+  // we make a scan so that we can do things in parallel...
+  int theirSum = 0;
+  for (uint16_t vol=0; vol<theirNumVolumes; vol++) {
+    theirSum+=mesh->brep.numSurfacesPerVolume[vol];
+    mesh->brep.numSurfacesPerVolume[vol] = theirSum;
+  }
+
+  int ourSum = 0;
+  for (uint16_t vol=0; vol<ourNumVolumes; vol++) {
+    ourSum+=numSurfacesPerVolume[vol];
+    numSurfacesPerVolume[vol] = ourSum;
+  }
+
+  HXT_CHECK( getVolumesHashes(ourNumVolumes,
+                              numSurfacesPerVolume,
+                              surfacesPerVolume, ourPairs) );
+  HXT_CHECK( getVolumesHashes(theirNumVolumes,
+                              mesh->brep.numSurfacesPerVolume,
+                              mesh->brep.surfacesPerVolume, theirPairs) );
+
+  // now that we sorted every volumes, see if they match
+  // ourPair contains all volumes, while ourPairs can skip some volumes...
+  int ourIndex = 0;
+  int volNotCorresponding = theirNumVolumes;
+  for (int theirIndex=0; theirIndex<theirNumVolumes; theirIndex++) {
+    while (1) {
+
+      if(ourIndex>=ourNumVolumes)
+        return HXT_ERROR_MSG(HXT_STATUS_ERROR, "Volumes do not match the BREP");
+      
+      if(ourPairs[ourIndex].v[1]==theirPairs[theirIndex].v[1]){
+        if(ourIndex<ourNumVolumes-1 && ourPairs[ourIndex+1].v[1]==ourPairs[ourIndex].v[1]) {
+          // we have to check further because there was a collision in the hashes
+          uint16_t ourVol = ourPairs[ourIndex].v[0];
+          uint16_t theirVol = theirPairs[theirIndex].v[0];
+
+          uint16_t ourStart = ourVol==0? 0:numSurfacesPerVolume[ourVol-1];
+          uint16_t ourEnd = numSurfacesPerVolume[ourVol];
+          uint16_t theirStart = theirVol==0? 0:mesh->brep.numSurfacesPerVolume[theirVol-1];
+          uint16_t theirEnd = mesh->brep.numSurfacesPerVolume[theirVol];
+
+          int cmp = compareVolumes(ourEnd - ourStart, theirEnd - theirStart, 
+                                   &surfacesPerVolume[ourStart], &mesh->brep.surfacesPerVolume[theirStart]);
+
+          if(cmp==0) {
+            ourPairs[ourIndex++].v[1] = theirPairs[theirIndex].v[0];
+            break;
+          }
+        }
+        else {
+          ourPairs[ourIndex++].v[1] = theirPairs[theirIndex].v[0];
+          break;
+        }
+      }
+
+      ourPairs[ourIndex++].v[1] = volNotCorresponding++;
+    }
+  }
+
+  while(ourIndex<ourNumVolumes) {
+    ourPairs[ourIndex++].v[1] = volNotCorresponding++;
+  }
+
+  #pragma omp parallel for
+  for (int i=0; i<ourNumVolumes; i++) {
+    uint16_t ourVol = ourPairs[i].v[0];
+    uint16_t theirVol = ourPairs[i].v[1];
+
+    theirPairs[ourVol].v[1] = theirVol;
+  }
+
+  #pragma omp parallel for
+  for (uint64_t i=0; i<mesh->tetrahedra.num; i++) {
+    uint16_t color = mesh->tetrahedra.colors[i];
+    if(color!=UINT16_MAX) {
+      mesh->tetrahedra.colors[i] = theirPairs[color].v[1];
+    }
+  }
+
+  #pragma omp parallel for
+  for (int vol=0; vol<theirNumVolumes; vol++) {
+    uint16_t start = vol==0?0:mesh->brep.numSurfacesPerVolume[vol-1];
+    uint16_t end = mesh->brep.numSurfacesPerVolume[vol];
+    mesh->brep.numSurfacesPerVolume[vol] = end-start;
+  }
+
+  HXT_CHECK( hxtAlignedFree(&ourPairs) );
+
+  return HXT_STATUS_OK;
+}
+
+
+
+HXTStatus hxtMapColorsToBrep(HXTMesh* mesh, uint16_t nbColors, uint64_t* tri2TetMap)
+{
+  if(mesh->brep.numVolumes==0) {
+  #ifndef NDEBUG
+    if(mesh->brep.numSurfacesPerVolume!=NULL) {
+      HXT_WARNING("mesh->brep.numSurfacesPerVolume is not null but numVolumes=0\nAttempting to free it");
+      HXT_CHECK( hxtFree(&mesh->brep.numSurfacesPerVolume) );
+    }
+    if(mesh->brep.surfacesPerVolume!=NULL) {
+      HXT_WARNING("mesh->brep.surfacesPerVolume is not null but numVolumes=0\nAttempting to free it");
+      HXT_CHECK( hxtFree(&mesh->brep.surfacesPerVolume) );
+    }
+  #endif
+    mesh->brep.numVolumes = nbColors;
+    
+    HXT_CHECK( hxtFillVolumeBrep(mesh, tri2TetMap,
+                              nbColors,
+                              &mesh->brep.numSurfacesPerVolume,
+                              &mesh->brep.surfacesPerVolume) );
+  }
+  else {
+    if(mesh->brep.numVolumes>nbColors)
+      return HXT_ERROR_MSG(HXT_STATUS_ERROR, "brep contains more volumes than there really are !");
+
+    if(mesh->brep.numVolumes<nbColors)
+      HXT_INFO("%u out of %u volumes will be refined", mesh->brep.numVolumes, nbColors);
+    
+
+    // match our brep with the brep given...
+    uint16_t* numSurfacesPerVolume;
+    uint16_t* surfacesPerVolume;
+    HXT_CHECK( hxtFillVolumeBrep(mesh, tri2TetMap,
+                              nbColors,
+                              &numSurfacesPerVolume,
+                              &surfacesPerVolume) );
+
+    HXT_CHECK( matchVolumes(mesh, numSurfacesPerVolume, surfacesPerVolume, nbColors) );
+
+
+    HXT_CHECK( hxtAlignedFree( &numSurfacesPerVolume ) );
+    HXT_CHECK( hxtAlignedFree( &surfacesPerVolume ) );
+  }
+
+
+  return HXT_STATUS_OK;
+}
+
+
+HXTStatus setFlagsToProcessOnlyVolumesInBrep(HXTMesh* mesh)
+{
+  #pragma omp parallel for
+  for (uint64_t i=0; i<mesh->tetrahedra.num; i++) {
+    if(mesh->tetrahedra.colors[i]>=mesh->brep.numVolumes) {
+      setProcessedFlag(mesh, i);
+    }
+  }
+
+  return HXT_STATUS_OK;
+}
+
+
+
+// TODO: compute a boundinng box following some surface mesh colors !
+// we should get this bbox from the triangles (and not the tetrahedra) to be quick
+// we should thus have a function that receive a volume color and receive a list of surface colors !
\ No newline at end of file
diff --git a/contrib/hxt/hxt_tetColor.h b/contrib/hxt/hxt_tetColor.h
new file mode 100644
index 0000000000000000000000000000000000000000..8a841956d8c5a0c0285745d0ada3dd46df90f42b
--- /dev/null
+++ b/contrib/hxt/hxt_tetColor.h
@@ -0,0 +1,32 @@
+#ifndef _HXT_TETCOLOR_
+#define _HXT_TETCOLOR_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "hxt_mesh.h"
+
+/// Gives a unique color to each enclosed volume
+HXTStatus hxtColorMesh(HXTMesh* mesh, uint16_t *nbVolumes);
+
+/* call hxtFillVolumeBrep if mesh->brep.numVolumes = 0
+ * or change the colors so that they match the volumes described by
+ * mesh->brep.numSurfacesPerVolume
+ * mesh->brep.surfacesPerVolume
+ */
+HXTStatus hxtMapColorsToBrep(HXTMesh* mesh, uint16_t nbColors, uint64_t* tri2TetMap);
+
+
+// fill mesh->brep.numVolumes, mesh->brep.numSurfacesPerVolume and mesh->brep.surfacesPerVolume
+HXTStatus hxtFillVolumeBrep(HXTMesh* mesh, uint64_t* tri2TetMap,
+                            uint16_t numVolumes, uint16_t** numSurfacesPerVolume, uint16_t** surfacesPerVolume);
+
+/* set the processed flag (see hxt_tetFlag.h) for colors that are not in colorsToMesh */
+HXTStatus setFlagsToProcessOnlyVolumesInBrep(HXTMesh* mesh);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/contrib/hxt/hxt_tetDelaunay.c b/contrib/hxt/hxt_tetDelaunay.c
new file mode 100644
index 0000000000000000000000000000000000000000..c8cb4a459fc3df3302174d3edf459fa75bfafb57
--- /dev/null
+++ b/contrib/hxt/hxt_tetDelaunay.c
@@ -0,0 +1,1907 @@
+#include "hxt_tetDelaunay.h"
+#include "predicates.h"
+#include "hxt_tetRepair.h"
+#include "hxt_tetUtils.h"
+#include "hxt_tetFlag.h"
+#include "hxt_sort.h"
+
+/**
+* \file hxt_tetrahedra.c see header hxt_tetDelaunay.h.
+* \author Célestin Marot
+*/
+
+/* compile-time parameters */
+#define SMALLEST_ROUND 2048
+#define DELETED_BUFFER_SIZE 8182
+// #define HXT_DELAUNAY_LOW_MEMORY /* doesn't use any buffer (a lot slower, except if you are at the limit of filling the RAM) */
+
+/* usefull macros */
+#define ABS(x) ((x) >= 0 ? (x) : -(x))
+#define MAX(x,y) ((x)>(y) ? (x) : (y))
+#define MIN(x,y) ((x)<(y) ? (x) : (y))
+
+#define HXT_OMP_CHECK(status) do{ HXTStatus _tmp_ = (status); \
+    if(_tmp_<0){ \
+      if(_tmp_>HXT_STATUS_INTERNAL) \
+        HXT_TRACE_MSG(_tmp_, "cannot break OpenMP region -> exiting"); \
+      fflush(stdout); fflush(stderr); \
+      exit(_tmp_); \
+    } \
+  }while(0)
+
+
+typedef struct{
+  uint32_t hxtDeclareAligned node[3];
+  uint16_t flag;
+  uint64_t neigh; // the tet on the other side of the boundar
+} cavityBnd_t;
+
+typedef struct {
+#ifndef HXT_DELAUNAY_LOW_MEMORY
+  uint64_t hxtDeclareAligned Map[1024];
+#endif
+
+  struct {
+    cavityBnd_t* bnd;
+    uint64_t num;
+    uint64_t size;
+  } ball;
+
+  struct {
+    uint64_t* tetID;
+    uint64_t num;
+    uint64_t size;
+  } deleted;
+
+  struct {
+    uint64_t startDist;
+    uint64_t endDist;
+    uint32_t first;
+  } partition;
+} TetLocal;
+
+
+/***********************************
+ * create the initial tetrahedron 
+ * surrounded by 4 ghost tetrahedra
+ ***********************************/
+static inline HXTStatus hxtTetrahedraInit(HXTMesh* mesh, hxtNodeInfo* nodeInfo, uint32_t nToInsert, int verbosity){
+  if(nToInsert < 4){
+    return HXT_ERROR_MSG(HXT_STATUS_ERROR, "cannot mesh less than four vertices");
+  }
+  if(mesh->tetrahedra.size < 5){
+    uint32_t maxSizeEstim = MAX(omp_get_max_threads()*DELETED_BUFFER_SIZE+8UL*nToInsert, 10UL*nToInsert);
+    HXT_CHECK( hxtTetrahedraReserve(mesh, maxSizeEstim) );
+    HXT_INFO_COND(verbosity>1, "Initialization reserved %lu Tet.", mesh->tetrahedra.size);
+  }
+
+  HXTVertex* vertices = (HXTVertex*) mesh->vertices.coord;
+
+  // find non-coplanar vertices
+  double orientation = 0.0;
+
+  uint32_t i=0, j=1, k=2, l=3;
+  for (i=0; orientation==0.0 && i<nToInsert-3; i++)
+  {
+    for (j=i+1; orientation==0.0 && j<nToInsert-2; j++)
+    {
+      for (k=j+1; orientation==0.0 && k<nToInsert-1; k++)
+      {
+        for (l=k+1; orientation==0.0 && l<nToInsert; l++)
+        {
+          orientation = orient3d(vertices[nodeInfo[i].node].coord,
+                                 vertices[nodeInfo[j].node].coord,
+                                 vertices[nodeInfo[k].node].coord,
+                                 vertices[nodeInfo[l].node].coord);
+        }
+      }
+    }
+  }
+  l--; k--; j--; i--;
+
+
+  if(orientation==0.0){
+    return HXT_ERROR_MSG(HXT_STATUS_FAILED, "all vertices are coplanar");
+  }
+
+  // swap 0<->i  1<->j 2<->k 3<->l
+  {
+    hxtNodeInfo tmp = nodeInfo[i];
+    nodeInfo[i] = nodeInfo[0];
+    nodeInfo[0] = tmp;
+    nodeInfo[0].status = HXT_STATUS_TRUE;
+    i = 0;
+
+    tmp = nodeInfo[j];
+    nodeInfo[j] = nodeInfo[1];
+    nodeInfo[1] = tmp;
+    nodeInfo[1].status = HXT_STATUS_TRUE;
+    j = 1;
+
+    tmp = nodeInfo[k];
+    nodeInfo[k] = nodeInfo[2];
+    nodeInfo[2] = tmp;
+    nodeInfo[2].status = HXT_STATUS_TRUE;
+    k = 2;
+
+    tmp = nodeInfo[l];
+    nodeInfo[l] = nodeInfo[3];
+    nodeInfo[3] = tmp;
+    nodeInfo[3].status = HXT_STATUS_TRUE;
+    l = 3;
+  }
+
+
+  if(orientation > 0.0){
+    uint32_t tmp = i;
+    i = j;
+    j = tmp;
+  }
+
+  mesh->tetrahedra.neigh[ 0] = 19;    mesh->tetrahedra.node[ 0] = nodeInfo[l].node;
+  mesh->tetrahedra.neigh[ 1] = 15;    mesh->tetrahedra.node[ 1] = nodeInfo[k].node;
+  mesh->tetrahedra.neigh[ 2] = 11;    mesh->tetrahedra.node[ 2] = nodeInfo[j].node;
+  mesh->tetrahedra.neigh[ 3] =  7;    mesh->tetrahedra.node[ 3] = nodeInfo[i].node;
+
+  mesh->tetrahedra.neigh[ 4] = 18;    mesh->tetrahedra.node[ 4] = nodeInfo[l].node;
+  mesh->tetrahedra.neigh[ 5] = 10;    mesh->tetrahedra.node[ 5] = nodeInfo[j].node;
+  mesh->tetrahedra.neigh[ 6] = 13;    mesh->tetrahedra.node[ 6] = nodeInfo[k].node;
+  mesh->tetrahedra.neigh[ 7] =  3;    mesh->tetrahedra.node[ 7] = HXT_GHOST_VERTEX;
+
+  mesh->tetrahedra.neigh[8 ] = 17;    mesh->tetrahedra.node[ 8] = nodeInfo[l].node;
+  mesh->tetrahedra.neigh[9 ] = 14;    mesh->tetrahedra.node[ 9] = nodeInfo[k].node;
+  mesh->tetrahedra.neigh[10] =  5;    mesh->tetrahedra.node[10] = nodeInfo[i].node;
+  mesh->tetrahedra.neigh[11] =  2;    mesh->tetrahedra.node[11] = HXT_GHOST_VERTEX;
+
+  mesh->tetrahedra.neigh[12] = 16;    mesh->tetrahedra.node[12] = nodeInfo[l].node;
+  mesh->tetrahedra.neigh[13] =  6;    mesh->tetrahedra.node[13] = nodeInfo[i].node;
+  mesh->tetrahedra.neigh[14] =  9;    mesh->tetrahedra.node[14] = nodeInfo[j].node;
+  mesh->tetrahedra.neigh[15] =  1;    mesh->tetrahedra.node[15] = HXT_GHOST_VERTEX;
+
+  mesh->tetrahedra.neigh[16] = 12;    mesh->tetrahedra.node[16] = nodeInfo[k].node;
+  mesh->tetrahedra.neigh[17] =  8;    mesh->tetrahedra.node[17] = nodeInfo[j].node;
+  mesh->tetrahedra.neigh[18] =  4;    mesh->tetrahedra.node[18] = nodeInfo[i].node;
+  mesh->tetrahedra.neigh[19] =  0;    mesh->tetrahedra.node[19] = HXT_GHOST_VERTEX;
+
+  mesh->tetrahedra.num = 5;
+
+  for (uint64_t tet=0; tet<5; tet++){
+    mesh->tetrahedra.colors[tet] = UINT16_MAX;
+    mesh->tetrahedra.flag[tet] = 0;
+  }
+
+  return HXT_STATUS_OK;
+}
+
+/***********************************
+ * fill the passes array which tells
+ * the size of each BRIO round.
+ * return the number of BRIO passes
+ ***********************************/
+static unsigned computePasses(uint32_t passes[12], uint32_t nInserted, uint32_t nToInsert)
+{
+  unsigned npasses=0;
+  passes[0] = nToInsert;
+
+  for (unsigned i=0; i<10; i++) {
+    if(passes[i] < SMALLEST_ROUND || passes[i]/8 < nInserted){
+      passes[i+1] = 0;
+      npasses = i+1;
+      break;
+    }
+    passes[i+1] = passes[i]/7.5;
+  }
+
+  for(unsigned i=0; i<=npasses/2; i++){
+    uint32_t tmp = passes[i];
+    passes[i] = passes[npasses-i];
+    passes[npasses-i] = tmp;
+  }
+
+  return npasses;
+}
+
+/******************************************
+ * initialisation of the TetLocal structure
+ ******************************************/
+static inline HXTStatus localInit(TetLocal* local){
+    local->ball.size = 1020; // accounting for the offset in aligned malloc, to avoid additional memory page
+    local->ball.num = 0;
+    local->ball.bnd = NULL;
+    local->deleted.size = DELETED_BUFFER_SIZE;
+    local->deleted.num = 0;
+    local->deleted.tetID = NULL;
+
+    HXT_CHECK( hxtAlignedMalloc(&local->ball.bnd, local->ball.size*sizeof(cavityBnd_t)) );
+    HXT_CHECK( hxtAlignedMalloc(&local->deleted.tetID, local->deleted.size*sizeof(uint64_t)) );
+
+    return HXT_STATUS_OK;
+}
+
+/***********************************************
+           re-allocation functions
+ ***********************************************/
+static HXTStatus synchronizeReallocation(HXTMesh* mesh, volatile int* toCopy, volatile int* copy){
+  // threads cant be doing something while the realloc portion happen
+  #pragma omp barrier
+  
+  // this unable us to have the same value of toCopy for everyone, as we are sure nothing happens to those variables here
+  if(toCopy!=copy){
+    *copy = *toCopy;
+  }
+
+  HXTStatus status = HXT_STATUS_OK;
+  // make reallocations in a critical section
+  #pragma omp single
+  {
+    if(mesh->tetrahedra.num > mesh->tetrahedra.size){
+      status = hxtTetrahedraDoubleSize(mesh);
+    }
+  } // implicit barrier here
+
+  if(status!=HXT_STATUS_OK)
+    HXT_TRACE(status);
+
+  return status;
+}
+
+
+// pragma atomic capture to get tetrahedra.num and update it at the same time before caling this function !
+static inline HXTStatus reserveNewTet(HXTMesh* mesh){
+  if(mesh->tetrahedra.num > mesh->tetrahedra.size){
+    HXT_CHECK( synchronizeReallocation(mesh, NULL, NULL) );
+  }
+
+  return HXT_STATUS_OK;
+}
+
+static inline HXTStatus reserveNewDeleted(TetLocal* local, uint64_t num){
+  num += local->deleted.num;
+  if(num > local->deleted.size){
+      HXT_CHECK( hxtAlignedRealloc(&local->deleted.tetID, 2*num*sizeof(uint64_t)) );
+      local->deleted.size = 2*num;
+  }
+
+  return HXT_STATUS_OK;
+}
+
+static inline HXTStatus reserveNewBnd(TetLocal* local, uint64_t num){
+  num += local->ball.num;
+  if(num > local->ball.size){
+      HXT_CHECK( hxtAlignedRealloc(&local->ball.bnd, 2*num*sizeof(cavityBnd_t)) );
+      local->ball.size = 2*num;
+  }
+
+  return HXT_STATUS_OK;
+}
+/***********************************************/
+
+/************************************
+ * check if a tetrahedra is entirely
+ * in the calling thread's partition
+ ***********************************/
+static inline HXTStatus checkTetrahedron(HXTVertex* vertices, TetLocal* local, const uint32_t* nodes){
+  /* Actually, one vertex (not more) could be in another partition without creating a conflict.
+   However, all threads would have to have a verticesID array => a lot of memory space wasted.
+   Instead, we only allow the ghost vertex to be in another partition, it is handle differently in
+   computeAdjacenciesFast function */
+  uint64_t rel = local->partition.endDist - local->partition.startDist;
+
+  if(local->partition.endDist==UINT64_MAX) // if we are working with one thread only
+    return HXT_STATUS_OK;
+
+  // unsigned wrap around is defined by the standard
+  uint64_t h0 = vertices[nodes[0]].padding.hilbertDist;
+  uint64_t h1 = vertices[nodes[1]].padding.hilbertDist;
+  uint64_t h2 = vertices[nodes[2]].padding.hilbertDist;
+  uint64_t h3 = nodes[3]==HXT_GHOST_VERTEX ? h2 : vertices[nodes[3]].padding.hilbertDist;
+
+  /* if a vertex has a hilbert index UINT64_MAX, it means that only certain volume are meshed
+   * and this vertex was outside of the bounding box. we should never go in any tetrahedra that is outside
+   * the meshed volume, so we return false... */
+  if(h0==UINT64_MAX || h1==UINT64_MAX || h2==UINT64_MAX || h3==UINT64_MAX)
+    return HXT_STATUS_INTERNAL;
+  
+  if((h0- local->partition.startDist>=rel) || 
+     (h1- local->partition.startDist>=rel) ||
+     (h2- local->partition.startDist>=rel) ||
+     (h3- local->partition.startDist>=rel))
+    return HXT_STATUS_INTERNAL;
+
+  return HXT_STATUS_OK;
+
+}
+
+
+static inline HXTStatus pointIsTooClose(const double* __restrict__ p1, const double* __restrict__ p2, double nodalSize){
+  double d2 = (p1[0]-p2[0])*(p1[0]-p2[0])
+            + (p1[1]-p2[1])*(p1[1]-p2[1])
+            + (p1[2]-p2[2])*(p1[2]-p2[2]); 
+  if (d2 < 0.8*0.8*nodalSize*nodalSize){
+    return  HXT_STATUS_INTERNAL;
+  }
+
+  return HXT_STATUS_OK;
+}
+
+/* if one edge of the cavity is shorter than the nodalSize, return HXT_STATUS_INTERNAL */
+static inline HXTStatus filterCavity (TetLocal* local, HXTMesh *mesh, const double *nodalSizes, const uint32_t vta)
+{
+  double *vtaCoord = mesh->vertices.coord + 4*vta;
+  double vtaNodalSize = nodalSizes[vta];
+
+  for (uint64_t i = 0 ; i< local->ball.num ; i++) {
+    for (unsigned j=0;j<3;j++) {
+      uint32_t nodej = local->ball.bnd[i].node[j];
+
+      if (j!=3 || nodej != HXT_GHOST_VERTEX){
+        double *Xj = mesh->vertices.coord + 4*nodej;
+        double otherNodalSize = nodalSizes[nodej];
+        if(otherNodalSize==DBL_MAX){
+          otherNodalSize = vtaNodalSize;
+        }
+        HXT_CHECK( pointIsTooClose(vtaCoord, Xj, 0.5*( vtaNodalSize + otherNodalSize)) );
+      }
+    }
+  }
+  return  HXT_STATUS_OK;
+}
+
+static inline HXTStatus filterTet(HXTMesh* mesh, const double *nodalSizes, const uint64_t curTet, const uint32_t vta){
+  HXTVertex* vertices = (HXTVertex*) mesh->vertices.coord;
+
+  double *vtaCoord = vertices[vta].coord;
+  double vtaNodalSize = nodalSizes[vta];
+
+  for (unsigned j=0; j<4; j++) {
+    uint32_t nodej = mesh->tetrahedra.node[4*curTet+j];
+
+    if (j!=3 || nodej != HXT_GHOST_VERTEX){
+      double* Xj = vertices[nodej].coord;
+      double otherNodalSize = nodalSizes[nodej];
+      if(otherNodalSize==DBL_MAX){
+        otherNodalSize = vtaNodalSize;
+      }
+      HXT_CHECK( pointIsTooClose(vtaCoord, Xj, 0.5*( vtaNodalSize + otherNodalSize)) );
+    }
+  }
+  return HXT_STATUS_OK;
+}
+
+
+/* restore the structure as it was before the failed insertion attempt */
+static inline void restoreDeleted(HXTMesh* mesh, TetLocal* local, const uint64_t prevDeleted){
+  for (uint64_t i=prevDeleted; i<local->deleted.num; i++)
+    unsetDeletedFlag(mesh, local->deleted.tetID[i]);
+
+  local->deleted.num = prevDeleted;
+}
+
+
+/***********************************
+ * insphere predicate & perturbation
+ ***********************************/
+// see Perturbations and Vertex Removal in a 3D Delaunay Triangulation, O. Devillers & M. Teillaud
+static double symbolicPerturbation (uint32_t indices[5] ,  const double* __restrict__ i,
+                                                           const double* __restrict__ j,
+                                                           const double* __restrict__ k,
+                                                           const double* __restrict__ l,
+                                                           const double* __restrict__ m){
+  double const* pt[5] = {i,j,k,l,m};
+
+  // Sort the five points such that their indices are in the increasing
+  //   order. An optimized bubble sort algorithm is used, i.e., it has
+  //   the worst case O(n^2) runtime, but it is usually much faster.
+  int swaps = 0; // Record the total number of swaps.
+  int n = 5;
+  int count;
+  do {
+    count = 0;
+    n = n - 1;
+    for (int iter = 0; iter < n; iter++) {
+      if (indices[iter] > indices[iter+1]) {
+
+        const double *swappt = pt[iter];
+        pt[iter] = pt[iter+1];
+        pt[iter+1] = swappt;
+
+        uint32_t sw = indices [iter];
+        indices[iter] = indices[iter+1];
+        indices[iter+1] = sw;
+        count++;
+      }
+    }
+    swaps += count;
+  } while (count > 0); // Continue if some points are swapped.
+  
+  double oriA = orient3d(pt[1], pt[2], pt[3], pt[4]);
+  if (oriA != 0.0) {
+    // Flip the sign if there are odd number of swaps.
+    if ((swaps % 2) != 0) oriA = -oriA;
+    return oriA;
+  }
+  
+  double oriB = -orient3d(pt[0], pt[2], pt[3], pt[4]);
+  if (oriB == 0.0) HXT_WARNING("Symbolic perturbation failed (2 superposed vertices ?)");
+
+  // Flip the sign if there are odd number of swaps.
+  if ((swaps % 2) != 0) oriB = -oriB;
+  return oriB;
+}
+
+
+/* wrapper around the insphere predicate that handles
+   the ghost vertex and symbolic perturbation if needed */
+double tetInsphere(HXTMesh* mesh, const uint64_t curTet, const uint32_t vta){
+  HXTVertex* vertices = (HXTVertex*) mesh->vertices.coord;
+  uint32_t* Node = mesh->tetrahedra.node + curTet;
+
+  const double* __restrict__ a = vertices[Node[0]].coord;
+  const double* __restrict__ b = vertices[Node[1]].coord;
+  const double* __restrict__ c = vertices[Node[2]].coord;
+  const double* __restrict__ e = vertices[vta].coord;
+
+  if(Node[3]==HXT_GHOST_VERTEX){ 
+    double det = orient3d(a,b,c,e);
+
+    if(det!=0.0){
+      return det;
+    }
+
+    // we never go here, except when point are aligned on boundary
+    // HXT_INFO("insphere using opposite vertex");
+    uint32_t oppositeNode = mesh->tetrahedra.node[mesh->tetrahedra.neigh[curTet+3]];
+    double* const __restrict__ oppositeVertex = vertices[oppositeNode].coord;
+    det = insphere(a,b,c,oppositeVertex,e);
+
+    if (det == 0.0) {
+      uint32_t nn[5] = {Node[0],Node[1],Node[2],oppositeNode,vta};
+      // HXT_INFO("symbolic perturbation on boundary");
+      det = symbolicPerturbation (nn, a,b,c,oppositeVertex,e);
+      
+    }
+    return -det;
+  }
+
+  double* const __restrict__ d = vertices[Node[3]].coord;
+
+  double det = insphere(a,b,c,d,e);
+  if (det == 0.0) {
+    uint32_t nn[5] = {Node[0],Node[1],Node[2],Node[3],vta};
+    // HXT_INFO("symbolic perturbation");
+    det = symbolicPerturbation (nn, a,b,c,d,e);
+  }
+  return det;
+}
+
+
+/***********************************
+ * walk to cavity
+ ***********************************/
+static HXTStatus walking2Cavity(HXTMesh* mesh, TetLocal* local, uint64_t* __restrict__ curTet, const uint32_t vta){
+  uint64_t nextTet = *curTet;
+  uint32_t seed = 1;
+  HXTVertex* vertices = (HXTVertex*) mesh->vertices.coord;
+
+  /* if nextTet is a ghost triangle, go to the neighbor that is not a ghost triangle */
+  if(mesh->tetrahedra.node[4*nextTet+3]==HXT_GHOST_VERTEX)
+    nextTet = mesh->tetrahedra.neigh[4*nextTet+3]/4;
+
+  double* const vtaCoord = vertices[vta].coord;
+  unsigned enteringFace = 4;
+
+#ifndef NDEBUG
+  uint64_t TotalCount = 0;
+#endif
+  
+
+  while(1){
+    const uint32_t* __restrict__ curNode = mesh->tetrahedra.node + 4*nextTet;
+    const uint64_t* __restrict__ curNeigh = mesh->tetrahedra.neigh + 4*nextTet;
+
+  #ifndef NDEBUG
+    if(curNode[3]==HXT_GHOST_VERTEX){
+      return HXT_ERROR_MSG(HXT_STATUS_FAILED, "walked outside of the domain");
+    }
+  #endif
+
+    unsigned neigh = 4;
+    unsigned outside = 0;
+    uint32_t randomU = hxtReproducibleLCG(&seed);
+    for (unsigned i=0; i<4; i++)
+    {
+      uint32_t index = (i+randomU)%4;
+      if (index!=enteringFace) {
+        // we walk where the volume is minimum
+        const double* __restrict__ a = vertices[curNode[getNode0FromFacet(index)]].coord;
+        const double* __restrict__ b = vertices[curNode[getNode1FromFacet(index)]].coord;
+        const double* __restrict__ c = vertices[curNode[getNode2FromFacet(index)]].coord;
+
+        if (orient3d(a,b,c, vtaCoord) < 0.0){
+          if(curNeigh[index]==HXT_NO_ADJACENT) { // the point is outside the triangulation
+            return HXT_ERROR_MSG(HXT_STATUS_ERROR,
+                                "vertex {%f %f %f} outside the triangulation and no ghost tetrahedra",
+                                 vtaCoord[0], vtaCoord[1], vtaCoord[2]);
+          }
+
+          uint64_t tet = curNeigh[index]/4;
+          const uint32_t* __restrict__ neighNodes = mesh->tetrahedra.node + tet*4;
+          if(checkTetrahedron(vertices, local, neighNodes)==HXT_STATUS_OK){
+            if(neighNodes[3]==HXT_GHOST_VERTEX){
+              *curTet = tet;
+              return HXT_STATUS_OK;
+            }
+            neigh=index;
+            break;
+          }
+          outside = 1;
+        }
+      }
+    }
+
+    if(neigh==4){
+      const double* __restrict__ a = vertices[curNode[0]].coord;
+      const double* __restrict__ b = vertices[curNode[1]].coord;
+      const double* __restrict__ c = vertices[curNode[2]].coord;
+      const double* __restrict__ d = vertices[curNode[3]].coord;
+      if(outside ||
+         (orient3d(a,b,c,vtaCoord)>=0.0) +
+         (orient3d(a,b,vtaCoord,d)>=0.0) +
+         (orient3d(a,vtaCoord,c,d)>=0.0) +
+         (orient3d(vtaCoord,b,c,d)>=0.0)>2){
+        return HXT_STATUS_TRYAGAIN;
+      }
+      *curTet = nextTet;
+      return HXT_STATUS_OK;
+    }
+
+    //    printf("nextTet %u %g %u %u\n",nextTet,Min, count, neigh);
+    nextTet = curNeigh[neigh]/4;
+    enteringFace = curNeigh[neigh]&3;
+
+  #ifndef NDEBUG
+    if(TotalCount>mesh->tetrahedra.num){
+      return HXT_ERROR_MSG(HXT_STATUS_FAILED, "infinite walk to find the cavity");
+    }
+    // printf("%lu\n",TotalCount);
+    TotalCount++;
+  #endif
+  }
+}
+
+
+/***********************************
+ * digging cavity
+ ***********************************/
+
+/* pushing cavity boundary information to local->ball */
+static inline void bndPush( TetLocal* local, uint16_t flag,
+              const uint32_t node1, const uint32_t node2,
+              const uint32_t node3, const uint64_t neigh){
+  uint64_t n = local->ball.num;
+  local->ball.bnd[n].node[0] = node1;
+  local->ball.bnd[n].node[1] = node2;
+  local->ball.bnd[n].node[2] = node3;
+  local->ball.bnd[n].flag = flag;
+  local->ball.bnd[n].neigh = neigh;
+  local->ball.num++;
+}
+
+/* delete a tetrahedron being part of the cavity */
+static inline HXTStatus deletedPush(HXTMesh* mesh, TetLocal* local, const uint64_t neigh){
+  // check if 3 points of the new tetrahedra are owned by this thread
+  HXT_CHECK( checkTetrahedron((HXTVertex*) mesh->vertices.coord, local, mesh->tetrahedra.node + neigh*4) );
+  local->deleted.tetID[local->deleted.num++] = neigh;
+  setDeletedFlag(mesh, neigh);
+
+  return HXT_STATUS_OK;
+}
+
+/* check if the cavity is star shaped
+   This isn't usefull for pure Delaunay but when we constrain cavity with colors,
+   it is usefull */
+static HXTStatus isStarShaped(TetLocal* local, HXTMesh* mesh, const uint32_t vta, uint64_t* blindFaceIndex)
+{
+  HXTVertex* vertices = (HXTVertex*) mesh->vertices.coord;
+  double *vtaCoord = vertices[vta].coord;
+
+  for (uint64_t i=0; i<local->ball.num; i++) {
+    if(local->ball.bnd[i].node[2]==HXT_GHOST_VERTEX){
+
+    }
+    else{
+      double* b = vertices[local->ball.bnd[i].node[0]].coord;
+      double* c = vertices[local->ball.bnd[i].node[1]].coord;
+      double* d = vertices[local->ball.bnd[i].node[2]].coord;
+      if(orient3d(vtaCoord, b, c, d)>=0.0){
+        *blindFaceIndex = i;
+        return HXT_STATUS_INTERNAL;
+      }
+    }
+  }
+  return HXT_STATUS_OK;
+}
+
+
+static HXTStatus undeleteTetrahedron(TetLocal* local, HXTMesh* mesh, uint64_t tetToUndelete) {
+  // the tetrahedra should not be deleted anymore
+  for (uint64_t i=local->deleted.num; ; i--) {
+    if(local->deleted.tetID[i-1]==tetToUndelete) {
+      local->deleted.num--;
+      local->deleted.tetID[i-1] = local->deleted.tetID[local->deleted.num];
+      break;
+    }
+#ifdef DEBUG
+    if(i==1)
+      return HXT_ERROR_MSG(HXT_STATUS_ERROR, "could not find the tetrahedra in the deleted array");
+#endif
+  }
+  unsetDeletedFlag(mesh, tetToUndelete);
+
+  uint64_t bndFaces[4] = {HXT_NO_ADJACENT, HXT_NO_ADJACENT, HXT_NO_ADJACENT, HXT_NO_ADJACENT};
+  int nbndFace = 0;
+
+  // we should update the boundary (that's the difficult part...)
+  // first remove all the boundary faces that come from the tetrahedron we just remove from the cavity
+  for (uint64_t i=local->ball.num; nbndFace<4 && i>0; i--) {
+    if(mesh->tetrahedra.neigh[local->ball.bnd[i-1].neigh]/4==tetToUndelete) {
+      bndFaces[nbndFace++] = local->ball.bnd[i-1].neigh;
+      local->ball.num--;
+      local->ball.bnd[i-1] = local->ball.bnd[local->ball.num];
+    }
+  }
+
+  // we must replace them by all the other faces of the tetrahedron we just removed
+  const uint64_t* __restrict__ curNeigh = mesh->tetrahedra.neigh + tetToUndelete*4;
+  const uint32_t* __restrict__ curNode = mesh->tetrahedra.node + tetToUndelete*4;
+
+#ifdef DEBUG
+  int nbndFace2 = (getDeletedFlag(mesh, curNeigh[0]/4)==0) + (getDeletedFlag(mesh, curNeigh[1]/4)==0) + (getDeletedFlag(mesh, curNeigh[2]/4)==0) + (getDeletedFlag(mesh, curNeigh[3]/4)==0);
+  if(nbndFace!=nbndFace2)
+    return HXT_ERROR_MSG(HXT_STATUS_ERROR, "found %d non-deleted tet adjacent to the tet we unremove but there should be %d %lu %lu %lu %lu", nbndFace, nbndFace2, bndFaces[0], bndFaces[1], bndFaces[2], bndFaces[3]);
+#endif
+
+  HXT_CHECK( reserveNewBnd(local, 3) );
+
+  if(curNeigh[0]!=bndFaces[0] && curNeigh[0]!=bndFaces[1] && curNeigh[0]!=bndFaces[2] && curNeigh[0]!=bndFaces[3])
+    bndPush(local, (getFacetConstraint(mesh, tetToUndelete, 0)   ) |
+                   (getEdgeConstraint(mesh, tetToUndelete, 1)>>1) | // constraint on edge 1 (facet 0 2) goes on edge 0
+                   (getEdgeConstraint(mesh, tetToUndelete, 0)<<1) | // constraint on edge 0 (facet 0 1) goes on edge 1
+                   (getEdgeConstraint(mesh, tetToUndelete, 2)   ),  // constraint on edge 2 (facet 0 3) goes on edge 2
+                   curNode[2], curNode[1], curNode[3], 4*tetToUndelete+0);
+
+  if(curNeigh[1]!=bndFaces[0] && curNeigh[1]!=bndFaces[1] && curNeigh[1]!=bndFaces[2] && curNeigh[1]!=bndFaces[3])
+    bndPush(local,  (getFacetConstraint(mesh, tetToUndelete, 1)>>1) |// constraint on facet 1 goes on facet 0
+                    (getEdgeConstraint(mesh, tetToUndelete, 0)   ) | // constraint on edge 0 (facet 1 0) goes on edge 0
+                    (getEdgeConstraint(mesh, tetToUndelete, 3)>>2) | // constraint on edge 3 (facet 1 2) goes on edge 1
+                    (getEdgeConstraint(mesh, tetToUndelete, 4)>>2),  // constraint on edge 4 (facet 1 3) goes on edge 2
+                    curNode[0], curNode[2], curNode[3], 4*tetToUndelete+1);
+
+  if(curNeigh[2]!=bndFaces[0] && curNeigh[2]!=bndFaces[1] && curNeigh[2]!=bndFaces[2] && curNeigh[2]!=bndFaces[3])
+    bndPush(local,  (getFacetConstraint(mesh, tetToUndelete, 2)>>2) |// constraint on facet 2 goes on facet 0
+                    (getEdgeConstraint(mesh, tetToUndelete, 3)>>3) | // constraint on edge 3 (facet 2 1) goes on edge 0
+                    (getEdgeConstraint(mesh, tetToUndelete, 1)   ) | // constraint on edge 1 (facet 2 0) goes on edge 1
+                    (getEdgeConstraint(mesh, tetToUndelete, 5)>>3),  // constraint on edge 5 (facet 2 3) goes on edge 2
+                     curNode[1], curNode[0], curNode[3], 4*tetToUndelete+2);
+
+  if(curNeigh[3]!=bndFaces[0] && curNeigh[3]!=bndFaces[1] && curNeigh[3]!=bndFaces[2] && curNeigh[3]!=bndFaces[3])
+    bndPush(local, (getFacetConstraint(mesh, tetToUndelete, 3)>>3) |// constraint on facet 3 goes on facet 0
+                   (getEdgeConstraint(mesh, tetToUndelete, 2)>>2) | // constraint on edge 2 (facet 3 0) goes on edge 0
+                   (getEdgeConstraint(mesh, tetToUndelete, 4)>>3) | // constraint on edge 4 (facet 3 1) goes on edge 1
+                   (getEdgeConstraint(mesh, tetToUndelete, 5)>>3),  // constraint on edge 5 (facet 3 2) goes on edge 2
+                   curNode[0], curNode[1], curNode[2], 4*tetToUndelete+3);
+
+  return HXT_STATUS_OK;
+}
+
+
+static HXTStatus reshapeCavityIfNeeded(TetLocal* local, HXTMesh* mesh, const uint32_t vta) {
+  // we will remove the tetrahedra adjacent to the face that does not see the point, progressively, until the cavity is star shaped...
+  uint64_t blindFace = 0;
+  while(isStarShaped(local, mesh, vta, &blindFace)==HXT_STATUS_INTERNAL)
+  {
+    // printf("deleting %lu  cavity:%lu  ball:%lu\n",mesh->tetrahedra.neigh[local->ball.bnd[blindFace].neigh]/4, local->deleted.num-prevDeleted, local->ball.num );
+    HXT_CHECK( undeleteTetrahedron(local, mesh, mesh->tetrahedra.neigh[local->ball.bnd[blindFace].neigh]/4) );
+  }
+  return HXT_STATUS_OK;
+}
+
+
+static HXTStatus respectEdgeConstraint(TetLocal* local, HXTMesh* mesh, const uint32_t vta, const uint16_t color, const uint64_t prevDeleted) {
+  // HXT_WARNING("a constrained edge was inside the cavity, recovering it");
+
+  // all the tetrahedron have the same color 'color', we will use that color to flag them
+  for (uint64_t i=prevDeleted; i<local->deleted.num; i++) {
+    uint64_t delTet = local->deleted.tetID[i];
+    mesh->tetrahedra.colors[delTet] = 0;
+  }
+
+  for (uint64_t i=prevDeleted; i<local->deleted.num; i++) {
+    uint64_t delTet = local->deleted.tetID[i];
+    int exist = 1;
+    for (int edge=0; exist && edge<6; edge++) {
+      if(getEdgeConstraint(mesh, delTet, edge) && (mesh->tetrahedra.colors[delTet] & (1U<<edge))==0) {
+        unsigned in_facet;
+        unsigned out_facet;
+
+        getFacetsFromEdge(edge, &in_facet, &out_facet);
+
+        int edgeIsSafe = 0;
+        uint64_t curTet = delTet;
+
+        // first turn
+        do
+        {
+          uint32_t newV = mesh->tetrahedra.node[4*curTet + in_facet];
+
+          // go into the neighbor through out_facet
+          uint64_t neigh = mesh->tetrahedra.neigh[4*curTet + out_facet];
+          curTet = neigh/4;
+          in_facet = neigh%4;
+
+          uint32_t* nodes = mesh->tetrahedra.node + 4*curTet;
+          for (out_facet=0; out_facet<3; out_facet++)
+            if(nodes[out_facet]==newV)
+              break;
+
+          if(getDeletedFlag(mesh, curTet)!=0) {
+            // mark that the edge as been treate
+            #ifdef DEBUG
+              if((mesh->tetrahedra.colors[curTet] & (1U<<getEdgeFromFacets(in_facet, out_facet)))!=0)
+                return HXT_ERROR_MSG(HXT_STATUS_ERROR, "the flag says that the tet has already been processed for this edge...");
+            #endif
+            mesh->tetrahedra.colors[curTet] |= (1U<<getEdgeFromFacets(in_facet, out_facet));
+          }
+          else {
+            edgeIsSafe=1;
+          }
+
+        } while (curTet!=delTet);
+
+        if(!edgeIsSafe) { // we must find a tetrahedron on the opposite side of vta and delete it.
+          getFacetsFromEdge(edge, &in_facet, &out_facet);
+          curTet = delTet;
+
+          uint64_t tetContainingVta = local->deleted.tetID[prevDeleted];
+          uint64_t tetToUndelete = HXT_NO_ADJACENT;
+          double distMax = 0.0;
+          double* vtaCoord = mesh->vertices.coord + 4*vta;
+
+        #ifdef DEBUG
+          double* a = mesh->vertices.coord + 4*mesh->tetrahedra.node[4*tetContainingVta];
+          double* b = mesh->vertices.coord + 4*mesh->tetrahedra.node[4*tetContainingVta+1];
+          double* c = mesh->vertices.coord + 4*mesh->tetrahedra.node[4*tetContainingVta+2];
+          double* d = mesh->vertices.coord + 4*mesh->tetrahedra.node[4*tetContainingVta+3];
+
+          if(orient3d(vtaCoord,b,c,d)>0.0 || orient3d(a,vtaCoord,c,d)>0.0 || orient3d(a,b,vtaCoord,d)>0.0 || orient3d(a,b,c,vtaCoord)>0.0) {
+            return HXT_ERROR_MSG(HXT_STATUS_ERROR, "an edge part of a ghost tetrahedron is constrained");
+          }
+        #endif
+
+          // second turn
+          do
+          {
+            uint32_t newV = mesh->tetrahedra.node[4*curTet + in_facet];
+
+            // go into the neighbor through out_facet
+            uint64_t neigh = mesh->tetrahedra.neigh[4*curTet + out_facet];
+            curTet = neigh/4;
+            in_facet = neigh%4;
+
+            uint32_t* nodes = mesh->tetrahedra.node + 4*curTet;
+            for (out_facet=0; out_facet<3; out_facet++)
+              if(nodes[out_facet]==newV)
+                break;
+
+            double* coord1 = mesh->vertices.coord + newV;
+            double* coord2 = mesh->vertices.coord + nodes[in_facet];
+
+            if(curTet!=tetContainingVta) {
+              double dist = 0.0;
+              for (int l=0; l<3; l++) {
+                double meanCoord = (coord1[l]+coord2[l])*0.5;
+                double diff = meanCoord-vtaCoord[l];
+                dist += diff*diff;
+              }
+
+              if(dist>distMax) {
+                dist = distMax;
+                tetToUndelete = curTet;
+              }
+            }
+          } while (curTet!=delTet);
+
+          if(tetToUndelete==delTet)
+            exist = 0;
+
+          // printf("undeleting tetrahedron %lu\n", tetToUndelete);
+          mesh->tetrahedra.colors[tetToUndelete] = color;
+          HXT_CHECK( undeleteTetrahedron(local, mesh, tetToUndelete) );
+        }
+      }
+    }
+  }
+
+  for (uint64_t i=prevDeleted; i<local->deleted.num; i++) {
+    uint64_t delTet = local->deleted.tetID[i];
+    mesh->tetrahedra.colors[delTet] = color;
+  }
+
+  return HXT_STATUS_OK;
+}
+
+
+/* this function does a Breadth-first search of the tetrahedra in the cavity
+ * it add those to local->deleted
+ * it also maintain a local->bnd array with all the information concerning the boundary of the cavity
+ */
+static inline HXTStatus diggingACavity(HXTMesh* mesh, TetLocal* local, uint64_t firstTet, const uint32_t vta, int* edgeConstraint){
+  // add tetrahedra to cavity
+  local->deleted.tetID[local->deleted.num++] = firstTet;
+  setDeletedFlag(mesh, firstTet);
+  local->ball.num = 0;
+
+  
+
+  for(uint64_t start=local->deleted.num-1; start < local->deleted.num; start++){
+    uint64_t curTet = local->deleted.tetID[start];
+    const uint64_t* __restrict__ curNeigh = mesh->tetrahedra.neigh + 4*curTet;
+    const uint32_t* __restrict__ curNode = mesh->tetrahedra.node + 4*curTet;
+
+    *edgeConstraint += isAnyEdgeConstrained(mesh, curTet)!=0;
+
+    /* here we allocate enough space for the boundary (local->bnd), the cavity (local->deleted) and the vertices (local->vertices) */
+    HXT_CHECK( reserveNewDeleted(local, 4) );
+    HXT_CHECK( reserveNewBnd(local, 4) );
+
+    // we unrolled the loop for speed (also because indices are not trivial, we would need a 4X4 array)
+
+    /* and here we push stuff to local->bnd or local->deleted, always keeping ghost tet at last place */
+    uint64_t neigh = curNeigh[0]/4;
+    if(curNeigh[0]!=HXT_NO_ADJACENT && getDeletedFlag(mesh, neigh)==0){
+      if(getFacetConstraint(mesh, curTet, 0) || 
+        tetInsphere(mesh, neigh*4, vta)>=0.0){
+        bndPush(local, mesh->tetrahedra.flag[curTet] & UINT16_C(0x107),
+                       /* corresponds to :
+                       getFacetConstraint(mesh, curTet, 0) | 
+                       getEdgeConstraint(mesh, curTet, 0) |
+                       getEdgeConstraint(mesh, curTet, 1) |
+                       getEdgeConstraint(mesh, curTet, 2) */
+                       curNode[1], curNode[2], curNode[3], curNeigh[0]);
+      }
+      else{
+        HXT_CHECK( deletedPush(mesh, local, neigh) );
+      }
+    }
+
+    neigh = curNeigh[1]/4;
+    if(curNeigh[1]!=HXT_NO_ADJACENT && getDeletedFlag(mesh, neigh)==0){
+      if(getFacetConstraint(mesh, curTet, 1) || 
+        tetInsphere(mesh, neigh*4, vta)>=0.0){
+        bndPush(local, (getFacetConstraint(mesh, curTet, 1)>>1) |// constraint on facet 1 goes on facet 0
+                       (getEdgeConstraint(mesh, curTet, 3)>>3) | // constraint on edge 3 (facet 1 2) goes on edge 0
+                       (getEdgeConstraint(mesh, curTet, 0)<<1) | // constraint on edge 0 (facet 1 0) goes on edge 1
+                       (getEdgeConstraint(mesh, curTet, 4)>>2),  // constraint on edge 4 (facet 1 3) goes on edge 2
+                       curNode[2], curNode[0], curNode[3], curNeigh[1]);
+      }
+      else{
+        HXT_CHECK( deletedPush(mesh, local, neigh) );
+      }
+    }
+
+    neigh = curNeigh[2]/4;
+    if(curNeigh[2]!=HXT_NO_ADJACENT && getDeletedFlag(mesh, neigh)==0){
+      if(getFacetConstraint(mesh, curTet, 2)|| 
+        tetInsphere(mesh, neigh*4, vta)>=0.0){
+        bndPush(local, (getFacetConstraint(mesh, curTet, 2)>>2) |// constraint on facet 2 goes on facet 0
+                       (getEdgeConstraint(mesh, curTet, 1)>>1) | // constraint on edge 1 (facet 2 0) goes on edge 0
+                       (getEdgeConstraint(mesh, curTet, 3)>>2) | // constraint on edge 3 (facet 2 1) goes on edge 1
+                       (getEdgeConstraint(mesh, curTet, 5)>>3),  // constraint on edge 5 (facet 2 3) goes on edge 2
+                       curNode[0], curNode[1], curNode[3], curNeigh[2]);
+      }
+      else{
+        HXT_CHECK( deletedPush(mesh, local, neigh) );
+      }
+    }
+
+    neigh = curNeigh[3]/4;
+    if(curNeigh[3]!=HXT_NO_ADJACENT && getDeletedFlag(mesh, neigh)==0){
+      if(getFacetConstraint(mesh, curTet, 3) || 
+        tetInsphere(mesh, neigh*4, vta)>=0.0){
+        
+        bndPush(local, (getFacetConstraint(mesh, curTet, 3)>>3) |// constraint on facet 3 goes on facet 0
+                       (getEdgeConstraint(mesh, curTet, 4)>>4) | // constraint on edge 4 (facet 3 1) goes on edge 0
+                       (getEdgeConstraint(mesh, curTet, 2)>>1) | // constraint on edge 2 (facet 3 0) goes on edge 1
+                       (getEdgeConstraint(mesh, curTet, 5)>>3),  // constraint on edge 5 (facet 3 2) goes on edge 2
+                       // there are 2 valid order for nodes: 1,0,2,3 and 0,2,1,3
+                       curNode[1], curNode[0], curNode[2], curNeigh[3]);
+      }
+      else{
+        HXT_CHECK( deletedPush(mesh, local, neigh) );
+      }
+    }
+  }
+
+  return HXT_STATUS_OK;
+}
+
+
+/**************************************************************
+ * compute adjacencies with a matrix O(1) insertion and search
+ **************************************************************/
+#ifndef HXT_DELAUNAY_LOW_MEMORY
+static inline HXTStatus computeAdjacenciesFast(HXTMesh* mesh, TetLocal* local, uint32_t* __restrict__ verticesID, const uint64_t blength){
+  cavityBnd_t* __restrict__ bnd = local->ball.bnd;
+
+#ifndef NDEBUG
+  int ghost_is_there = 0;
+#endif
+
+HXT_ASSERT(((size_t) bnd)%SIMD_ALIGN==0);
+HXT_ASSERT(((size_t) verticesID)%SIMD_ALIGN==0);
+
+  #pragma omp simd aligned(verticesID,bnd:SIMD_ALIGN)
+  for (uint32_t i=0; i<blength; i++){
+    verticesID[bnd[i].node[0]] = UINT32_MAX;
+    verticesID[bnd[i].node[1]] = UINT32_MAX;
+    if(bnd[i].node[2]!=HXT_GHOST_VERTEX){
+      verticesID[bnd[i].node[2]] = UINT32_MAX;
+    }
+  }
+
+  uint32_t npts = 1;
+  for (uint32_t i=0; i<blength; i++)
+  {
+    if(verticesID[bnd[i].node[0]]>npts){
+      verticesID[bnd[i].node[0]] = npts++;
+    }
+    bnd[i].node[0] = verticesID[bnd[i].node[0]];
+    if(verticesID[bnd[i].node[1]]>npts){
+      verticesID[bnd[i].node[1]] = npts++;
+    }
+    bnd[i].node[1] = verticesID[bnd[i].node[1]];
+
+    if(bnd[i].node[2]==HXT_GHOST_VERTEX){
+      bnd[i].node[2] = 0;
+#ifndef NDEBUG
+      ghost_is_there = 1;
+#endif
+    }
+    else{
+      if(verticesID[bnd[i].node[2]]>npts){
+        verticesID[bnd[i].node[2]] = npts++;
+      }
+      bnd[i].node[2] = verticesID[bnd[i].node[2]];
+    }
+
+  }
+
+  HXT_ASSERT_MSG((npts-3+ghost_is_there)*2==blength, "Failed to compute adjacencies (f) %u (%u ghost) vertices and %u cavity boundaries", npts-1+ghost_is_there, ghost_is_there, blength); // symbol undefined
+
+  #pragma omp simd aligned(verticesID:SIMD_ALIGN)
+  for (uint32_t i=0; i<blength; i++)
+  {
+    local->Map[bnd[i].node[0]*32 + bnd[i].node[1]] = bnd[i].neigh + 3;
+    local->Map[bnd[i].node[1]*32 + bnd[i].node[2]] = bnd[i].neigh + 1;
+    local->Map[bnd[i].node[2]*32 + bnd[i].node[0]] = bnd[i].neigh + 2;
+  }
+
+  #pragma omp simd aligned(verticesID:SIMD_ALIGN)
+  for (uint32_t i=0; i<blength; i++)
+  {
+    mesh->tetrahedra.neigh[bnd[i].neigh + 1] = local->Map[bnd[i].node[2]*32 + bnd[i].node[1]];
+    mesh->tetrahedra.neigh[bnd[i].neigh + 2] = local->Map[bnd[i].node[0]*32 + bnd[i].node[2]];
+    mesh->tetrahedra.neigh[bnd[i].neigh + 3] = local->Map[bnd[i].node[1]*32 + bnd[i].node[0]];
+  }
+
+  return HXT_STATUS_OK;
+}
+#endif
+
+
+/**************************************************************
+ * compute adjacencies with a matrix O(n) insertion and search
+ **************************************************************/
+static inline HXTStatus computeAdjacenciesSlow(HXTMesh* mesh, TetLocal* local, const uint64_t start, const uint64_t blength){
+
+  uint64_t tlength = 0;
+  const uint64_t middle = blength*3/2; // 3N
+
+  // N+2 point on the surface of the cavity
+  // 2N triangle on the surface of the cavity, x3 (4*0.5+1) data = 6N+9 uint64_t
+  // => enough place for the 3N edge x2 data = 6N uint64_t
+  uint64_t* Tmp = (uint64_t*) local->ball.bnd;
+  const unsigned index[4] = {2,3,1,2};
+
+  for (uint64_t i=0; i<blength; i++)
+  {
+    uint64_t curTet = local->deleted.tetID[start+ i];
+    const uint32_t* __restrict__ Node = mesh->tetrahedra.node + 4*curTet;
+
+    // pointer to the position of Node[0] in the Tmp array
+    for (unsigned j=0; j<3; j++)
+    {
+      // define the edge by the minimum vertex and the other
+      uint64_t key = ((uint64_t) Node[index[j]]<<32) + Node[index[j+1]];
+
+      // linear searching/pushing into Tmp
+      uint64_t k;
+      for (k=0; k<tlength; k++) // this is the only nested loop... the one that cost it all
+      {
+        __assume_aligned(Tmp, SIMD_ALIGN);
+        if(Tmp[k]==key)
+          break;
+      }
+
+      uint64_t curFace = 4*curTet+j+1;
+
+      // we did not found it
+      if(k==tlength){
+        Tmp[tlength] = (key>>32) + (key<<32);
+        Tmp[middle + tlength] = curFace;
+        tlength++;
+      }
+      else{// we found the neighbour !
+        uint64_t pairValue = Tmp[middle+k];
+        mesh->tetrahedra.neigh[curFace] = pairValue;
+        mesh->tetrahedra.neigh[pairValue] = curFace;
+        tlength--;
+        if(k<tlength){// put the last entry in the one we just discovered
+          Tmp[k] = Tmp[tlength];
+          Tmp[middle+k] = Tmp[middle + tlength];
+        }
+      }
+    }
+  }
+  HXT_ASSERT_MSG(tlength==0, "Failed to compute adjacencies (s)"); // verify that all neighbor were found
+  return HXT_STATUS_OK;
+}
+
+
+/****************************************
+ * filling back the cavity (DelaunayBall)
+ ****************************************/
+static inline HXTStatus fillingACavity(HXTMesh* mesh, TetLocal* local, uint32_t* __restrict__ verticesID, uint64_t* __restrict__ curTet, const uint32_t vta, const uint16_t color){
+  uint64_t clength = local->deleted.num;
+  uint64_t blength = local->ball.num;
+
+  uint64_t start = clength - blength;
+
+  // #pragma vector aligned
+  #pragma omp simd
+  for (uint64_t i=0; i<blength; i++)
+  {
+
+    __assume_aligned(local->deleted.tetID, SIMD_ALIGN);
+    __assume_aligned(local->ball.bnd, SIMD_ALIGN);
+    __assume_aligned(mesh->tetrahedra.colors, SIMD_ALIGN);
+    __assume_aligned(mesh->tetrahedra.flag, SIMD_ALIGN);
+    __assume_aligned(mesh->tetrahedra.node, SIMD_ALIGN);
+    __assume_aligned(mesh->tetrahedra.neigh, SIMD_ALIGN);
+
+    const uint64_t newTet = local->deleted.tetID[i + start];
+    uint32_t* __restrict__ Node = mesh->tetrahedra.node + 4*newTet;
+    mesh->tetrahedra.colors[newTet] = color;
+    mesh->tetrahedra.flag[newTet] = 0;
+
+    /* we need to always put the ghost vertex at the fourth slot*/
+    Node[0] = vta;
+    Node[1] = local->ball.bnd[i].node[0];
+    Node[2] = local->ball.bnd[i].node[1];
+    Node[3] = local->ball.bnd[i].node[2];
+
+    const uint64_t neigh = local->ball.bnd[i].neigh;
+    mesh->tetrahedra.neigh[4*newTet] = neigh;
+
+    mesh->tetrahedra.flag[newTet] = local->ball.bnd[i].flag;
+
+    // update neighbor's neighbor
+    mesh->tetrahedra.neigh[neigh] = 4*newTet;
+
+    // we recycle neigh to contain newTet (used in computeAdjacencies)
+    local->ball.bnd[i].neigh = 4*newTet;
+  }
+#ifndef HXT_DELAUNAY_LOW_MEMORY
+  if(blength<=58){ // N+2<=31 => N<=29 => 2N<=58
+  #ifndef NDEBUG
+    HXT_CHECK( computeAdjacenciesFast(mesh, local, verticesID, blength) );
+  #else
+    computeAdjacenciesFast(mesh, local, verticesID, blength);
+  #endif
+  }
+  else
+#endif
+  {
+  #ifndef NDEBUG
+    HXT_CHECK(computeAdjacenciesSlow(mesh, local, start, blength) );
+  #else
+    computeAdjacenciesSlow(mesh, local, start, blength);
+  #endif
+  }
+
+
+
+  *curTet = local->deleted.tetID[start];
+  local->deleted.num = start;
+
+  return HXT_STATUS_OK;
+}
+
+
+/*************************************************************
+ * insert a single point
+ ************************************************************/
+static inline HXTStatus insertion(HXTMesh* mesh,
+                                  uint32_t* verticesID,
+                                  TetLocal* local,
+                                  const double* nodalSizes,
+                                  uint64_t* curTet,
+                                  const uint32_t vta,
+                                  int perfectlyDelaunay){
+  const uint64_t prevDeleted = local->deleted.num;
+
+  HXT_CHECK( walking2Cavity(mesh, local, curTet, vta) );
+
+  if(nodalSizes!=NULL && filterTet(mesh, nodalSizes, *curTet, vta)){
+    return HXT_STATUS_FALSE;
+  }
+
+  const uint16_t color = mesh->tetrahedra.colors[*curTet];
+  int edgeConstraint = 0;
+  HXTStatus status = diggingACavity(mesh, local, *curTet, vta, &edgeConstraint);
+
+  if(status==HXT_STATUS_INTERNAL){
+    restoreDeleted(mesh, local, prevDeleted);
+    return HXT_STATUS_TRYAGAIN;
+  }
+  else{
+    HXT_CHECK(status);
+  }
+
+  if(edgeConstraint) {
+    HXT_CHECK( respectEdgeConstraint(local, mesh, vta, color, prevDeleted) );
+  }
+
+  // uint64_t face = 0;
+  // if(!perfectlyDelaunay && isStarShaped(local, mesh, vta, &face)==HXT_STATUS_INTERNAL) {
+  //   restoreDeleted(mesh, local, prevDeleted);
+  //   return HXT_STATUS_FALSE;
+  // }
+
+  // reshape the cavity if it is not star shaped
+  if(!perfectlyDelaunay)
+    HXT_CHECK( reshapeCavityIfNeeded(local, mesh, vta) );
+
+  if(nodalSizes!=NULL && filterCavity(local, mesh, nodalSizes, vta)) {
+    restoreDeleted(mesh, local, prevDeleted);
+    return HXT_STATUS_FALSE;
+  }
+
+
+  if(local->ball.num > local->deleted.num){
+    uint64_t needed = MAX(DELETED_BUFFER_SIZE,local->ball.num)-local->deleted.num;
+
+    uint64_t ntet;
+
+    #pragma omp atomic capture
+    { ntet = mesh->tetrahedra.num; mesh->tetrahedra.num+=needed;}
+
+    reserveNewTet(mesh);
+    reserveNewDeleted(local, needed);
+
+    #pragma omp simd
+    for (uint64_t i=0; i<needed; i++){
+      local->deleted.tetID[local->deleted.num+i] = ntet+i;
+      mesh->tetrahedra.flag[ntet+i] = 0;
+      setDeletedFlag(mesh, ntet+i);
+    }
+
+    local->deleted.num+=needed;
+  }
+
+  HXT_CHECK( fillingACavity(mesh, local, verticesID, curTet, vta, color) );
+
+  return HXT_STATUS_TRUE;
+}
+
+
+/*************************************************************
+ * Delaunay triangulation of a set of points
+ ************************************************************/
+static HXTStatus parallelDelaunay3D(HXTMesh* mesh,
+                                    HXTDelaunayOptions* options,
+                                    hxtNodeInfo* nodeInfo,
+                                    const uint32_t nToInsert,
+                                    int noReordering)
+{
+  uint32_t totalNumSkipped = 0;
+  uint32_t seed = 1;
+
+  // third, divide indices in different passes
+  const int maxThreads = options->delaunayThreads;
+  const int perfectlyDelaunay = mesh->tetrahedra.num<=5;
+
+  uint32_t passes[12];
+  unsigned npasses = computePasses(passes, options->numVerticesInMesh, nToInsert);
+
+  // that ugly cast because people want an array of double into the mesh structure
+  HXTVertex* vertices = (HXTVertex*) mesh->vertices.coord;
+  
+
+  /******************************************************
+          shuffle (and optimize cache locality)
+  ******************************************************/
+  if(noReordering){
+    // shuffle nodeInfo
+    HXT_CHECK( hxtNodeInfoShuffle(nodeInfo, nToInsert) );
+  }
+  else {
+    HXT_INFO_COND(options->verbosity>1, "Reordering vertices from %u to %u", mesh->vertices.num - nToInsert, mesh->vertices.num);
+    HXTVertex* verticesToInsert = vertices + mesh->vertices.num - nToInsert;
+
+    if(options->nodalSizes==NULL){
+      // shuffle the vertices to insert, then sort each pass except the first according to the hilbert curve...
+      HXT_CHECK( hxtVerticesShuffle(verticesToInsert, nToInsert) );
+    }
+    else{
+      HXT_CHECK( hxtNodeInfoShuffle(nodeInfo, nToInsert) );
+    }
+
+    uint32_t nbits = hxtAdvancedHilbertBits(options->bbox, options->minSizeStart, options->minSizeEnd,
+                                            options->numVerticesInMesh,
+                                            options->numVerticesInMesh + nToInsert,
+                                            options->numVerticesInMesh + nToInsert/4,
+                                            nToInsert/2,
+                                            maxThreads);
+    
+    HXT_CHECK( hxtVerticesHilbertDist(options->bbox, verticesToInsert, nToInsert, &nbits, NULL) );
+
+    if(options->nodalSizes==NULL){
+      for (unsigned i=options->numVerticesInMesh < SMALLEST_ROUND; i<npasses; i++) {
+        HXT_CHECK( hxtVerticesSort(verticesToInsert+passes[i], passes[i+1]-passes[i], nbits) );
+      }
+    }
+    else{
+      #pragma omp parallel for
+      for (uint32_t i=0; i<nToInsert; i++) {
+        nodeInfo[i].hilbertDist = verticesToInsert[i].padding.hilbertDist;
+      }
+
+      for (unsigned i=options->numVerticesInMesh < SMALLEST_ROUND; i<npasses; i++) {
+        HXT_CHECK( hxtNodeInfoSort(nodeInfo+passes[i], passes[i+1]-passes[i], nbits) );
+      }
+
+      const uint32_t nodalMin = mesh->vertices.num - nToInsert;
+      double* sizesToInsert = options->nodalSizes + nodalMin;
+
+      size_t vertSize = nToInsert*sizeof(HXTVertex);
+      size_t sizeSize = nToInsert*sizeof(double);
+      HXTVertex* vertCopy;
+      double* sizeCopy;
+      HXT_CHECK( hxtAlignedMalloc(&vertCopy, vertSize) );
+      HXT_CHECK( hxtAlignedMalloc(&sizeCopy, sizeSize) );
+      
+      #pragma omp parallel for
+      for (uint32_t i=0; i<nToInsert; i++) {
+        vertCopy[i] = verticesToInsert[nodeInfo[i].node-nodalMin];
+        sizeCopy[i] = sizesToInsert[nodeInfo[i].node-nodalMin];
+        nodeInfo[i].node = nodalMin + i;
+      }
+
+      memcpy(verticesToInsert, vertCopy, vertSize);
+      memcpy(sizesToInsert, sizeCopy, sizeSize);
+
+      HXT_CHECK( hxtAlignedFree(&vertCopy) );
+      HXT_CHECK( hxtAlignedFree(&sizeCopy) );
+    }
+  }
+
+  /******************************************************
+        Initializations and allocations
+  ******************************************************/
+  if(mesh->tetrahedra.num<5){
+    HXT_INFO_COND(options->verbosity>0,
+                  "Initialization of tet. mesh");
+    HXT_CHECK( hxtTetrahedraInit(mesh, nodeInfo, nToInsert, options->verbosity) );
+    options->numVerticesInMesh = 4; // not counting the ghost vertex
+    passes[0] = 4;
+  }
+
+
+  uint32_t*  verticesID;
+#ifdef HXT_DELAUNAY_LOW_MEMORY
+  verticesID = NULL; // we do not need it
+#else
+  HXT_CHECK( hxtAlignedMalloc(&verticesID, mesh->vertices.num*sizeof(uint32_t)) );
+#endif
+
+  TetLocal* Locals;
+  HXT_CHECK( hxtMalloc(&Locals, maxThreads*sizeof(TetLocal)) );
+  // HXT_CHECK( hxtMalloc())
+
+  for (int i=0; i<maxThreads; i++)
+    localInit(&Locals[i]);
+
+
+  HXT_INFO_COND(options->verbosity>0,
+                "Delaunay of %10u vertices on %3d threads\t- mesh.nvert: %-10u",
+                passes[npasses] - passes[0], maxThreads, options->numVerticesInMesh);
+
+  for (uint32_t p=0; p<npasses; p++)
+  {
+
+    double percent = 200;
+    int nthreads = 1;
+    {
+      uint32_t tmp = (passes[p+1]-passes[p])/SMALLEST_ROUND;
+      while(tmp>0 && nthreads<maxThreads){
+        tmp = tmp/2;
+        nthreads*=2;
+      }
+    }
+    nthreads = MIN(nthreads, maxThreads);
+
+    // const uint32_t initialPassLength = passes[p+1] - passes[p];
+
+    for(uint32_t n=0; passes[p+1]-passes[p]; n++)
+    {
+      const uint32_t passStart = passes[p];
+      const uint32_t passEnd = passes[p+1];
+      const uint32_t passLength = passEnd - passStart;
+
+      /******************************************************
+                      choosing number of threads
+      ******************************************************/
+      if(percent<140/nthreads || passLength<SMALLEST_ROUND){
+        nthreads=1;
+      }
+      else if(percent<20){
+        nthreads=(nthreads+1)/2;
+      }
+      else if(passLength < (uint32_t) nthreads*SMALLEST_ROUND)
+        nthreads=(nthreads+1)/2;
+
+
+      /******************************************************
+                      Sorting vertices
+      ******************************************************/
+      double hxtDeclareAligned bboxShift[4]={0.5,0.5,0.5,0};
+
+      if(percent<100 && nthreads>1)
+      {
+        bboxShift[0] = (double) hxtReproducibleLCG(&seed)/RAND_MAX;
+        bboxShift[1] = (double) hxtReproducibleLCG(&seed)/RAND_MAX;
+        bboxShift[2] = (double) hxtReproducibleLCG(&seed)/RAND_MAX;
+        bboxShift[3] = (double) hxtReproducibleLCG(&seed)/RAND_MAX; // this is not a bbox deformation, it's an index shift
+      }
+
+      uint32_t nbits;
+
+      if(p==0 && maxThreads<=1) {
+        nbits = hxtAdvancedHilbertBits(options->bbox, options->minSizeStart, options->minSizeEnd,
+                                       options->numVerticesInMesh,
+                                       options->numVerticesInMesh + nToInsert,
+                                       options->numVerticesInMesh,
+                                       nToInsert,
+                                       1);
+
+        HXT_CHECK( hxtVerticesHilbertDist(options->bbox, vertices, mesh->vertices.num, &nbits, bboxShift) );
+      }
+      else {
+        nbits = hxtAdvancedHilbertBits(options->bbox, options->minSizeStart, options->minSizeEnd,
+                                       options->numVerticesInMesh - passStart,
+                                       options->numVerticesInMesh - passStart + nToInsert,
+                                       options->numVerticesInMesh,
+                                       passLength,
+                                       nthreads);
+        if(noReordering){
+          HXT_CHECK( hxtVerticesHilbertDist(options->bbox, vertices, mesh->vertices.num, &nbits, bboxShift) );
+        }
+        else{
+          HXT_CHECK( hxtVerticesHilbertDist(options->bbox, vertices, mesh->vertices.num - nToInsert + passEnd, &nbits, bboxShift) );
+        }
+      }
+
+      
+
+      #pragma omp parallel for simd aligned(nodeInfo:SIMD_ALIGN)
+      for (uint32_t i=passStart; i<passEnd; i++) {
+        nodeInfo[i].hilbertDist = vertices[nodeInfo[i].node].padding.hilbertDist;
+      }
+
+      if(p!=0 || n!=0 || nthreads>1 || options->numVerticesInMesh >= SMALLEST_ROUND){
+        HXT_CHECK( hxtNodeInfoSort(nodeInfo + passStart, passLength, nbits) );
+      }
+
+      const uint32_t step = passLength/nthreads;
+
+      uint32_t indexShift = MIN(step-1,(uint32_t) bboxShift[3]*step);
+
+      int threadFinished = 0;
+
+      #pragma omp parallel num_threads(nthreads)
+      {
+      #ifdef _MSC_VER
+        #pragma omp single
+        nthreads = omp_get_num_threads();
+      #endif
+
+        uint64_t curTet = 0; // we always begin with the first tet. (index 0)
+        const int threadID = omp_get_thread_num();
+
+        uint32_t localStart;
+        uint32_t localN;
+        int foundTet = 0;
+
+        if(nthreads>1){
+          // if(threadID<nthreads){
+
+          /******************************************************
+                          Making partitions
+          ******************************************************/
+          localStart = step*threadID + indexShift;
+          uint64_t dist = nodeInfo[passStart + localStart].hilbertDist;
+          
+          uint32_t up = 1;
+          while(localStart+up<passLength && dist==nodeInfo[passStart + localStart + up].hilbertDist)
+            up++;
+
+          localStart = localStart+up==passLength?0:localStart+up;
+          if(localStart > 0)
+            Locals[threadID].partition.startDist = (nodeInfo[passStart + localStart].hilbertDist
+                                                  + nodeInfo[passStart + localStart - 1].hilbertDist + 1)/2;
+          else
+            Locals[threadID].partition.startDist = nodeInfo[passStart + passLength-1].hilbertDist + (nodeInfo[passStart + localStart].hilbertDist - nodeInfo[passStart + passLength - 1].hilbertDist)/2;
+          Locals[threadID].partition.first = localStart;
+          // }
+
+          #pragma omp barrier
+
+          // if(threadID<nthreads){
+          uint32_t localEnd = Locals[(threadID+1)%nthreads].partition.first;
+          localN = (localEnd + passLength - localStart)%passLength;
+
+          Locals[threadID].partition.endDist = Locals[(threadID+1)%nthreads].partition.startDist;
+
+          // printf("%d) first dist: %lu, last dist: %lu startDist: %lu endDist: %lu\n", threadID, nodeInfo[passStart + localStart].hilbertDist, nodeInfo[(passStart + localStart + localN-1)%passLength].hilbertDist, Locals[threadID].partition.startDist, Locals[threadID].partition.endDist);
+
+
+          /******************************************************
+                          find starting tetrahedron
+          ******************************************************/
+
+          for (uint64_t i=0; i<mesh->tetrahedra.num; i++)
+          {
+            curTet = i;
+            if(getDeletedFlag(mesh, i)==0 &&
+               checkTetrahedron(vertices, &Locals[threadID], mesh->tetrahedra.node + curTet*4 )==HXT_STATUS_OK)
+            {
+              foundTet = 1;
+              break;
+            }
+          }
+
+          if(options->reproducible){
+            Locals[threadID].partition.startDist = 0;
+            Locals[threadID].partition.endDist = UINT64_MAX;
+
+            // walk in total liberty toward the first point
+            HXTStatus status = walking2Cavity(mesh, &Locals[threadID], &curTet, nodeInfo[passStart + (localStart+localN/2)%passLength].node);
+
+            if(status!=HXT_STATUS_OK && status!=HXT_STATUS_TRYAGAIN){
+              HXT_OMP_CHECK( status );
+            }
+
+            Locals[threadID].partition.startDist = nodeInfo[passStart + localStart].hilbertDist;
+            Locals[threadID].partition.endDist = nodeInfo[passStart + localEnd].hilbertDist;
+
+            if(checkTetrahedron(vertices, &Locals[threadID], mesh->tetrahedra.node + curTet*4 )!=HXT_STATUS_OK){
+              foundTet = 0;
+              // check the neighbors
+              for (unsigned i=0; i<4; i++) {
+                uint64_t tet = mesh->tetrahedra.neigh[4*curTet+i]/4;
+                if(checkTetrahedron(vertices, &Locals[threadID], mesh->tetrahedra.node + tet*4 )==HXT_STATUS_OK){
+                  foundTet = 1;
+                  curTet = tet;
+                  break;
+                }
+              }
+            }
+          }
+
+          // }
+
+          #pragma omp barrier
+        }
+        else
+          // if(threadID==0)
+        {
+
+          /******************************************************
+             single-thread partition and starting tetrahedron
+          ******************************************************/
+          localStart = 0;
+          localN = passLength;
+          Locals[0].partition.startDist = 0;
+          Locals[0].partition.endDist = UINT64_MAX;
+
+          for (uint64_t i=0; i<mesh->tetrahedra.num; i++)
+          { 
+            if(getDeletedFlag(mesh, i)==0){
+              curTet = i;
+              foundTet = 1;
+              break;
+            }
+          }
+        }
+
+        if (foundTet == 0) {
+          HXT_INFO_COND(options->verbosity>1,
+                        "thread %d did not find any tetrahedron to begin with", threadID);
+        }
+
+        // filtering vertices on the Moore curve
+        if(options->nodalSizes!=NULL)
+        {
+          double* p1 = NULL;
+          double p1Size = 0;
+
+          for (uint32_t i=0; i<localN; i++)
+          {
+            uint32_t passIndex = (localStart+i)%passLength;
+            uint32_t lastNode = nodeInfo[passStart + passIndex].node;
+            if(nodeInfo[passStart + passIndex].status==HXT_STATUS_TRYAGAIN){
+              double* p2 = vertices[lastNode].coord;
+              double p2Size = options->nodalSizes[lastNode];
+              if(p1!=NULL && pointIsTooClose(p1, p2, 0.5*(p1Size+p2Size))!=HXT_STATUS_OK){
+                nodeInfo[passStart + passIndex].status=HXT_STATUS_FALSE;
+              }
+              else{
+                p1 = p2;
+                p1Size = p2Size;
+              }
+            }
+          }
+        }
+
+
+        // if(threadID<nthreads){
+        if(foundTet!=0){
+
+          /******************************************************
+                          vertices insertion
+          ******************************************************/
+          for (uint32_t i=0; i<localN; i++)
+          {
+            uint32_t passIndex = (localStart+i)%passLength;
+            uint32_t vta = nodeInfo[passStart + passIndex].node;
+            if(nodeInfo[passStart + passIndex].status==HXT_STATUS_TRYAGAIN){
+              HXTStatus status = insertion(mesh, verticesID, &Locals[threadID], options->nodalSizes, &curTet, vta, perfectlyDelaunay);
+
+              switch(status){
+                case HXT_STATUS_TRYAGAIN:
+                  // ;
+                  if(nthreads==1){
+                    double* vtaCoord = vertices[vta].coord;
+                    HXT_WARNING("skipping supposedly duplicate vertex (%f %f %f)", vtaCoord[0], vtaCoord[1], vtaCoord[2]);
+                    nodeInfo[passStart + passIndex].status = HXT_STATUS_FALSE;
+                    break;
+                  }
+                  /* fall through */
+                case HXT_STATUS_FALSE:
+                case HXT_STATUS_TRUE:
+                  nodeInfo[passStart + passIndex].status = status;
+                  break;
+                default: // error other than HXT_STATUS_TRYAGAIN cause the program to return
+                  nodeInfo[passStart + passIndex].status = HXT_STATUS_TRYAGAIN;
+                  HXT_OMP_CHECK( status );
+                  break;
+              }
+            }
+            else{
+              nodeInfo[passStart + passIndex].status = HXT_STATUS_FALSE;
+            }
+          }  
+        }
+        // }
+
+        #pragma omp atomic update
+        threadFinished++;
+
+        int val = 0;
+        do{
+          // threads are waiting here for a reallocation
+          HXT_OMP_CHECK( synchronizeReallocation(mesh, &threadFinished, &val) );
+        }while(val<nthreads);
+        // }while(val<maxThreads);
+      }
+
+      /******************************************************
+      vertices that have to be tried again are put at the end
+      ******************************************************/
+      // everything above i+shift is HXT_STATUS_TRYAGAIN
+      uint32_t shift = 0;
+      unsigned numSkipped = 0;
+      for (uint32_t i=passEnd; i>passStart;)
+      {
+        i--;
+        if(nodeInfo[i].status!=HXT_STATUS_TRYAGAIN){
+          if(nodeInfo[i].status==HXT_STATUS_FALSE)
+            numSkipped++;
+          shift++;
+        }
+        else if(shift!=0) {
+          hxtNodeInfo tmp = nodeInfo[i];
+          nodeInfo[i] = nodeInfo[i+shift];
+          nodeInfo[i+shift] = tmp;
+        }
+      }
+
+      options->numVerticesInMesh += shift - numSkipped;
+
+      percent = (shift-numSkipped)*100.0/MAX(1,passLength-numSkipped);
+      totalNumSkipped += numSkipped;
+
+      HXT_INFO_COND(options->verbosity>1,
+                    "%3d thrd |%10u/%-10u-> %*.1f%-*c\t- mesh.nvert: %-10u",
+                    nthreads, shift-numSkipped, passLength-numSkipped, MIN(8,n/2)+5, percent, 8-MIN(8,n/2),'%', options->numVerticesInMesh);
+      
+      passes[p] += shift;
+    }
+  }
+
+  /******************************************************
+                  Cleaning
+  ******************************************************/
+  #pragma omp parallel num_threads(maxThreads)
+  {
+    const int threadID = omp_get_thread_num();
+    for (uint64_t i=0; i<Locals[threadID].deleted.num; i++) {
+      for (int j=0; j<4; j++) {
+        mesh->tetrahedra.neigh[4*Locals[threadID].deleted.tetID[i]+j] = HXT_NO_ADJACENT;
+      }
+    }
+  }
+  HXT_CHECK( hxtRemoveDeleted(mesh) );
+
+  for (int i=0; i<maxThreads; i++){
+    HXT_CHECK( hxtAlignedFree(&Locals[i].deleted.tetID) );
+    HXT_CHECK( hxtAlignedFree(&Locals[i].ball.bnd) );
+  }
+
+  HXT_CHECK( hxtAlignedFree(&verticesID) );
+  HXT_CHECK( hxtFree(&Locals) );
+
+  /***************************************************************
+    if reordering allowed, remove vertices we could not insert
+  ***************************************************************/
+  if(!noReordering && totalNumSkipped!=0){
+    /* remove deleted vertices and change tetrahedra.node accordingly */
+
+    uint32_t* numInserted;
+    HXT_CHECK( hxtAlignedMalloc(&numInserted, omp_get_max_threads()*sizeof(uint32_t)) );
+
+    uint32_t firstShifted = mesh->vertices.num - nToInsert;
+    uint32_t n = nToInsert;
+
+    // when a vertex was skipped, nodeInfo[i].status = HXT_STATUS_FALSE
+    #pragma omp parallel
+    {
+      // 1st: mark vertices with their corresponding status
+      #pragma omp for schedule(static)
+      for (uint32_t i=0; i<nToInsert; i++) {
+        uint32_t index = nodeInfo[i].node;
+        HXTStatus status = nodeInfo[i].status;
+        vertices[index].padding.status = status;
+      }// implicit barrier here
+
+      #pragma omp single
+      {
+        uint32_t i = 0;
+        while (vertices[firstShifted+i].padding.status==HXT_STATUS_TRUE) i++;
+
+        firstShifted += i+1;
+        n -= i+1;
+      }// implicit barrier here
+
+      uint32_t start = 0;
+      int threadID = omp_get_thread_num();
+      numInserted[threadID] = 0;
+
+      #pragma omp for schedule(static)
+      for (uint32_t i=0; i<n; i++) {
+        if(vertices[firstShifted+i].padding.status==HXT_STATUS_TRUE)
+          numInserted[threadID]++;
+      }// implicit barrier here
+
+      for (int i=0; i<threadID; i++) {
+        start+=numInserted[i];
+      }
+      start += firstShifted-1;
+
+      // 3rd: compute where each vertices will be
+      #pragma omp for schedule(static)
+      for (uint32_t i=0; i<n; i++) {
+        uint32_t oldStart = start;
+
+        if(vertices[firstShifted+i].padding.status==HXT_STATUS_TRUE)
+          start++;
+
+        // index and status are at the same location (it's a union) we cannot put this above the "if" !
+        vertices[firstShifted+i].padding.index = oldStart;
+      }
+
+      // 4th: update tetrahedra.node accordingly
+      #pragma omp for
+      for (uint64_t i=0; i<4*mesh->tetrahedra.num; i++) {
+        uint32_t index = mesh->tetrahedra.node[i];
+        if(index>=firstShifted && index!=HXT_GHOST_VERTEX)
+          mesh->tetrahedra.node[i] = vertices[index].padding.index;
+      }
+    }
+
+    HXT_CHECK( hxtAlignedFree(&numInserted) );
+
+    // 5th: put vertices at the right indices
+    for (uint32_t i=firstShifted; i<mesh->vertices.num; i++) {
+      if(options->nodalSizes!=NULL){
+        options->nodalSizes[vertices[i].padding.index] = options->nodalSizes[i];
+      }
+      vertices[vertices[i].padding.index] = vertices[i];
+    }
+
+    if(options->verbosity>1)
+      HXT_INFO("%u vertices removed (vertices not inserted in the mesh are removed when using hxtDelaunay)\n", totalNumSkipped);
+
+    mesh->vertices.num = mesh->vertices.num - totalNumSkipped;
+  }
+
+  HXT_INFO_COND(options->verbosity>0, "Delaunay done !%10u skipped", totalNumSkipped);
+  HXT_INFO_COND(options->verbosity>1, "mem. allocated:%5.2fGB - mesh.ntet: %-12lu - mesh.nvert: %-10lu",
+    ((50 + 2*(mesh->tetrahedra.flag!=NULL)) * mesh->tetrahedra.size +
+     (32 + 8*(options->nodalSizes!=NULL)) * mesh->vertices.size)/(1024.*1024.*1024.),
+    mesh->tetrahedra.num, mesh->vertices.num);
+
+  if(options->reproducible && maxThreads!=1){
+    HXT_INFO_COND(options->verbosity>1, "Reordering tetrahedra (reproducible==true)\n", mesh->vertices.num - nToInsert, mesh->vertices.num);
+    HXT_CHECK( hxtTetReorder(mesh) );
+  }
+
+  return HXT_STATUS_OK;
+}
+
+
+/*****************************************
+ * complete the HXTDelaunayOptions struct
+ * when there are missing fields.
+ ****************************************/
+static HXTStatus DelaunayOptionsInit(HXTMesh* mesh,
+                                HXTDelaunayOptions* userOptions,
+                                HXTDelaunayOptions* options,
+                                HXTBbox* bbox){
+HXT_ASSERT(mesh!=NULL);
+
+  if(userOptions!=NULL){
+    options->bbox = userOptions->bbox;
+    options->nodalSizes = userOptions->nodalSizes;
+    options->verbosity = userOptions->verbosity;
+    options->minSizeStart = MAX(0.0, userOptions->minSizeStart);
+    options->minSizeEnd = MAX(options->minSizeStart, userOptions->minSizeEnd);
+    options->numVerticesInMesh = userOptions->numVerticesInMesh;
+    options->delaunayThreads = userOptions->delaunayThreads;
+    options->reproducible = userOptions->reproducible;
+  }
+  else{
+    HXTVertex* vertices = (HXTVertex*) mesh->vertices.coord;
+
+    // default parameters
+    options->bbox = NULL;
+    options->nodalSizes = NULL;
+    options->minSizeStart = 0.0;
+    options->minSizeEnd = 0.0;
+    options->verbosity = 1;
+    options->delaunayThreads = 0;
+    options->reproducible = 0;
+
+    // count the number of vertices in the mesh
+    #pragma omp parallel for
+    for (uint32_t i=0; i<mesh->vertices.num; i++) {
+      vertices[i].padding.index = 0;
+    }
+
+    #pragma omp parallel for
+    for (uint64_t i=0; i<mesh->tetrahedra.num; i++) {
+      vertices[mesh->tetrahedra.node[4*i+0]].padding.index = 1;
+      vertices[mesh->tetrahedra.node[4*i+1]].padding.index = 1;
+      vertices[mesh->tetrahedra.node[4*i+2]].padding.index = 1;
+      if(mesh->tetrahedra.node[4*i+3]!=HXT_GHOST_VERTEX)
+        vertices[mesh->tetrahedra.node[4*i+3]].padding.index = 1;
+    }
+
+    uint32_t numVerticesInMesh = 0;
+    #pragma omp parallel for reduction(+:numVerticesInMesh)
+    for (uint32_t i=0; i<mesh->vertices.num; i++) {
+        numVerticesInMesh += vertices[i].padding.index;
+    }
+
+    options->numVerticesInMesh = numVerticesInMesh;
+  }
+
+HXT_ASSERT(options->numVerticesInMesh <= mesh->vertices.num);
+
+  if(options->bbox==NULL){
+    options->bbox = bbox;
+    hxtBboxInit(bbox);
+    HXT_CHECK( hxtBboxAdd(bbox, mesh->vertices.coord, mesh->vertices.num) );
+  }
+
+  if(options->delaunayThreads==0)
+    options->delaunayThreads = omp_get_max_threads();
+  else if(options->delaunayThreads<0)
+    options->delaunayThreads = omp_get_num_procs();
+  
+  if(options->delaunayThreads>omp_get_thread_limit())
+    options->delaunayThreads = omp_get_thread_limit();
+
+  // for the predicates to work
+  exactinit(options->bbox->max[0]-options->bbox->min[0],
+            options->bbox->max[1]-options->bbox->min[1],
+            options->bbox->max[2]-options->bbox->min[2]);
+
+  return HXT_STATUS_OK;
+}
+
+
+/*****************************************
+ * parallel Delaunay
+ * see header for a complete description
+ ****************************************/
+HXTStatus hxtDelaunay(HXTMesh* mesh, HXTDelaunayOptions* userOptions){
+  HXTDelaunayOptions options;
+  HXTBbox bbox;
+  HXT_CHECK( DelaunayOptionsInit(mesh, userOptions, &options, &bbox) );
+
+  const uint32_t nToInsert = mesh->vertices.num - options.numVerticesInMesh;
+
+  if(options.reproducible && nToInsert<2048) // not worth launching threads and having to reorder tets after...
+    options.delaunayThreads = 1;
+
+  hxtNodeInfo* nodeInfo;
+  HXT_CHECK( hxtAlignedMalloc(&nodeInfo, nToInsert*sizeof(hxtNodeInfo)) );
+  
+  // we fill nodeInfo with the indices of each vertices to insert...
+  #pragma omp parallel for simd
+  for (uint32_t i=0; i<nToInsert; i++) {
+    nodeInfo[i].node = options.numVerticesInMesh + i;
+    nodeInfo[i].status = HXT_STATUS_TRYAGAIN; // necessary for when foundTet = 0;
+  }
+
+  HXT_CHECK( parallelDelaunay3D(mesh, &options, nodeInfo, nToInsert, 0) );
+
+  HXT_CHECK( hxtAlignedFree(&nodeInfo) );
+
+  return HXT_STATUS_OK;
+}
+
+
+/************************************************
+ * parallel Delaunay without moving the vertices
+ * see header for a complete description
+ ***********************************************/
+HXTStatus hxtDelaunaySteadyVertices(HXTMesh* mesh, HXTDelaunayOptions* userOptions, hxtNodeInfo* nodeInfo, uint64_t nToInsert){
+HXT_ASSERT(nodeInfo!=NULL);
+
+  HXTDelaunayOptions options;
+  HXTBbox bbox;
+  HXT_CHECK( DelaunayOptionsInit(mesh, userOptions, &options, &bbox) );
+
+  if(options.reproducible && nToInsert<2048) // not worth launching threads and having to reorder tets after...
+    options.delaunayThreads = 1;
+
+HXT_ASSERT(options.numVerticesInMesh+nToInsert <= mesh->vertices.num);
+
+  HXT_CHECK( parallelDelaunay3D(mesh, &options, nodeInfo, nToInsert, 1) );
+
+  return HXT_STATUS_OK;
+}
+
diff --git a/contrib/hxt/hxt_tetDelaunay.h b/contrib/hxt/hxt_tetDelaunay.h
new file mode 100644
index 0000000000000000000000000000000000000000..34020fdaeb25913787d9240f4ea3b99f296eec5b
--- /dev/null
+++ b/contrib/hxt/hxt_tetDelaunay.h
@@ -0,0 +1,113 @@
+#ifndef _HXT_TETDELAUNAY_
+#define _HXT_TETDELAUNAY_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "hxt_mesh.h"
+#include "hxt_vertices.h"
+
+/**
+* \file hxt_tetDelaunay.h Delaunay tetrahedrization
+* \author Célestin Marot
+*/
+
+/**
+ * \struct HXTDelaunayOptions
+ * 
+ * Options for the Delaunay functions hxtDelaunay() and hxtDelaunaySteadyVertices()
+ * 
+ *
+ */
+typedef struct {
+  HXTBbox* bbox;              /**< The bounding box for all vertices.
+                               *  - if bbox==NULL, the bbox is recomputed internally;
+                               *  - if bbox!=NULL, bbox must contain all vertices */
+
+  double* nodalSizes;         /**<
+                               *  - if nodalSize==NULL, doesn't restrict nodalSize;
+                               *  - if nodalSize!=NULL, nodalSize contains the minimum
+                               *  mesh size at each vertex.\n
+                               *  If the insertion of a vertex create an edge smaller than
+                               * the average nodalSize of its endpoints, the vertex is
+                               * not inserted
+                               *  \warning a segmentation fault will occur if a vertex
+                               * doesn't have a corresponding mesh size */
+
+  double minSizeStart;         /**< estimate of the minimum mesh size at the moment of the call. 
+                                * 0 if the mesh is empty or if the distribution is uniform
+                                * (the mesh size is then guessed with the number of point) */
+  double minSizeEnd;           /**< estimate of the minimum mesh size when all points are inserted in the Delaunay. 
+                                * 0 if the distribution is uniform
+                                * (the mesh size is then guessed with the number of point) */
+
+  uint32_t numVerticesInMesh; /**< The number of vertices in the mesh */
+
+  int verbosity;              /**<
+                               *  - if verbosity<=0: don't print information.
+                               *  - if verbosity==1: print basic information on each pass
+                               *  - if verbosity>=2: print everything */
+
+  int reproducible;           /**< If reproducible!=0, the Delaunay use a reproducible tetrahedra order
+                               * in order to be totally deterministic.
+                               * \warning this takes time !
+                               * It requires a total reordering of tetrahedra at the end to get a reproducible order\n
+                               * except if `delaunayThreads==1 || (delaunayThreads==0 && omp_get_max_threads()==1)`\n
+                               * in which case it is reproducible anyway */
+
+  int delaunayThreads;        /**< number of threads for the delaunay insertion
+                               *  - if delaunayThreads==0, it will use omp_get_max_threads()
+                               *  - if delaunayThreads<0, it will uses omp_get_num_procs() */
+} HXTDelaunayOptions;
+
+
+/**
+ * \brief Delaunay of a set of vertices that does not modify their order
+ * \details This perform the insertion of the vertices whose indices are
+ * given in nodeInfo (in the \ref hxtNodeInfo.node wtructure member)\n
+ * This function does not change the order of vertices in the mesh.\n
+ * \ref hxtNodeInfo.status will be modified by the function to tell
+ * if the vertex was successfully inserted or not.
+ *  - nodeInfo[i].status==HXT_STATUS_TRUE  if the vertex was successfully inserted.
+ *  - nodeInfo[i].status==HXT_STATUS_FALSE  if the vertex was not inserted.
+ *  - nodeInfo[i].status==HXT_STATUS_TRYAGAIN  if an error occured before the vertex could be inserted
+ *
+ * \warning
+ *  - the order of nodeInfo will change
+ *  - hxtNodeInfo[i].hilbertDist will change
+ *  - mesh->tetrahedra.* will change
+ *  - mesh->vertices.coord[4*i+3] will change
+ *
+ * \param mesh: a valid Delaunay mesh
+ * \param options: options to give to the Delaunay algorithm \ref HXTDelaunayOptions
+ * \param[in, out] nodeInfo: the indices of the vertices to insert in the tetrahedral mesh.
+ * \param nToInset: the number of element in nodeInfo, hence the number of vertices to insert.
+ */
+HXTStatus hxtDelaunaySteadyVertices(HXTMesh* mesh, HXTDelaunayOptions* options, hxtNodeInfo* nodeInfo, uint64_t nToInsert);
+
+
+/**
+ * \brief Delaunay of a set of vertices
+ * \details This perform the insertion of the vertices
+ * from numVerticesInMesh to mesh->vertices.num\n
+ *
+ * \warning
+ *  - the order of mesh->vertices will change
+ *  - hxtNodeInfo[i].hilbertDist will change
+ *  - mesh->tetrahedra.* will change
+ *  - mesh->vertices.coord[4*i+3] will change
+ *  - vertices that could not be inserted are deleted from mesh->vertices !
+ *
+ * \param mesh: a valid Delaunay mesh
+ * \param options: options to give to the Delaunay algorithm \ref HXTDelaunayOptions
+ */
+HXTStatus hxtDelaunay(HXTMesh* mesh, HXTDelaunayOptions* options);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/contrib/hxt/hxt_tetFlag.c b/contrib/hxt/hxt_tetFlag.c
index bf068fd7677023574aabaa6f9c8183725d5456df..60a34249dbfe3b7c3667f6dd318e921fc1740440 100644
--- a/contrib/hxt/hxt_tetFlag.c
+++ b/contrib/hxt/hxt_tetFlag.c
@@ -8,10 +8,10 @@ static inline void sort3ints(uint32_t i[3]){
   }
 
   if(i[1]>i[2]){
-    uint32_t tmp = i[1]; i[1] = i[2]; i[2] = tmp;
+    uint32_t tmp1 = i[1]; i[1] = i[2]; i[2] = tmp1;
 
     if(i[0]>i[1]){
-      uint32_t tmp = i[0]; i[0] = i[1]; i[1] = tmp;
+      uint32_t tmp2 = i[0]; i[0] = i[1]; i[1] = tmp2;
     }
   }
 }
@@ -34,7 +34,6 @@ static inline uint64_t hash64(uint64_t x) {
     return x;
 }
 
-
 // just a function such that:
 // if transitiveHashedCmp(a,b)==true
 // then transitiveHashedCmp(b,a)==false
@@ -46,32 +45,44 @@ static inline uint64_t transitiveHashedCmp(uint64_t a, uint64_t b) {
   return hash64(a) < hash64(b);
 }
 
-
-static HXTStatus hxtEdgesNotInTriangles(HXTMesh* mesh, uint32_t* lines, uint64_t* numLines, uint64_t* threadCount, const uint64_t n) {
+/**************************************************************************************
+ *   Lines --> Triangles   MAPPING  (for each line, get 3*tri+e)
+ *************************************************************************************/
+HXTStatus hxtGetLines2TriMap(HXTMesh* mesh, uint64_t* lines2TriMap, uint64_t* missing)
+{
   HXTGroup2* edgeKey = NULL;
+  uint64_t* numEdges = NULL;
+
+  const int maxThreads = omp_get_max_threads();
+  const uint64_t n = mesh->vertices.num;
+  const uint64_t numEdgesTotal = mesh->triangles.num*3+mesh->lines.num;
 
-  uint64_t numEdgesTotal = mesh->triangles.num*3+mesh->lines.num;
+  HXT_CHECK( hxtMalloc(&numEdges, maxThreads*sizeof(uint64_t)) );
   HXT_CHECK( hxtAlignedMalloc(&edgeKey, numEdgesTotal*sizeof(HXTGroup2)) );
 
   #pragma omp parallel
   {
     #pragma omp for nowait
     for (uint64_t i=0; i<mesh->lines.num; i++) {
-      if(mesh->lines.node[2*i]<mesh->lines.node[2*i+1]) {
-        edgeKey[i].v[0] = mesh->lines.node[2*i]*n + mesh->lines.node[2*i+1];
-        edgeKey[i].v[1] = 0;
+      uint32_t v0 = mesh->lines.node[2*i];
+      uint32_t v1 = mesh->lines.node[2*i+1];
+
+      if(v0<v1) {
+        edgeKey[i].v[0] = v0*n + v1;
+        edgeKey[i].v[1] = 2*i;
       }
-      else if(mesh->lines.node[2*i]<mesh->lines.node[2*i+1]){
-        edgeKey[i].v[0] = mesh->lines.node[2*i+1]*n + mesh->lines.node[2*i];
-        edgeKey[i].v[1] = 0;
+      else if(v0<v1){
+        edgeKey[i].v[0] = v1*n + v0;
+        edgeKey[i].v[1] = 2*i;
       }
       else {
-        edgeKey[i].v[0] = mesh->lines.node[2*i]*n + mesh->lines.node[2*i]; // the line begins and ends at the same point...
+        edgeKey[i].v[0] = v0*n + v0; // the line begins and ends at the same point...
         edgeKey[i].v[1] = 1;
+        lines2TriMap[i] = HXT_NO_ADJACENT;
       }
     }
 
-    #pragma omp for nowait
+    #pragma omp for
     for (uint64_t i=0; i<mesh->triangles.num; i++) {
       uint32_t v[3] = {mesh->triangles.node[3*i+0],
                        mesh->triangles.node[3*i+1],
@@ -80,107 +91,85 @@ static HXTStatus hxtEdgesNotInTriangles(HXTMesh* mesh, uint32_t* lines, uint64_t
       sort3ints(v);
 
       edgeKey[mesh->lines.num+3*i].v[0] = v[0]*n + v[1];
-      edgeKey[mesh->lines.num+3*i].v[1] = 1;
+      edgeKey[mesh->lines.num+3*i].v[1] = 2*(3*i)+1;
       edgeKey[mesh->lines.num+3*i+1].v[0] = v[0]*n + v[2];
-      edgeKey[mesh->lines.num+3*i+1].v[1] = 1;
+      edgeKey[mesh->lines.num+3*i+1].v[1] = 2*(3*i+1)+1;
       edgeKey[mesh->lines.num+3*i+2].v[0] = v[1]*n + v[2];
-      edgeKey[mesh->lines.num+3*i+2].v[1] = 1;
+      edgeKey[mesh->lines.num+3*i+2].v[1] = 2*(3*i+2)+1;
     }
   }
 
-  group2_sort_v0(edgeKey, numEdgesTotal, n*(n-1)-1);
-
-  uint64_t total = 0;
+  HXT_CHECK( group2_sort_v0(edgeKey, numEdgesTotal, n*(n-1)-1) );
 
-  // TODO: filter all edge (the sort is stable... maybe put triangles before lines :p)
   #pragma omp parallel
   {
     int threadID = omp_get_thread_num();
     uint64_t localNum = 0;
 
-    #pragma omp for schedule(static)
+    #pragma omp for
     for (uint64_t i=0; i<numEdgesTotal; i++) {
-      if(edgeKey[i].v[1]==0) {
-        if(i==numEdgesTotal-1 || edgeKey[i].v[0] != edgeKey[i+1].v[0])
+      if(edgeKey[i].v[1]%2==0) {
+        if(i!=numEdgesTotal-1 && edgeKey[i].v[0]==edgeKey[i+1].v[0]) {
+        #ifndef NDEBUG
+          if(edgeKey[i+1].v[1]%2==0) {
+            HXT_ERROR_MSG(HXT_STATUS_ERROR, "Duplicated line in mesh->lines (%lu & %lu)\n"
+                                           "\tThis case is not handled in Release mode, FIX IT !!",
+                                           edgeKey[i].v[1]/2, edgeKey[i+1].v[1]/2);
+            exit(EXIT_FAILURE);
+          }
+          else
+        #endif
+          {
+            lines2TriMap[edgeKey[i].v[1]/2] = edgeKey[i+1].v[1]/2;
+          }
+        }
+        else /* the edge is not in a triangle */ {
           localNum++;
-        else
-          edgeKey[i].v[1]=1;
+          lines2TriMap[edgeKey[i].v[1]/2] = HXT_NO_ADJACENT;
+        }
       }
     }
 
-    threadCount[threadID] = localNum;
+    numEdges[threadID] = localNum;
 
     #pragma omp barrier
     #pragma omp single
     {
       int nthreads = omp_get_num_threads();
+      *missing = 0;
       for (int i=0; i<nthreads; i++) {
-        uint32_t tsum = total + threadCount[i];
-        threadCount[i] = total;
-        total = tsum;
-      }
-    }
-
-    localNum = threadCount[threadID];
-    if(total) {
-      #pragma omp for schedule(static)
-      for (uint64_t i=0; i<numEdgesTotal; i++) {
-        if(edgeKey[i].v[1]==0) {
-          lines[2*localNum] = edgeKey[i].v[0]%n;
-          lines[2*localNum+1] = edgeKey[i].v[0]/n;
-          localNum++;
-        }
+        *missing += numEdges[i];
       }
     }
   }
 
-  *numLines = total;
-
+  HXT_CHECK( hxtFree(&numEdges) );
   HXT_CHECK( hxtAlignedFree(&edgeKey) );
 
   return HXT_STATUS_OK;
 }
 
 
-/********************************************************************************
- *  report the number of missing edges or set tet.flag for constrained edges    *
- ********************************************************************************/
-// every tetrahedra must have a neighbor HXT_NO_ADJACENT is NOT permitted !!!
-HXTStatus hxtConstrainEdgesNotInTriangles(HXTMesh* mesh, uint64_t* missing) {
-  const int nodeArray[4][4] = {{-1, 2, 3, 1},
-                               { 3,-1, 0, 2},
-                               { 1, 3,-1, 0},
-                               { 2, 0, 1,-1}};
-
-  const int facetToNumber[4][4] = {{-1, 0, 1, 2},
-                                   { 0,-1, 3, 4},
-                                   { 1, 3,-1, 5},
-                                   { 2, 4, 5,-1}};
-
-
-  const int numberToFacetMin[] = { 0, 0, 0, 1, 1, 2};
-  const int numberToFacetMax[] = { 1, 2, 3, 2, 3, 3};
+/**************************************************************************************
+ *   Lines --> Tetrahedra   MAPPING  (for each line, get 6*tet+e)
+ *************************************************************************************/
+HXTStatus hxtGetLines2TetMap(HXTMesh* mesh, uint64_t* lines2TetMap, uint64_t* missing)
+{
+HXT_ASSERT( lines2TetMap!=NULL );
+HXT_ASSERT( mesh!=NULL );
 
+  const int maxThreads = omp_get_max_threads();
   const uint64_t n = mesh->vertices.num;
+  HXTStatus status = HXT_STATUS_OK;
+  uint64_t numEdgesTotal;
+
+  HXTGroup2* edgeKey = NULL;
   uint64_t* numEdges;
-  uint32_t* lines;
-  uint64_t numLines;
-  int maxThreads = omp_get_max_threads();
+  unsigned char* edgeFlag;
   HXT_CHECK( hxtMalloc(&numEdges, maxThreads*sizeof(uint64_t)) );
-  HXT_CHECK( hxtAlignedMalloc(&lines, mesh->lines.num*2*sizeof(uint32_t)) );
-  
-  // we don't wont to constrain edge that are already in a triangle
-  HXT_CHECK( hxtEdgesNotInTriangles(mesh, lines, &numLines, numEdges, n) );
+  HXT_CHECK( hxtAlignedMalloc(&edgeFlag, mesh->tetrahedra.num*sizeof(char)) );
+  memset(edgeFlag, 0, mesh->tetrahedra.num*sizeof(char));
 
-  if(numLines==0) {
-    HXT_CHECK( hxtAlignedFree(&lines) );
-    HXT_CHECK( hxtFree(&numEdges) );
-    return HXT_STATUS_OK;
-  }
-
-  HXTGroup2* edgeKey = NULL;
-  uint64_t numEdgesTotal = 0;
-  HXTStatus status = HXT_STATUS_OK;
 
   #pragma omp parallel
   {
@@ -188,46 +177,50 @@ HXTStatus hxtConstrainEdgesNotInTriangles(HXTMesh* mesh, uint64_t* missing) {
     uint64_t localNum = 0;
 
     #pragma omp for schedule(static)
-    for (uint64_t i=0; i<mesh->tetrahedra.num; i++) { // for each tetrahedra
-      for (int j=0; j<4; j++) {
-        for (int k=j+1; k<4; k++) {
-          int in_facet = j;
-          int out_facet = k;
+    for (uint64_t tet=0; tet<mesh->tetrahedra.num; tet++) { // for each tetrahedra
+      for (int edge=0; edge<6; edge++) {
+
+        unsigned in_facet, out_facet;
+        getFacetsFromEdge(edge, &in_facet, &out_facet);
+
+        uint32_t p0, p1;
+        {
+          unsigned n0, n1;
+          getNodesFromEdge(edge, &n0, &n1);
+          p0 = mesh->tetrahedra.node[4*tet + n0];
+          p1 = mesh->tetrahedra.node[4*tet + n1];
+        }
 
-          uint32_t p0 = mesh->tetrahedra.node[4*i + nodeArray[j][k]];
-          uint32_t p1 = mesh->tetrahedra.node[4*i + nodeArray[k][j]];
+        if(p0==HXT_GHOST_VERTEX || p1==HXT_GHOST_VERTEX)
+          continue;
 
-          if(p0==HXT_GHOST_VERTEX || p1==HXT_GHOST_VERTEX)
-            continue;
+        int truth = 1;
 
-          int truth = 1;
+        uint64_t curTet = tet;
+        do
+        {
+          uint32_t newV = mesh->tetrahedra.node[4*curTet + in_facet];
 
-          uint64_t curTet = i;
-          do
-          {
-            uint32_t newV = mesh->tetrahedra.node[4*curTet + in_facet];
+          // go into the neighbor through out_facet
+          uint64_t neigh = mesh->tetrahedra.neigh[4*curTet + out_facet];
+          curTet = neigh/4;
+          in_facet = neigh%4;
 
-            // go into the neighbor through out_facet
-            uint64_t neigh = mesh->tetrahedra.neigh[4*curTet + out_facet];
-            curTet = neigh/4;
-            in_facet = neigh%4;
+          if(transitiveHashedCmp(curTet, tet)) {
+            truth=0;
+            break;
+          }
 
-            if(transitiveHashedCmp(curTet, i)) {
-              truth=0;
+          uint32_t* nodes = mesh->tetrahedra.node + 4*curTet;
+          for (out_facet=0; out_facet<3; out_facet++)
+            if(nodes[out_facet]==newV)
               break;
-            }
-
-            uint32_t* nodes = mesh->tetrahedra.node + 4*curTet;
-            for (out_facet=0; out_facet<3; out_facet++)
-              if(nodes[out_facet]==newV)
-                break;
 
-          } while (curTet!=i);
+        } while (curTet!=tet);
 
-          if(truth){
-            constrainEdge(mesh, i, j, k);
-            localNum++;
-          }
+        if(truth){
+          edgeFlag[tet] |= 1U<<edge;
+          localNum++;
         }
       }
     }
@@ -238,17 +231,18 @@ HXTStatus hxtConstrainEdgesNotInTriangles(HXTMesh* mesh, uint64_t* missing) {
     #pragma omp single
     {
       int nthreads = omp_get_num_threads();
-      numEdgesTotal = numLines;
+      numEdgesTotal = mesh->lines.num;
       for (int i=0; i<nthreads; i++) {
-        // printf("%lu\n", numEdges[i]);
         uint32_t tsum = numEdgesTotal + numEdges[i];
         numEdges[i] = numEdgesTotal;
         numEdgesTotal = tsum;
       }
 
 #ifndef NDEBUG
-      if(numEdgesTotal>2*mesh->tetrahedra.num+numLines){
-        HXT_ERROR_MSG(HXT_STATUS_ERROR, "you should never go here..");
+      if(numEdgesTotal>2*mesh->tetrahedra.num+mesh->lines.num){
+        HXT_ERROR_MSG(HXT_STATUS_ERROR,
+                      "There is less than 2 tetrahedra per edge in average,"
+                      "which means the mesh is totally broken !");
         exit(EXIT_FAILURE);
       }
 #endif
@@ -259,70 +253,91 @@ HXTStatus hxtConstrainEdgesNotInTriangles(HXTMesh* mesh, uint64_t* missing) {
     if(status==HXT_STATUS_OK) {
       // copy the edges from mesh->lines in the edgeKey struct array
       #pragma omp for
-      for (uint64_t i=0; i<numLines; i++) {
-        uint32_t p0 = lines[2*i+0];
-        uint32_t p1 = lines[2*i+1];
+      for (uint64_t l=0; l<mesh->lines.num; l++) {
+        uint32_t p0 = mesh->lines.node[2*l+0];
+        uint32_t p1 = mesh->lines.node[2*l+1];
 
         if(p0<p1) {
-          edgeKey[i].v[0] = p0*n*2 + p1*2 + 0;
+          edgeKey[l].v[0] = p0*n + p1;
+          edgeKey[l].v[1] = 2*l;
+        }
+        else if(p0>p1){
+          edgeKey[l].v[0] = p1*n + p0;
+          edgeKey[l].v[1] = 2*l;
         }
         else {
-          edgeKey[i].v[0] = p1*n*2 + p0*2 + 0;
+          edgeKey[l].v[0] = p0*n + p0; // the line begins and ends at the same point...
+          edgeKey[l].v[1] = 1;
+          lines2TetMap[l] = HXT_NO_ADJACENT;
         }
 
-        edgeKey[i].v[1] = HXT_NO_ADJACENT; // this lines does not come from any tetrahedra
+        
       }
 
       localNum = numEdges[threadID];
       #pragma omp for schedule(static)
-      for (uint64_t i=0; i<mesh->tetrahedra.num; i++) {
-        for (int j=0; j<4; j++) {
-          for (int k=j+1; k<4; k++) {
-            if(isEdgeConstrained(mesh, i, j, k)){
-              uint32_t p0 = mesh->tetrahedra.node[4*i + nodeArray[j][k]];
-              uint32_t p1 = mesh->tetrahedra.node[4*i + nodeArray[k][j]];
-
-              if(p0==HXT_GHOST_VERTEX || p1==HXT_GHOST_VERTEX)
-              {
-                HXT_ERROR_MSG(HXT_STATUS_ERROR, "There were contrained edges before this function was called.");
-                exit(EXIT_FAILURE);
-              }
-
-              if(p0<p1){
-                edgeKey[localNum].v[0] = p0*n*2 + p1*2 + 1;
-              }
-              else {
-                edgeKey[localNum].v[0] = p1*n*2 + p0*2 + 1;
-              }
-              edgeKey[localNum].v[1] = 6*i+facetToNumber[j][k];
-
-              localNum++;
+      for (uint64_t tet=0; tet<mesh->tetrahedra.num; tet++) {
+        for (unsigned edge=0; edge<6; edge++) {
+          if(edgeFlag[tet] & (1U<<edge)){
+            uint32_t p0, p1;
+            {
+              unsigned n0, n1;
+              getNodesFromEdge(edge, &n0, &n1);
+              p0 = mesh->tetrahedra.node[4*tet + n0];
+              p1 = mesh->tetrahedra.node[4*tet + n1];
+            }
 
-              unconstrainEdge(mesh, i, j, k);
+            if(p0<p1){
+              edgeKey[localNum].v[0] = p0*n + p1;
             }
+            else {
+              edgeKey[localNum].v[0] = p1*n + p0;
+            }
+            edgeKey[localNum].v[1] = 2*(6*tet+edge)+1;
+
+            localNum++;
           }
         }
       }
     }
   }
 
-  HXT_CHECK( hxtAlignedFree(&lines) );
-
-  HXT_CHECK(status);
+  HXT_CHECK( hxtAlignedFree(&edgeFlag) );
+  HXT_CHECK( status );
 
-  group2_sort_v0(edgeKey, numEdgesTotal, (n-1)*n*2-1);
+  HXT_CHECK( group2_sort_v0(edgeKey, numEdgesTotal, n*(n-1)-1) );
 
   #pragma omp parallel
   {
     const int threadID = omp_get_thread_num();
-    numEdges[threadID] = 0;
+    uint64_t localNum = 0;
 
     #pragma omp for
     for (uint64_t i=0; i<numEdgesTotal; i++) {
-      if(edgeKey[i].v[0]%2==0 && (i==numEdgesTotal-1 || edgeKey[i].v[0]/2!=edgeKey[i+1].v[0]/2))
-        numEdges[threadID]++; // the edge is missing          
+      if(edgeKey[i].v[1]%2==0) {
+        if(i!=numEdgesTotal-1 && edgeKey[i].v[0]==edgeKey[i+1].v[0]) {
+        #ifndef NDEBUG
+          if(edgeKey[i+1].v[1]%2==0) {
+            HXT_ERROR_MSG(HXT_STATUS_ERROR, "Duplicated line in mesh->lines (%lu & %lu)\n"
+                                           "\tThis case is not handled in Release mode, FIX IT !!",
+                                           edgeKey[i].v[1]/2, edgeKey[i+1].v[1]/2);
+            exit(EXIT_FAILURE);
+          }
+          else
+        #endif
+          {
+            lines2TetMap[edgeKey[i].v[1]/2] = edgeKey[i+1].v[1]/2;
+          }
+        }
+        else {
+          lines2TetMap[edgeKey[i].v[1]/2] = HXT_NO_ADJACENT;
+          localNum++;
+        }
+      }   
     }
 
+    numEdges[threadID] = localNum;
+
     #pragma omp barrier
     #pragma omp single
     {
@@ -335,85 +350,32 @@ HXTStatus hxtConstrainEdgesNotInTriangles(HXTMesh* mesh, uint64_t* missing) {
   }
 
   HXT_CHECK( hxtFree(&numEdges) );
-
-  if(*missing){
-    HXT_CHECK( hxtAlignedFree(&edgeKey) );
-    return HXT_STATUS_OK;
-  }
-
-  char* edgeFlag;
-  HXT_CHECK( hxtAlignedMalloc(&edgeFlag, 6*mesh->tetrahedra.num*sizeof(char)) );
-  memset(edgeFlag, 0, 6*mesh->tetrahedra.num*sizeof(char));
-
-  #pragma omp parallel for
-  for (uint64_t i=1; i<numEdgesTotal; i++) {
-    if(edgeKey[i-1].v[0]%2==0) {
-
-      // turn around the edge to set edgeFlag of all tetrahedra to 1...
-      uint64_t firstTet = edgeKey[i].v[1]/6;
-      uint64_t curTet = firstTet;
-      int edgeNumber = edgeKey[i].v[1]%6;
-      int in_facet = numberToFacetMin[edgeNumber];
-      int out_facet = numberToFacetMax[edgeNumber];
-
-      do
-      {
-      	edgeFlag[6*curTet + facetToNumber[in_facet][out_facet]] = 1;
-
-      	uint32_t newV = mesh->tetrahedra.node[4*curTet + in_facet];
-
-        // go into the neighbor through out_facet
-        uint64_t neigh = mesh->tetrahedra.neigh[4*curTet + out_facet];
-        curTet = neigh/4;
-        in_facet = neigh%4;
-        uint32_t* nodes = mesh->tetrahedra.node + 4*curTet;
-        for (out_facet=0; out_facet<3; out_facet++)
-          if(nodes[out_facet]==newV)
-            break;
-
-      } while (curTet!=firstTet);
-    }
-  }
-
   HXT_CHECK( hxtAlignedFree(&edgeKey) );
-
-  #pragma omp parallel for
-  for (uint64_t i=0; i<mesh->tetrahedra.num; i++) {
-    for (int j=0; j<6; j++) {
-      if(edgeFlag[6*i+j])
-        constrainEdge(mesh, i, numberToFacetMin[j], numberToFacetMax[j]);
-    }
-  }
-
-
-  HXT_CHECK( hxtAlignedFree(&edgeFlag) );
-  
-
   return HXT_STATUS_OK;
 }
 
 
-/********************************************************************************
- *  report the number of missing triangles or set flag for constrained triangle *
- ********************************************************************************/
-// every tetrahedra must have a neighbor HXT_NO_ADJACENT is NOT permitted !!!
-HXTStatus hxtConstrainTriangles(HXTMesh* mesh, uint64_t* missing) {
-  HXTGroup2 *triKey = NULL;
-  HXTGroup3* pairKey = NULL;
+/**************************************************************************************
+ *   Triangles --> Tetrahedra   MAPPING
+ *************************************************************************************/
+HXTStatus hxtGetTri2TetMap(HXTMesh* mesh, uint64_t* tri2TetMap, uint64_t* missing)
+{
+  HXT_ASSERT(tri2TetMap!=NULL);
 
-  if(mesh->triangles.num==0) {
-    *missing = 0;
+  if(mesh->triangles.num==0)
     return HXT_STATUS_OK;
-  }
-
-  uint64_t *numTriangles;
-  int maxThreads = omp_get_max_threads();
-  HXT_CHECK( hxtMalloc(&numTriangles, maxThreads*sizeof(uint64_t)) );
 
   const uint64_t n = mesh->vertices.num;
+  const int maxThreads = omp_get_max_threads();
+  HXTStatus status = HXT_STATUS_OK;
   uint64_t numTrianglesTotal;
 
-  HXTStatus status = HXT_STATUS_OK;
+
+  HXTGroup2 *triKey = NULL;
+  HXTGroup3* pairKey = NULL;
+  uint64_t *numTriangles;
+  HXT_CHECK( hxtMalloc(&numTriangles, maxThreads*sizeof(uint64_t)) );
+  
 
 #ifndef NDEBUG
   uint64_t nGhosts = 0;
@@ -424,7 +386,7 @@ HXTStatus hxtConstrainTriangles(HXTMesh* mesh, uint64_t* missing) {
       nGhosts++;
   }
 
-  if(n <= 2097152){
+  if(n <= 2642246){
     HXT_CHECK( hxtAlignedMalloc(&triKey, (2*mesh->tetrahedra.num-3*nGhosts/2+mesh->triangles.num)*sizeof(HXTGroup2)) );
   }
   else{
@@ -451,6 +413,7 @@ HXTStatus hxtConstrainTriangles(HXTMesh* mesh, uint64_t* missing) {
     }
 
     numTriangles[threadID] = localNum;
+
     #pragma omp barrier
     #pragma omp single
     {
@@ -462,33 +425,33 @@ HXTStatus hxtConstrainTriangles(HXTMesh* mesh, uint64_t* missing) {
         numTrianglesTotal = tsum;
       }
 
-#ifndef NDEBUG
+    #ifndef NDEBUG
       if(numTrianglesTotal!=2*mesh->tetrahedra.num-3*nGhosts/2+mesh->triangles.num){
         HXT_ERROR_MSG(HXT_STATUS_ERROR, "you should never go here... (%lu!=2*%lu+3*%lu/2",numTrianglesTotal-mesh->triangles.num,
                                                                                                 mesh->tetrahedra.num, nGhosts);
         exit(EXIT_FAILURE);
       }
-#else
-      if(n <= 2097152){
+    #else
+      if(n <= 2642246){
         status = hxtAlignedMalloc(&triKey, numTrianglesTotal*sizeof(HXTGroup2));
       }
       else{
         status = hxtAlignedMalloc(&pairKey, numTrianglesTotal*sizeof(HXTGroup3));
       }
-#endif
+    #endif
     }
 
     if(status==HXT_STATUS_OK) {
       // copy the triangles from mesh->triangles in the triKey struct array
-      if(n <= 2097152){
+      if(n <= 2642246){
         #pragma omp for
         for (uint64_t i=0; i<mesh->triangles.num; i++) {
           uint32_t v[3] = {mesh->triangles.node[3*i+0],
                            mesh->triangles.node[3*i+1],
                            mesh->triangles.node[3*i+2]};
           sort3ints(v);
-          triKey[i].v[1] = HXT_NO_ADJACENT; // this triangle does not come from any tetrahedra
-          triKey[i].v[0] = v[0]*(n-1)*n*2 + v[1]*n*2 + v[2]*2 + 0; // the lowest bit of triangles from mesh->triangles is unset
+          triKey[i].v[1] = 2*i;
+          triKey[i].v[0] = v[0]*(n-1)*n + v[1]*n + v[2];
         }
       }
       else{
@@ -498,15 +461,15 @@ HXTStatus hxtConstrainTriangles(HXTMesh* mesh, uint64_t* missing) {
                            mesh->triangles.node[3*i+1],
                            mesh->triangles.node[3*i+2]};
           sort3ints(v);
-          pairKey[i].v[2] = HXT_NO_ADJACENT; // this triangle does not come from any tetrahedra
+          pairKey[i].v[2] = 2*i;
           pairKey[i].v[1] = v[0]*(n-1) + v[1];
-          pairKey[i].v[0] = v[2]*2+0;
+          pairKey[i].v[0] = v[2];
         }
       }
 
       // add the triangle from the tetrahedral mesh to the triKey struct array
       localNum = numTriangles[threadID];
-      if(n <= 2097152){
+      if(n <= 2642246){
         #pragma omp for schedule(static)
         for (uint64_t i=0; i<mesh->tetrahedra.num; i++) {
           if(mesh->tetrahedra.node[4*i+3]!=HXT_GHOST_VERTEX){
@@ -518,9 +481,9 @@ HXTStatus hxtConstrainTriangles(HXTMesh* mesh, uint64_t* missing) {
                                  mesh->tetrahedra.node[4*i+((j+2)&3)],
                                  mesh->tetrahedra.node[4*i+((j+3)&3)]};
                 sort3ints(v);
-                triKey[localNum].v[1] = 4*i+j;
-                triKey[localNum].v[0] = v[0]*(n-1)*n*2 + v[1]*n*2 + v[2]*2 + 1; // max: 2(n-3)(n-1)n + 2(n-2)n + 2(n-1) + 1
-                                                                                //    = 2(n-2)(n-1)n-1
+                triKey[localNum].v[1] = 2*(4*i+j)+1;
+                triKey[localNum].v[0] = v[0]*(n-1)*n + v[1]*n + v[2]; // max: (n-3)(n-1)n + (n-2)n + (n-1)
+                                                                      //    = (n-2)(n-1)n - 1
                 localNum++;
               }
             }
@@ -539,9 +502,9 @@ HXTStatus hxtConstrainTriangles(HXTMesh* mesh, uint64_t* missing) {
                                  mesh->tetrahedra.node[4*i+((j+2)&3)],
                                  mesh->tetrahedra.node[4*i+((j+3)&3)]};
                 sort3ints(v);
-                pairKey[localNum].v[2] = 4*i+j;
+                pairKey[localNum].v[2] = 2*(4*i+j)+1;
                 pairKey[localNum].v[1] = v[0]*(n-1) + v[1]; // max: (n-3)(n-1) + (n-2) = (n-2)(n-1) - 1
-                pairKey[localNum].v[0] = v[2]*2+1;          // max: (n-1)2+1 = 2n-1
+                pairKey[localNum].v[0] = v[2];              // max: n-1
 
                 localNum++;
               }
@@ -554,92 +517,195 @@ HXTStatus hxtConstrainTriangles(HXTMesh* mesh, uint64_t* missing) {
 
   HXT_CHECK(status);
 
-  if(n <= 2097152){
+  if(n <= 2642246){
     // sort triKey
-    group2_sort_v0(triKey, numTrianglesTotal, 2*(n-2)*(n-1)*n-1);
+    HXT_CHECK( group2_sort_v0(triKey, numTrianglesTotal, (n-2)*(n-1)*n-1) );
   }
   else{
-    group3_sort_v0(pairKey, numTrianglesTotal, 2*n-1);
-    group3_sort_v1(pairKey, numTrianglesTotal, (n-2)*(n-1)-1);
+    HXT_CHECK( group3_sort_v0(pairKey, numTrianglesTotal, n-1) );
+    HXT_CHECK( group3_sort_v1(pairKey, numTrianglesTotal, (n-2)*(n-1)-1) );
   }
 
+
   #pragma omp parallel
   {
     const int threadID = omp_get_thread_num();
-    numTriangles[threadID] = 0;
+    uint64_t localNum = 0;
 
-    if(n <= 2097152){
+    if(n <= 2642246){
       #pragma omp for
       for (uint64_t i=0; i<numTrianglesTotal; i++) {
-        if(triKey[i].v[0]%2==0 && (i==numTrianglesTotal-1 || triKey[i].v[0]/2!=triKey[i+1].v[0]/2))
-            numTriangles[threadID]++; // the triangle is missing
+        if(triKey[i].v[1]%2==0) {
+          if(i!=numTrianglesTotal-1 && triKey[i].v[0]==triKey[i+1].v[0]) {
+          #ifndef NDEBUG
+            if(triKey[i+1].v[1]%2==0) {
+              HXT_ERROR_MSG(HXT_STATUS_ERROR, "Duplicated triangle in mesh->triangles (%lu & %lu)\n"
+                                             "\tThis case is not handled in Release mode, FIX IT !!",
+                                             triKey[i].v[1]/2, triKey[i+1].v[1]/2);
+              exit(EXIT_FAILURE);
+            }
+            else
+          #endif
+            {
+              tri2TetMap[triKey[i].v[1]/2] = triKey[i+1].v[1]/2;
+            }
+          }
+          else /* the triangle is missing */ {
+            localNum++;
+            tri2TetMap[triKey[i].v[1]/2] = HXT_NO_ADJACENT;
+          }
+        }
       }
     }
     else{
       #pragma omp for
       for (uint64_t i=0; i<numTrianglesTotal; i++) {
-        if(pairKey[i].v[0]%2==0 && (i==numTrianglesTotal-1 || pairKey[i].v[0]/2!=pairKey[i+1].v[0]/2 || pairKey[i].v[1]!=pairKey[i+1].v[1]))
-            numTriangles[threadID]++;
+        if(pairKey[i].v[2]%2==0) {
+          if(i!=numTrianglesTotal-1 && pairKey[i].v[0]==pairKey[i+1].v[0] && pairKey[i].v[1]==pairKey[i+1].v[1]) {
+          #ifndef NDEBUG
+            if(pairKey[i+1].v[2]%2==0) {
+              HXT_ERROR_MSG(HXT_STATUS_ERROR, "Duplicated triangle in mesh->triangles (%lu & %lu)\n"
+                                             "\tThis case is not handled in Release mode, FIX IT !!",
+                                             pairKey[i].v[2]/2, pairKey[i+1].v[2]/2);
+              exit(EXIT_FAILURE);
+            }
+            else
+          #endif
+            {
+              tri2TetMap[pairKey[i].v[2]/2] = pairKey[i+1].v[2]/2;
+            }
+          }
+          else /* the triangle is missing */ {
+            localNum++;
+            tri2TetMap[pairKey[i].v[2]/2] = HXT_NO_ADJACENT;
+          }
+        }
       }
     }
 
+    numTriangles[threadID] = localNum;
+
     #pragma omp barrier
     #pragma omp single
     {
       int nthreads = omp_get_num_threads();
       *missing = 0;
       for (int i=0; i<nthreads; i++) {
-        *missing+=numTriangles[i];
+        *missing += numTriangles[i];
       }
     }
   }
 
   HXT_CHECK( hxtFree(&numTriangles) );
+  HXT_CHECK( hxtAlignedFree(&triKey) );
+  HXT_CHECK( hxtAlignedFree(&pairKey) );
 
-  if(*missing){
-    HXT_CHECK( hxtAlignedFree(&triKey) );
-    HXT_CHECK( hxtAlignedFree(&pairKey) );
-    return HXT_STATUS_OK;
+  return HXT_STATUS_OK;
+}
+
+
+/**************************************************************************************
+ *   Constrain facets of tetrahedron if it is in tri2TetMap
+ *************************************************************************************/
+HXTStatus hxtConstrainTriangles(HXTMesh* mesh, uint64_t* tri2TetMap)
+{
+  HXT_ASSERT(tri2TetMap!=NULL);
+  HXT_ASSERT(mesh!=NULL);
+#ifdef DEBUG
+  for (uint64_t i=0; i<mesh->triangles.num; i++) {
+    if(tri2TetMap[i]==HXT_NO_ADJACENT)
+      return HXT_ERROR_MSG(HXT_STATUS_ERROR, "There are missing mappings in tri2TetMap");
   }
+#endif
 
   char* faceFlag;
   HXT_CHECK( hxtAlignedMalloc(&faceFlag, 4*mesh->tetrahedra.num*sizeof(char)) );
   memset(faceFlag, 0, 4*mesh->tetrahedra.num*sizeof(char));
 
-  if(n <= 2097152){
-    #pragma omp parallel for
-    for (uint64_t i=1; i<numTrianglesTotal; i++) {
-      if(triKey[i-1].v[0]%2==0 ) {
-        faceFlag[triKey[i].v[1]] = 1;
-        if(mesh->tetrahedra.neigh[triKey[i].v[1]]!=HXT_NO_ADJACENT)
-          faceFlag[mesh->tetrahedra.neigh[triKey[i].v[1]]] = 1;
-      }
-    }
+  // fill faceFlag
+  #pragma omp parallel for
+  for (uint64_t i=0; i<mesh->triangles.num; i++) {
+    faceFlag[tri2TetMap[i]] = 1;
+    faceFlag[mesh->tetrahedra.neigh[tri2TetMap[i]]] = 1;
   }
-  else{
-    #pragma omp parallel for
-    for (uint64_t i=1; i<numTrianglesTotal; i++) {     
-      if(pairKey[i-1].v[0]%2==0) {
-        faceFlag[pairKey[i].v[2]] = 1;
-        if(mesh->tetrahedra.neigh[pairKey[i].v[2]]!=HXT_NO_ADJACENT)
-          faceFlag[mesh->tetrahedra.neigh[pairKey[i].v[2]]] = 1;
+
+  // constrain corresponding flag, teetrahedron by tetrahedron to avoid race conditions
+  #pragma omp parallel for
+  for (uint64_t i=0; i<mesh->tetrahedra.num; i++) {
+    for (uint64_t j=0; j<4; j++) {
+      if(faceFlag[4*i+j]) {
+        setFacetConstraint(mesh, i, j);
       }
     }
   }
 
-  HXT_CHECK( hxtAlignedFree(&triKey) );
-  HXT_CHECK( hxtAlignedFree(&pairKey) );
+  HXT_CHECK( hxtAlignedFree(&faceFlag) );
+
+  return HXT_STATUS_OK;
+}
+
+
+
+/**************************************************************************************
+ *   Constrain edge of tetrahedron if it is in lines2TetMap but not in lines2TriMap
+ *************************************************************************************/
+HXTStatus hxtConstrainLinesNotInTriangles(HXTMesh* mesh, uint64_t* lines2TetMap, uint64_t* lines2TriMap)
+{
+  HXT_ASSERT(lines2TetMap!=NULL);
+  HXT_ASSERT(lines2TriMap!=NULL);
+  HXT_ASSERT(mesh!=NULL);
+
+#ifdef DEBUG
+  for (uint64_t i=0; i<mesh->lines.num; i++) {
+    if(lines2TetMap[i]==HXT_NO_ADJACENT && mesh->lines.node[2*i]!=mesh->lines.node[2*i+1])
+      return HXT_ERROR_MSG(HXT_STATUS_ERROR, "There are missing mappings in lines2TetMap");
+  }
+#endif
+
+  char* edgeFlag;
+  HXT_CHECK( hxtAlignedMalloc(&edgeFlag, 6*mesh->tetrahedra.num*sizeof(char)) );
+  memset(edgeFlag, 0, 6*mesh->tetrahedra.num*sizeof(char));
+
+  #pragma omp parallel for
+  for (uint64_t i=0; i<mesh->lines.num; i++) {
+    if(lines2TriMap[i]==HXT_NO_ADJACENT && lines2TetMap[i]!=HXT_NO_ADJACENT) {
+      // turn around the edge to set edgeFlag of all tetrahedra to 1...
+      uint64_t firstTet = lines2TetMap[i]/6;
+      uint64_t curTet = firstTet;
+      int edge = lines2TetMap[i]%6;
+
+      unsigned in_facet, out_facet;
+      getFacetsFromEdge(edge, &in_facet, &out_facet);
+
+      do
+      {
+        edgeFlag[6*curTet + getEdgeFromFacets(in_facet, out_facet)] = 1;
 
+        uint32_t newV = mesh->tetrahedra.node[4*curTet + in_facet];
+
+        // go into the neighbor through out_facet
+        uint64_t neigh = mesh->tetrahedra.neigh[4*curTet + out_facet];
+        curTet = neigh/4;
+        in_facet = neigh%4;
+        uint32_t* nodes = mesh->tetrahedra.node + 4*curTet;
+        for (out_facet=0; out_facet<3; out_facet++)
+          if(nodes[out_facet]==newV)
+            break;
+
+      } while (curTet!=firstTet);
+    }
+  }
 
   #pragma omp parallel for
   for (uint64_t i=0; i<mesh->tetrahedra.num; i++) {
-    for (int j=0; j<4; j++) {
-      if(faceFlag[4*i+j])
-        constrainFacet(mesh, 4*i+j);
+    for (int j=0; j<6; j++) {
+      if(edgeFlag[6*i+j])
+        setEdgeConstraint(mesh, i, j);
     }
   }
 
-  HXT_CHECK( hxtAlignedFree(&faceFlag) );
+
+  HXT_CHECK( hxtAlignedFree(&edgeFlag) );
 
   return HXT_STATUS_OK;
 }
\ No newline at end of file
diff --git a/contrib/hxt/hxt_tetFlag.h b/contrib/hxt/hxt_tetFlag.h
index 829df847ae5f629e9c6a531311fcc377f12b4fbe..a8aaa7ba823f53dfc177d59d7fdcc8dd9a66d40c 100644
--- a/contrib/hxt/hxt_tetFlag.h
+++ b/contrib/hxt/hxt_tetFlag.h
@@ -7,98 +7,217 @@ extern "C" {
 
 #include "hxt_mesh.h"
 
-// Verify if all facets are present in a tetrahedrization, missing is the number of missing facets
-// it then set tetrahedra.flag to account for the contrains on the facets
-HXTStatus hxtConstrainTriangles(HXTMesh* mesh, uint64_t* missing);
-HXTStatus hxtConstrainEdgesNotInTriangles(HXTMesh* mesh, uint64_t* missing);
-
-
-/* flag is a 16-bit number
+/* Get a mapping between triangles and tetrahedra
+ * tri2TetMap must be an array of size: `mesh->triangles.num`
+ *
+ * if *nbMissing = 0 at the end of function:
+ *    `tri2TetMap[i] = 4*tet+face | HXT_NO_ADJACENT` , where `4*tet+face`
+ *    is a facet of tetrahedron `tet` that correspond
+ *    to the triangle `i` in mesh->triangles.
+ *    If there is no such tetrahedron (happens only if nbMissing!=0),
+ *    it is set to HXT_NO_ADJACENT.
+ *   
+ *    The tetrahedron on the other side is easily obtainable by doing
+ *    `mesh->tetrahedra.neigh[tri2TetMap[i]]`
+ */
+HXTStatus hxtGetTri2TetMap(HXTMesh* mesh, uint64_t* tri2TetMap, uint64_t* nbMissing);
+
+/* Same as above with lines and triangles: `lines2TriMap = 3*tri+edge | HXT_NO_ADJACENT`
+  (edge is 0,1 or 2, and it correspond to the nodes 0-1, 1-2 and 2-0 of the triangles)
+  Lines that begin and ends at the same point are not reported in nbMissing, but the mapping is
+  set to HXT_NO_ADJACENT nevertheless */
+HXTStatus hxtGetLines2TriMap(HXTMesh* mesh, uint64_t* lines2TriMap, uint64_t* nbMissing);
+
+/* Same as above with lines and tets: `lines2TetMap = 6*tet+edge | HXT_NO_ADJACENT`
+ (edge is a number between 0 & 5, explained below in "ANATOMY OF A TET.")
+   Lines that begin and ends at the same point are not reported in nbMissing, but the mapping is
+  set to HXT_NO_ADJACENT nevertheless */
+HXTStatus hxtGetLines2TetMap(HXTMesh* mesh, uint64_t* lines2TetMap, uint64_t* nbMissing);
+
+
+/* Constrain facets of tetrahedra (set the right tetrahedra.flag)
+ * that corresponding to a triangle.
+ * tri2TetMap is the array that comes from hxtGetTri2TetMap() */
+HXTStatus hxtConstrainTriangles(HXTMesh* mesh, uint64_t* tri2TetMap);
+
+/* Constrain edges of tetrahedra (set the right tetrahedra.flag)
+ * that corresponding to a line that isn't in any triangle.
+ * lines2TetMap is the array that comes from hxtGetLines2TetMap()
+ * lines2TriMap is the array that comes from hxtGetLines2TriMap() */
+HXTStatus hxtConstrainLinesNotInTriangles(HXTMesh* mesh, uint64_t* lines2TetMap, uint64_t* lines2TriMap);
+
+
+/*****************************
+ *  ANATOMY OF A TERAHEDRON  *
+ *****************************
+
+      node
+       0
+       |\`-_
+       | \  `-_                   facet
+       |  \    `-_                 1
+       |   \      `2_              |
+       |    0        `-_          /
+       |     \   facet  `-_   <--'
+       |      \    2       `-_
+       1       \              `-_
+       | facet  node_____4_______=> node
+       |   3    /1              _-'  3
+       |       /             _-'
+       |      /  facet    _-'
+       |     /    0    _-'
+       |    3       _5'
+       |   /     _-'
+       |  /   _-'
+       | / _-'
+       |/-'
+      node
+       2
+
+  facets `i` is the facet that does not contain node `i`
+  edge 0 contain node 0 & 1
+  edge 1 contain node 0 & 2
+  edge 2 contain node 0 & 3
+  edge 3 contain node 1 & 2
+  edge 4 contain node 1 & 3
+  edge 5 contain node 2 & 3
+
+  - - - - - - - - - - - - - - - - - - - - - - - - - - - 
+
+ ********************************
+ *  mesh->tetrahedra.flag[tet]  *
+ ********************************
+ *  flag is a 16-bit number
  *
- *  0  facet 0 is contrained
- *  1  edge between facet 0 and facet 1 is contrained
- *  2  edge betwwen facet 0 and facet 2 is contrained
- *  3  edge betwwen facet 0 and facet 3 is contrained
- *  4  facet 1 is contrained
- *  5  
- *  6  edge betwwen facet 1 and facet 2 is contrained
- *  7  edge betwwen facet 1 and facet 3 is contrained
- *  8  facet 2 is contrained
- *  9 
- *  10 
- *  11 edge between facet 2 and facet 3 is contrained
- *  12 facet 3 is contrained
- *  13
- *  14 the tetrahedron has already been processed (a vertex was already inserted inside it and if failed)
- *  15 the tetrahedron is deleted
+ *  0  edge between facet 0 and facet 1 is constrained
+ *  1  edge betwwen facet 0 and facet 2 is constrained
+ *  2  edge betwwen facet 0 and facet 3 is constrained
+ *  3  edge betwwen facet 1 and facet 2 is constrained
+ *  4  edge betwwen facet 1 and facet 3 is constrained
+ *  5  edge between facet 2 and facet 3 is constrained
+ *  6  the tetrahedron is deleted
+ *  7  the tetrahedron has already been processed (a vertex was already inserted inside it and it failed)
+ *  8  facet 0 is constrained
+ *  9  facet 1 is constrained
+ *  10 facet 2 is constrained
+ *  11 facet 3 is constrained
+ *  12 -unused-
+ *  13 -unused-
+ *  14 -unused-
+ *  15 -unused-
  */
 
 
+/***************************
+ * combined operations
+ ***************************/
+
 static inline uint16_t isAnyEdgeConstrained(HXTMesh* mesh, uint64_t tet) {
-  return mesh->tetrahedra.flag[tet] & UINT16_C(0x8CE);
+  return mesh->tetrahedra.flag[tet] & UINT16_C(0x3F);
 }
 
-static inline uint16_t isAnyEdgeOfFacetConstrained(HXTMesh* mesh, uint64_t facet) {
-  static uint16_t mask[4] = {0xE, 0xC2, 0x844, 0x888};
-  return mesh->tetrahedra.flag[facet/4] & mask[facet%4];
+static inline uint16_t isAnyFacetConstrained(HXTMesh* mesh, uint64_t tet) {
+  return mesh->tetrahedra.flag[tet] & UINT16_C(0xF00);
 }
 
-// static inline uint16_t isAnyThingConstrained(HXTMesh* mesh, uint64_t tet) {
-//   return mesh->tetrahedra.flag[tet] & 0x19DF;
-// }
+static inline uint16_t isAnyThingConstrained(HXTMesh* mesh, uint64_t tet) {
+  return mesh->tetrahedra.flag[tet] & UINT16_C(0xF3F);
+}
 
-static inline uint16_t isAnyFacetConstrained(HXTMesh* mesh, uint64_t tet) {
-  return mesh->tetrahedra.flag[tet] & UINT16_C(0x1111);
+
+/***************************
+ * edges operations
+ ***************************/
+static inline int getEdgeFromFacets(unsigned facet1, unsigned facet2) {
+  static const int facets2EdgeNum[4][4] = {{-1, 0, 1, 2},
+                                           { 0,-1, 3, 4},
+                                           { 1, 3,-1, 5},
+                                           { 2, 4, 5,-1}};
+  return facets2EdgeNum[facet1][facet2];
+}
+
+static inline void getFacetsFromEdge(int edgeNum, unsigned* facetMin, unsigned* facetMax) {
+  static const unsigned edgeNum2FacetMin[6] = { 0, 0, 0, 1, 1, 2};
+  static const unsigned edgeNum2FacetMax[6] = { 1, 2, 3, 2, 3, 3};
+  *facetMin = edgeNum2FacetMin[edgeNum];
+  *facetMax = edgeNum2FacetMax[edgeNum];
 }
 
-static inline uint16_t isEdgeConstrained(HXTMesh* mesh, uint64_t tet, unsigned facetmin, unsigned facetmax) {
-  return mesh->tetrahedra.flag[tet] & (1U<<(facetmin*4+facetmax));
+static inline void getNodesFromEdge(int edgeNum, unsigned* nodeMin, unsigned* nodeMax) {
+  getFacetsFromEdge(5-edgeNum, nodeMin, nodeMax);
 }
 
-static inline uint16_t isEdgeConstrainedSafe(HXTMesh* mesh, uint64_t tet, unsigned facet1, unsigned facet2) {
-  return mesh->tetrahedra.flag[tet] & (1U<<(facet1<facet2?facet1*4+facet2:facet2*4+facet1));
+static inline uint16_t getEdgeConstraint(HXTMesh* mesh, uint64_t tet, int edgeNum) {
+  return mesh->tetrahedra.flag[tet] & (1U<<edgeNum);
 }
 
-static inline void constrainEdge(HXTMesh* mesh, uint64_t tet, unsigned facetmin, unsigned facetmax) {
-  mesh->tetrahedra.flag[tet] |= (1U<<(facetmin*4+facetmax));
+static inline void setEdgeConstraint(HXTMesh* mesh, uint64_t tet, int edgeNum) {
+  mesh->tetrahedra.flag[tet] |= (1U<<edgeNum);
 }
 
-static inline void unconstrainEdge(HXTMesh* mesh, uint64_t tet, unsigned facetmin, unsigned facetmax) {
-  mesh->tetrahedra.flag[tet] &= ~(1U<<(facetmin*4+facetmax));
+static inline void unsetEdgeConstraint(HXTMesh* mesh, uint64_t tet, int edgeNum) {
+  mesh->tetrahedra.flag[tet] &= ~(1U<<edgeNum);
 }
 
-// here facet = 4*tet + facett
-static inline uint16_t isFacetConstrained(HXTMesh* mesh, uint64_t facet) {
-  return mesh->tetrahedra.flag[facet/4] & (1U<<(facet%4*4));
+
+/***************************
+ * facets operations
+ ***************************/
+static inline unsigned getNode0FromFacet(unsigned facet) {
+  return (facet+1)&3;
+}
+
+static inline unsigned getNode1FromFacet(unsigned facet) {
+  return (facet+3)&2;
 }
 
-static inline void constrainFacet(HXTMesh* mesh, uint64_t facet) {
-  mesh->tetrahedra.flag[facet/4] |= (1U<<(facet%4*4));
+static inline unsigned getNode2FromFacet(unsigned facet) {
+  return (facet&2)^3;
 }
 
-static inline uint16_t isTetDeleted(HXTMesh* mesh, uint64_t tet) {
-  return mesh->tetrahedra.flag[tet] & (1U<<15);
+static inline uint16_t getFacetConstraint(HXTMesh* mesh, uint64_t tet, unsigned facet) {
+  return mesh->tetrahedra.flag[tet] & (1U<<(facet+8));
 }
 
-static inline void markTetAsDeleted(HXTMesh* mesh, uint64_t tet) {
-  mesh->tetrahedra.flag[tet] |= (1U<<15);
+static inline void setFacetConstraint(HXTMesh* mesh, uint64_t tet, unsigned facet) {
+  mesh->tetrahedra.flag[tet] |= (1U<<(facet+8));
 }
 
-static inline void unmarkTetAsDeleted(HXTMesh* mesh, uint64_t tet) {
-  mesh->tetrahedra.flag[tet] &= ~(1U<<15);
+static inline void unsetFacetConstraint(HXTMesh* mesh, uint64_t tet, unsigned facet) {
+  mesh->tetrahedra.flag[tet] &= ~(1U<<(facet+8));
+}
+
+
+/***************************
+ * deleted flag operations
+ ***************************/
+static inline uint16_t getDeletedFlag(HXTMesh* mesh, uint64_t tet) {
+  return mesh->tetrahedra.flag[tet] & UINT16_C(0x40);
 }
 
-static inline uint16_t isTetProcessed(HXTMesh* mesh, uint64_t tet) {
-  return mesh->tetrahedra.flag[tet] & (1U<<14);
+static inline void setDeletedFlag(HXTMesh* mesh, uint64_t tet) {
+  mesh->tetrahedra.flag[tet] |= UINT16_C(0x40);
+}
+
+static inline void unsetDeletedFlag(HXTMesh* mesh, uint64_t tet) {
+  mesh->tetrahedra.flag[tet] &= ~UINT16_C(0x40);
+}
+
+
+/***************************
+ * processed flag operations
+ ***************************/
+static inline uint16_t getProcessedFlag(HXTMesh* mesh, uint64_t tet) {
+  return mesh->tetrahedra.flag[tet] & UINT16_C(0x80);
 }
 
-static inline void markTetAsProcessed(HXTMesh* mesh, uint64_t tet) {
-  mesh->tetrahedra.flag[tet] |= (1U<<14);
+static inline void setProcessedFlag(HXTMesh* mesh, uint64_t tet) {
+  mesh->tetrahedra.flag[tet] |= UINT16_C(0x80);
 }
 
 
-static inline void unmarkTetAsProcessed(HXTMesh* mesh, uint64_t tet) {
-  mesh->tetrahedra.flag[tet] &= ~(1U<<14);
+static inline void unsetProcessedFlag(HXTMesh* mesh, uint64_t tet) {
+  mesh->tetrahedra.flag[tet] &= ~UINT16_C(0x80);
 }
 
 
diff --git a/contrib/hxt/hxt_tetOpti.c b/contrib/hxt/hxt_tetOpti.c
index d5714451ead9d40c3536137152f3920538a761c2..9a914d232fdc268ba28e6cb86b88eb520ee2a748 100644
--- a/contrib/hxt/hxt_tetOpti.c
+++ b/contrib/hxt/hxt_tetOpti.c
@@ -25,13 +25,13 @@ An oriented edge {up,down} is described by its 'in' and 'out' facets:
        v_up
        |\`-_
        | \  `-_ 
-       |  \    `-_                out_facet
+       |  \    `-_                in_facet
        |   \      `-_              |
        |    \   up   `-_          /
        |     \  facet   `-_   <--'
 our    |      \            `-_
-edge   | in    \              `-_
------> | facet  \v_in___________`>v_out
+edge   | out   \              `-_
+-----> | facet  \v_out__________`>v_in
        |        /               _-'
        |       /             _-'
        |      /  down     _-'
@@ -43,10 +43,10 @@ edge   | in    \              `-_
        |/-'
       v_down
         
-    _-'
+    _->
    /
    \    we scan tetrahedra in a counterclockwise order when viewed from up
-    `-__->
+    `-__-'
 
 
  We keep the numbering of facets and the orientation of tetrahedra as before so:
@@ -67,7 +67,7 @@ in_f=0 out_f=1 up_f=2 down_f=3    in_f=0 out_f=2 up_f=3 down_f=1     in_f=0 out_
        |  \    `-_                      |  \    `-_                      |  \    `-_           
        |   \      `-_                   |   \      `-_                   |   \      `-_        
        |    \        `-_                |    \        `-_                |    \        `-_     
-       |     \v_1_______`>v_0           |     \v_2_______`>v_0           |     \v_3_______`>v_0
+       |     \v_0_______`>v_1           |     \v_0_______`>v_2           |     \v_0_______`>v_3
        |     /         _-'              |     /         _-'              |     /         _-'   
        |    /       _-'                 |    /       _-'                 |    /       _-'      
        |   /     _-'                    |   /     _-'                    |   /     _-'         
@@ -84,7 +84,7 @@ in_f=1 out_f=2 up_f=0 down_f=3    in_f=1 out_f=0 up_f=3 down_f=2     in_f=1 out_
        |  \    `-_                      |  \    `-_                      |  \    `-_           
        |   \      `-_                   |   \      `-_                   |   \      `-_        
        |    \        `-_                |    \        `-_                |    \        `-_     
-       |     \v_2_______`>v_1           |     \v_0_______`>v_1           |     \v_3_______`>v_1
+       |     \v_1_______`>v_2           |     \v_1_______`>v_0           |     \v_1_______`>v_3
        |     /         _-'              |     /         _-'              |     /         _-'   
        |    /       _-'                 |    /       _-'                 |    /       _-'      
        |   /     _-'                    |   /     _-'                    |   /     _-'         
@@ -101,7 +101,7 @@ in_f=2 out_f=3 up_f=0 down_f=1    in_f=2 out_f=0 up_f=1 down_f=3     in_f=2 out_
        |  \    `-_                      |  \    `-_                      |  \    `-_           
        |   \      `-_                   |   \      `-_                   |   \      `-_        
        |    \        `-_                |    \        `-_                |    \        `-_     
-       |     \v_3_______`>v_2           |     \v_0_______`>v_2           |     \v_1_______`>v_2
+       |     \v_3_______`>v_2           |     \v_2_______`>v_0           |     \v_2_______`>v_1
        |     /         _-'              |     /         _-'              |     /         _-'   
        |    /       _-'                 |    /       _-'                 |    /       _-'      
        |   /     _-'                    |   /     _-'                    |   /     _-'         
@@ -118,7 +118,7 @@ in_f=3 out_f=1 up_f=0 down_f=2    in_f=3 out_f=0 up_f=2 down_f=1     in_f=3 out_
        |  \    `-_                      |  \    `-_                      |  \    `-_           
        |   \      `-_                   |   \      `-_                   |   \      `-_        
        |    \        `-_                |    \        `-_                |    \        `-_     
-       |     \v_1_______`>v_3           |     \v_0_______`>v_3           |     \v_2_______`>v_3
+       |     \v_2_______`>v_1           |     \v_3_______`>v_0           |     \v_3_______`>v_2
        |     /         _-'              |     /         _-'              |     /         _-'   
        |    /       _-'                 |    /       _-'                 |    /       _-'      
        |   /     _-'                    |   /     _-'                    |   /     _-'         
@@ -147,12 +147,12 @@ const int _UP_FACET[4][4] = {{NV, 2, 3, 1},
  */
 
 typedef struct {
-  const unsigned char (*triangles)[3];              /* triangles array                                     */
-  const uint64_t *triangle_in_triangul;   /* in which triangulation is the triangles (bit array) */
-  const unsigned char (*triangulations)[5];         /* triangulation array                                 */
-  const signed char (*triangul_neigh)[20];        /* array to find adjacencies back                      */
-  const int num_triangles;                /* number of different triangles                       */
-  const int num_triangulations ;          /* number of different triangulations                  */
+  const unsigned char (*triangles)[3];      /* triangles array                                     */
+  const uint64_t *triangle_in_triangul;     /* in which triangulation is the triangles (bit array) */
+  const unsigned char (*triangulations)[5]; /* triangulation array                                 */
+  const signed char (*triangul_neigh)[20];  /* array to find adjacencies back                      */
+  const unsigned num_triangles;             /* number of different triangles                       */
+  const unsigned num_triangulations ;       /* number of different triangulations                  */
   // const int num_triangles_per_triangulation; /* simply the number of nodes +2...              */
 } SwapPattern ;
 
@@ -297,7 +297,7 @@ const signed char triangul_neigh7[][20] = {
 
 
 SwapPattern patterns[8] = {
-  {},{},{},
+  {0},{0},{0},
   {
     // pattern with 3 points around edge  | 3 tetra in, 2 tetra out
     .triangles = triangles3,
@@ -459,7 +459,7 @@ static inline HXTStatus createNewDeleted(HXTMesh* mesh, ThreadShared* shared, Th
   for (uint64_t i=0; i<needed; i++){
     local->deleted.tetID[local->deleted.num+i] = ntet+i;
     shared->quality2.values[ntet+i] = DBL_MAX;
-    // markTetAsDeleted(mesh, ntet+i);
+    // setDeletedFlag(mesh, ntet+i);
     // printf("adding tet %lu to deleted[%lu]\n", ntet+i, local->deleted.num+i);
   }
 
@@ -481,7 +481,7 @@ static HXTStatus threadShared_update(HXTMesh* mesh, ThreadShared* shared) {
     badTetsCount[threadID] = 0;
 
     #pragma omp for schedule(static)
-    for (int i=0; i<mesh->tetrahedra.num; i++) {
+    for (uint64_t i=0; i<mesh->tetrahedra.num; i++) {
       if(mesh->tetrahedra.colors[i]!=UINT16_MAX && shared->quality2.values[i]<shared->quality2.threshold)
         badTetsCount[threadID]++;
     }
@@ -685,152 +685,152 @@ static HXTStatus threadLocals_destroy(ThreadLocal** local, int nthreads) {
 }
 
 
-static HXTStatus flip2_3(HXTMesh* mesh, ThreadShared* shared, ThreadLocal* local,
-                         const uint64_t tet_0, uint16_t color, unsigned out_facet_0)
-{
-  if(isFacetConstrained(mesh, 4*tet_0 + out_facet_0) || mesh->tetrahedra.neigh[4*tet_0 + out_facet_0]==HXT_NO_ADJACENT)
-    return HXT_STATUS_INTERNAL;
+// static HXTStatus flip2_3(HXTMesh* mesh, ThreadShared* shared, ThreadLocal* local,
+//                          const uint64_t tet_0, uint16_t color, unsigned out_facet_0)
+// {
+//   if(getFacetConstraint(mesh, tet_0, out_facet_0) || mesh->tetrahedra.neigh[4*tet_0 + out_facet_0]==HXT_NO_ADJACENT)
+//     return HXT_STATUS_INTERNAL;
 
-  uint64_t neigh = mesh->tetrahedra.neigh[4*tet_0 + out_facet_0];
+//   uint64_t neigh = mesh->tetrahedra.neigh[4*tet_0 + out_facet_0];
   
 
-  uint64_t tet_1 = neigh/4;
-
-  HXT_ASSERT(tet_1<mesh->tetrahedra.num);
-  HXT_ASSERT(tet_1!=tet_0);
-  HXT_ASSERT(mesh->tetrahedra.neigh[neigh]==4*tet_0 + out_facet_0);
-  unsigned in_facet_1 = neigh%4;
-
-  { // check for conflict with other threads'partition
-    const uint64_t startDist = local->partition.startDist;
-    const uint64_t rel = local->partition.endDist - startDist;
-    HXTVertex* vertices = (HXTVertex*) mesh->vertices.coord;
-    if(vertexOutOfPartition(vertices, mesh->tetrahedra.node[4*tet_1 + 0], rel, startDist) +
-       vertexOutOfPartition(vertices, mesh->tetrahedra.node[4*tet_1 + 1], rel, startDist) +
-       vertexOutOfPartition(vertices, mesh->tetrahedra.node[4*tet_1 + 2], rel, startDist) +
-       vertexOutOfPartition(vertices, mesh->tetrahedra.node[4*tet_1 + 3], rel, startDist) > 1)
-       return HXT_STATUS_CONFLICT;
-   }
-
-  double worst_qual = shared->quality2.values[tet_0];
-
-  if(shared->quality2.values[tet_1]<worst_qual)
-    worst_qual = shared->quality2.values[tet_1];
-
-  local->cavity.v_up = mesh->tetrahedra.node[4*tet_0 + out_facet_0];
-  local->cavity.v_down = mesh->tetrahedra.node[4*tet_1 + in_facet_1];
-
-  // choose a reference facet in the tet_0
-  unsigned in_facet_0 = (out_facet_0+1)%4;
-
-  // adding vertices of the annulus:
-  local->cavity.annulus[0] = mesh->tetrahedra.node[4*tet_0 + UP_VERTEX(in_facet_0, out_facet_0)];
-  local->cavity.annulus[1] = mesh->tetrahedra.node[4*tet_0 + DOWN_VERTEX(in_facet_0, out_facet_0)];
-  local->cavity.annulus[2] = mesh->tetrahedra.node[4*tet_0 + OUT_VERTEX(in_facet_0, out_facet_0)];
-
-  local->cavity.neigh_up[0] = mesh->tetrahedra.neigh[4*tet_0 + in_facet_0];
-  local->cavity.neigh_up[1] = mesh->tetrahedra.neigh[4*tet_0 + DOWN_FACET(in_facet_0, out_facet_0)];
-  local->cavity.neigh_up[2] = mesh->tetrahedra.neigh[4*tet_0 + UP_FACET(in_facet_0, out_facet_0)];
-
-  uint32_t v = local->cavity.annulus[2];
-
-  // find one of the vertex in the tet_1
-  uint32_t* nodes = mesh->tetrahedra.node + 4*tet_1;
-
-  unsigned out_facet_1;
-  for (out_facet_1=0; out_facet_1<4; out_facet_1++)
-      if(nodes[out_facet_1]==v)
-        break;
-
-  // HXT_ASSERT(out_facet_1!=4);
-  // HXT_ASSERT(out_facet_1!=in_facet_1);
-  // HXT_ASSERT((isEdgeConstrainedSafe(mesh, tet_0, in_facet_0, out_facet_0)!=0)==(isEdgeConstrainedSafe(mesh, tet_1, in_facet_1, out_facet_1)!=0));
-  // HXT_ASSERT((isEdgeConstrainedSafe(mesh, tet_0, in_facet_0, out_facet_0)!=0)==(isEdgeConstrainedSafe(mesh, tet_1, in_facet_1, out_facet_1)!=0));
-  // HXT_ASSERT((isEdgeConstrainedSafe(mesh, tet_0, DOWN_FACET(in_facet_0, out_facet_0), out_facet_0)!=0)
-  //          ==(isEdgeConstrainedSafe(mesh, tet_1, DOWN_FACET(in_facet_1, out_facet_1), in_facet_1)!=0));
-  // HXT_ASSERT((isEdgeConstrainedSafe(mesh, tet_0, UP_FACET(in_facet_0, out_facet_0), out_facet_0)!=0)
-  //          ==(isEdgeConstrainedSafe(mesh, tet_1, UP_FACET(in_facet_1, out_facet_1), in_facet_1)!=0));
-  // HXT_ASSERT((isFacetConstrained(mesh, 4*tet_0 + out_facet_0)!=0)==(isFacetConstrained(mesh, 4*tet_1 + in_facet_1)!=0));
-  // if(mesh->tetrahedra.neigh[neigh]!=4*tet_0+out_facet_0)
-  //   return HXT_ERROR_MSG(HXT_STATUS_ERROR, "mesh->tetrahedra.neigh[%lu]==%lu instead of %lu",neigh,mesh->tetrahedra.neigh[neigh],4*tet_0+out_facet_0);
-
-  local->cavity.neigh_down[0] = mesh->tetrahedra.neigh[4*tet_1 + out_facet_1];
-  local->cavity.neigh_down[1] = mesh->tetrahedra.neigh[4*tet_1 + DOWN_FACET(in_facet_1, out_facet_1)];
-  local->cavity.neigh_down[2] = mesh->tetrahedra.neigh[4*tet_1 + UP_FACET(in_facet_1, out_facet_1)];
-
-  local->cavity.flag[0] = ((isFacetConstrained(mesh, 4*tet_0 + in_facet_0)!=0)<<12) +
-                          ((isFacetConstrained(mesh, 4*tet_1 + out_facet_1)!=0)<<8) +
-                          ((isEdgeConstrainedSafe(mesh, tet_1, out_facet_1, DOWN_FACET(in_facet_1, out_facet_1))!=0)<<2) +
-                          ((isEdgeConstrainedSafe(mesh, tet_0, in_facet_0, DOWN_FACET(in_facet_0, out_facet_0))!=0)<<3) +
-                          ((isEdgeConstrainedSafe(mesh, tet_1, out_facet_1, UP_FACET(in_facet_1, out_facet_1))!=0)<<6) +
-                          ((isEdgeConstrainedSafe(mesh, tet_0, in_facet_0, UP_FACET(in_facet_0, out_facet_0))!=0)<<7) +
-                          ((isEdgeConstrainedSafe(mesh, tet_0, in_facet_0, out_facet_0)!=0)<<11);
-
-  local->cavity.flag[1] = ((isFacetConstrained(mesh, 4*tet_0 + DOWN_FACET(in_facet_0, out_facet_0))!=0)<<12) +
-                          ((isFacetConstrained(mesh, 4*tet_1 + DOWN_FACET(in_facet_1, out_facet_1))!=0)<<8) +
-                          ((isEdgeConstrainedSafe(mesh, tet_1, DOWN_FACET(in_facet_1, out_facet_1), UP_FACET(in_facet_1, out_facet_1))!=0)<<2) +
-                          ((isEdgeConstrainedSafe(mesh, tet_0, DOWN_FACET(in_facet_0, out_facet_0), UP_FACET(in_facet_0, out_facet_0))!=0)<<3) +
-                          ((isEdgeConstrainedSafe(mesh, tet_1, DOWN_FACET(in_facet_1, out_facet_1), out_facet_1)!=0)<<6) +
-                          ((isEdgeConstrainedSafe(mesh, tet_0, DOWN_FACET(in_facet_0, out_facet_0), in_facet_0)!=0)<<7) +
-                          ((isEdgeConstrainedSafe(mesh, tet_0, DOWN_FACET(in_facet_0, out_facet_0), out_facet_0)!=0)<<11);
-
-  local->cavity.flag[2] = ((isFacetConstrained(mesh, 4*tet_0 + UP_FACET(in_facet_0, out_facet_0))!=0)<<12) +
-                          ((isFacetConstrained(mesh, 4*tet_1 + UP_FACET(in_facet_1, out_facet_1))!=0)<<8) +
-                          ((isEdgeConstrainedSafe(mesh, tet_1, UP_FACET(in_facet_1, out_facet_1), out_facet_1)!=0)<<2) +
-                          ((isEdgeConstrainedSafe(mesh, tet_0, UP_FACET(in_facet_0, out_facet_0), in_facet_0)!=0)<<3) +
-                          ((isEdgeConstrainedSafe(mesh, tet_1, UP_FACET(in_facet_1, out_facet_1), DOWN_FACET(in_facet_1, out_facet_1))!=0)<<6) +
-                          ((isEdgeConstrainedSafe(mesh, tet_0, UP_FACET(in_facet_0, out_facet_0), DOWN_FACET(in_facet_0, out_facet_0))!=0)<<7) +
-                          ((isEdgeConstrainedSafe(mesh, tet_0, UP_FACET(in_facet_0, out_facet_0), out_facet_0)!=0)<<11);
-
-  // now we have everything... we just need to test if the quality of tetrahedra would be good
-  double qual[3];
-  for (int i=0; i<3; i++) {
-    qual[i] = tetQuality2(mesh, local->cavity.annulus[i], local->cavity.annulus[(i+1)%3], local->cavity.v_up, local->cavity.v_down);
-    if(qual[i]<worst_qual){
-      return HXT_STATUS_INTERNAL;
-    }
-  }
-
-  if(local->deleted.num<1)
-    HXT_CHECK( createNewDeleted(mesh, shared, local) );
-
-  local->deleted.num--;
-  uint64_t newTet[3] = {tet_0, tet_1, local->deleted.tetID[local->deleted.num]};
-
-  for (unsigned i=0; i<3; i++) {
-    uint64_t curTet = newTet[i];
-    mesh->tetrahedra.node[4*curTet + 0] = local->cavity.annulus[i];
-    mesh->tetrahedra.node[4*curTet + 1] = local->cavity.annulus[(i+1)%3];
-    mesh->tetrahedra.node[4*curTet + 2] = local->cavity.v_up;
-    mesh->tetrahedra.node[4*curTet + 3] = local->cavity.v_down;
-
-    mesh->tetrahedra.neigh[4*curTet + 0] = 4*newTet[(i+1)%3] + 1;
-    // mesh->tetrahedra.neigh[4*curTet + 1] = 4*newTet[(i+4)%3] + 0;
-    mesh->tetrahedra.neigh[4*newTet[(i+1)%3] + 1] = 4*curTet + 0;
-
-    mesh->tetrahedra.neigh[4*curTet + 2] = local->cavity.neigh_down[i];
-    if( local->cavity.neigh_down[i]!=HXT_NO_ADJACENT)
-      mesh->tetrahedra.neigh[local->cavity.neigh_down[i]] = 4*curTet + 2;
-
-    mesh->tetrahedra.neigh[4*curTet + 3] = local->cavity.neigh_up[i];
-    if( local->cavity.neigh_up[i]!=HXT_NO_ADJACENT)
-      mesh->tetrahedra.neigh[local->cavity.neigh_up[i]] = 4*curTet + 3;
-
-    mesh->tetrahedra.colors[curTet] = color;
-
-    // TODO: verify flags are well done
-    mesh->tetrahedra.flag[curTet] = local->cavity.flag[i];
-    shared->quality2.values[curTet] = qual[i];
-  }
-
-  return HXT_STATUS_OK;
-  // return HXT_STATUS_INTERNAL;
-}
+//   uint64_t tet_1 = neigh/4;
+
+//   HXT_ASSERT(tet_1<mesh->tetrahedra.num);
+//   HXT_ASSERT(tet_1!=tet_0);
+//   HXT_ASSERT(mesh->tetrahedra.neigh[neigh]==4*tet_0 + out_facet_0);
+//   unsigned in_facet_1 = neigh%4;
+
+//   { // check for conflict with other threads'partition
+//     const uint64_t startDist = local->partition.startDist;
+//     const uint64_t rel = local->partition.endDist - startDist;
+//     HXTVertex* vertices = (HXTVertex*) mesh->vertices.coord;
+//     if(vertexOutOfPartition(vertices, mesh->tetrahedra.node[4*tet_1 + 0], rel, startDist) +
+//        vertexOutOfPartition(vertices, mesh->tetrahedra.node[4*tet_1 + 1], rel, startDist) +
+//        vertexOutOfPartition(vertices, mesh->tetrahedra.node[4*tet_1 + 2], rel, startDist) +
+//        vertexOutOfPartition(vertices, mesh->tetrahedra.node[4*tet_1 + 3], rel, startDist) > 1)
+//        return HXT_STATUS_CONFLICT;
+//    }
+
+//   double worst_qual = shared->quality2.values[tet_0];
+
+//   if(shared->quality2.values[tet_1]<worst_qual)
+//     worst_qual = shared->quality2.values[tet_1];
+
+//   local->cavity.v_up = mesh->tetrahedra.node[4*tet_0 + out_facet_0];
+//   local->cavity.v_down = mesh->tetrahedra.node[4*tet_1 + in_facet_1];
+
+//   // choose a reference facet in the tet_0
+//   unsigned in_facet_0 = (out_facet_0+1)%4;
+
+//   // adding vertices of the annulus:
+//   local->cavity.annulus[0] = mesh->tetrahedra.node[4*tet_0 + UP_VERTEX(in_facet_0, out_facet_0)];
+//   local->cavity.annulus[1] = mesh->tetrahedra.node[4*tet_0 + DOWN_VERTEX(in_facet_0, out_facet_0)];
+//   local->cavity.annulus[2] = mesh->tetrahedra.node[4*tet_0 + OUT_VERTEX(in_facet_0, out_facet_0)];
+
+//   local->cavity.neigh_up[0] = mesh->tetrahedra.neigh[4*tet_0 + in_facet_0];
+//   local->cavity.neigh_up[1] = mesh->tetrahedra.neigh[4*tet_0 + DOWN_FACET(in_facet_0, out_facet_0)];
+//   local->cavity.neigh_up[2] = mesh->tetrahedra.neigh[4*tet_0 + UP_FACET(in_facet_0, out_facet_0)];
+
+//   uint32_t v = local->cavity.annulus[2];
+
+//   // find one of the vertex in the tet_1
+//   uint32_t* nodes = mesh->tetrahedra.node + 4*tet_1;
+
+//   unsigned out_facet_1;
+//   for (out_facet_1=0; out_facet_1<4; out_facet_1++)
+//       if(nodes[out_facet_1]==v)
+//         break;
+
+//   // HXT_ASSERT(out_facet_1!=4);
+//   // HXT_ASSERT(out_facet_1!=in_facet_1);
+//   // HXT_ASSERT((isEdgeConstrainedSafe(mesh, tet_0, in_facet_0, out_facet_0)!=0)==(isEdgeConstrainedSafe(mesh, tet_1, in_facet_1, out_facet_1)!=0));
+//   // HXT_ASSERT((isEdgeConstrainedSafe(mesh, tet_0, in_facet_0, out_facet_0)!=0)==(isEdgeConstrainedSafe(mesh, tet_1, in_facet_1, out_facet_1)!=0));
+//   // HXT_ASSERT((isEdgeConstrainedSafe(mesh, tet_0, DOWN_FACET(in_facet_0, out_facet_0), out_facet_0)!=0)
+//   //          ==(isEdgeConstrainedSafe(mesh, tet_1, DOWN_FACET(in_facet_1, out_facet_1), in_facet_1)!=0));
+//   // HXT_ASSERT((isEdgeConstrainedSafe(mesh, tet_0, UP_FACET(in_facet_0, out_facet_0), out_facet_0)!=0)
+//   //          ==(isEdgeConstrainedSafe(mesh, tet_1, UP_FACET(in_facet_1, out_facet_1), in_facet_1)!=0));
+//   // HXT_ASSERT((getFacetConstraint(mesh, tet_0, out_facet_0)!=0)==(getFacetConstraint(mesh, tet_1, in_facet_1)!=0));
+//   // if(mesh->tetrahedra.neigh[neigh]!=4*tet_0+out_facet_0)
+//   //   return HXT_ERROR_MSG(HXT_STATUS_ERROR, "mesh->tetrahedra.neigh[%lu]==%lu instead of %lu",neigh,mesh->tetrahedra.neigh[neigh],4*tet_0+out_facet_0);
+
+//   local->cavity.neigh_down[0] = mesh->tetrahedra.neigh[4*tet_1 + out_facet_1];
+//   local->cavity.neigh_down[1] = mesh->tetrahedra.neigh[4*tet_1 + DOWN_FACET(in_facet_1, out_facet_1)];
+//   local->cavity.neigh_down[2] = mesh->tetrahedra.neigh[4*tet_1 + UP_FACET(in_facet_1, out_facet_1)];
+
+//   local->cavity.flag[0] = ((getFacetConstraint(mesh, tet_0, in_facet_0)!=0)<<12) +
+//                           ((getFacetConstraint(mesh, tet_1, out_facet_1)!=0)<<8) +
+//                           ((isEdgeConstrainedSafe(mesh, tet_1, out_facet_1, DOWN_FACET(in_facet_1, out_facet_1))!=0)<<2) +
+//                           ((isEdgeConstrainedSafe(mesh, tet_0, in_facet_0, DOWN_FACET(in_facet_0, out_facet_0))!=0)<<3) +
+//                           ((isEdgeConstrainedSafe(mesh, tet_1, out_facet_1, UP_FACET(in_facet_1, out_facet_1))!=0)<<6) +
+//                           ((isEdgeConstrainedSafe(mesh, tet_0, in_facet_0, UP_FACET(in_facet_0, out_facet_0))!=0)<<7) +
+//                           ((isEdgeConstrainedSafe(mesh, tet_0, in_facet_0, out_facet_0)!=0)<<11);
+
+//   local->cavity.flag[1] = ((getFacetConstraint(mesh, tet_0, DOWN_FACET(in_facet_0, out_facet_0))!=0)<<12) +
+//                           ((getFacetConstraint(mesh, tet_1, DOWN_FACET(in_facet_1, out_facet_1))!=0)<<8) +
+//                           ((isEdgeConstrainedSafe(mesh, tet_1, DOWN_FACET(in_facet_1, out_facet_1), UP_FACET(in_facet_1, out_facet_1))!=0)<<2) +
+//                           ((isEdgeConstrainedSafe(mesh, tet_0, DOWN_FACET(in_facet_0, out_facet_0), UP_FACET(in_facet_0, out_facet_0))!=0)<<3) +
+//                           ((isEdgeConstrainedSafe(mesh, tet_1, DOWN_FACET(in_facet_1, out_facet_1), out_facet_1)!=0)<<6) +
+//                           ((isEdgeConstrainedSafe(mesh, tet_0, DOWN_FACET(in_facet_0, out_facet_0), in_facet_0)!=0)<<7) +
+//                           ((isEdgeConstrainedSafe(mesh, tet_0, DOWN_FACET(in_facet_0, out_facet_0), out_facet_0)!=0)<<11);
+
+//   local->cavity.flag[2] = ((getFacetConstraint(mesh, tet_0, UP_FACET(in_facet_0, out_facet_0))!=0)<<12) +
+//                           ((getFacetConstraint(mesh, tet_1, UP_FACET(in_facet_1, out_facet_1))!=0)<<8) +
+//                           ((isEdgeConstrainedSafe(mesh, tet_1, UP_FACET(in_facet_1, out_facet_1), out_facet_1)!=0)<<2) +
+//                           ((isEdgeConstrainedSafe(mesh, tet_0, UP_FACET(in_facet_0, out_facet_0), in_facet_0)!=0)<<3) +
+//                           ((isEdgeConstrainedSafe(mesh, tet_1, UP_FACET(in_facet_1, out_facet_1), DOWN_FACET(in_facet_1, out_facet_1))!=0)<<6) +
+//                           ((isEdgeConstrainedSafe(mesh, tet_0, UP_FACET(in_facet_0, out_facet_0), DOWN_FACET(in_facet_0, out_facet_0))!=0)<<7) +
+//                           ((isEdgeConstrainedSafe(mesh, tet_0, UP_FACET(in_facet_0, out_facet_0), out_facet_0)!=0)<<11);
+
+//   // now we have everything... we just need to test if the quality of tetrahedra would be good
+//   double qual[3];
+//   for (int i=0; i<3; i++) {
+//     qual[i] = tetQuality2(mesh, local->cavity.annulus[i], local->cavity.annulus[(i+1)%3], local->cavity.v_up, local->cavity.v_down);
+//     if(qual[i]<worst_qual){
+//       return HXT_STATUS_INTERNAL;
+//     }
+//   }
+
+//   if(local->deleted.num<1)
+//     HXT_CHECK( createNewDeleted(mesh, shared, local) );
+
+//   local->deleted.num--;
+//   uint64_t newTet[3] = {tet_0, tet_1, local->deleted.tetID[local->deleted.num]};
+
+//   for (unsigned i=0; i<3; i++) {
+//     uint64_t curTet = newTet[i];
+//     mesh->tetrahedra.node[4*curTet + 0] = local->cavity.annulus[i];
+//     mesh->tetrahedra.node[4*curTet + 1] = local->cavity.annulus[(i+1)%3];
+//     mesh->tetrahedra.node[4*curTet + 2] = local->cavity.v_up;
+//     mesh->tetrahedra.node[4*curTet + 3] = local->cavity.v_down;
+
+//     mesh->tetrahedra.neigh[4*curTet + 0] = 4*newTet[(i+1)%3] + 1;
+//     // mesh->tetrahedra.neigh[4*curTet + 1] = 4*newTet[(i+4)%3] + 0;
+//     mesh->tetrahedra.neigh[4*newTet[(i+1)%3] + 1] = 4*curTet + 0;
+
+//     mesh->tetrahedra.neigh[4*curTet + 2] = local->cavity.neigh_down[i];
+//     if( local->cavity.neigh_down[i]!=HXT_NO_ADJACENT)
+//       mesh->tetrahedra.neigh[local->cavity.neigh_down[i]] = 4*curTet + 2;
+
+//     mesh->tetrahedra.neigh[4*curTet + 3] = local->cavity.neigh_up[i];
+//     if( local->cavity.neigh_up[i]!=HXT_NO_ADJACENT)
+//       mesh->tetrahedra.neigh[local->cavity.neigh_up[i]] = 4*curTet + 3;
+
+//     mesh->tetrahedra.colors[curTet] = color;
+
+//     // TODO: verify flags are well done
+//     mesh->tetrahedra.flag[curTet] = local->cavity.flag[i];
+//     shared->quality2.values[curTet] = qual[i];
+//   }
+
+//   return HXT_STATUS_OK;
+//   // return HXT_STATUS_INTERNAL;
+// }
 
 
 static inline HXTStatus buildEdgeCavity(HXTMesh* mesh, ThreadLocal* local,
-                                        const uint64_t badTet, uint16_t color,
-                                        int in_facet, int out_facet)
+                                        const uint64_t badTet,
+                                        unsigned in_facet, unsigned out_facet)
 {
   const uint64_t startDist = local->partition.startDist;
   const uint64_t rel = local->partition.endDist - startDist;
@@ -840,7 +840,7 @@ static inline HXTStatus buildEdgeCavity(HXTMesh* mesh, ThreadLocal* local,
   uint64_t curTet = badTet;
   local->cavity.num = 0;
 
-  if(isEdgeConstrainedSafe(mesh, badTet, in_facet, out_facet))
+  if(getEdgeConstraint(mesh, badTet, getEdgeFromFacets(in_facet, out_facet)))
     return HXT_STATUS_INTERNAL;
 
   local->cavity.v_up = mesh->tetrahedra.node[4*badTet + UP_VERTEX(in_facet, out_facet)];
@@ -854,23 +854,34 @@ static inline HXTStatus buildEdgeCavity(HXTMesh* mesh, ThreadLocal* local,
     // add the current tetrahedra
     local->deleted.tetID[local->deleted.num + local->cavity.num] = curTet;
 
-    // add the neighbor up and down
-    local->cavity.neigh_up[local->cavity.num] = mesh->tetrahedra.neigh[4*curTet + UP_FACET(in_facet, out_facet)];
-    local->cavity.neigh_down[local->cavity.num] = mesh->tetrahedra.neigh[4*curTet + DOWN_FACET(in_facet, out_facet)];
-
-    // TODO: just store one flag for up and down. the one of the default tetrahedron
-    local->cavity.flag[local->cavity.num] = (isFacetConstrained(mesh, 4*curTet + UP_FACET(in_facet, out_facet))!=0) +
-                                            ((isEdgeConstrainedSafe(mesh, curTet, UP_FACET(in_facet, out_facet), out_facet)!=0)<<1) +
-                                            ((isEdgeConstrainedSafe(mesh, curTet, UP_FACET(in_facet, out_facet), DOWN_FACET(in_facet, out_facet))!=0)<<2) +
-                                            ((isEdgeConstrainedSafe(mesh, curTet, UP_FACET(in_facet, out_facet), in_facet)!=0)<<3) +
-                                            ((isFacetConstrained(mesh, 4*curTet + DOWN_FACET(in_facet, out_facet))!=0)<<4) +
-                                            ((isEdgeConstrainedSafe(mesh, curTet, DOWN_FACET(in_facet, out_facet), out_facet)!=0)<<5) +
-                                            ((isEdgeConstrainedSafe(mesh, curTet, DOWN_FACET(in_facet, out_facet), in_facet)!=0)<<6) +
-                                            ((isEdgeConstrainedSafe(mesh, curTet, UP_FACET(in_facet, out_facet), DOWN_FACET(in_facet, out_facet))!=0)<<7);
-
+    {
+      unsigned up_facet = UP_FACET(in_facet, out_facet);
+      unsigned down_facet = DOWN_FACET(in_facet, out_facet);
+
+      // add the neighbor up and down
+      local->cavity.neigh_up[local->cavity.num] = mesh->tetrahedra.neigh[4*curTet + up_facet];
+      local->cavity.neigh_down[local->cavity.num] = mesh->tetrahedra.neigh[4*curTet + down_facet];
+
+      int upDownEdge = getEdgeFromFacets(up_facet, down_facet);
+      int upOutEdge = getEdgeFromFacets(up_facet, out_facet);
+      int upInEdge = getEdgeFromFacets(up_facet, in_facet);
+      int downOutEdge = getEdgeFromFacets(down_facet, out_facet);
+      int downInEdge = getEdgeFromFacets(down_facet, in_facet);
+
+
+      // TODO: just store one flag for up and down. the one of the default tetrahedron
+      local->cavity.flag[local->cavity.num] = (getFacetConstraint(mesh, curTet, up_facet)!=0) +
+                                              ((getEdgeConstraint(mesh, curTet, upOutEdge)!=0)<<1) +
+                                              ((getEdgeConstraint(mesh, curTet, upDownEdge)!=0)<<2) +
+                                              ((getEdgeConstraint(mesh, curTet, upInEdge)!=0)<<3) +
+                                              ((getFacetConstraint(mesh, curTet, down_facet)!=0)<<4) +
+                                              ((getEdgeConstraint(mesh, curTet, downOutEdge)!=0)<<5) +
+                                              ((getEdgeConstraint(mesh, curTet, downInEdge)!=0)<<6) +
+                                              ((getEdgeConstraint(mesh, curTet, upDownEdge)!=0)<<7);
+    }
     // add the annulus vertex
-    uint32_t oldV = mesh->tetrahedra.node[4*curTet + IN_VERTEX(in_facet, out_facet)];
-    uint32_t newV = mesh->tetrahedra.node[4*curTet + OUT_VERTEX(in_facet, out_facet)];
+    uint32_t oldV = mesh->tetrahedra.node[4*curTet + out_facet];
+    uint32_t newV = mesh->tetrahedra.node[4*curTet + in_facet];
 
     local->cavity.annulus[local->cavity.num] = oldV;
     local->cavity.num++;
@@ -878,7 +889,7 @@ static inline HXTStatus buildEdgeCavity(HXTMesh* mesh, ThreadLocal* local,
     // go into the neighbor through out_facet
     uint64_t neigh = mesh->tetrahedra.neigh[4*curTet + out_facet];
     if(neigh == HXT_NO_ADJACENT
-      || (isFacetConstrained(mesh, neigh)!=0)
+      || (getFacetConstraint(mesh, neigh/4, neigh%4)!=0)
       || local->cavity.num>=HXT_MAX_CAVITY_SIZE) {
       return HXT_STATUS_INTERNAL;
     }
@@ -905,9 +916,9 @@ static inline HXTStatus buildEdgeCavity(HXTMesh* mesh, ThreadLocal* local,
 
 static HXTStatus edgeSwap(HXTMesh *mesh, ThreadShared* shared, ThreadLocal* local,
                              const uint64_t badTet, const uint16_t color,
-                             int in_facet, int out_facet)
+                             unsigned in_facet, unsigned out_facet)
 {
-  HXT_CHECK( buildEdgeCavity(mesh, local, badTet, color, in_facet, out_facet) );
+  HXT_CHECK( buildEdgeCavity(mesh, local, badTet, in_facet, out_facet) );
 
   // find worst quality2 tet of the cavity
   double worst = DBL_MAX;
@@ -919,7 +930,7 @@ static HXTStatus edgeSwap(HXTMesh *mesh, ThreadShared* shared, ThreadLocal* loca
   }
 
   const SwapPattern* patt = &patterns[local->cavity.num];
-  const int num_triangle_per_triangul = local->cavity.num-2;
+  const unsigned num_triangle_per_triangul = local->cavity.num-2;
   uint32_t* annulus = local->cavity.annulus;
 
   // calculate qualities of all possible tetrahedra
@@ -927,7 +938,7 @@ static HXTStatus edgeSwap(HXTMesh *mesh, ThreadShared* shared, ThreadLocal* loca
   double hxtDeclareAligned qual_up[35];
   double hxtDeclareAligned qual_down[35];
   uint64_t mask = 0;
-  for (int i=0; i<patt->num_triangles; i++) {
+  for (unsigned i=0; i<patt->num_triangles; i++) {
     uint32_t p0 = annulus[patt->triangles[i][0]];
     uint32_t p1 = annulus[patt->triangles[i][1]];
     uint32_t p2 = annulus[patt->triangles[i][2]];
@@ -944,12 +955,12 @@ static HXTStatus edgeSwap(HXTMesh *mesh, ThreadShared* shared, ThreadLocal* loca
   // find the best triangulation
   int best_triangulation = -1;
   double best_worst = 0;
-  for (int i=0; i<patt->num_triangulations; i++) {
+  for (unsigned i=0; i<patt->num_triangulations; i++) {
     if((mask & (UINT64_C(1)<<i))==0) {
       double cur_worst = DBL_MAX;
       // this mean that no triangle in the triangulation
       //   is worst than the current worst tetrahedron
-      for (int j=0; j<num_triangle_per_triangul; j++) {
+      for (unsigned j=0; j<num_triangle_per_triangul; j++) {
         double q_u = qual_up[patt->triangulations[i][j]];
         double q_d = qual_down[patt->triangulations[i][j]];
         if(q_u<best_worst || q_d<best_worst){
@@ -978,7 +989,7 @@ static HXTStatus edgeSwap(HXTMesh *mesh, ThreadShared* shared, ThreadLocal* loca
   // mark new deleted tet as deleted
   for (uint64_t i=0; i<local->cavity.num; i++) {
     shared->quality2.values[local->deleted.tetID[local->deleted.num+i]] = DBL_MAX; // deleted tets have good quality2
-    // markTetAsDeleted(mesh, local->deleted.tetID[local->deleted.num+i]);
+    // setDeletedFlag(mesh, local->deleted.tetID[local->deleted.num+i]);
   }
   local->deleted.num += local->cavity.num;
 
@@ -990,7 +1001,7 @@ static HXTStatus edgeSwap(HXTMesh *mesh, ThreadShared* shared, ThreadLocal* loca
   uint64_t start = local->deleted.num - 2*num_triangle_per_triangul;
 
   // make the swap
-  for (int i=0; i<num_triangle_per_triangul; i++) {
+  for (unsigned i=0; i<num_triangle_per_triangul; i++) {
     uint32_t tri = patt->triangulations[best_triangulation][i];
     uint32_t p0 = annulus[patt->triangles[tri][0]];
     uint32_t p1 = annulus[patt->triangles[tri][1]];
@@ -1025,7 +1036,10 @@ static HXTStatus edgeSwap(HXTMesh *mesh, ThreadShared* shared, ThreadLocal* loca
         neigh[0] = local->cavity.neigh_up[-n0-1];
 
         //  (down=2, in=3, out=1, up=0)
-        mesh->tetrahedra.flag[newTet_up] |= local->cavity.flag[-n0-1] & 0xF;
+        mesh->tetrahedra.flag[newTet_up] |= (local->cavity.flag[-n0-1]&1)<<8 |// face (bit 0) is the up_facet => 0  (bit 8)
+                                            (local->cavity.flag[-n0-1]&2)>>1 |// first edge (bit 1) was between up_facet and out_facet => 0-1  (bit 0)
+                                            (local->cavity.flag[-n0-1]&4)>>1 |// second edge (bit 2) was between up_facet and down_facet => 0-2 (bit 1)
+                                            (local->cavity.flag[-n0-1]&8)>>1; // third edge (bit 3) was between up_facet and in_facet => 0-3    (bit 2)
 
         if(neigh[0]!=HXT_NO_ADJACENT)
           mesh->tetrahedra.neigh[neigh[0]] = 4*newTet_up + 0;
@@ -1037,10 +1051,10 @@ static HXTStatus edgeSwap(HXTMesh *mesh, ThreadShared* shared, ThreadLocal* loca
         neigh[1] = local->cavity.neigh_up[-n1-1];
 
         //  (down=2, in=0, out=3, up=1)
-        mesh->tetrahedra.flag[newTet_up] |= (local->cavity.flag[-n1-1]&5)<<4 |  // face (bit 0) is the up_facet => 1*4  (bit 4)
-                                            (local->cavity.flag[-n1-1]&2)<<6 |  // first edge (bit 1) was between up_facet and out_facet => 1-3   (bit 7)
-                                            // (local->cavity.flag[-n1-1]&4)<<4 |  // second edge (bit 2) was between up_facet and down_facet => 1-2 (bit 6)
-                                            (local->cavity.flag[-n1-1]&8)>>2;   // third edge (bit 3) was between up_facet and in_facet => 0-1    (bit 1)
+        mesh->tetrahedra.flag[newTet_up] |= (local->cavity.flag[-n1-1]&1)<<9 |// face (bit 0) is the up_facet => 1  (bit 9)
+                                            (local->cavity.flag[-n1-1]&2)<<3 |// first edge (bit 1) was between up_facet and out_facet => 1-3   (bit 4)
+                                            (local->cavity.flag[-n1-1]&4)<<1 |// second edge (bit 2) was between up_facet and down_facet => 1-2 (bit 3)
+                                            (local->cavity.flag[-n1-1]&8)>>3; // third edge (bit 3) was between up_facet and in_facet => 0-1    (bit 0)
 
         if(neigh[1]!=HXT_NO_ADJACENT)
           mesh->tetrahedra.neigh[neigh[1]] = 4*newTet_up + 1;
@@ -1053,10 +1067,10 @@ static HXTStatus edgeSwap(HXTMesh *mesh, ThreadShared* shared, ThreadLocal* loca
         neigh[3] = local->cavity.neigh_up[-n2-1];
 
         //  (down=2, in=1, out=0, up=3)
-        mesh->tetrahedra.flag[newTet_up] |= (local->cavity.flag[-n2-1]&1)<<12 |  // face (bit 0) is the up_facet => 3*4  (bit 12)
-                                            (local->cavity.flag[-n2-1]&2)<<2 |  // first edge (bit 1) was between up_facet and out_facet => 0-3   (bit 3)
-                                            (local->cavity.flag[-n2-1]&4)<<9 |  // second edge (bit 2) was between up_facet and down_facet => 2-3 (bit 11)
-                                            (local->cavity.flag[-n2-1]&8)<<4;   // third edge (bit 3) was between up_facet and in_facet => 1-3    (bit 7)
+        mesh->tetrahedra.flag[newTet_up] |= (local->cavity.flag[-n2-1]&1)<<11 |// face (bit 0) is the up_facet => 3  (bit 11)
+                                            (local->cavity.flag[-n2-1]&2)<<1  |// first edge (bit 1) was between up_facet and out_facet => 0-3   (bit 2)
+                                            (local->cavity.flag[-n2-1]&4)<<3  |// second edge (bit 2) was between up_facet and down_facet => 2-3 (bit 5)
+                                            (local->cavity.flag[-n2-1]&8)<<1;  // third edge (bit 3) was between up_facet and in_facet => 1-3    (bit 4)
 
         if(neigh[3]!=HXT_NO_ADJACENT)
           mesh->tetrahedra.neigh[neigh[3]] = 4*newTet_up + 3;
@@ -1084,7 +1098,10 @@ static HXTStatus edgeSwap(HXTMesh *mesh, ThreadShared* shared, ThreadLocal* loca
         neigh[0] = local->cavity.neigh_down[-n0-1];
 
         //  (down=0, in=2, out=1, up=3)
-        mesh->tetrahedra.flag[newTet_down] |= (local->cavity.flag[-n0-1] & 0xF0)>>4;
+        mesh->tetrahedra.flag[newTet_down] |= (local->cavity.flag[-n0-1]&16)<<4 |// face (bit 4) is the down_facet => 0  (bit 8)
+                                              (local->cavity.flag[-n0-1]&32)>>5 |// first edge (bit 5) was between down_facet and out_facet => 0-1   (bit 0)
+                                              (local->cavity.flag[-n0-1]&64)>>5 |// second edge (bit 6) was between down_facet and in_facet => 0-2   (bit 1)
+                                              (local->cavity.flag[-n0-1]&128)>>5;// third edge (bit 7) was between down_facet and up_facet => 0-3    (bit 2)
 
         if(neigh[0]!=HXT_NO_ADJACENT)
           mesh->tetrahedra.neigh[neigh[0]] = 4*newTet_down + 0;
@@ -1096,10 +1113,10 @@ static HXTStatus edgeSwap(HXTMesh *mesh, ThreadShared* shared, ThreadLocal* loca
         neigh[1] = local->cavity.neigh_down[-n1-1];
 
         //  (down=1, in=0, out=2, up=3)
-        mesh->tetrahedra.flag[newTet_down] |= (local->cavity.flag[-n1-1]&0x90) |  // face (bit 4) is the down_facet => 1*4  (bit 4)
-                                              (local->cavity.flag[-n1-1]&32)<<1 |  // first edge (bit 5) was between down_facet and out_facet => 1-2   (bit 6)
-                                              (local->cavity.flag[-n1-1]&64)>>5;  // second edge (bit 6) was between down_facet and in_facet => 0-1   (bit 1)
-                                              //(local->cavity.flag[-n1-1]&128);   // third edge (bit 7) was between down_facet and up_facet => 1-3    (bit 7)
+        mesh->tetrahedra.flag[newTet_down] |= (local->cavity.flag[-n1-1]&16)<<5 |// face (bit 4) is the down_facet => 1  (bit 9)
+                                              (local->cavity.flag[-n1-1]&32)>>2 |// first edge (bit 5) was between down_facet and out_facet => 1-2   (bit 3)
+                                              (local->cavity.flag[-n1-1]&64)>>6 |// second edge (bit 6) was between down_facet and in_facet => 0-1   (bit 0)
+                                              (local->cavity.flag[-n1-1]&128)>>3;// third edge (bit 7) was between down_facet and up_facet => 1-3    (bit 4)
 
         if(neigh[1]!=HXT_NO_ADJACENT)
           mesh->tetrahedra.neigh[neigh[1]] = 4*newTet_down + 1;
@@ -1111,10 +1128,10 @@ static HXTStatus edgeSwap(HXTMesh *mesh, ThreadShared* shared, ThreadLocal* loca
         neigh[2] = local->cavity.neigh_down[-n2-1];
 
         //  (down=2, in=1, out=0, up=3)
-        mesh->tetrahedra.flag[newTet_down] |= (local->cavity.flag[-n2-1]&0x90)<<4 |  // face (bit 4) is the down_facet => 2*4  (bit 8)
-                                              (local->cavity.flag[-n2-1]&32)>>3 |  // first edge (bit 5) was between down_facet and out_facet => 0-2   (bit 2)
-                                              (local->cavity.flag[-n2-1]&64);  // second edge (bit 6) was between down_facet and in_facet => 1-2   (bit 6)
-                                              //(local->cavity.flag[-n2-1]&128)<<4;   // third edge (bit 7) was between down_facet and up_facet => 2-3    (bit 11)
+        mesh->tetrahedra.flag[newTet_down] |= (local->cavity.flag[-n2-1]&16)<<6 |// face (bit 4) is the down_facet => 2*4  (bit 10)
+                                              (local->cavity.flag[-n2-1]&32)>>4 |// first edge (bit 5) was between down_facet and out_facet => 0-2   (bit 1)
+                                              (local->cavity.flag[-n2-1]&64)>>3 |// second edge (bit 6) was between down_facet and in_facet => 1-2   (bit 3)
+                                              (local->cavity.flag[-n2-1]&128)>>2;// third edge (bit 7) was between down_facet and up_facet => 2-3    (bit 5)
 
         if(neigh[2]!=HXT_NO_ADJACENT)
           mesh->tetrahedra.neigh[neigh[2]] = 4*newTet_down + 2;
@@ -1130,11 +1147,11 @@ static HXTStatus edgeSwap(HXTMesh *mesh, ThreadShared* shared, ThreadLocal* loca
 
 
 static inline HXTStatus buildVertexCavity(HXTMesh* mesh, ThreadLocal* local,
-                                          uint64_t curFace,
+                                          uint64_t startFace,
                                           const uint32_t numVerticesConstrained) {
   const uint64_t startDist = local->partition.startDist;
   const uint64_t rel = local->partition.endDist - startDist;
-  const uint32_t vertex = mesh->tetrahedra.node[curFace];
+  const uint32_t vertex = mesh->tetrahedra.node[startFace];
   HXTVertex* vertices = (HXTVertex*) mesh->vertices.coord;
 
   // the vertex we are moving should be in the partition or we don't even try...
@@ -1142,8 +1159,8 @@ static inline HXTStatus buildVertexCavity(HXTMesh* mesh, ThreadLocal* local,
     return HXT_STATUS_INTERNAL;
 
   HXT_CHECK( reserveNewDeleted(local, 4) );
-  local->deleted.tetID[local->deleted.num++] = curFace;
-  markTetAsDeleted(mesh, curFace/4);
+  local->deleted.tetID[local->deleted.num++] = startFace;
+  setDeletedFlag(mesh, startFace/4);
 
   for (uint64_t start=local->deleted.num-1; start<local->deleted.num; start++) {
     HXT_CHECK( reserveNewDeleted(local, 3) );
@@ -1162,14 +1179,14 @@ static inline HXTStatus buildVertexCavity(HXTMesh* mesh, ThreadLocal* local,
       if(neigh==HXT_NO_ADJACENT)
         return HXT_STATUS_INTERNAL;
 
-      if(isTetDeleted(mesh, neighTet))
+      if(getDeletedFlag(mesh, neighTet))
         continue;
 
-      if(isFacetConstrained(mesh, 4*curTet+f))
+      if(getFacetConstraint(mesh, curTet, f))
         return HXT_STATUS_INTERNAL;
 
-      for(int k=0; k<4; k++){
-        if(k!=f && isEdgeConstrainedSafe(mesh, curTet, f, k))
+      for(unsigned k=0; k<4; k++){
+        if(k!=f && getEdgeConstraint(mesh, curTet, getEdgeFromFacets(f, k)))
           return HXT_STATUS_INTERNAL;
       }
 
@@ -1190,7 +1207,7 @@ static inline HXTStatus buildVertexCavity(HXTMesh* mesh, ThreadLocal* local,
       else
         local->deleted.tetID[local->deleted.num++] = 4*neighTet + (neighF+3)%4;
 
-      markTetAsDeleted(mesh, neighTet);
+      setDeletedFlag(mesh, neighTet);
     }
   }
 
@@ -1324,7 +1341,7 @@ static HXTStatus smoothing(HXTMesh *mesh,
   }
 
   for (uint64_t i=prevNumDeleted; i<local->deleted.num; i++) {
-    unmarkTetAsDeleted(mesh, local->deleted.tetID[i]/4);
+    unsetDeletedFlag(mesh, local->deleted.tetID[i]/4);
   }
 
   if(status==HXT_STATUS_INTERNAL || status==HXT_STATUS_CONFLICT){
@@ -1370,17 +1387,22 @@ static HXTStatus smoothing(HXTMesh *mesh,
 
 HXTStatus hxtOptimizeTetrahedra(HXTMesh *mesh,
                                 HXTBbox* bbox,
+                                int maxThreads,
                                 double minSize,
                                 double qualityThreshold,
                                 uint32_t numVerticesConstrained){
   ThreadLocal* locals = NULL;
   ThreadShared* shared = NULL;
   volatile HXTStatus globalStatus = HXT_STATUS_OK;
-  const int maxThreads = omp_get_max_threads();
   uint32_t seed = 1;
   uint32_t nbits = 0;
   int changePartitions = 1;
 
+  if(maxThreads<0)
+    maxThreads = omp_get_num_procs();
+  else if(maxThreads==0)
+    maxThreads = omp_get_max_threads();
+
   HXT_CHECK( threadShared_create(mesh, qualityThreshold, &shared) );
   HXT_CHECK( threadLocals_create(&locals, maxThreads) );
 
@@ -1433,16 +1455,16 @@ HXTStatus hxtOptimizeTetrahedra(HXTMesh *mesh,
         /*** sort the neighbor by their qualities **/
 
         double hxtDeclareAligned qual[4];
-        for (int i=0; i<4; i++) {
-          if(neighs[i]==HXT_NO_ADJACENT ||
-            mesh->tetrahedra.colors[neighs[i]/4]!=color) {
-            qual[i]=DBL_MAX;
+        for (int j=0; j<4; j++) {
+          if(neighs[j]==HXT_NO_ADJACENT ||
+            mesh->tetrahedra.colors[neighs[j]/4]!=color) {
+            qual[j]=DBL_MAX;
           }
           else
-            qual[i] = shared->quality2.values[neighs[i]/4];
+            qual[j] = shared->quality2.values[neighs[j]/4];
         }
 
-        int hxtDeclareAligned order[4] = {0,1,2,3};
+        unsigned hxtDeclareAligned order[4] = {0,1,2,3};
 
         // sort first pair
         if(qual[order[1]] < qual[order[0]])
@@ -1561,9 +1583,9 @@ HXTStatus hxtOptimizeTetrahedra(HXTMesh *mesh,
   } while(nConflict!=0 || nSwaps!=0);
 
   for (int threadID=0; threadID<maxThreads; threadID++) {
-    for (int i=0; i<locals[threadID].deleted.num; i++) {
+    for (uint64_t i=0; i<locals[threadID].deleted.num; i++) {
       uint64_t delTet = locals[threadID].deleted.tetID[i];
-      markTetAsDeleted(mesh, delTet);
+      setDeletedFlag(mesh, delTet);
       for (int j=0; j<4; j++)
         mesh->tetrahedra.neigh[4*delTet+j] = HXT_NO_ADJACENT;
     }
diff --git a/contrib/hxt/hxt_tetOpti.h b/contrib/hxt/hxt_tetOpti.h
index 0afd8121a090de670173669eb38c0bcc866e97f7..32350cda57ab91e57f2cf365267f285223c15dfb 100644
--- a/contrib/hxt/hxt_tetOpti.h
+++ b/contrib/hxt/hxt_tetOpti.h
@@ -2,5 +2,6 @@
 #define HXT_MESH_H_
 #include "hxt_api.h"
 #include "hxt_mesh.h"
-HXTStatus hxtOptimizeTetrahedra(HXTMesh *mesh, HXTBbox* bbox, double minSize, double qualityThreshold, uint32_t numVerticesConstrained);
+#include "hxt_bbox.h"
+HXTStatus hxtOptimizeTetrahedra(HXTMesh *mesh, HXTBbox* bbox, int maxThreads, double minSize, double qualityThreshold, uint32_t numVerticesConstrained);
 #endif
diff --git a/contrib/hxt/hxt_tetPostpro.c b/contrib/hxt/hxt_tetPostpro.c
index ad2537193c66f77cf309ed8a5622b5cd6698acb6..427c4f683d826a29c2f619bc805df84882e85abe 100644
--- a/contrib/hxt/hxt_tetPostpro.c
+++ b/contrib/hxt/hxt_tetPostpro.c
@@ -1,5 +1,4 @@
 #include "predicates.h"
-#include "hxt_tetrahedra.h"
 #include "hxt_tetPostpro.h"
 #include "hxt_tetFlag.h"
 #include "hxt_tetUtils.h"
@@ -21,7 +20,7 @@ HXTStatus hxtTetPlaneIntersection(HXTMesh* mesh, double* p0, double* p1, double*
   #pragma omp parallel for
   for (uint64_t i=0; i<mesh->tetrahedra.num; i++) {
     if(mesh->tetrahedra.node[4*i+3] == HXT_GHOST_VERTEX) {
-      markTetAsDeleted(mesh, i);
+      setDeletedFlag(mesh, i);
     }
     else {
       double firstOrient = mesh->vertices.coord[4*mesh->tetrahedra.node[4*i]+3];
@@ -34,7 +33,7 @@ HXTStatus hxtTetPlaneIntersection(HXTMesh* mesh, double* p0, double* p1, double*
       }
 
       if(firstOrient!=0.0)
-        markTetAsDeleted(mesh, i);
+        setDeletedFlag(mesh, i);
     }
   }
 
@@ -52,12 +51,12 @@ HXTStatus hxtTetPlaneOrient(HXTMesh* mesh, double* p0, double* p1, double* p2){
   #pragma omp parallel for
   for (uint64_t i=0; i<mesh->tetrahedra.num; i++) {
     if(mesh->tetrahedra.node[4*i+3] == HXT_GHOST_VERTEX) {
-      markTetAsDeleted(mesh, i);
+      setDeletedFlag(mesh, i);
     }
     else {
       for (int j=0; j<4; j++) {
         if(mesh->vertices.coord[4*mesh->tetrahedra.node[4*i+j]+3]<0.0){
-          markTetAsDeleted(mesh, i);
+          setDeletedFlag(mesh, i);
           break;
         }
       }
diff --git a/contrib/hxt/hxt_tetRepair.c b/contrib/hxt/hxt_tetRepair.c
index 4ba131f23e3404e3aa60d9347b4b3d2a7daa8189..7ccea71216be8d4f9659fc936d61151b48459014 100644
--- a/contrib/hxt/hxt_tetRepair.c
+++ b/contrib/hxt/hxt_tetRepair.c
@@ -1,9 +1,9 @@
-#include "hxt_mesh.h"
+#include "hxt_tetRepair.h"
+#include "hxt_tetUtils.h"
+#include "hxt_tetFlag.h"
 #include "hxt_vertices.h"
 #include "predicates.h"
 #include "hxt_sort.h"
-#include "hxt_tetRepair.h"
-#include "hxt_tetFlag.h"
 
 /**
 * \file hxt_tetRepair.c see header hxt_tetRepair.h
@@ -105,12 +105,12 @@ HXTStatus hxtTetAdjacencies(HXTMesh* mesh){
 
     // now that triangles are sorted, when two are the same, they are neighbors
     #pragma omp parallel for
-    for (uint64_t i=0; i<nTet*4-1; i++) {
-      if(triplet[i].v[0]==triplet[i+1].v[0] &&
-         triplet[i].v[1]==triplet[i+1].v[1])
+    for (uint64_t i=1; i<nTet*4; i++) {
+      if(triplet[i-1].v[0]==triplet[i].v[0] &&
+         triplet[i-1].v[1]==triplet[i].v[1])
       {
-        mesh->tetrahedra.neigh[triplet[i].v[2]] = triplet[i+1].v[2];
-        mesh->tetrahedra.neigh[triplet[i+1].v[2]] = triplet[i].v[2];
+        mesh->tetrahedra.neigh[triplet[i-1].v[2]] = triplet[i].v[2];
+        mesh->tetrahedra.neigh[triplet[i].v[2]] = triplet[i-1].v[2];
         // i++; // can be done but break SIMD
       }
     }
@@ -121,11 +121,11 @@ HXTStatus hxtTetAdjacencies(HXTMesh* mesh){
     HXT_CHECK( group2_sort_v0(pair, nTet*4, n*n*n-1) );
 
     #pragma omp parallel for
-    for (uint64_t i=0; i<nTet*4-1; i++) {
-      if(pair[i].v[0]==pair[i+1].v[0])
+    for (uint64_t i=1; i<nTet*4; i++) {
+      if(pair[i-1].v[0]==pair[i].v[0])
       {
-        mesh->tetrahedra.neigh[pair[i].v[1]] = pair[i+1].v[1];
-        mesh->tetrahedra.neigh[pair[i+1].v[1]] = pair[i].v[1];
+        mesh->tetrahedra.neigh[pair[i-1].v[1]] = pair[i].v[1];
+        mesh->tetrahedra.neigh[pair[i].v[1]] = pair[i-1].v[1];
         // i++; // can be done but break SIMD
       }
     }
@@ -268,7 +268,7 @@ HXTStatus hxtTetOrientNodes(HXTMesh* mesh)
   for (uint64_t i=0; i<nTet; i++) {
     uint32_t* node = mesh->tetrahedra.node + 4*i;
 
-    if(orient3d(coord+4*node[0], coord+4*node[1], coord+4*node[2], coord+4*node[3])<0){
+    if(orient3d(coord+4*node[0], coord+4*node[1], coord+4*node[2], coord+4*node[3])>0){
       uint32_t tmp = node[0];
       node[0] = node[1];
       node[1] = tmp;
@@ -295,7 +295,7 @@ HXTStatus hxtTetVerify(HXTMesh* mesh)
     uint32_t* Node = mesh->tetrahedra.node + i*4;
     uint64_t* Neigh = mesh->tetrahedra.neigh + i*4;
 
-    if(isTetDeleted(mesh, i)){
+    if(getDeletedFlag(mesh, i)){
       // HXT_WARNING("deleted tetrahedra remain in the mesh");
       continue;
     }
@@ -316,7 +316,7 @@ HXTStatus hxtTetVerify(HXTMesh* mesh)
     //   HXT_WARNING("ghost tet. %lu remains in the array (did you clean the mesh?)",i*4);
     // }
     // else
-    if(Node[3]!=HXT_GHOST_VERTEX && orient3d(a,b,c,vertices[Node[3]].coord)<=0.0){
+    if(Node[3]!=HXT_GHOST_VERTEX && orient3d(a,b,c,vertices[Node[3]].coord)>=0.0){
       HXT_ERROR_MSG(HXT_STATUS_ERROR, "orientation of tet %lu is wrong",i);
       errorOccured=1;
       continue;
@@ -325,33 +325,34 @@ HXTStatus hxtTetVerify(HXTMesh* mesh)
     // check the neighbors
     for (unsigned j=0; j<4; j++)
     {
-      uint64_t neigh = Neigh[j];
-
-      if(neigh==HXT_NO_ADJACENT){
+      if(Neigh[j]==HXT_NO_ADJACENT){
         continue;
       }
 
-      if(neigh>=mesh->tetrahedra.num*4) {
+      uint64_t neigh = Neigh[j]/4;
+      unsigned face = Neigh[j]%4;
+
+      if(neigh>=mesh->tetrahedra.num) {
         HXT_ERROR_MSG(HXT_STATUS_ERROR, "%uth neighbor of tet %lu does not exist", j, i);
         errorOccured=1;
         continue;
       }
 
       // uint64_t* NeighNeigh = mesh->tetrahedra.neigh + neigh;
-      uint32_t* NeighNode = mesh->tetrahedra.node + neigh/4*4;
+      uint32_t* NeighNode = mesh->tetrahedra.node + neigh*4;
       
-      if(mesh->tetrahedra.neigh[neigh]!=i*4+j){
-        HXT_ERROR_MSG(HXT_STATUS_ERROR, "tet %lu (%lu/4) is not the neighbor of its %uth neighbor %lu (%lu/4)", i, i*4,j, neigh/4, neigh);
+      if(mesh->tetrahedra.neigh[4*neigh+face]!=i*4+j){
+        HXT_ERROR_MSG(HXT_STATUS_ERROR, "tet %lu (%lu/4) is not the neighbor of its %uth neighbor %lu (%lu/4)", i, i*4,j, neigh, 4*neigh+face);
         errorOccured=1;
         continue;
       }
 
-      uint32_t V[3] = { Node[((j+1)&3)], Node[((j&2)^3)], Node[((j+3)&2)]};
+      uint32_t V[3] = { Node[((j+1)&3)], Node[((j+3)&2)], Node[((j&2)^3)]};
       unsigned l;
 
       for (l=0; l<3; l++)
       {
-        if(NeighNode[(((neigh&3)+1)&3)]==V[l] && NeighNode[(((neigh&3)&2)^3)]==V[(l+1)%3] && NeighNode[(((neigh&3)+3)&2)]==V[(l+2)%3])
+        if(NeighNode[((face+1)&3)]==V[l] && NeighNode[((face+3)&2)]==V[(l+1)%3] && NeighNode[((face&2)^3)]==V[(l+2)%3])
            break;
       }
 
@@ -363,7 +364,7 @@ HXTStatus hxtTetVerify(HXTMesh* mesh)
 
       for (l=0; l<3; l++)
       {
-        if(NeighNode[(((neigh&3)+1)&3)]==V[l] && NeighNode[(((neigh&3)&2)^3)]==V[(l+2)%3] && NeighNode[(((neigh&3)+3)&2)]==V[(l+1)%3])
+        if(NeighNode[((face+1)&3)]==V[l] && NeighNode[((face&2)^3)]==V[(l+1)%3] && NeighNode[((face+3)&2)]==V[(l+2)%3])
            break;
       }
 
@@ -374,19 +375,19 @@ HXTStatus hxtTetVerify(HXTMesh* mesh)
       }
 
 
-      if((isFacetConstrained(mesh, i*4+j)!=0) ^ (isFacetConstrained(mesh, neigh)!=0)) {
+      if((getFacetConstraint(mesh, i,j)!=0) ^ (getFacetConstraint(mesh, neigh, face)!=0)) {
         HXT_ERROR_MSG(HXT_STATUS_ERROR, "constraint is not consistent on both side of facet 4*%lu+%u",i,j);
         errorOccured=1;
         continue;
       }
 
       // only for delaunay triangulation...
-      // if(Node[3]!=HXT_GHOST_VERTEX && NeighNode[neigh&3]!=HXT_GHOST_VERTEX && insphere(vertices[Node[0]].coord,
+      // if(Node[3]!=HXT_GHOST_VERTEX && NeighNode[face]!=HXT_GHOST_VERTEX && insphere(vertices[Node[0]].coord,
       //             vertices[Node[1]].coord,
       //             vertices[Node[2]].coord,
       //             vertices[Node[3]].coord,
-      //             vertices[NeighNode[neigh&3]].coord)>0.0){
-      //   HXT_ERROR_MSG(HXT_STATUS_ERROR, "neighbor %u of tet %lu has it's non-common node in the sphere (insphere(%u %u %u %u %u)>0)",j,i*4, Node[0], Node[1], Node[2], Node[3], NeighNode[neigh&3]);
+      //             vertices[NeighNode[face]].coord)<0.0){
+      //   HXT_ERROR_MSG(HXT_STATUS_ERROR, "neighbor %u of tet %lu has it's non-common node in the sphere (insphere(%u %u %u %u %u)>0)",j,i*4, Node[0], Node[1], Node[2], Node[3], NeighNode[face]);
       //   errorOccured=1;
       //   continue;
       // }
@@ -397,3 +398,142 @@ HXTStatus hxtTetVerify(HXTMesh* mesh)
     return HXT_STATUS_ERROR;
   return HXT_STATUS_OK;
 }
+
+
+/***************************************
+ * remove ghost tetrahedra from the mesh
+ * see header for more information
+ ***************************************/
+HXTStatus hxtRemoveGhosts(HXTMesh* mesh){
+  #pragma omp parallel for
+  for (uint64_t i=0; i<mesh->tetrahedra.num; i++) {
+    if(mesh->tetrahedra.node[4*i+3]==HXT_GHOST_VERTEX){
+      setDeletedFlag(mesh, i);
+    }
+  }
+
+  HXT_CHECK( hxtRemoveDeleted(mesh) );
+
+  return HXT_STATUS_OK;
+}
+
+
+
+/**********************************
+ * add ghost tetrahedra to the mesh
+ * see header for more information
+ **********************************/
+HXTStatus hxtAddGhosts(HXTMesh* mesh){
+  int maxThreads = omp_get_max_threads();
+
+  uint64_t* hullCount;
+  HXTGroup2* edges;
+  uint64_t totalHullCount = 0;
+  HXT_CHECK( hxtMalloc(&hullCount, maxThreads*sizeof(uint64_t)) );
+
+  HXTStatus status = HXT_STATUS_OK;
+
+  #pragma omp parallel
+  {
+    int threadID = omp_get_thread_num();
+    hullCount[threadID] = 0;
+
+    // count the number of convex hull faces
+    #pragma omp for schedule(static)
+    for (uint64_t i=0; i<mesh->tetrahedra.num; i++) {
+      for (unsigned j=0; j<4; j++) {
+        if(mesh->tetrahedra.neigh[4*i+j]==HXT_NO_ADJACENT)
+          hullCount[threadID]++;
+      }
+    }
+
+    // exclusive scan + allocation
+    #pragma omp barrier
+    #pragma omp single
+    {
+      int nthreads = omp_get_num_threads();
+
+      for (int i=0; i<nthreads; i++) {
+        uint64_t tsum = hullCount[i] + totalHullCount;
+        hullCount[i] = totalHullCount;
+        totalHullCount = tsum;
+      }
+
+      status = hxtTetrahedraReserve(mesh, totalHullCount+mesh->tetrahedra.num);
+      if(status!=HXT_STATUS_OK)
+        HXT_TRACE(status);
+      else {
+        status = hxtAlignedMalloc(&edges, 3*totalHullCount*sizeof(HXTGroup2));
+        if(status!=HXT_STATUS_OK)
+          HXT_TRACE(status);
+      }
+    }
+
+
+    if(status== HXT_STATUS_OK){
+      // create the Ghost tet.
+      #pragma omp for schedule(static)
+      for (uint64_t i=0; i<mesh->tetrahedra.num; i++) {
+        for (unsigned j=0; j<4; j++) {
+          if(mesh->tetrahedra.neigh[4*i+j]==HXT_NO_ADJACENT){
+            uint64_t newGhost = hullCount[threadID] + mesh->tetrahedra.num;
+
+            mesh->tetrahedra.neigh[4*i+j] = 4*newGhost+3;
+            mesh->tetrahedra.neigh[4*newGhost+3] = 4*i+j;
+
+            mesh->tetrahedra.colors[newGhost] = UINT16_MAX;
+            mesh->tetrahedra.flag[newGhost] = 0;
+            if(getFacetConstraint(mesh, i, j))
+              setFacetConstraint(mesh, newGhost, 3);
+
+            uint32_t v0, v1, v2;
+
+            v0 = mesh->tetrahedra.node[4*i+((j+1)&3)];
+            v1 = mesh->tetrahedra.node[4*i+((j+3)&2)];
+            v2 = mesh->tetrahedra.node[4*i+((j&2)^3)];
+
+            mesh->tetrahedra.node[4*newGhost+0] = v0;
+            mesh->tetrahedra.node[4*newGhost+1] = v1;
+            mesh->tetrahedra.node[4*newGhost+2] = v2;
+            mesh->tetrahedra.node[4*newGhost+3] = HXT_GHOST_VERTEX;
+
+            uint64_t index = 3*hullCount[threadID];
+
+            edges[index+0].v[0] = (v0<v1)?(uint64_t) v0*mesh->vertices.num+v1 : (uint64_t) v1*mesh->vertices.num+v0;
+            edges[index+0].v[1] = 4*newGhost+2;
+            edges[index+1].v[0] = (v0<v2)?(uint64_t) v0*mesh->vertices.num+v2 : (uint64_t) v2*mesh->vertices.num+v0;
+            edges[index+1].v[1] = 4*newGhost+1;
+            edges[index+2].v[0] = (v1<v2)?(uint64_t) v1*mesh->vertices.num+v2 : (uint64_t) v2*mesh->vertices.num+v1;
+            edges[index+2].v[1] = 4*newGhost+0;
+
+            hullCount[threadID]++;
+          }
+        }
+      }
+    }
+  }
+
+  if(status!=HXT_STATUS_OK){
+    return status;
+  }
+
+  mesh->tetrahedra.num+=totalHullCount;
+
+  // now we have to find the adjacencies between ghosts
+  const uint64_t max = (uint64_t) mesh->vertices.num*mesh->vertices.num;
+  const uint64_t n = totalHullCount*3;
+  HXT_CHECK(  group2_sort_v0(edges, n, max) );
+
+  // connect adjacencies
+  #pragma omp parallel for
+  for (uint64_t i=0; i<n; i+=2) {
+    mesh->tetrahedra.neigh[edges[i].v[1]] = edges[i+1].v[1];
+    mesh->tetrahedra.neigh[edges[i+1].v[1]] = edges[i].v[1];
+  }
+
+  // first make a list with all edges
+  HXT_CHECK( hxtAlignedFree(&edges) );
+  HXT_CHECK( hxtFree(&hullCount) );
+
+  return HXT_STATUS_OK;
+}
\ No newline at end of file
diff --git a/contrib/hxt/hxt_tetRepair.h b/contrib/hxt/hxt_tetRepair.h
index 8f87bfbc4f2b02f0652443b3b4b4ac8533069e7e..0ae279a9955bdaefeac508f9fdf729135a184c40 100644
--- a/contrib/hxt/hxt_tetRepair.h
+++ b/contrib/hxt/hxt_tetRepair.h
@@ -25,6 +25,18 @@ HXTStatus hxtTetVerify(HXTMesh* mesh);
 /** reorder tetrahedra in a reproducible manner */
 HXTStatus hxtTetReorder(HXTMesh* mesh);
 
+/** Removes ghost tetrahedra */
+HXTStatus hxtRemoveGhosts(HXTMesh* mesh);
+
+/** Adds ghost tetrahedra to adjoin tet. whose neighbors are HXT_NO_ADJACENT.\n
+ * THIS FUNCTION SUPPOSE 2 THINGS:
+ *  - there are no ghost tetrahedra
+ *  - face with 1 tetrahedra (neigh[face]==HXT_NO_ADJACENT) are all on the convex hull
+ *
+ * this function will not work properly in any other cases...
+ */
+HXTStatus hxtAddGhosts(HXTMesh* mesh);
+
 
 #ifdef __cplusplus
 }
diff --git a/contrib/hxt/hxt_tetUtils.c b/contrib/hxt/hxt_tetUtils.c
index ef618ab7892761f06b17cacea0bb5599514bc4e9..39bd940407a03a30398ccaf9353a9014fda1821d 100644
--- a/contrib/hxt/hxt_tetUtils.c
+++ b/contrib/hxt/hxt_tetUtils.c
@@ -12,7 +12,7 @@ HXTStatus hxtRemoveDeleted(HXTMesh* mesh)
 {
   #pragma omp parallel for
   for (uint64_t i=0; i<mesh->tetrahedra.num; i++) {
-    if(isTetDeleted(mesh, i)){
+    if(getDeletedFlag(mesh, i)){
       for (unsigned j=0; j<4; j++) {
         uint64_t neigh = mesh->tetrahedra.neigh[4*i+j]; // neighbor of the deleted tet
         if(neigh!=HXT_NO_ADJACENT) // the deleted tet had a neighbor pointing to him...
@@ -25,8 +25,8 @@ HXTStatus hxtRemoveDeleted(HXTMesh* mesh)
   uint64_t left = 0;
 
   while(1) {
-    while(left < right && isTetDeleted(mesh, right)) right--;
-    while(left < right && isTetDeleted(mesh, left)==0) left++;
+    while(left < right && getDeletedFlag(mesh, right)) right--;
+    while(left < right && getDeletedFlag(mesh, left)==0) left++;
 
     if(left >= right)
       break;
@@ -47,148 +47,9 @@ HXTStatus hxtRemoveDeleted(HXTMesh* mesh)
     left++; right--;
   }
 
-  if(left==right && isTetDeleted(mesh, left)==0) left++;
+  if(left==right && getDeletedFlag(mesh, left)==0) left++;
 
   mesh->tetrahedra.num = left;
 
-  return HXT_STATUS_OK;
-}
-
-
-/***************************************
- * remove ghost tetrahedra from the mesh
- * see header for more information
- ***************************************/
-HXTStatus hxtRemoveGhosts(HXTMesh* mesh){
-  #pragma omp parallel for
-  for (uint64_t i=0; i<mesh->tetrahedra.num; i++) {
-    if(mesh->tetrahedra.node[4*i+3]==HXT_GHOST_VERTEX){
-      markTetAsDeleted(mesh, i);
-    }
-  }
-
-  HXT_CHECK( hxtRemoveDeleted(mesh) );
-
-  return HXT_STATUS_OK;
-}
-
-
-
-/**********************************
- * add ghost tetrahedra to the mesh
- * see header for more information
- **********************************/
-HXTStatus hxtAddGhosts(HXTMesh* mesh){
-  int maxThreads = omp_get_max_threads();
-
-  uint64_t* hullCount;
-  HXTGroup2* edges;
-  uint64_t totalHullCount = 0;
-  HXT_CHECK( hxtMalloc(&hullCount, maxThreads*sizeof(uint64_t)) );
-
-  HXTStatus status = HXT_STATUS_OK;
-
-  #pragma omp parallel
-  {
-    int threadID = omp_get_thread_num();
-    hullCount[threadID] = 0;
-
-    // count the number of convex hull faces
-    #pragma omp for schedule(static)
-    for (uint64_t i=0; i<mesh->tetrahedra.num; i++) {
-      for (unsigned j=0; j<4; j++) {
-        if(mesh->tetrahedra.neigh[4*i+j]==HXT_NO_ADJACENT)
-          hullCount[threadID]++;
-      }
-    }
-
-    // exclusive scan + allocation
-    #pragma omp barrier
-    #pragma omp single
-    {
-      int nthreads = omp_get_num_threads();
-
-      for (int i=0; i<nthreads; i++) {
-        uint64_t tsum = hullCount[i] + totalHullCount;
-        hullCount[i] = totalHullCount;
-        totalHullCount = tsum;
-      }
-
-      status = hxtTetrahedraReserve(mesh, totalHullCount+mesh->tetrahedra.num);
-      if(status!=HXT_STATUS_OK)
-        HXT_TRACE(status);
-      else {
-        status = hxtAlignedMalloc(&edges, 3*totalHullCount*sizeof(HXTGroup2));
-        if(status!=HXT_STATUS_OK)
-          HXT_TRACE(status);
-      }
-    }
-
-
-    if(status== HXT_STATUS_OK){
-      // create the Ghost tet.
-      #pragma omp for schedule(static)
-      for (uint64_t i=0; i<mesh->tetrahedra.num; i++) {
-        for (unsigned j=0; j<4; j++) {
-          if(mesh->tetrahedra.neigh[4*i+j]==HXT_NO_ADJACENT){
-            uint64_t newGhost = hullCount[threadID] + mesh->tetrahedra.num;
-
-            mesh->tetrahedra.neigh[4*i+j] = 4*newGhost+3;
-            mesh->tetrahedra.neigh[4*newGhost+3] = 4*i+j;
-
-            mesh->tetrahedra.colors[newGhost] = UINT16_MAX;
-            mesh->tetrahedra.flag[newGhost] = 0;
-            if(isFacetConstrained(mesh, 4*i+j))
-              constrainFacet(mesh, 4*newGhost+3);
-
-            uint32_t v0, v1, v2;
-
-            v0 = mesh->tetrahedra.node[4*i+((j+1)&3)];
-            v1 = mesh->tetrahedra.node[4*i+((j+3)&2)];
-            v2 = mesh->tetrahedra.node[4*i+((j&2)^3)];
-
-            mesh->tetrahedra.node[4*newGhost+0] = v0;
-            mesh->tetrahedra.node[4*newGhost+1] = v1;
-            mesh->tetrahedra.node[4*newGhost+2] = v2;
-            mesh->tetrahedra.node[4*newGhost+3] = HXT_GHOST_VERTEX;
-
-            uint64_t index = 3*hullCount[threadID];
-
-            edges[index+0].v[0] = (v0<v1)?(uint64_t) v0*mesh->vertices.num+v1 : (uint64_t) v1*mesh->vertices.num+v0;
-            edges[index+0].v[1] = 4*newGhost+2;
-            edges[index+1].v[0] = (v0<v2)?(uint64_t) v0*mesh->vertices.num+v2 : (uint64_t) v2*mesh->vertices.num+v0;
-            edges[index+1].v[1] = 4*newGhost+1;
-            edges[index+2].v[0] = (v1<v2)?(uint64_t) v1*mesh->vertices.num+v2 : (uint64_t) v2*mesh->vertices.num+v1;
-            edges[index+2].v[1] = 4*newGhost+0;
-
-            hullCount[threadID]++;
-          }
-        }
-      }
-    }
-  }
-
-  if(status!=HXT_STATUS_OK){
-    return status;
-  }
-
-  mesh->tetrahedra.num+=totalHullCount;
-
-  // now we have to find the adjacencies between ghosts
-  const uint64_t max = (uint64_t) mesh->vertices.num*mesh->vertices.num;
-  const uint64_t n = totalHullCount*3;
-  HXT_CHECK(  group2_sort_v0(edges, n, max) );
-
-  // connect adjacencies
-  #pragma omp parallel for
-  for (uint64_t i=0; i<n; i+=2) {
-    mesh->tetrahedra.neigh[edges[i].v[1]] = edges[i+1].v[1];
-    mesh->tetrahedra.neigh[edges[i+1].v[1]] = edges[i].v[1];
-  }
-
-  // first make a list with all edges
-  HXT_CHECK( hxtAlignedFree(&edges) );
-  HXT_CHECK( hxtFree(&hullCount) );
-
   return HXT_STATUS_OK;
 }
\ No newline at end of file
diff --git a/contrib/hxt/hxt_tetUtils.h b/contrib/hxt/hxt_tetUtils.h
index b8d382e1bb1f151c3e2e1f34fd5c7134248c1c33..d637df37a3fb60d04d458ea85ef9b421c56c3444 100644
--- a/contrib/hxt/hxt_tetUtils.h
+++ b/contrib/hxt/hxt_tetUtils.h
@@ -41,18 +41,6 @@ static inline HXTStatus hxtTetrahedraDoubleSize(HXTMesh* mesh) {
  */
 HXTStatus hxtRemoveDeleted(HXTMesh* mesh);
 
-/** Removes ghost tetrahedra */
-HXTStatus hxtRemoveGhosts(HXTMesh* mesh);
-
-/** Adds ghost tetrahedra to adjoin tet. whose neighbors are HXT_NO_ADJACENT.\n
- * THIS FUNCTION SUPPOSE 2 THINGS:
- *  - there are no ghost tetrahedra
- *  - face with 1 tetrahedra (neigh[face]==HXT_NO_ADJACENT) are all on the convex hull
- *
- * this function will not work properly in any other cases...
- */
-HXTStatus hxtAddGhosts(HXTMesh* mesh);
-
 
 #ifdef __cplusplus
 }
diff --git a/contrib/hxt/hxt_tet_aspect_ratio.c b/contrib/hxt/hxt_tet_aspect_ratio.c
index fc1a9011d3ba960231f901d818666a07092315af..3320d5915d9a5c758a7a63571ef44e82f034a773 100644
--- a/contrib/hxt/hxt_tet_aspect_ratio.c
+++ b/contrib/hxt/hxt_tet_aspect_ratio.c
@@ -67,23 +67,23 @@ double hxtTetAspectFastRatio (double a[3],  double b[3], double c[3], double d[3
     ad[i] = d[i] - a[i]; // AD
   }
  
-  double adxac0 = ad[1]*ac[2] - ad[2]*ac[1];
-  double abxad0 = ab[1]*ad[2] - ab[2]*ad[1];
-  double acxab0 = ac[1]*ab[2] - ac[2]*ab[1];
-  double volume6 = ab[0]*adxac0 + ac[0]*abxad0 + ad[0]*acxab0;
+  double acxad0 = ac[1]*ad[2] - ac[2]*ad[1];
+  double adxab0 = ad[1]*ab[2] - ad[2]*ab[1];
+  double abxac0 = ab[1]*ac[2] - ab[2]*ac[1];
+  double volume6 = ab[0]*acxad0 + ac[0]*adxab0 + ad[0]*abxac0;
  
    // abort as early as possible
   if(volume6<=0.0)
     return 0.0;
  
-  double adxac1 = ad[2]*ac[0] - ad[0]*ac[2];
-  double adxac2 = ad[0]*ac[1] - ad[1]*ac[0];
+  double acxad1 = ac[2]*ad[0] - ac[0]*ad[2];
+  double acxad2 = ac[0]*ad[1] - ac[1]*ad[0];
  
-  double abxad1 = ab[2]*ad[0] - ab[0]*ad[2];
-  double abxad2 = ab[0]*ad[1] - ab[1]*ad[0];
+  double adxab1 = ad[2]*ab[0] - ad[0]*ab[2];
+  double adxab2 = ad[0]*ab[1] - ad[1]*ab[0];
  
-  double acxab1 = ac[2]*ab[0] - ac[0]*ab[2];
-  double acxab2 = ac[0]*ab[1] - ac[1]*ab[0];
+  double abxac1 = ab[2]*ac[0] - ab[0]*ac[2];
+  double abxac2 = ab[0]*ac[1] - ab[1]*ac[0];
 
   for (int i=0; i<3; i++) {
     db[i] = b[i] - d[i]; // DB = B-D = AB-AD
@@ -91,14 +91,14 @@ double hxtTetAspectFastRatio (double a[3],  double b[3], double c[3], double d[3
     cd[i] = d[i] - c[i]; // CD = D-c = AD-AC
   }
  
-  double cdxbc0 = cd[1]*bc[2] - cd[2]*bc[1]; // = adxac0+acxab0+abxad0;
-  double cdxbc1 = cd[2]*bc[0] - cd[0]*bc[2]; // = adxac1+acxab1+abxad1;
-  double cdxbc2 = cd[0]*bc[1] - cd[1]*bc[0]; // = adxac2+acxab2+abxad2;
-
-  double areaSum = sqrt(adxac0*adxac0 + adxac1*adxac1 + adxac2*adxac2)
-                 + sqrt(abxad0*abxad0 + abxad1*abxad1 + abxad2*abxad2)
-                 + sqrt(acxab0*acxab0 + acxab1*acxab1 + acxab2*acxab2)
-                 + sqrt(cdxbc0*cdxbc0 + cdxbc1*cdxbc1 + cdxbc2*cdxbc2);
+  double bcxcd0 = bc[1]*cd[2] - bc[2]*cd[1]; // = acxad0+abxac0+adxab0;
+  double bcxcd1 = bc[2]*cd[0] - bc[0]*cd[2]; // = acxad1+abxac1+adxab1;
+  double bcxcd2 = bc[0]*cd[1] - bc[1]*cd[0]; // = acxad2+abxac2+adxab2;
+
+  double areaSum = sqrt(acxad0*acxad0 + acxad1*acxad1 + acxad2*acxad2)
+                 + sqrt(adxab0*adxab0 + adxab1*adxab1 + adxab2*adxab2)
+                 + sqrt(abxac0*abxac0 + abxac1*abxac1 + abxac2*abxac2)
+                 + sqrt(bcxcd0*bcxcd0 + bcxcd1*bcxcd1 + bcxcd2*bcxcd2);
 
   double l = ab[0]*ab[0] + ab[1]*ab[1] + ab[2]*ab[2]; // |AB|²
   double l2 = ac[0]*ac[0] + ac[1]*ac[1] + ac[2]*ac[2]; // |AC|²
diff --git a/contrib/hxt/hxt_tools.c b/contrib/hxt/hxt_tools.c
index 8ccdc28e667b701a2939658dbdf53b001dbc2ffd..26e1586edb57037d985ef49438fb82b280740528 100644
--- a/contrib/hxt/hxt_tools.c
+++ b/contrib/hxt/hxt_tools.c
@@ -1,6 +1,46 @@
 #include "hxt_api.h"
 #include "hxt_tools.h"
 
+HXTStatus hxtNorm2V3(double v[3], double* norm2){
+  *norm2 = sqrt(v[0]*v[0]+v[1]*v[1]+v[2]*v[2]);
+  return HXT_STATUS_OK;
+}
+
+HXTStatus hxtNormalizeV3(double v[3]){
+  double norm=0.0;
+  hxtNorm2V3(v,&norm);
+  v[0] /= norm;
+  v[1] /= norm;
+  v[2] /= norm;
+  return HXT_STATUS_OK;
+}
+
+HXTStatus hxtCrossProductV3(double a[3], double b[3], double res[3]){
+  res[0] = a[1]*b[2] - a[2]*b[1];
+  res[1] = a[2]*b[0] - a[0]*b[2];
+  res[2] = a[0]*b[1] - a[1]*b[0];
+  return HXT_STATUS_OK;
+}
+
+HXTStatus hxtDet2x2(double mat[2][2], double* det){
+  *det = mat[0][0]*mat[1][1]-mat[0][1]*mat[1][0];
+  return HXT_STATUS_OK;
+}
+
+HXTStatus hxtInv2x2(double mat[2][2], double inv[2][2], double *det){
+  hxtDet2x2(mat,det);
+  if(*det) {
+    const double ud = 1. / *det;
+    inv[0][0] =  mat[1][1] * ud;
+    inv[0][1] = -mat[0][1] * ud;
+    inv[1][0] = -mat[1][0] * ud;
+    inv[1][1] =  mat[0][0] * ud;
+  } else {
+    return HXT_STATUS_ERROR;
+  }
+  return HXT_STATUS_OK;
+}
+
 HXTStatus hxtDet3x3(double mat[3][3], double *det)
 {
   *det = (mat[0][0] * (mat[1][1] * mat[2][2] - mat[1][2] * mat[2][1]) -
@@ -68,3 +108,15 @@ HXTStatus hxtInv4x4ColumnMajor(double m[16], double invOut[16], double *det)
   return HXT_STATUS_OK;
 }
 
+HXTStatus hxtJacobianLinTet(double *x , double *y, double *z , double mat[3][3]){
+  mat[0][0] = x[1] - x[0];
+  mat[0][1] = x[2] - x[0];
+  mat[0][2] = x[3] - x[0];
+  mat[1][0] = y[1] - y[0];
+  mat[1][1] = y[2] - y[0];
+  mat[1][2] = y[3] - y[0];
+  mat[2][0] = z[1] - z[0];
+  mat[2][1] = z[2] - z[0];
+  mat[2][2] = z[3] - z[0];  
+  return HXT_STATUS_OK;
+}
diff --git a/contrib/hxt/hxt_tools.h b/contrib/hxt/hxt_tools.h
index 9d456fd593c7ee6e787810958b94c6c1442b3ef3..cbb7f6aaa608d0678d9c49ab27fc21dbfaca8b27 100644
--- a/contrib/hxt/hxt_tools.h
+++ b/contrib/hxt/hxt_tools.h
@@ -33,6 +33,8 @@ extern "C" {
 #define __assume_aligned(x,y)
 #endif
 
+#define HXT_UNUSED(x) (void)(x)  // portable way to avoid warning about unused variable
+
 
 /*********************************************************
  * Hextreme malloc implementation
@@ -46,7 +48,7 @@ ___
 |  int arrayLength = ...;
 |  int *array;
 |  HXT_CHECK( hxtMalloc(&array, sizeof(int)*arrayLength) );
-|
+|  
 |  array[0] = ...;
 |  [...]
 |  array[arrayLength-1] = ...;
@@ -97,19 +99,42 @@ static inline HXTStatus hxtRealloc(void* ptrToPtr, size_t size)
   return HXT_STATUS_OK;
 }
 
-// FIXME Gmsh: aligned routines do not seem to work on 32 bit machines
-#include <stdint.h>
-#if UINTPTR_MAX == 0xffffffff
-static inline HXTStatus hxtGetAlignedBlockSize(void* ptrToPtr, size_t* size){ *size = 0; return HXT_STATUS_OK; }
-static inline HXTStatus hxtAlignedMalloc(void* ptrToPtr, size_t size){ return hxtMalloc(ptrToPtr, size); }
-static inline HXTStatus hxtAlignedFree(void* ptrToPtr){ return hxtFree(ptrToPtr); }
-static inline HXTStatus hxtAlignedRealloc(void* ptrToPtr, size_t size){ return hxtRealloc(ptrToPtr, size); }
-#else
 
+#if defined ( HAVE_MSDN_ALIGNED_MALLOC ) // microsoft implementation
+#include <malloc.h>
+#include <errno.h>
+
+static inline HXTStatus hxtAlignedMalloc(void* ptrToPtr, size_t size) {
+  void** p = (void**)ptrToPtr;
+  *p = _aligned_malloc(size, SIMD_ALIGN);
+  if ((*p == NULL && size!=0) || errno == ENOMEM)
+    return HXT_ERROR(HXT_STATUS_OUT_OF_MEMORY);
+  return HXT_STATUS_OK;
+}
+
+static inline HXTStatus hxtAlignedFree(void* ptrToPtr)
+{
+  void** p = (void**)ptrToPtr;
+  _aligned_free(*p);
+  *p = NULL;
+  return HXT_STATUS_OK;
+}
+
+static inline HXTStatus hxtAlignedRealloc(void* ptrToPtr, size_t size)
+{
+  void** p = (void**)ptrToPtr;
+  void* newptr = _aligned_realloc(*p, size, SIMD_ALIGN);
+  if ((newptr == NULL && *p != NULL && size != 0) || (size!=0 && errno == ENOMEM)) {
+    if (errno == ENOMEM)
+      HXT_INFO("it was errno");
+    return HXT_ERROR(HXT_STATUS_OUT_OF_MEMORY);
+  }
+  *p = newptr;
+  return HXT_STATUS_OK;
+}
+
+#else  // HEXTREME aligned malloc implementation
 
-/*********************************************************
- * Hextreme aligned malloc implementation
- *********************************************************/
 static inline HXTStatus hxtGetAlignedBlockSize(void* ptrToPtr, size_t* size)
 {
   char** p2 = *(char***)(ptrToPtr);
@@ -181,7 +206,7 @@ static inline HXTStatus hxtAlignedRealloc(void* ptrToPtr, size_t size)
     HXT_CHECK(hxtAlignedFree(ptrToPtr));
     return HXT_STATUS_OK;
   }
-
+  
   size_t old_size;
   HXT_CHECK( hxtGetAlignedBlockSize(ptrToPtr, &old_size) );
 
@@ -199,7 +224,7 @@ static inline HXTStatus hxtAlignedRealloc(void* ptrToPtr, size_t size)
   return HXT_STATUS_OK;
 }
 
-#endif // FIXME Gmsh
+#endif // HEXTREME malloc implementation
 
 /*********************************************************
   A way to call rand with a seed to get a reproducible
@@ -207,7 +232,7 @@ static inline HXTStatus hxtAlignedRealloc(void* ptrToPtr, size_t size)
   For example, we do not call srand() each time we
   call a reproducible Delaunay, else if someone was calling
   rand(); Delaunay(); rand(); ...
-  he would always get the same result. We use
+  he would always get the same result. We use 
   hxtReproducibleRand() instead
 
   !!!! 1st seed must absolutely be 1 !!!!
@@ -218,14 +243,28 @@ static inline uint32_t hxtReproducibleLCG(uint32_t *seed)
   return *seed;
 }
 
+/*********************************************************
+ * Operations on 3D vectors
+ *********************************************************/
+HXTStatus hxtNorm2V3(double v[3], double* norm2);
+HXTStatus hxtNormalizeV3(double v[3]);
+HXTStatus hxtCrossProductV3(double a[3], double b[3], double res[3]);
+
 /*********************************************************
  * Matrix operations
  *********************************************************/
+HXTStatus hxtDet2x2(double mat[2][2], double* det);
+HXTStatus hxtInv2x2(double mat[2][2], double inv[2][2], double *det);
 HXTStatus hxtDet3x3(double mat[3][3], double *det);
 HXTStatus hxtInv3x3(double mat[3][3], double inv[3][3], double *det);
 
 HXTStatus hxtInv4x4ColumnMajor(double mat[16], double inv[16], double *det);
 
+/*********************************************************
+ * Operations on linear Tet
+ *********************************************************/
+HXTStatus hxtJacobianLinTet(double *x , double *y, double *z , double mat[3][3]);
+  
 #ifndef M_PI
   #define M_PI 3.14159265358979323846264338327950
 #endif // !M_PI
diff --git a/contrib/hxt/hxt_vertices.c b/contrib/hxt/hxt_vertices.c
index 5e1a15770c824339d67a198efa43c45e3932662d..fa176dcbb9cba886680a0febc5a42f4abca00067 100644
--- a/contrib/hxt/hxt_vertices.c
+++ b/contrib/hxt/hxt_vertices.c
@@ -160,6 +160,17 @@ HXT_ASSERT_MSG(bbox->min[0]<bbox->max[0] ||
     double vy = vertices[i].coord[1];
     double vz = vertices[i].coord[2];
 
+    if(vx<bbox->min[0] || vx>bbox->max[0] ||
+       vy<bbox->min[1] || vy>bbox->max[1] ||
+       vz<bbox->min[2] || vz>bbox->max[2]) {
+      /* if a tetrahedron contain a vertex that is outside the bounding box,
+         it will not be refined and will never be in any cavity.
+         The vertices outside the bounding box get the value UINT64_MAX as hilbert index
+      */
+      vertices[i].padding.hilbertDist = UINT64_MAX;
+      continue;
+    }
+
     if(vx < mean[0]){
       vx = vx*div1[0]+min1[0];
     }
@@ -189,16 +200,7 @@ HXT_ASSERT_MSG(bbox->min[0]<bbox->max[0] ||
 
     uint64_t bits;
 
-// #ifdef DEBUG
-    // if(coord[0]>nmax || coord[1]>nmax || coord[2]>nmax)
-    //   printf("coordinate out of bbox\n");
-
-    // if((coord[0]<nmax/2 && vertices[i].coord[0] > mean[0]) || (coord[1]<nmax/2 && vertices[i].coord[1] > mean[1]) || (coord[2]<nmax/2 && vertices[i].coord[2] > mean[2])){
-    //   printf("coordinate on the wrong size of bbox\n");
-    // }
-// #endif
-
-    // this part is for Moore's curve... comment it to get hilbert curve :-)
+    #if 1 // this part is for Moore's curve...
     {
       uint32_t s = 1U<<(level-1);
       uint32_t rx = (x & s) != 0;
@@ -224,6 +226,7 @@ HXT_ASSERT_MSG(bbox->min[0]<bbox->max[0] ||
       SWAP(x,z);
       SWAP(x,y);
     }
+    #endif
 
     for (int j = level-2; j>=0; j--) {
       uint32_t s = 1U<<j;
@@ -275,8 +278,9 @@ HXT_ASSERT_MSG(bbox->min[0]<bbox->max[0] ||
 }
 
 
-static inline uint64_t getVertexDist64(HXTVertex* const __restrict__  v, const void* user_data)
+static inline uint64_t getVertexDist64(HXTVertex* const __restrict__  v, const void* userData)
 {
+  HXT_UNUSED(userData);
   return v->padding.hilbertDist;
 }
 
@@ -286,8 +290,9 @@ static HXTStatus hxtVerticesSort64(HXTVertex* const __restrict__  vertices, cons
   return HXT_STATUS_OK;
 }
 
-static inline uint32_t getVertexDist32(HXTVertex* const __restrict__  v, const void* user_data)
+static inline uint32_t getVertexDist32(HXTVertex* const __restrict__  v, const void* userData)
 {
+  HXT_UNUSED(userData);
   return v->padding.hilbertDist;
 }
 
@@ -319,8 +324,9 @@ HXTStatus hxtVerticesSort(HXTVertex* const __restrict__  vertices, const uint32_
   return HXT_STATUS_OK;
 }
 
-static inline uint64_t getNodeInfoDist64(hxtNodeInfo*  const __restrict__ nodeInfo, const void* user_data)
+static inline uint64_t getNodeInfoDist64(hxtNodeInfo*  const __restrict__ nodeInfo, const void* userData)
 {
+  HXT_UNUSED(userData);
   return nodeInfo->hilbertDist;
 }
 
@@ -330,8 +336,9 @@ static HXTStatus hxtNodeInfoSort64(hxtNodeInfo*  const __restrict__ array, const
   return HXT_STATUS_OK;
 }
 
-static inline uint32_t getNodeInfoDist32(hxtNodeInfo*  const __restrict__ nodeInfo, const void* user_data)
+static inline uint32_t getNodeInfoDist32(hxtNodeInfo*  const __restrict__ nodeInfo, const void* userData)
 {
+  HXT_UNUSED(userData);
   return nodeInfo->hilbertDist;
 }
 
diff --git a/contrib/hxt/predicates.c b/contrib/hxt/predicates.c
index 727a32c3cbd86a7ed9ffa5f37bd74a16ab1341d7..c9b7b5405863d38280cc0b489f8ae37431e46baa 100644
--- a/contrib/hxt/predicates.c
+++ b/contrib/hxt/predicates.c
@@ -548,43 +548,49 @@ int fast_expansion_sum(int elen, const REAL *e, int flen, const REAL *f, REAL *h
   eindex = findex = 0;
   if ((fnow > enow) == (fnow > -enow)) {
     Q = enow;
-    enow = e[++eindex];
+    ++eindex;
   } else {
     Q = fnow;
-    fnow = f[++findex];
+    ++findex;
   }
   hindex = 0;
   if ((eindex < elen) && (findex < flen)) {
+    enow = e[eindex];
+    fnow = f[findex];
     if ((fnow > enow) == (fnow > -enow)) {
       Fast_Two_Sum(enow, Q, Qnew, h[0]);
-      enow = e[++eindex];
+      ++eindex;
     } else {
       Fast_Two_Sum(fnow, Q, Qnew, h[0]);
-      fnow = f[++findex];
+      ++findex;
     }
     Q = Qnew;
     hindex = 1;
     while ((eindex < elen) && (findex < flen)) {
+      enow = e[eindex];
+      fnow = f[findex];
       if ((fnow > enow) == (fnow > -enow)) {
         Two_Sum(Q, enow, Qnew, h[hindex]);
-        enow = e[++eindex];
+        ++eindex;
       } else {
         Two_Sum(Q, fnow, Qnew, h[hindex]);
-        fnow = f[++findex];
+        ++findex;
       }
       Q = Qnew;
       hindex++;
     }
   }
   while (eindex < elen) {
+    enow = e[eindex];
     Two_Sum(Q, enow, Qnew, h[hindex]);
-    enow = e[++eindex];
+    ++eindex;
     Q = Qnew;
     hindex++;
   }
   while (findex < flen) {
+    fnow = f[findex];
     Two_Sum(Q, fnow, Qnew, h[hindex]);
-    fnow = f[++findex];
+    ++findex;
     Q = Qnew;
     hindex++;
   }
diff --git a/contrib/hxt/tetgenBR.cxx b/contrib/hxt/tetgenBR.cxx
index e8eecc4aa6854d011de2a585ea5a2f3c37dbb432..ec1969f10adfa21b8fb1a9bcf521cf56952243a4 100644
--- a/contrib/hxt/tetgenBR.cxx
+++ b/contrib/hxt/tetgenBR.cxx
@@ -200,7 +200,7 @@ bool tetgenbehavior::parse_commandline(int argc, char **argv)
             k++;
           }
           workstring[k] = '\0';
-          brio_threshold = (int) strtol(workstring, (char **) &workstring, 0);
+          brio_threshold = (int) strtol(workstring, (char**) NULL, 0);
         }
         if ((argv[i][j + 1] == '/') || (argv[i][j + 1] == ',')) {
           j++;
@@ -229,7 +229,7 @@ bool tetgenbehavior::parse_commandline(int argc, char **argv)
               k++;
             }
             workstring[k] = '\0';
-            hilbert_limit = (int) strtol(workstring, (char **) &workstring, 0);
+            hilbert_limit = (int) strtol(workstring, (char **) NULL, 0);
           }
         }
         if ((argv[i][j + 1] == '/') || (argv[i][j + 1] == ',')) {
@@ -8473,7 +8473,11 @@ void tetgenmesh::randomsample(point searchpt,triface *searchtet)
 enum tetgenmesh::locateresult 
   tetgenmesh::locate(point searchpt, triface* searchtet, int chkencflag)
 {
+#ifndef NDEBUG
+  point torg=NULL, tdest=NULL, tapex=NULL, toppo;
+#else
   point torg, tdest, tapex, toppo;
+#endif
   enum {ORGMOVE, DESTMOVE, APEXMOVE} nextmove;
   REAL ori, oriorg, oridest, oriapex;
   enum locateresult loc = OUTSIDE;
diff --git a/contrib/hxt/tetgenBR.h b/contrib/hxt/tetgenBR.h
index b88347e4de2a7511134076e3cfb697685446e1a3..d47ef862f8edee8cc58563719081be7270aaf514 100644
--- a/contrib/hxt/tetgenBR.h
+++ b/contrib/hxt/tetgenBR.h
@@ -1260,7 +1260,7 @@ public:
 
   // Comment: These three functions are implemented directly in:
   //   gmsh_wrk/Mesh/meshGRegionBoundaryRecovery.cpp
-  bool reconstructmesh(void *);
+  int reconstructmesh(void *);
   void outsurfacemesh(const char* mfilename);
   void outmesh2medit(const char* mfilename);
 
@@ -1501,6 +1501,7 @@ static selfint_event sevent;
 inline void terminatetetgen(tetgenmesh *m, int x)
 {
 #ifdef TETLIBRARY
+  (void) m; // parameter is unused (suppress warning)
   throw x;
 #else
   switch (x) {
@@ -1566,7 +1567,7 @@ inline void tetgenmesh::decode(tetrahedron ptr, triface& t) {
 
 inline void tetgenmesh::bond(triface& t1, triface& t2) {
   //  printf("%d %d %d\n",t1.ver,t2.ver,bondtbl[t1.ver][t2.ver]);
-  t1.tet[t1.ver & 3] = encode2(t2.tet, bondtbl[t1.ver][t2.ver]);
+  t1.tet[t1.ver & 3] = encode2(t2.tet, bondtbl[t1.ver][t2.ver]); // bondtbl[i][j] = (i/4*4 + j)%12
   t2.tet[t2.ver & 3] = encode2(t1.tet, bondtbl[t2.ver][t1.ver]);
 }