diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1f3e966dee3cea8f67c9a095d985ad618738b236..1686be617d8a35a3f6cbb82945202e824f126430 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1312,7 +1312,7 @@ endif(NOWARN)
 # disable compile optimization on some known problematic files
 check_cxx_compiler_flag("-O0" NOOPT)
 if(NOOPT)
-  file(GLOB_RECURSE NOOPT_SRC Numeric/robustPredicates.cpp Mesh/BDS.cpp
+  file(GLOB_RECURSE NOOPT_SRC Mesh/BDS.cpp
                               Parser/Gmsh.tab.cpp contrib/Tetgen*/predicates.cxx)
   foreach(FILE ${NOOPT_SRC})
     get_source_file_property(PROP ${FILE} COMPILE_FLAGS)
diff --git a/Mesh/delaunay3d.cpp b/Mesh/delaunay3d.cpp
index dea924898fe661474f67b61ae61b5fd7a35f6194..25eec59e26aedeecd100e47b70154725e0b89c10 100644
--- a/Mesh/delaunay3d.cpp
+++ b/Mesh/delaunay3d.cpp
@@ -346,7 +346,6 @@ static void delaunayCavity2 (Tet *t,
 }
 
 static void delaunayCavity (Tet *t,
-			    Tet *prev,
 			    Vertex *v,
 			    cavityContainer &cavity,
 			    connContainer &bnd,
@@ -361,7 +360,7 @@ static void delaunayCavity (Tet *t,
     }
     else if (neigh->inSphere(v,thread)){
       if (!(neigh->isSet(thread, iPnt))) {
-	delaunayCavity (neigh, t, v, cavity,bnd,thread, iPnt); 
+	delaunayCavity (neigh, v, cavity,bnd,thread, iPnt); 
       }
     }
     else {
@@ -529,7 +528,7 @@ void delaunayTrgl (const unsigned int numThreads,
   double totCavityGlob=0;
 #endif
 
-  double t1,t2=0,t3=0,t4=0;
+  //  double t1,t2=0,t3=0,t4=0;
 
   //  checkLocalDelaunayness(allocator, 0, "initial");
 
@@ -571,7 +570,8 @@ void delaunayTrgl (const unsigned int numThreads,
 #ifdef _HAVE_NUMA
       allocatedVerts [K] = (Vertex*)numa_alloc_local (locSizeK[K]*sizeof(Vertex));
 #else
-      allocatedVerts [K] = (Vertex*)calloc (locSizeK[K],sizeof(Vertex));
+      //      allocatedVerts [K] = (Vertex*)calloc (locSizeK[K],sizeof(Vertex));
+      allocatedVerts [K] = new Vertex [locSizeK[K]];
 #endif
       for (unsigned int iP=0 ; iP < locSizeK[K] ; iP++){
 	allocatedVerts[K][iP] = *(assignTo[K+myThread*NPTS_AT_ONCE][iP]);
@@ -591,7 +591,7 @@ void delaunayTrgl (const unsigned int numThreads,
       std::vector<Tet*> t(NPTS_AT_ONCE);
       //	  double c1 = Cpu();
       // FIND SEEDS
-      t1 = Cpu();
+      //      t1 = Cpu();
       for (unsigned int K=0; K< NPTS_AT_ONCE; K++) {
 	vToAdd[K] = iPGlob <  locSizeK[K] ? &allocatedVerts[K][iPGlob] : NULL;
 	if(vToAdd[K]){
@@ -606,10 +606,10 @@ void delaunayTrgl (const unsigned int numThreads,
 	  }
 	}
       }
-      t2+= Cpu() - t1;
+      //      t2+= Cpu() - t1;
       //      double c1 = Cpu();
       // BUILD CAVITIES
-      t1 = Cpu();
+      //      t1 = Cpu();
       for (unsigned int K=0; K< NPTS_AT_ONCE; K++) {
 	if(vToAdd[K]){
 	  cavityContainer &cavityK = cavity[K];
@@ -618,6 +618,7 @@ void delaunayTrgl (const unsigned int numThreads,
 	  for (unsigned int i=0; i<bndK.size(); i++)if(bndK[i].t)bndK[i].t->unset(myThread,K);
 	  cavityK.clear(); bndK.clear();
 	  delaunayCavity2(t[K], NULL, vToAdd[K], cavityK, bndK, myThread, K);
+	  //delaunayCavity(t[K], vToAdd[K], cavityK, bndK, myThread, K);
 	  // verify the cavity
 	  for (unsigned int i=0; i< bndK.size(); i++) {
 	    const double val =   robustPredicates::orient3d((double*)bndK[i].f.V[0],
@@ -633,7 +634,7 @@ void delaunayTrgl (const unsigned int numThreads,
 	}
       }
 
-      t3 += Cpu() - t1;
+      //      t3 += Cpu() - t1;
 
 #pragma omp barrier
 
@@ -641,7 +642,7 @@ void delaunayTrgl (const unsigned int numThreads,
 	if (!vToAdd[K])ok[K]=false;
 	else ok[K] = canWeProcessCavity (cavity[K], myThread, K);
       }
-      t1 = Cpu();
+      //      t1 = Cpu();
 
       for (unsigned int K=0; K< NPTS_AT_ONCE; K++) {
 	if (ok[K]){
@@ -676,7 +677,7 @@ void delaunayTrgl (const unsigned int numThreads,
 	  }
 	}
       }
-      t4 += Cpu() - t1;
+      //      t4 += Cpu() - t1;
     }
 #ifdef _VERBOSE
     #pragma omp critical
@@ -698,7 +699,7 @@ void delaunayTrgl (const unsigned int numThreads,
   //checkLocalDelaunayness(T,"final");
 
 
-  printf(" %12.5E %12.5E  %12.5E tot  %12.5E \n",t2,t3,t4,t2+t3+t4);
+  //  printf(" %12.5E %12.5E  %12.5E tot  %12.5E \n",t2,t3,t4,t2+t3+t4);
 
 #ifdef _VERBOSE
   printf("average searches per point  %12.5E\n",totSearchGlob/Npts);
diff --git a/Mesh/delaunay3d_private.h b/Mesh/delaunay3d_private.h
index def190ba3e0a872a298663d2f74b86047acb7149..a81270f09cddcfd2beafeb226b85d52d8497042e 100644
--- a/Mesh/delaunay3d_private.h
+++ b/Mesh/delaunay3d_private.h
@@ -10,6 +10,9 @@
 #include <math.h>
 #include "robustPredicates.h"
 #include <stdio.h>
+#ifdef _OPENMP
+#include <omp.h>
+#endif
 
 #ifndef MAX_NUM_THREADS_
 #define MAX_NUM_THREADS_ 1
@@ -292,7 +295,16 @@ class tetContainer {
   tetContainer (int nbThreads, int preallocSizePerThread) {
     // FIXME !!!
     if (nbThreads != 1) throw;
-    _perThread.push_back (new aBunchOfStuff<Tet>    (preallocSizePerThread) );
+    _perThread.resize(nbThreads);
+#pragma omp parallel num_threads(nbThreads)
+    {
+#ifdef _OPENMP
+      int  myThread = omp_get_thread_num();
+#else
+      int  myThread = 0;
+#endif
+      _perThread [myThread] = new aBunchOfStuff<Tet>    (preallocSizePerThread) ;
+    }
     //    _perThreadV.push_back(new aBunchOfStuff<Vertex> (preallocSizePerThread) );
   }
   inline Tet * newTet(int thread) {
diff --git a/Mesh/delaunay_refinement.cpp b/Mesh/delaunay_refinement.cpp
index 7f7fedf6641ed2052332cbb760b499f9507fcf03..eab55e7d8ac62268f8ecfde3db30985408f2bf5f 100644
--- a/Mesh/delaunay_refinement.cpp
+++ b/Mesh/delaunay_refinement.cpp
@@ -311,8 +311,7 @@ void edgeBasedRefinement (const int numThreads,
 
   // fill up old Datastructures
 
-  tetContainer allocator (numThreads,3000000);
-
+  tetContainer allocator (numThreads,1000000);
 
   std::vector<Vertex *> _vertices;
   edgeContainer ec;
@@ -328,13 +327,13 @@ void edgeBasedRefinement (const int numThreads,
     }
 
 
-    FILE *f = fopen ("pts_init.dat","w");
-    fprintf(f,"%d\n",all.size());
-    for (std::set<MVertex*>::iterator it = all.begin();it !=all.end(); ++it){
-      MVertex *mv = *it;      
-      fprintf(f,"%12.5E %12.5E %12.5E\n",mv->x(),mv->y(),mv->z());
-    }
-    fclose(f);
+    //    FILE *f = fopen ("pts_init.dat","w");
+    //    fprintf(f,"%d\n",all.size());
+    //    for (std::set<MVertex*>::iterator it = all.begin();it !=all.end(); ++it){
+    //      MVertex *mv = *it;      
+    //      fprintf(f,"%12.5E %12.5E %12.5E\n",mv->x(),mv->y(),mv->z());
+    //    }
+    //    fclose(f);
 
 
     _vertices.resize(all.size());
@@ -347,22 +346,25 @@ void edgeBasedRefinement (const int numThreads,
       _ma[v] = mv;
       counter++;
     }
-    connSet faceToTet;
-    // FIXME MULTITHREADING
-    for (unsigned int i=0;i<T.size();i++){
-      MTetrahedron  *tt = T[i];
-      int i0 = tt->getVertex(0)->getIndex();
-      int i1 = tt->getVertex(1)->getIndex();
-      int i2 = tt->getVertex(2)->getIndex();
-      int i3 = tt->getVertex(3)->getIndex();
-      Tet *t = allocator.newTet(0) ; t->setVertices (_vertices[i0],_vertices[i1],_vertices[i2],_vertices[i3]);
-      computeAdjacencies (t,0,faceToTet);
-      computeAdjacencies (t,1,faceToTet);
-      computeAdjacencies (t,2,faceToTet);
-      computeAdjacencies (t,3,faceToTet);
-      delete tt;
+
+    {
+      connSet faceToTet;
+      // FIXME MULTITHREADING
+      for (unsigned int i=0;i<T.size();i++){
+	MTetrahedron  *tt = T[i];
+	int i0 = tt->getVertex(0)->getIndex();
+	int i1 = tt->getVertex(1)->getIndex();
+	int i2 = tt->getVertex(2)->getIndex();
+	int i3 = tt->getVertex(3)->getIndex();
+	Tet *t = allocator.newTet(0) ; t->setVertices (_vertices[i0],_vertices[i1],_vertices[i2],_vertices[i3]);
+	computeAdjacencies (t,0,faceToTet);
+	computeAdjacencies (t,1,faceToTet);
+	computeAdjacencies (t,2,faceToTet);
+	computeAdjacencies (t,3,faceToTet);
+	delete tt;
+      }
+      T.clear();
     }
-    T.clear();
   }
 
   // do not allow to saturate boundary edges
@@ -436,7 +438,7 @@ void edgeBasedRefinement (const int numThreads,
 		(t5-t4),
 		(t5-__t__),
 		allocator.size(0));
-      printf("%d calls to inSphere\n",Tet::in_sphere_counter);
+      //      printf("%d calls to inSphere\n",Tet::in_sphere_counter);
       iter++;
     }
   }
diff --git a/Mesh/meshGRegion.cpp b/Mesh/meshGRegion.cpp
index 040db18bfbda297690dd7de31a7ab708d35b0e5b..352edc18aac790f7096feeb1d40137bb2bd4c0f4 100644
--- a/Mesh/meshGRegion.cpp
+++ b/Mesh/meshGRegion.cpp
@@ -592,7 +592,7 @@ void MeshDelaunayVolumeTetgen(std::vector<GRegion*> &regions)
 }
 
 // uncomment this to test the new code
-//#define NEW_CODE
+#define NEW_CODE
 
 static void MeshDelaunayVolumeNewCode(std::vector<GRegion*> &regions) {
   GRegion *gr = regions[0];