diff --git a/Mesh/delaunay3d.cpp b/Mesh/delaunay3d.cpp
index cbba094d890bffe8ff28b0b912244fdbe3c4f61e..8eaa669da5c6c957fe034764051279e5c303fd03 100644
--- a/Mesh/delaunay3d.cpp
+++ b/Mesh/delaunay3d.cpp
@@ -305,15 +305,6 @@ void SortHilbert (std::vector<Vertex*>& v, std::vector<int> &indices)
   h.Apply(v, indices);
 }
 
-double walltime( double *t0 )
-{
-#ifdef _OPENMP
-  return omp_get_wtime();
-#else
-  return GetTimeInSeconds();
-#endif
-}
-
 void computeAdjacencies (Tet *t, int iFace, connContainer &faceToTet){
   conn c (t->getFace(iFace), iFace, t);
   connContainer::iterator it = std::find(faceToTet.begin(),faceToTet.end(),c);
@@ -359,7 +350,7 @@ static void delaunayCavity (Tet *t,
 			    Vertex *v,
 			    cavityContainer &cavity,
 			    connContainer &bnd,
-			    int thread, int iPnt, int PP){
+			    int thread, int iPnt){
 
   t->set(thread, iPnt); // Mark the triangle
   cavity.push_back(t);
@@ -370,7 +361,7 @@ static void delaunayCavity (Tet *t,
     }
     else if (neigh->inSphere(v,thread)){
       if (!(neigh->isSet(thread, iPnt))) {
-	delaunayCavity (neigh, t, v, cavity,bnd,thread, iPnt,PP);
+	delaunayCavity (neigh, t, v, cavity,bnd,thread, iPnt); 
       }
     }
     else {
@@ -379,49 +370,6 @@ static void delaunayCavity (Tet *t,
   }
 }
 
-bool straddling_segment_intersects_triangle(Vertex *p1, Vertex *p2,
-					   Vertex *q1, Vertex *q2, Vertex *q3)
-{
-  double s1 = orientationTest(p1, p2, q2, q3);
-  double s2 = orientationTest(p1, p2, q3, q1);
-  double s3 = orientationTest(p1, p2, q1, q2);
-
-  if (s1*s2 < 0.0 || s2 * s3 < 0.0) return false;
-
-  double s4 = orientationTest(q1, q2, q3, p1);
-  double s5 = orientationTest(q3, q2, q1, p2);
-
-  return (s4*s5 >= 0) ;
-}
-
-void print (Vertex *v){
-  printf("%p ",v);
-}
-
-void print (Tet *t, bool recur = true){
-  if (recur){
-    printf("PRINT TET\n");
-  }
-  if (!t){
-    printf("(NULL)\n");
-    return;
-  }
-  printf("( ");
-  print(t->V[0]);
-  print(t->V[1]);
-  print(t->V[2]);
-  print(t->V[3]);
-  printf(")\n");
-  if (recur){
-    printf("{");
-    print(t->T[0],false);
-    print(t->T[1],false);
-    print(t->T[2],false);
-    print(t->T[3],false);
-    printf("}\n");
-  }
-}
-
 Tet* walk (Tet *t, Vertex *v, int maxx, double &totSearch, int thread){
   while (1){
     totSearch++;
@@ -468,29 +416,30 @@ void __print (const char *name, connContainer &conn){
   fclose(f);
 }
 
-void __print (const char *name, std::vector<Tet*> &T, Vertex *v){
+void __print (const char *name, int thread, tetContainer &T, Vertex *v){
   FILE *f = fopen(name,"w");
   fprintf(f,"View \"\"{\n");
   if (v)fprintf(f,"SP(%g,%g,%g){%d};\n",v->x(),v->y(),v->z(),v->getNum());
 
-  for (unsigned int i=0;i<T.size();i++){
-    if (T[i]->V[0]){
-      //      double val = robustPredicates::orient3d((double*)T[i]->V[0],(double*)T[i]->V[1],(double*)T[i]->V[2],(double*)T[i]->V[3]);
+  for (unsigned int i=0;i<T.size(thread);i++){
+    Tet *tt = T(thread,i);
+    if (tt->V[0]){
+      //      double val = robustPredicates::orient3d((double*)tt->V[0],(double*)tt->V[1],(double*)tt->V[2],(double*)tt->V[3]);
       if (!v){
 	fprintf(f,"SS(%g,%g,%g,%g,%g,%g,%g,%g,%g,%g,%g,%g){%g,%g,%g,%g};\n",
-		T[i]->V[0]->x(),T[i]->V[0]->y(),T[i]->V[0]->z(),
-		T[i]->V[1]->x(),T[i]->V[1]->y(),T[i]->V[1]->z(),
-		T[i]->V[2]->x(),T[i]->V[2]->y(),T[i]->V[2]->z(),
-		T[i]->V[3]->x(),T[i]->V[3]->y(),T[i]->V[3]->z(),
-		T[i]->V[0]->lc(),T[i]->V[1]->lc(),T[i]->V[2]->lc(),T[i]->V[3]->lc());
+		tt->V[0]->x(),tt->V[0]->y(),tt->V[0]->z(),
+		tt->V[1]->x(),tt->V[1]->y(),tt->V[1]->z(),
+		tt->V[2]->x(),tt->V[2]->y(),tt->V[2]->z(),
+		tt->V[3]->x(),tt->V[3]->y(),tt->V[3]->z(),
+		tt->V[0]->lc(),tt->V[1]->lc(),tt->V[2]->lc(),tt->V[3]->lc());
       }
       else{
 	fprintf(f,"SS(%g,%g,%g,%g,%g,%g,%g,%g,%g,%g,%g,%g){%d,%d,%d,%d};\n",
-		T[i]->V[0]->x(),T[i]->V[0]->y(),T[i]->V[0]->z(),
-		T[i]->V[1]->x(),T[i]->V[1]->y(),T[i]->V[1]->z(),
-		T[i]->V[2]->x(),T[i]->V[2]->y(),T[i]->V[2]->z(),
-		T[i]->V[3]->x(),T[i]->V[3]->y(),T[i]->V[3]->z(),
-		T[i]->V[0]->getNum(),T[i]->V[1]->getNum(),T[i]->V[2]->getNum(),T[i]->V[3]->getNum());
+		tt->V[0]->x(),tt->V[0]->y(),tt->V[0]->z(),
+		tt->V[1]->x(),tt->V[1]->y(),tt->V[1]->z(),
+		tt->V[2]->x(),tt->V[2]->y(),tt->V[2]->z(),
+		tt->V[3]->x(),tt->V[3]->y(),tt->V[3]->z(),
+		tt->V[0]->getNum(),tt->V[1]->getNum(),tt->V[2]->getNum(),tt->V[3]->getNum());
       }
     }
   }
@@ -541,89 +490,47 @@ bool canWeProcessCavity (cavityContainer &cavity, unsigned int myThread, unsigne
   return true;
 }
 
-static Tet* stoopidSearch (Tet *t, Vertex *v, int thread){
-  std::stack<Tet*> _stack;
-  _stack.push(t);
-  std::set<Tet*> all;
-  while(!_stack.empty()){
-    t = _stack.top();
-    all.insert(t);
-    _stack.pop();
-    if (t->inSphere(v,thread))return t;
-    for (int i=0;i<4;i++){
-      if (t->T[i]){
-	if (all.find(t->T[i]) == all.end())
-	  _stack.push(t->T[i]);
-      }
+bool checkLocalDelaunayness(Tet* t){
+  if (t->V[0]){
+    for (int i=0;i<4;i++){    
+      Tet *n = t->T[i];
+      if (n && n->inSphere(t->getOppositeVertex(i),0))return false;
     }
   }
-  return NULL;
+  return true;
 }
 
-static void stoopidCavity (Tet *t, Vertex *v, int thread, std::vector<Tet*> &cavity){
-  cavity.clear();
-  std::stack<Tet*> _stack;
-  _stack.push(t);
-  std::set<Tet*> all;
-  while(!_stack.empty()){
-    t = _stack.top();
-    all.insert(t);
-    _stack.pop();
-    if (t->inSphere(v,thread)){cavity.push_back(t);}
-    for (int i=0;i<4;i++){
-      if (t->T[i]){
-	if (all.find(t->T[i]) == all.end()){
-	  all.insert(t->T[i]);
-	  _stack.push(t->T[i]);
-	}
-      }
-    }
+int checkLocalDelaunayness(tetContainer &c, int thread, char *msg){
+  int nLoc = 0;
+  for (unsigned int i=0; i<c.size(thread); i++) {
+    if (!checkLocalDelaunayness(c(thread,i)))nLoc++;
   }
+  if (nLoc != 0)Msg::Info ("%s --> %d tets are not locally delaunay",msg,nLoc);
+  return nLoc ;
 }
 
-void checkCavity (std::vector<Tet*> &c, Vertex *v){
-  std::set<Face> ff;
-  for (unsigned int i=0; i<c.size(); i++) {
-    for (int j=0;j<4;j++){
-      std::set<Face>::iterator it = ff.find(c[i]->getFace(j));
-      if (it != ff.end())ff.erase(it);
-      else ff.insert(c[i]->getFace(j));
-    }
-  }
-  printf("%d free faces\n",ff.size());
-
-  std::vector<Tet*> bad;
-  for (unsigned int i=0; i<c.size(); i++) {
-    for (int j=0;j<4;j++){
-      std::set<Face>::iterator it = ff.find(c[i]->getFace(j));
-      if (it != ff.end()){
-	Tet *neigh = c[i]->T[j];
-	if (neigh){
-	  int val = neigh->inSphere(v,0);
-	  if (val){printf("arghhhh %p %p set %d\n",neigh,c[i], neigh->isSet(0,0));
-	    bad.push_back(neigh);
-	    //	    bad.push_back(c[i]);
-	  }
-	}
-	std::vector<Tet*>::iterator it2= std::find(c.begin(),c.end(),neigh);
-	if (it2 != c.end())printf("datastructure broken\n");
-      }
-    }
+static Tet* randomTet (int thread,  tetContainer &allocator ){
+  unsigned int N = allocator.size(thread);
+  //  printf("coucou random TET %d %d\n",thread,N);
+  while(1) {
+    Tet *t = allocator(thread,rand()%N);
+    if (t->V[0])return t;
   }
-  __print("bad.pos",bad);
 }
 
-void delaunayTrgl (const unsigned int numThreads,
-		   const unsigned int NPTS_AT_ONCE,
-		   unsigned int Npts,
-		   std::vector<Tet*> &T,
-		   std::vector<Vertex*> assignTo[]){
+
+void delaunayTrgl (const unsigned int numThreads, 
+		   const unsigned int NPTS_AT_ONCE, 
+		   unsigned int Npts, 
+		   std::vector<Vertex*> assignTo[],
+		   tetContainer &allocator){
+#ifdef _VERBOSE
   double totSearchGlob=0;
   double totCavityGlob=0;
-  int counter = 0;
-  int counterMiss = 0;
+#endif
+
+  //  checkLocalDelaunayness(T, "initial");
 
-  std::vector<double> cavitySize (numThreads);
   std::vector<int> invalidCavities(numThreads);
   std::vector<int> cashMisses(numThreads, 0);
 
@@ -635,13 +542,6 @@ void delaunayTrgl (const unsigned int numThreads,
 
 #pragma omp parallel num_threads(numThreads)
   {
-    double totSearch=0;
-    double totCavity=0;
-    std::vector<cavityContainer> cavity(NPTS_AT_ONCE);
-    std::vector<connContainer> bnd(NPTS_AT_ONCE);
-    connContainer faceToTet;
-    std::vector<Tet*> Choice(NPTS_AT_ONCE);
-    for (unsigned int K=0;K<NPTS_AT_ONCE;K++)Choice[K] = T[0];
 
 #ifdef _OPENMP
     int  myThread = omp_get_thread_num();
@@ -649,6 +549,15 @@ void delaunayTrgl (const unsigned int numThreads,
     int  myThread = 0;
 #endif
 
+    double totSearch=0;
+    double totCavity=0;
+    std::vector<cavityContainer> cavity(NPTS_AT_ONCE);
+    std::vector<connContainer> bnd(NPTS_AT_ONCE);
+    connContainer faceToTet;
+    std::vector<Tet*> Choice(NPTS_AT_ONCE);
+    for (unsigned int K=0;K<NPTS_AT_ONCE;K++)Choice[K] = randomTet (myThread, allocator);
+    
+
     invalidCavities [myThread] = 0;
     unsigned int locSize=0;
     std::vector<unsigned int> locSizeK(NPTS_AT_ONCE);
@@ -666,15 +575,7 @@ void delaunayTrgl (const unsigned int numThreads,
 	if (numThreads!=1) allocatedVerts[K][iP]._thread = myThread;
       }
     }
-    int allocatedSize = 12*locSize;
-#ifdef _HAVE_NUMA
-    Tet    *allocatedTable = (Tet*) numa_alloc_local (allocatedSize*sizeof(Tet));
-#else
-    //    Tet    *allocatedTable = new Tet[allocatedSize];
-    std::vector<Tet*> allocatedTable;
-#endif
 
-    int newCounter = 0;
     std::vector<Vertex*> vToAdd(NPTS_AT_ONCE);
 
 #pragma omp barrier
@@ -684,77 +585,33 @@ void delaunayTrgl (const unsigned int numThreads,
 
     for (unsigned int iPGlob=0 ; iPGlob < maxLocSizeK; iPGlob++){
 #pragma omp barrier
-      cavitySize[myThread] = 0;
       std::vector<Tet*> t(NPTS_AT_ONCE);
       //	  double c1 = Cpu();
+      // FIND SEEDS
       for (unsigned int K=0; K< NPTS_AT_ONCE; K++) {
 	vToAdd[K] = iPGlob <  locSizeK[K] ? &allocatedVerts[K][iPGlob] : NULL;
 	if(vToAdd[K]){
-	  if (!Choice[K]->V[0]){
-	    printf("HMMM I KNOW WHAT SHOULD BE DONE\n");
-	    if (Choice[K]->T[0] && Choice[K]->T[0]->V[0])Choice[K] = Choice[K]->T[0];
-	    else if (Choice[K]->T[1] && Choice[K]->T[1]->V[0])Choice[K] = Choice[K]->T[1];
-	    else if (Choice[K]->T[2] && Choice[K]->T[2]->V[0])Choice[K] = Choice[K]->T[2];
-	    else if (Choice[K]->T[3] && Choice[K]->T[3]->V[0])Choice[K] = Choice[K]->T[3];
-	    else printf("I KNOW WHAT SHOULD BE DONE ARGH\n");
-	  }
+	  // In 3D, insertion of a point may lead to deletion of tets !!
+	  if (!Choice[K]->V[0])Choice[K] = randomTet (myThread, allocator);
 	  while(1){
 	    t[K] = walk ( Choice[K] , vToAdd[K], Npts, totSearch, myThread);
 	    if (t[K])break;
-	    Tet * newChoice = NULL;
-	    int ITT=0;
-	    while(1){
-	      newChoice = T[rand() % T.size()];
-	      if (newChoice->V[0])break;
-	      ITT ++;
-	      if (ITT > T.size()) {
-		newChoice == NULL;
-		break;
-	      }
-	    }
-	    if (newChoice == NULL)break;
-	    Choice[K] = newChoice;
-	  }
-	  if (!t[K]){
-	    Msg::Warning("Jump-and-Walk Failed (non convex domain)");
-	    t[K] = stoopidSearch (Choice[K], vToAdd[K],myThread);
-	    if (!t[K]){
-	      Msg::Fatal("A point is outside the triangulation");
-	      throw;
-	    }
+	    // the domain may not be convex. we then start from a random tet and
+	    // walk from there
+	    Choice[K] = randomTet(rand() % numThreads, allocator);
 	  }
 	}
       }
       //      double c1 = Cpu();
-
-      // FIXME TEST
-      //      int NBNULL;
+      // BUILD CAVITIES
       for (unsigned int K=0; K< NPTS_AT_ONCE; K++) {
 	if(vToAdd[K]){
 	  cavityContainer &cavityK = cavity[K];
 	  connContainer   &bndK = bnd[K];
 	  for (unsigned int i=0; i<cavityK.size(); i++)cavityK[i]->unset(myThread,K);
-	  //	  for (unsigned int i=0; i<   bndK.size(); i++)if(bndK[i].t)bndK[i].t->unset(myThread,K);
 	  cavityK.clear(); bndK.clear();
-	  delaunayCavity(t[K], NULL, vToAdd[K], cavityK, bndK, myThread, K,iPGlob);
+	  delaunayCavity(t[K], NULL, vToAdd[K], cavityK, bndK, myThread, K);
 	  for (unsigned int i=0; i<cavityK.size(); i++)cavityK[i]->unset(myThread,K);
-	  //	  for (unsigned int i=0; i<   bndK.size(); i++)if(bndK[i].t)bndK[i].t->unset(myThread,K);
-	  //// FIXME TEST
-	  //	  NBNULL = 0;
-	  //	  for (unsigned int i=0; i<cavityK.size(); i++) {
-	  //	    for (int j=0;j<4;j++)if(!cavityK[i]->T[j])NBNULL++;
-	  //	  }
-	  /*	  if (iPGlob==40){
-	    char name[245];
-	    printf("cavity for point %d size %d shell %d NBNULL %d\n",
-		   iPGlob,cavityK.size(),bndK.size(),NBNULL);
-	    sprintf(name,"Cavity%d.pos",iPGlob);
-	    __print(name,cavityK,vToAdd[K]);
-	    sprintf(name,"Bnd%d.pos",iPGlob);
-	    __print(name,bndK);
-	    checkCavity(cavityK,vToAdd[K]);
-	  }
-	  */
 	  // verify the cavity
 	  for (unsigned int i=0; i< bndK.size(); i++) {
 	    double val =   robustPredicates::orient3d((double*)bndK[i].f.V[0],
@@ -762,26 +619,11 @@ void delaunayTrgl (const unsigned int numThreads,
 						      (double*)bndK[i].f.V[2],
 						      (double*)vToAdd[K]);
 	    if (val <= 0 ) {
-	      //	      	      char name[245];
-	      //	      	      sprintf(name,"invalidCavity%d.pos",invalidCavities [myThread]);
-	      //	      	      __print(name,cavityK,vToAdd[K]);
-	      //	      	      printf("val = %22.15E\n",val);
-	      //	      	      std::vector<Tet*> temp;
-	      //	      	      stoopidCavity (t[K], vToAdd[K], myThread, temp);
-	      //	      	      sprintf(name,"validCavity%d.pos",invalidCavities [myThread]);
-	      //	      	      __print(name,temp,vToAdd[K]);
-	      //	      	      printf("---> cavity %d vs. %d elements in cavity\n",temp.size(),cavityK.size());
-	      //		      sprintf(name,"Bnd%d.pos",invalidCavities [myThread]);
-	      //		      __print(name,bndK);
-		      //	      for (unsigned int i=0; i<temp.size(); i++) {
-		      //	      		print(temp[i]);
-		      //	      	      }
 	      vToAdd[K] = NULL;
 	      invalidCavities [myThread]++;
 	      break;
 	    }
 	  }
-	  cavitySize[myThread] += (cavityK.size());
 	}
       }
 
@@ -793,9 +635,7 @@ void delaunayTrgl (const unsigned int numThreads,
 	if (!vToAdd[K])ok[K]=false;
 	else ok[K] = canWeProcessCavity (cavity[K], myThread, K);
       }
-
-      //            double ck = Cpu();
-      //      std::set<Tet*> touched;
+      
       for (unsigned int K=0; K< NPTS_AT_ONCE; K++) {
 	if (ok[K]){
 	  cavityContainer &cavityK = cavity[K];
@@ -805,8 +645,6 @@ void delaunayTrgl (const unsigned int numThreads,
 	  const unsigned int bSize = bndK.size();
 	  totCavity+= cSize;
 	  Choice[K] = cavityK[0];
-	  // FIXME TEST
-	  std::vector<Tet*> _news;
 	  for (unsigned int i=0; i<bSize; i++) {
 	    // reuse memory slots of invalid elements
 	    Tet *t;
@@ -814,9 +652,7 @@ void delaunayTrgl (const unsigned int numThreads,
 	      t = cavityK[i];
 	    }
 	    else {
-	      t = new Tet;
-	      allocatedTable.push_back(t);
-	      newCounter = allocatedTable.size();
+	      t = allocator.newTet(myThread);
 	    }
 	    if (i < cSize && t->V[0]->_thread != myThread)cashMisses[myThread]++;
 	    t->setVerticesNoTest (bndK[i].f.V[0], bndK[i].f.V[1], bndK[i].f.V[2], vToAdd[K]);
@@ -828,60 +664,38 @@ void delaunayTrgl (const unsigned int numThreads,
 	      else if (neigh->getFace(1) == bndK[i].f)neigh->T[1] = t;
 	      else if (neigh->getFace(2) == bndK[i].f)neigh->T[2] = t;
 	      else if (neigh->getFace(3) == bndK[i].f)neigh->T[3] = t;
-	      else {printf("oops 1\n");throw;}
+	      else {Msg::Fatal("Datatrsucture Broken in Triangulation");}
 	    }
 	    computeAdjacencies (t,1,faceToTet);
 	    computeAdjacencies (t,2,faceToTet);
 	    computeAdjacencies (t,3,faceToTet);
-	    _news.push_back(t);
 	  }
-	  // FIXME TEST
-	  //	  int NBNULL2 = 0;
-	  //	  for (unsigned int i=0; i<_news.size(); i++) {
-	  //	    for (int j=0;j<4;j++)if(!_news[i]->T[j])NBNULL2++;
-	  //	  }
-	  //	  if (NBNULL != NBNULL2){
-	  //	    char name[245];
-	  //	    sprintf(name,"invalidConnexion%d.pos",iPGlob);
-	  //	    __print(name,_news,vToAdd[K]);
-	  //	    printf("%d %d\n",NBNULL,NBNULL2);
-	  //	  }
 	  for (unsigned int i=bSize; i<cSize; i++) {
-	    counterMiss++;
 	    cavityK[i]->V[0] = cavityK[i]->V[1] = cavityK[i]->V[2] = cavityK[i]->V[3] = NULL;
 	  }
 	}
-	else {
-	  counter++;
-	}
       }
     }
+#ifdef _VERBOSE
     #pragma omp critical
     {
       totCavityGlob+= totCavity;
       totSearchGlob+= totSearch;
-      for (int i=0;i<newCounter;i++){
-	allocatedTable[i]->setAllDeleted ();
-	if (allocatedTable[i]->V[0]){
-	  T.push_back(allocatedTable [i]);
-	}
-      }
-      //      delete [] allocatedTable;
-      //      printf("new counter = %d\n",newCounter);
     }
+#endif
     #pragma omp barrier
   }
 
   if (invalidCavities[0])Msg::Warning("%d invalid cavities",invalidCavities[0]);
 
+  //checkLocalDelaunayness(T,"final");
+
+
 #ifdef _VERBOSE
-  double _t2 = walltime(&_t);
   printf("total time for %d points %12.5E seconds\n",Npts,(double)(_t2-_t1));
   printf("%8d tets generated (%6f/sec)\n",T.size(),(double) T.size()/ ((double)(_t2-_t1)));
   printf("average searches per point  %12.5E\n",totSearchGlob/Npts);
   printf("average size for del cavity %12.5E\n",totCavityGlob/Npts);
-  printf("counter = %d\n",counter);
-  printf("counterMiss = %d\n",counterMiss);
   printf("cash misses: ");
   for (int i=0;i<numThreads;i++){
     printf("%4d ",(int)cashMisses[i]);
@@ -895,10 +709,10 @@ void delaunayTrgl (const unsigned int numThreads,
 
 static void initialCube (std::vector<Vertex*> &v,
 			 Vertex *box[8],
-			 std::vector<Tet*> &t){
+			 tetContainer & allocator){
   SBoundingBox3d bbox ;
-  bbox += SPoint3(0,0,0);
-  bbox += SPoint3(1,1,1);
+  //  bbox += SPoint3(0,0,0);
+  //  bbox += SPoint3(1,1,1);
   for (size_t i=0;i<v.size();i++){
     Vertex *pv = v[i];
     bbox += SPoint3(pv->x(),pv->y(),pv->z());
@@ -920,18 +734,13 @@ static void initialCube (std::vector<Vertex*> &v,
   Tet *t4 = new Tet (box[1],box[4],box[5],box[7]);
   Tet *t5 = new Tet (box[1],box[7],box[5],box[6]);*/
 
-  Tet *t0 = new Tet (box[7],box[2],box[3],box[1]);
-  Tet *t1 = new Tet (box[7],box[0],box[1],box[3]);
-  Tet *t2 = new Tet (box[1],box[6],box[7],box[2]);
-  Tet *t3 = new Tet (box[1],box[0],box[7],box[4]);
-  Tet *t4 = new Tet (box[4],box[1],box[5],box[7]);
-  Tet *t5 = new Tet (box[7],box[1],box[5],box[6]);
-  t.push_back(t0);
-  t.push_back(t1);
-  t.push_back(t2);
-  t.push_back(t3);
-  t.push_back(t4);
-  t.push_back(t5);
+  Tet *t0 = allocator.newTet(0); t0->setVertices(box[7],box[2],box[3],box[1]);
+  Tet *t1 = allocator.newTet(0); t1->setVertices(box[7],box[0],box[1],box[3]);
+  Tet *t2 = allocator.newTet(0); t2->setVertices(box[1],box[6],box[7],box[2]);
+  Tet *t3 = allocator.newTet(0); t3->setVertices(box[1],box[0],box[7],box[4]);
+  Tet *t4 = allocator.newTet(0); t4->setVertices(box[4],box[1],box[5],box[7]);
+  Tet *t5 = allocator.newTet(0); t5->setVertices(box[7],box[1],box[5],box[6]);
+
   connContainer ctnr;
   for (int i=0;i<4;i++){
     computeAdjacencies (t0,i,ctnr);
@@ -947,20 +756,14 @@ static void initialCube (std::vector<Vertex*> &v,
 void delaunayTriangulation (const int numThreads,
 			    const int nptsatonce,
 			    std::vector<Vertex*> &S,
-			    std::vector<Tet*> &T,
-			    Vertex *box[8]){
+			    Vertex *box[8],
+			    tetContainer & allocator){
   int N = S.size();
 
   std::vector<int> indices;
-  //  double t = 0;
-  //  double t1 = walltime(&t);
   SortHilbert(S, indices);
-  //  double t2 = walltime(&t);
-  //    print ("sorted.pos",S);
-  //  printf("total time for sorting %d points %12.5E seconds\n",S.size(),(double)(t2-t1));
-
-  if (!T.size()){
-    initialCube (S,box,T);
+  if (!allocator.size(0)){
+    initialCube (S,box,allocator);
   }
 
   int nbBlocks  = nptsatonce * numThreads;
@@ -977,8 +780,8 @@ void delaunayTriangulation (const int numThreads,
     //    printf("sizePerBlock[%3d] = %8d\n",i,sizePerBlock);
     int currentBlock = 0;
     int localCounter = 0;
-    // FIXME : something's wring here !!!
-    if (i < -4){
+    // FIXME : something's wrong here !!!
+    if (i < 1){
       for (int jPt=start;jPt<end;jPt++){
 	assignTo0[0].push_back(S[jPt]);
 	S[jPt]->_thread = numThreads*(jPt-start)/(end-start);
@@ -996,15 +799,9 @@ void delaunayTriangulation (const int numThreads,
   }
 
   S.clear();
-  // double _t = 0;
-  //  double _t1 = walltime(&_t);
-  delaunayTrgl(1,1, assignTo0[0].size(),T,assignTo0);
-  //    print ("initialTetrahedrization.pos",T);
-  delaunayTrgl(numThreads,nptsatonce, N,T,&assignTo[0]);
-  //  _t = 0;
-  //  double _t2 = walltime(&_t);
-  //  printf("WALL CLOCK TIME %12.5E\n",_t2-_t1);
-  __print ("finalTetrahedrization.pos",T);
+  delaunayTrgl(1,1, assignTo0[0].size(),assignTo0,allocator);
+  delaunayTrgl(numThreads,nptsatonce, N,&assignTo[0], allocator);
+  __print ("finalTetrahedrization.pos",0, allocator);
 }
 
 
@@ -1041,9 +838,9 @@ void delaunayTriangulation (const int numThreads,
 
   robustPredicates::exactinit(1,maxx,maxy,maxz);
 
-  std::vector<Tet*> _tets;
+  tetContainer allocator (1, S.size() * 10);
   Vertex *box[8];
-  delaunayTriangulation (numThreads, nptsatonce, _vertices, _tets, box);
+  delaunayTriangulation (numThreads, nptsatonce, _vertices, box, allocator);
 
   for (int i=0;i<8;i++){
     Vertex *v = box[i];
@@ -1054,26 +851,27 @@ void delaunayTriangulation (const int numThreads,
     S.push_back(mv);
   }
 
-  for (unsigned int i=0;i<_tets.size();i++){
-    Tet *t =  _tets[i];
-    if (t->V[0]){
-      if (_tets[i]->V[0]->getNum() &&
-	  _tets[i]->V[1]->getNum() &&
-	  _tets[i]->V[2]->getNum() &&
-	  _tets[i]->V[3]->getNum() ) {
-	MVertex *v1 = _temp[_tets[i]->V[0]->getNum()];
-	MVertex *v2 = _temp[_tets[i]->V[1]->getNum()];
-	MVertex *v3 = _temp[_tets[i]->V[2]->getNum()];
-	MVertex *v4 = _temp[_tets[i]->V[3]->getNum()];
-	MTetrahedron *tr = new MTetrahedron(v1,v2,v3,v4);
-	T.push_back(tr);
-      }
-      else {
-	printf("oops\n", _tets[i]->V[0]->getNum(), _tets[i]->V[1]->getNum() , _tets[i]->V[2]->getNum() , _tets[i]->V[3]->getNum());
+  for (int myThread=0; myThread < numThreads; myThread++) {
+    for (unsigned int i=0;i<allocator.size(myThread);i++){
+      Tet *t =  allocator(myThread,i);
+      if (t->V[0]){
+	if (t->V[0]->getNum() &&
+	    t->V[1]->getNum() &&
+	    t->V[2]->getNum() &&
+	    t->V[3]->getNum() ) {
+	  MVertex *v1 = _temp[t->V[0]->getNum()];
+	  MVertex *v2 = _temp[t->V[1]->getNum()];
+	  MVertex *v3 = _temp[t->V[2]->getNum()];
+	  MVertex *v4 = _temp[t->V[3]->getNum()];
+	  MTetrahedron *tr = new MTetrahedron(v1,v2,v3,v4);
+	  T.push_back(tr);
+	}
+	else {
+	  Msg::Fatal("Error in triangulation");
+	}
       }
     }
   }
-  // clean
+
   for (unsigned int i=0;i<_vertices.size();i++)delete _vertices[i];
-  for (unsigned int i=0;i<_tets.size();i++)delete _tets[i];
 }
diff --git a/Mesh/delaunay3d_private.h b/Mesh/delaunay3d_private.h
index 38eb9ebbf61da16cd9007b31fbf510a3082ebf8e..f1a38f2a54520641ab8651a3b5c1e87d8da23e68 100644
--- a/Mesh/delaunay3d_private.h
+++ b/Mesh/delaunay3d_private.h
@@ -68,7 +68,7 @@ inline double orientationTestFast(double *pa, double *pb, double *pc, double *pd
 inline bool inSphereTest_s (Vertex *va, Vertex *vb, Vertex *vc, Vertex *vd , Vertex *ve){
   double val = robustPredicates::insphere ((double*)va,(double*)vb,(double*)vc,(double*)vd,(double*)ve);
   if (val == 0.0){
-    printf("symbolic perturbation needed vol %22.15E\n",orientationTestFast((double*)va,(double*)vb,(double*)vc,(double*)vd));
+    Msg::Debug("symbolic perturbation needed vol %22.15E",orientationTestFast((double*)va,(double*)vb,(double*)vc,(double*)vd));
     int count;
     // symbolic perturbation
     Vertex *pt[5] = {va,vb,vc,vd,ve};
@@ -210,6 +210,11 @@ struct Tet {
     const int fac[4][3] = {{0,1,2},{1,3,2},{2,3,0},{1,0,3}};
     return Face (V[fac[k][0]],V[fac[k][1]],V[fac[k][2]]);
   }
+  inline Vertex* getOppositeVertex  (int k) const {
+    const int o[4] = {3,0,1,2};
+    return V[o[k]];
+  }
+
   inline Edge getEdge (int k) const {
     const int edg[6][2] = {{0,1},{0,2},{0,3},{1,2},{1,3},{2,3}};
     return Edge (std::min(V[edg[k][0]],V[edg[k][1]]),
@@ -239,17 +244,71 @@ struct conn {
   }
 };
 
+
+// tetrahedra (one per thread)
+struct aBunchOfTets {
+  std::vector<Tet*> _all;
+  unsigned int _current;
+  unsigned int _nbAlloc;
+  unsigned int size () {
+    return _current + (_all.size()-1) * _nbAlloc;
+  }
+  Tet* operator () (unsigned int i){
+    unsigned int _array = i / _nbAlloc;
+    unsigned int _offset = i % _nbAlloc;
+    //    printf("%d %d %d\n",i,_array,_offset);
+    return _all [_array] + _offset;
+  }
+  aBunchOfTets (unsigned int s) : _current(0), _nbAlloc (s) {
+    _all.push_back(new Tet [_nbAlloc]);
+  }
+  ~aBunchOfTets(){
+    for (unsigned int i=0;i<_all.size();i++){
+      delete [] _all[i];
+    }
+  }
+  Tet* newTet() {
+
+    if (_current == _nbAlloc){
+      _all.push_back(new Tet [_nbAlloc]);
+      printf("REALLOCATION %d\n",_all.size());
+      _current = 0;
+    }
+    _current++;
+    return _all[_all.size()-1]+(_current-1);
+  }
+};
+
+// tetAllocator owns the tets that have been allocated by itself
+class tetContainer {
+  std::vector<aBunchOfTets*> _perThread;
+public:
+  unsigned int size(int thread) const {return _perThread[thread]->size();}
+  Tet * operator () (int thread, int j) const {return (*_perThread[thread]) (j);}
+  tetContainer (int nbThreads, int preallocSizePerThread) {
+    // FIXME !!!
+    if (nbThreads != 1) throw;
+    _perThread.push_back(new aBunchOfTets (preallocSizePerThread) );
+  }
+  Tet * newTet(int thread) {
+    return  _perThread[thread]->newTet();
+  }
+  ~tetContainer(){
+    // FIXME !!!
+    delete _perThread[0];
+  }
+};
+
 typedef std::vector<Tet*> cavityContainer;
 typedef std::vector<conn>   connContainer;
 
 void SortHilbert (std::vector<Vertex*>& v, std::vector<int> &indices);
 void computeAdjacencies (Tet *t, int iFace, connContainer &faceToTet);
-void __print (const char *name, std::vector<Tet*> &T, Vertex *v = 0);
-void delaunayTrgl (const unsigned int numThreads,
-		   const unsigned int NPTS_AT_ONCE,
-		   unsigned int Npts,
-		   std::vector<Tet*> &T,
-		   std::vector<Vertex*> assignTo[]);
-
+void __print (const char *name, int thread, tetContainer &T, Vertex *v = 0);
+void delaunayTrgl (const unsigned int numThreads, 
+		   const unsigned int NPTS_AT_ONCE, 
+		   unsigned int Npts, 
+		   std::vector<Vertex*> assignTo[],
+		   tetContainer &allocator);
 
 #endif
diff --git a/Mesh/delaunay_refinement.cpp b/Mesh/delaunay_refinement.cpp
index fa24adf655bc9552fd1ffee88b1aedaec1c61fdb..d5b84701e5834989f99810f8e9f2967f64ac8215 100644
--- a/Mesh/delaunay_refinement.cpp
+++ b/Mesh/delaunay_refinement.cpp
@@ -155,17 +155,19 @@ void saturateEdge (Edge &e, std::vector<Vertex*> &S, double (*f)(const SPoint3 &
 }
 
 void saturateEdges ( edgeContainer &ec,
-		     std::vector<Tet*> &T,
+		     tetContainer &T,
 		     int nbThreads,
 		     std::vector<Vertex*> &S,
 		     double (*f)(const SPoint3 &p, void *), void *data) {
   AVGSEARCH= 0;
   int counter = 0;
-  for (int i=0;i<T.size();i++){
-    if (T[i]->V[0] && T[i]->_modified){
-      T[i]->_modified = false;
+  // FIXME
+  for (int i=0;i<T.size(0);i++){
+    Tet *t = T(0,i);
+    if (t->V[0] && t->_modified){
+      t->_modified = false;
       for (int iEdge=0;iEdge<6;iEdge++){
-	Edge ed = T[i]->getEdge(iEdge);
+	Edge ed = t->getEdge(iEdge);
 	bool isNew = ec.addNewEdge(ed);
 	counter++;
 	if (isNew){
@@ -309,14 +311,15 @@ void edgeBasedRefinement (const int numThreads,
 
   // fill up old Datastructures
 
-  std::vector<Tet*> _tets;
+  tetContainer allocator (numThreads,1000000);
+
+
   std::vector<Vertex *> _vertices;
   edgeContainer ec;
   std::map<Vertex*,MVertex*> _ma;
 
   {
     std::vector<MTetrahedron*> &T = gr->tetrahedra;
-    _tets.resize(T.size());
     std::set<MVertex *> all;
     for (int i=0;i<T.size();i++){
       for (int j=0;j<4;j++){
@@ -335,28 +338,30 @@ void edgeBasedRefinement (const int numThreads,
       counter++;
     }
     connSet faceToTet;
+    // FIXME MULTITHREADING
     for (unsigned int i=0;i<T.size();i++){
-      int i0 = T[i]->getVertex(0)->getIndex();
-      int i1 = T[i]->getVertex(1)->getIndex();
-      int i2 = T[i]->getVertex(2)->getIndex();
-      int i3 = T[i]->getVertex(3)->getIndex();
-      Tet *t = new Tet (_vertices[i0],_vertices[i1],_vertices[i2],_vertices[i3]);
+      MTetrahedron  *tt = T[i];
+      int i0 = tt->getVertex(0)->getIndex();
+      int i1 = tt->getVertex(1)->getIndex();
+      int i2 = tt->getVertex(2)->getIndex();
+      int i3 = tt->getVertex(3)->getIndex();
+      Tet *t = allocator.newTet(0) ; t->setVertices (_vertices[i0],_vertices[i1],_vertices[i2],_vertices[i3]);
       computeAdjacencies (t,0,faceToTet);
       computeAdjacencies (t,1,faceToTet);
       computeAdjacencies (t,2,faceToTet);
       computeAdjacencies (t,3,faceToTet);
-      _tets[i] = t;
-      delete T[i];
+      delete tt;
     }
     T.clear();
   }
 
   // do not allow to saturate boundary edges
   {
-    for (unsigned int i=0;i<_tets.size();i++) {
+    for (unsigned int i=0;i< allocator.size(0);i++) {
+      Tet  *tt = allocator (0,i);
       for (int j=0;j<4;j++){
-	if (!_tets[i]->T[j]){
-	  Face f = _tets[i]->getFace(j);
+	if (!tt->T[j]){
+	  Face f = tt->getFace(j);
 	  for (int k=0;k<3;k++){
 	    Vertex *vi = f.V[k];
 	    Vertex *vj = f.V[(k+1)%3];
@@ -370,9 +375,10 @@ void edgeBasedRefinement (const int numThreads,
 	}
       }
     }
-    for (unsigned int i=0;i<_tets.size();i++) {
+    for (unsigned int i=0;i< allocator.size(0);i++) {
+      Tet  *tt = allocator (0,i);
       for (int j=0;j<6;j++){
-	Edge e = _tets[i]->getEdge(j);
+	Edge e = tt->getEdge(j);
 	if(e.first->lc() == 1.e22){/*printf("coucou\n");*/e.first->lc() = e.second->lc();}
 	else if(e.second->lc() == 1.e22){/*printf("coucou\n");*/e.second->lc() = e.first->lc();}
       }
@@ -388,16 +394,9 @@ void edgeBasedRefinement (const int numThreads,
 
     double __t__ = Cpu();
     while(1){
-      char name[256];
-      //      sprintf(name,"beforeRefinement%d.pos",iter);
-      //      print (name,_tets);
-      //      printf("ITER %3d\n",iter);
       std::vector<Vertex*> add;
       double t1 = Cpu();
-      saturateEdges (ec, _tets, numThreads, add, _fx, NULL);
-      //      printf("%d points to add\n",add.size());
-      //sprintf(name,"Points%d.pos",iter);
-      //     _print (name,add);
+      saturateEdges (ec, allocator, numThreads, add, _fx, NULL);
       double t2 = Cpu();
       filterVertices (numThreads, _filter, _vertices, add, _fx, NULL);
       double t3 = Cpu();
@@ -405,9 +404,7 @@ void edgeBasedRefinement (const int numThreads,
       std::vector<int> indices;
       SortHilbert(add, indices);
       double t4 = Cpu();
-      //      sprintf(name,"PointsFiltered%d.pos",iter);
-      //      _print (name,add);
-      delaunayTrgl (1,1,add.size(), _tets, &add);
+      delaunayTrgl (1,1,add.size(), &add,allocator);  
       add_all.insert (add_all.end(), add.begin(), add.end());
       double t5 = Cpu();
       Msg::Info("IT %3d %6d points added, timings %5.2f %5.2f %5.2f %5.2f %5.2f %5d",iter,add.size(),
@@ -416,30 +413,17 @@ void edgeBasedRefinement (const int numThreads,
 		(t4-t3),
 		(t5-t4),
 		(t5-__t__),
-		_tets.size());
+		allocator.size(0));
       iter++;
     }
-    //    print ("afterRefinement.pos",_tets);
   }
 
-  std::map<Edge,double> _sizes;
-  for (unsigned int i=0; i<_tets.size();i++){
+  for (unsigned int i=0; i< allocator.size(0);i++){
+    Tet  *tt = allocator (0,i);
     MVertex *mvs[4];
-
-
-    if (_tets[i]->V[0]){
-      for (int j=0;j<6;j++){
-	Edge e =  _tets[i]->getEdge(j);
-	std::map<Edge,double>::iterator it = _sizes.find(e);
-	if (it == _sizes.end()){
-	  double l = sqrt ((e.first->x() -  e.second->x()) * (e.first->x() -  e.second->x()) +
-			   (e.first->y() -  e.second->y()) * (e.first->y() -  e.second->y()) +
-			   (e.first->z() -  e.second->z()) * (e.first->z() -  e.second->z()));
-	  _sizes[e]= 2* l / (e.first->lc() + e.second->lc());
-	}
-      }
+    if (tt->V[0]){      
       for (int j=0;j<4;j++){
-	Vertex *v = _tets[i]->V[j];
+	Vertex *v = tt->V[j];
 	std::map<Vertex*,MVertex*>::iterator it = _ma.find(v);
 	if (it == _ma.end()){
 	  MVertex *mv = new MVertex (v->x(),v->y(),v->z(),gr);
@@ -451,16 +435,35 @@ void edgeBasedRefinement (const int numThreads,
       }
       gr->tetrahedra.push_back(new MTetrahedron(mvs[0],mvs[1],mvs[2],mvs[3]));
     }
-    delete _tets[i];
   }
 
-  std::map<Edge,double>::iterator it = _sizes.begin();
-  double sum = 0;
-  for (; it !=_sizes.end();++it){
-    double d = it->second;
-    double tau = d < 1 ? d - 1 : 1./d - 1;
-    sum += tau;
+  if (Msg::GetVerbosity() == 99) {
+    std::map<Edge,double> _sizes;
+    for (unsigned int i=0; i< allocator.size(0);i++){
+      Tet  *tt = allocator (0,i);
+      MVertex *mvs[4];
+      if (tt->V[0]){      
+	for (int j=0;j<6;j++){
+	  Edge e =  tt->getEdge(j);
+	  std::map<Edge,double>::iterator it = _sizes.find(e);
+	  if (it == _sizes.end()){
+	    double l = sqrt ((e.first->x() -  e.second->x()) * (e.first->x() -  e.second->x()) +
+			     (e.first->y() -  e.second->y()) * (e.first->y() -  e.second->y()) +
+			     (e.first->z() -  e.second->z()) * (e.first->z() -  e.second->z()));
+	    _sizes[e]= 2* l / (e.first->lc() + e.second->lc());
+	  }
+	}
+      }
+    }
+    std::map<Edge,double>::iterator it = _sizes.begin();
+    double sum = 0;
+    int nbBad = 0;
+    for (; it !=_sizes.end();++it){
+      double d = it->second;
+      double tau = d < 1 ? d - 1 : 1./d - 1;
+      if (d > 2.)nbBad++;
+      sum += tau;
+    }
+    Msg::Info("MESH EFFICIENCY : %22.15E %6d edges among %d are out of range",exp (sum / _sizes.size()),nbBad,_sizes.size());
   }
-  Msg::Info("MESH EFFICIENCY : %22.15E",exp (sum / _sizes.size()));
-
 }
diff --git a/Mesh/meshGRegion.cpp b/Mesh/meshGRegion.cpp
index ca00240b669fc9ab67044c4dcb6a786077f23a38..040db18bfbda297690dd7de31a7ab708d35b0e5b 100644
--- a/Mesh/meshGRegion.cpp
+++ b/Mesh/meshGRegion.cpp
@@ -634,7 +634,10 @@ static void MeshDelaunayVolumeNewCode(std::vector<GRegion*> &regions) {
 			    GRegion *gr);
   // just to remove tets that are not to be meshed
   insertVerticesInRegion(gr,0);
-  edgeBasedRefinement (1,1,gr);
+  for(unsigned int i = 0; i < regions.size(); i++){
+    Msg::Info("Refining volume %d",regions[i]->tag());
+    edgeBasedRefinement (1,1,regions[i]);
+  }
   //  RelocateVertices (regions,-1);
 }
 
diff --git a/Mesh/meshGRegionBoundaryRecovery.cpp b/Mesh/meshGRegionBoundaryRecovery.cpp
index 7edb97b04a3824ca3fbf586c14f498b4ce5c689e..58db9690660c1ddddeb361c25a09efc262ac6cc9 100644
--- a/Mesh/meshGRegionBoundaryRecovery.cpp
+++ b/Mesh/meshGRegionBoundaryRecovery.cpp
@@ -14856,7 +14856,7 @@ void meshGRegionBoundaryRecovery::reconstructmesh(GRegion *_gr)
 
   recoverdelaunay();
 
-  optimizemesh();
+  //  optimizemesh();
 
   if ((dupverts > 0l) || (unuverts > 0l)) {
     // Remove hanging nodes.
diff --git a/benchmarks/3d/Cube-01.geo b/benchmarks/3d/Cube-01.geo
index 7540862ed303238e4869226d5fb5b3cf38dffaa1..ebb2f0b2d8e7c947ba0ba7d2fa524d5d3f3df10d 100644
--- a/benchmarks/3d/Cube-01.geo
+++ b/benchmarks/3d/Cube-01.geo
@@ -6,7 +6,7 @@ Mesh.Smoothing=1;
 //Mesh.Voronoi=1;
 
 lc = 0.1;
-Point(1) = {0.0,0.0,0.0,lc/1};         
+Point(1) = {0.0,0.0,0.0,lc/10};         
 Point(2) = {1,0.0,0.0,lc/1};         
 Point(3) = {1,1,0.0,lc};         
 Point(4) = {0,1,0.0,lc};         
diff --git a/benchmarks/3d/Sphere.geo b/benchmarks/3d/Sphere.geo
index 3e445e48d0de1595b4ceaf6b65f12a87afb1e4ed..627274a6927d10057bd3a6b2ad3b3c31455267ab 100644
--- a/benchmarks/3d/Sphere.geo
+++ b/benchmarks/3d/Sphere.geo
@@ -45,6 +45,7 @@ Ruled Surface(26) = {25};
 Line Loop(27) = {-4,12,-6};
 Ruled Surface(28) = {27};
 Surface Loop(29) = {28,26,16,14,20,24,22,18};
+Volume(1111) = {29};
 
 Point(101) = {0,0,0,lcDom};
 Point(102) = {RDom,0,0,lcDom};
@@ -87,10 +88,10 @@ Volume(200) = {129,29};
 
 
 // Wall ID = 2100
-Physical Surface(2100) = {28,26,16,14,20,24,22,18};
+//Physical Surface(2100) = {28,26,16,14,20,24,22,18};
 // Farfield CBC ID = 2222
-Physical Surface(2222) = {128,126,116,114,120,124,122,118};
+//Physical Surface(2222) = {128,126,116,114,120,124,122,118};
 // Interior ID = 1000
-Physical Volume(1000) = {200};
+//Physical Volume(1000) = {200};
 
 
diff --git a/benchmarks/3d/sphere_in_cube.geo b/benchmarks/3d/sphere_in_cube.geo
index ce9ae9a71810aa9d3810a3f45fafd09283fb04bd..b7fda0c42cbcf58b40a15b87184a328c2e90e371 100644
--- a/benchmarks/3d/sphere_in_cube.geo
+++ b/benchmarks/3d/sphere_in_cube.geo
@@ -35,7 +35,7 @@ Ruled Surface(26) = {25};
 Line Loop(27) = {-4,12,-6};
 Ruled Surface(28) = {27};
 Surface Loop(29) = {28,26,16,14,20,24,22,18};
-Volume(30) = {29};
+//Volume(30) = {29};
 
 // try also netgen:
 // Mesh.Algorithm3D = 4;