diff --git a/Fltk/highOrderToolsWindow.cpp b/Fltk/highOrderToolsWindow.cpp
index fed9cb6e865e9c96e57692ef03c37a12a3cca255..f2cc517103cc9f46536491bb6d81d0f0d67a0354 100644
--- a/Fltk/highOrderToolsWindow.cpp
+++ b/Fltk/highOrderToolsWindow.cpp
@@ -84,28 +84,28 @@ static void chooseopti_cb(Fl_Widget *w, void *data)
 
   switch(algo) {
   case 0: {                                           // Optimization
-    o->push[0]->activate();
     o->choice[0]->activate();
     o->choice[3]->activate();
-    for (int i=2;i<=11;i++) o->value[i]->activate();
-    //    o->push[1]->activate();
+    for (int i=1;i<=8;i++) o->value[i]->activate();
+    if (o->choice[3]->value() == 0)
+      for (int i=9;i<=11;i++) o->value[i]->deactivate();
+    else
+      for (int i=9;i<=11;i++) o->value[i]->activate();
     break;
   }
   case 1: {                                           // Elastic analogy
     o->choice[0]->deactivate();
     o->choice[3]->deactivate();
-    for (int i=2;i<=11;i++) o->value[i]->deactivate();
-    //   o->push[1]->deactivate();
+    for (int i=1;i<=11;i++) o->value[i]->deactivate();
     break;
   }
   case 2: {                                           // Fast curving
     o->choice[0]->deactivate();
     o->choice[3]->deactivate();
-    for (int i=2;i<=6;i++)
+    o->value[1]->deactivate();
+    o->value[2]->activate();
+    for (int i=3;i<=11;i++)
       o->value[i]->deactivate();
-    for (int i=9;i<=11;i++) o->value[i]->deactivate();
-    for (int i=7;i<=8;i++) o->value[i]->activate();
-    //   o->push[1]->deactivate();
     break;
   }
   }
@@ -115,8 +115,10 @@ static void chooseopti_cb(Fl_Widget *w, void *data)
 static void chooseopti_strategy(Fl_Widget *w, void *data)
 {
   highOrderToolsWindow *o = FlGui::instance()->highordertools;
-  if (o->choice[3]->value() == 0) for (int i=9;i<=11;i++) o->value[i]->deactivate();
-  else for (int i=9;i<=11;i++) o->value[i]->activate();
+  if (o->choice[3]->value() == 0)
+    for (int i=9;i<=11;i++) o->value[i]->deactivate();
+  else
+    for (int i=9;i<=11;i++) o->value[i]->activate();
 }
 
 static void highordertools_runopti_cb(Fl_Widget *w, void *data)
@@ -127,27 +129,26 @@ static void highordertools_runopti_cb(Fl_Widget *w, void *data)
     FlGui::instance()->graph[0]->showMessages();
 
   const int algo = o->choice[2]->value();
-  double threshold_min = o->value[1]->value();
   bool onlyVisible = (bool)o->butt[1]->value();
-  int nbLayers = (int) o->value[2]->value();
-  double threshold_max = o->value[8]->value();
 
   int NE = 0;
-  for (GModel::riter it = GModel::current()->firstRegion(); it != GModel::current()->lastRegion(); ++it){
+  for (GModel::riter it = GModel::current()->firstRegion();
+       it != GModel::current()->lastRegion(); ++it) {
     NE += (*it)->getNumMeshElements();
   }
+  int dim = GModel::current()->getDim() == 3 ? ( NE ? 3 : 2 ) :  GModel::current()->getDim();
 
 
 #if defined(HAVE_OPTHOM)
   switch(algo) {
   case 0: {                                                               // Optimization
     OptHomParameters p;
-    p.nbLayers = nbLayers;
-    p.BARRIER_MIN = threshold_min;
-    p.BARRIER_MAX = threshold_max;
+    p.nbLayers = (int) o->value[2]->value();
+    p.BARRIER_MIN = o->value[1]->value();
+    p.BARRIER_MAX = o->value[8]->value();
     p.onlyVisible = onlyVisible;
     // change dim if no 3D elements are there
-    p.dim = GModel::current()->getDim() == 3 ? ( NE ? 3 : 2 ) :  GModel::current()->getDim();
+    p.dim = dim;
     p.itMax = (int) o->value[3]->value();
     p.optPassMax = (int) o->value[4]->value();
     p.weightFixed =  o->value[5]->value();
@@ -163,16 +164,14 @@ static void highordertools_runopti_cb(Fl_Widget *w, void *data)
     break;
   }
   case 1: {                                                               // Elastic analogy
-    ElasticAnalogy(GModel::current(), threshold_min, onlyVisible);
+    ElasticAnalogy(GModel::current(), onlyVisible);
     break;
   }
   case 2: {                                                               // Fast curving
     FastCurvingParameters p;
-    p.BARRIER_MIN = threshold_min;
-    p.BARRIER_MAX = threshold_max;
     p.onlyVisible = onlyVisible;
-    p.dim = GModel::current()->getDim() == 3 ? ( NE ? 3 : 2 ) :  GModel::current()->getDim();
-    p.distanceFactor =  o->value[7]->value();
+    p.dim = dim;
+    p.maxNumLayers = (int) o->value[2]->value();
     HighOrderMeshFastCurving(GModel::current(), p);
     break;
   }
@@ -252,7 +251,7 @@ highOrderToolsWindow::highOrderToolsWindow(int deltaFontSize)
   y += BH;
 
   butt[2] = new Fl_Check_Button
-    (x, y, width - 4 * WB, BH, "Generate curvilinear elements");
+    (x, y, width - 4 * WB, BH, "Use CAD model to curve mesh");
   butt[2]->type(FL_TOGGLE_BUTTON);
   butt[2]->value(1);
 
@@ -272,12 +271,24 @@ highOrderToolsWindow::highOrderToolsWindow(int deltaFontSize)
   {
     y += BH;
     Fl_Box *b = new Fl_Box
-      (x - WB, y, width, BH, "2. Optimization of high order elements");
+      (x - WB, y, width, BH, "2. Regularization of high order elements");
     b->align(FL_ALIGN_LEFT | FL_ALIGN_INSIDE);
   }
 
   y += BH;
+  static Fl_Menu_Item menu_method[] = {
+    {"Optimization", 0, 0, 0},
+    {"Elastic Analogy", 0, 0, 0},
+    {"Fast Curving", 0, 0, 0},
+    {0}
+  };
+  choice[2] = new Fl_Choice
+    (x, y, IW, BH, "Algorithm");
+  choice[2]->align(FL_ALIGN_RIGHT);
+  choice[2]->menu(menu_method);
+  choice[2]->callback(chooseopti_cb);
 
+  y += BH;
   value[1] = new Fl_Value_Input
     (x, y, IW/2.0, BH);
   value[1]->minimum(0);
@@ -312,27 +323,12 @@ highOrderToolsWindow::highOrderToolsWindow(int deltaFontSize)
   value[7]->align(FL_ALIGN_RIGHT);
   value[7]->value(12);
 
-  static Fl_Menu_Item menu_method[] = {
-    {"Optimization", 0, 0, 0},
-    {"Elastic Analogy", 0, 0, 0},
-    {"Fast Curving", 0, 0, 0},
-    {0}
-  };
-
   y += BH;
-  choice[2] = new Fl_Choice
-    (x, y, IW, BH, "Algorithm");
-  choice[2]->align(FL_ALIGN_RIGHT);
-  choice[2]->menu(menu_method);
-  choice[2]->callback(chooseopti_cb);
-
   static Fl_Menu_Item menu_objf[] = {
     {"Fixed", 0, 0, 0},
     {"Free", 0, 0, 0},
     {0}
   };
-
-  y += BH;
   choice[0] = new Fl_Choice
     (x, y, IW, BH, "Boundary vertices");
   choice[0]->menu(menu_objf);
@@ -410,7 +406,7 @@ highOrderToolsWindow::highOrderToolsWindow(int deltaFontSize)
 
   y += 1.5*BH;
   push[1] = new Fl_Button
-    (width - BB - 2 * WB, y, BB, BH, "Optimize");
+    (width - BB - 2 * WB, y, BB, BH, "Regularize");
   push[1]->callback(highordertools_runopti_cb);
 
   // win->resizable(o);
diff --git a/Geo/MQuadrangle.h b/Geo/MQuadrangle.h
index 13edb2941bccfbaa7c7050216400a09f5ea63615..187e6c639fcdd98827b71198400fee93aa820ae6 100644
--- a/Geo/MQuadrangle.h
+++ b/Geo/MQuadrangle.h
@@ -400,6 +400,10 @@ class MQuadrangleN : public MQuadrangle {
     MQuadrangle::_getFaceVertices(v);
     for(unsigned int i = 0; i != _vs.size(); ++i) v[i + 4] = _vs[i];
   }
+  virtual const char *getStringForPOS() const
+  {
+    return (getTypeForMSH() == MSH_QUA_9) ? "SQ2" : "SQ";
+  }
   virtual int getTypeForMSH() const
   {
     if(_order== 1 && _vs.size() + 4 == 4)   return MSH_QUA_4;
diff --git a/Geo/boundaryLayersData.cpp b/Geo/boundaryLayersData.cpp
index eb3a577fc6ccd8c649fdefab5384d06298f8bf88..a96851a7539450b7399fc6509c0d7e8c84eb743f 100644
--- a/Geo/boundaryLayersData.cpp
+++ b/Geo/boundaryLayersData.cpp
@@ -146,14 +146,17 @@ void buildMeshMetric(GFace *gf, double *uv, SMetric3 &m, double metric[3])
   metric[2] = res[1][1];
 }
 
-const BoundaryLayerData & BoundaryLayerColumns::getColumn(MVertex *v, MFace f)
+const BoundaryLayerData & BoundaryLayerColumns::getColumn(MVertex *v, MFace f) const
 {
   int N = getNbColumns(v) ;
   if (N == 1) return getColumn(v, 0);
-  GFace *gf = _inverse_classification[f];
-  for (int i=0;i<N;i++){
-    const BoundaryLayerData & c = getColumn(v, i);
-    if (std::find(c._joint.begin(),c._joint.end(),gf) != c._joint.end())return c;
+  std::map<MFace, GFace*, Less_Face>::const_iterator it = _inverse_classification.find(f);
+  if (it != _inverse_classification.end()) {
+    GFace *gf = it->second;
+    for (int i=0;i<N;i++){
+      const BoundaryLayerData & c = getColumn(v, i);
+      if (std::find(c._joint.begin(),c._joint.end(),gf) != c._joint.end())return c;
+    }
   }
   static BoundaryLayerData error;
   return error;
diff --git a/Geo/boundaryLayersData.h b/Geo/boundaryLayersData.h
index 057361b1c97fb9b9aad69a7ee88243e5ca027c2e..160c1e5bf8bdd7d69263cbba25d05e934756b98d 100644
--- a/Geo/boundaryLayersData.h
+++ b/Geo/boundaryLayersData.h
@@ -113,9 +113,9 @@ public:
      return 0;
   }
 
-  const BoundaryLayerData &getColumn(MVertex *v, MFace f);
+  const BoundaryLayerData &getColumn(MVertex *v, MFace f) const;
 
-  inline const BoundaryLayerData &getColumn(MVertex *v, MEdge e)
+  inline const BoundaryLayerData &getColumn(MVertex *v, MEdge e) const
   {
     std::map<MVertex*,BoundaryLayerFan>::const_iterator it = _fans.find(v);
     int N = getNbColumns(v) ;
@@ -133,11 +133,11 @@ public:
   }
   edgeColumn getColumns(MVertex *v1, MVertex *v2 , int side);
   faceColumn getColumns(GFace *gf, MVertex *v1, MVertex *v2 , MVertex* v3, int side);
-  inline int getNbColumns(MVertex *v) { return _data.count(v); }
-  inline const BoundaryLayerData &getColumn(MVertex *v, int iColumn)
+  inline int getNbColumns(MVertex *v) const { return _data.count(v); }
+  inline const BoundaryLayerData &getColumn(MVertex *v, int iColumn) const
   {
     int count = 0;
-    for(std::multimap<MVertex*,BoundaryLayerData>::iterator itm  = _data.lower_bound(v);
+    for(std::multimap<MVertex*,BoundaryLayerData>::const_iterator itm  = _data.lower_bound(v);
         itm != _data.upper_bound(v); ++itm){
       if (count++ == iColumn) return itm->second;
     }
diff --git a/Mesh/Generator.cpp b/Mesh/Generator.cpp
index a20f880db31b8bbc3f4a900fb255a03ac6fcf1a5..a746e4136e52c28c07970f696766a9d027b31aba 100644
--- a/Mesh/Generator.cpp
+++ b/Mesh/Generator.cpp
@@ -809,7 +809,7 @@ void GenerateMesh(GModel *m, int ask)
   if(CTX::instance()->mesh.hoOptimize){
 #if defined(HAVE_OPTHOM)
     if(CTX::instance()->mesh.hoOptimize < 0){
-      ElasticAnalogy(GModel::current(), CTX::instance()->mesh.hoThresholdMin, false);
+      ElasticAnalogy(GModel::current(), false);
     }
     else{
       OptHomParameters p;
diff --git a/contrib/HighOrderMeshOptimizer/OptHOM.cpp b/contrib/HighOrderMeshOptimizer/OptHOM.cpp
index f6a39bf29cd3932136eb81e1d475e553d59f2d12..05d88b3be147c29cec22ecb26bb41bf528173dc7 100644
--- a/contrib/HighOrderMeshOptimizer/OptHOM.cpp
+++ b/contrib/HighOrderMeshOptimizer/OptHOM.cpp
@@ -243,6 +243,26 @@ void OptHOM::calcScale(alglib::real_1d_array &scale)
     for (int iPC = 0; iPC < mesh.nPCFV(iFV); iPC++)
       scale[mesh.indPCFV(iFV,iPC)] = scaleFV[iPC];
   }
+
+//  std::vector<double> inSize(mesh.nEl());
+//  mesh.elInSize(inSize);
+//  for (int iEl = 0; iEl < mesh.nEl(); iEl++) {
+////    std::cout << "DBGTT: inSize[" << iEl << "] = " << inSize[iEl] << std::endl;
+//    for (int iPCEl = 0; iPCEl < mesh.nPCEl(iEl); iPCEl++)
+//      scale[mesh.indPCEl(iEl,iPCEl)] *= inSize[iEl];
+//  }
+
+//  for (int iEl = 0; iEl < mesh.nEl(); iEl++) {
+//    std::vector<double> sJ(mesh.nBezEl(iEl)), gSJ(mesh.nBezEl(iEl)*mesh.nPCEl(iEl));
+//    mesh.scaledJacAndGradients(iEl,sJ,gSJ);
+//    for (int iPC = 0; iPC < mesh.nPCEl(iEl); iPC++) {
+//      double grad = 0.;
+//      for (int l = 0; l < mesh.nBezEl(iEl); l++) grad = std::max(grad,fabs(gSJ[mesh.indGSJ(iEl,l,iPC)]));
+//      scale[mesh.indPCEl(iEl,iPC)] *= mesh.nBezEl(iEl)/grad;
+////      std::cout << "DBGTT: scale[" << mesh.indPCEl(iEl,iPC) << "] = " << scale[mesh.indPCEl(iEl,iPC)] << std::endl;
+//    }
+//  }
+
 }
 
 void OptHOM::OptimPass(alglib::real_1d_array &x,
diff --git a/contrib/HighOrderMeshOptimizer/OptHomElastic.cpp b/contrib/HighOrderMeshOptimizer/OptHomElastic.cpp
index c32f3ea4dd7d974573569c0c8513d60652ad6337..5c4849242c29f3342a9da5b41c953beed296e13f 100644
--- a/contrib/HighOrderMeshOptimizer/OptHomElastic.cpp
+++ b/contrib/HighOrderMeshOptimizer/OptHomElastic.cpp
@@ -55,7 +55,7 @@
 
 #define SQU(a)      ((a)*(a))
 
-void ElasticAnalogy(GModel *m, double threshold, bool onlyVisible)
+void ElasticAnalogy(GModel *m, bool onlyVisible)
 {
   bool CAD, complete;
   int meshOrder;
diff --git a/contrib/HighOrderMeshOptimizer/OptHomElastic.h b/contrib/HighOrderMeshOptimizer/OptHomElastic.h
index f823c23786980866cfbdb6d318bc8c50b9ecb9dc..2295e4eeecb2fb5f5e87fa4d945bc72f468b647f 100644
--- a/contrib/HighOrderMeshOptimizer/OptHomElastic.h
+++ b/contrib/HighOrderMeshOptimizer/OptHomElastic.h
@@ -34,7 +34,7 @@
 #include "GmshMessage.h"
 #include "GModel.h"
 
-void ElasticAnalogy(GModel *m, double threshold, bool onlyVisible);
+void ElasticAnalogy(GModel *m, bool onlyVisible);
 
 #if defined(HAVE_SOLVER)
 
diff --git a/contrib/HighOrderMeshOptimizer/OptHomFastCurving.cpp b/contrib/HighOrderMeshOptimizer/OptHomFastCurving.cpp
index ea07f088b60f3c83f58f5549b86228c5c746f776..20315c69fc3262c0e0153e323bf17ddcfa0d43cf 100644
--- a/contrib/HighOrderMeshOptimizer/OptHomFastCurving.cpp
+++ b/contrib/HighOrderMeshOptimizer/OptHomFastCurving.cpp
@@ -47,6 +47,7 @@
 #include "SuperEl.h"
 #include "SVector3.h"
 #include "BasisFactory.h"
+#include "Field.h"
 
 
 
@@ -54,69 +55,7 @@ namespace {
 
 
 
-void exportMeshToDassault(GModel *gm, const std::string &fn, int dim)
-{
-  FILE *f = fopen(fn.c_str(),"w");
-
-  int numVertices = gm->indexMeshVertices(true);
-  std::vector<GEntity*> entities;
-  gm->getEntities(entities);
-  fprintf(f,"%d %d\n", numVertices, dim);
-  for(unsigned int i = 0; i < entities.size(); i++)
-    for(unsigned int j = 0; j < entities[i]->mesh_vertices.size(); j++){
-      MVertex *v = entities[i]->mesh_vertices[j];
-      if (dim == 2)
-        fprintf(f,"%d %22.15E %22.15E\n", v->getIndex(), v->x(), v->y());
-      else if (dim == 3)
-        fprintf(f,"%d %22.15E %22.15E %22.5E\n", v->getIndex(), v->x(),
-                v->y(), v->z());
-    }
-
-  if (dim == 2){
-    int nt = 0;
-    int order  = 0;
-    for (GModel::fiter itf = gm->firstFace(); itf != gm->lastFace(); ++itf){
-      std::vector<MTriangle*> &tris = (*itf)->triangles;
-      nt += tris.size();
-      if (tris.size())order = tris[0]->getPolynomialOrder();
-    }
-    fprintf(f,"%d %d\n", nt,(order+1)*(order+2)/2);
-    int count = 1;
-    for (GModel::fiter itf = gm->firstFace(); itf != gm->lastFace(); ++itf){
-      std::vector<MTriangle*> &tris = (*itf)->triangles;
-      for (size_t i=0;i<tris.size();i++){
-  MTriangle *t = tris[i];
-  fprintf(f,"%d ", count++);
-  for (int j=0;j<t->getNumVertices();j++){
-    fprintf(f,"%d ", t->getVertex(j)->getIndex());
-  }
-  fprintf(f,"\n");
-      }
-    }
-    int ne = 0;
-    for (GModel::eiter ite = gm->firstEdge(); ite != gm->lastEdge(); ++ite){
-      std::vector<MLine*> &l = (*ite)->lines;
-      ne += l.size();
-    }
-    fprintf(f,"%d %d\n", ne,(order+1));
-    count = 1;
-    for (GModel::eiter ite = gm->firstEdge(); ite != gm->lastEdge(); ++ite){
-      std::vector<MLine*> &l = (*ite)->lines;
-      for (size_t i=0;i<l.size();i++){
-  MLine *t = l[i];
-  fprintf(f,"%d ", count++);
-  for (int j=0;j<t->getNumVertices();j++){
-    fprintf(f,"%d ", t->getVertex(j)->getIndex());
-  }
-  fprintf(f,"%d \n",(*ite)->tag());
-      }
-    }
-  }
-  fclose(f);
-}
-
-
-
+// Compute vertex -> element connectivity
 void calcVertex2Elements(int dim, GEntity *entity,
                                 std::map<MVertex*, std::vector<MElement *> > &vertex2elements)
 {
@@ -130,217 +69,452 @@ void calcVertex2Elements(int dim, GEntity *entity,
 
 
 
-// Among edges connected to a given vertex, return the direction of the one that is closest to the given normal
-// Return the given normal if no edge is sufficiently close
-SVector3 getNormalEdge(MVertex *vert, const SVector3 &n,
-                       const std::map<MVertex*, std::vector<MElement*> > &vertex2elements) {
-
-//  static const double spLimit = 0.70711;                          // Limit in dot product below which we return the normal
-  static const double spLimit = 0.5;                          // Limit in dot product below which we return the normal
-
-  const std::vector<MElement*> &elts = (*vertex2elements.find(vert)).second;     // All elements connected to vertex
+// Given a starting vertex and a direction, find the next vertex in a column
+MVertex *findVertFromDir(const std::map<MVertex*, std::vector<MElement*> > &vertex2elements,
+                         MVertex *v0, MVertex *vExclude, const SVector3 &dir, double spLimit)
+{
 
+  const std::vector<MElement*> &elts = (*vertex2elements.find(v0)).second;                      // All elements connected to vertex
+  MVertex *vFound = 0;
   double spMax = 0.;
-  SVector3 normalEdge;
-//  std::cout << "DBGTT: Looking for normal edge at vertex " << vert->getNum()
-//              << " with n = (" << n.x() << ", " << n.y() << ", " << n.z() << ")\n";
-
-  for (std::vector<MElement*>::const_iterator itEl = elts.begin(); itEl != elts.end(); ++itEl)
-    for (int i=0; i<(*itEl)->getNumEdges(); i++) {
-      std::vector<MVertex*> edgeVert;
-      (*itEl)->getEdgeVertices(i,edgeVert);
-      SVector3 edge;
-      if (edgeVert[0] == vert) edge = SVector3(vert->point(),edgeVert[1]->point());
-      else if (edgeVert[1] == vert) edge = SVector3(vert->point(),edgeVert[0]->point());
+  for (std::vector<MElement*>::const_iterator itEl = elts.begin(); itEl != elts.end(); ++itEl)  // Loop over all elements
+    for (int i=0; i<(*itEl)->getNumEdges(); i++) {                                              // Loop over all edges of each element
+      MEdge edge = (*itEl)->getEdge(i);
+      MVertex *vA = edge.getVertex(0), *vB = edge.getVertex(1), *vTmp;
+      if (v0 == vA)                                                                             // Skip if edge unconnected to vertex...
+        if (vB == vExclude) continue;                                                           // ... or if connecting to preceding vertex
+        else vTmp = vB;
+      else if (v0 == vB)
+        if (vA == vExclude) continue;
+        else vTmp = vA;
       else continue;
-      edge.normalize();
-      double sp = dot(edge,n);
-      if (sp > spMax) {                                           // Retain the edge giving max. dot product with normal
+      SVector3 tanVec = edge.tangent();
+      double sp = fabs(dot(tanVec, dir));
+      if ((sp > spMax) && (sp > spLimit)){                                                      // Retain the edge giving max. dot product with normal
         spMax = sp;
-        normalEdge = edge;
+        vFound = vTmp;
       }
-//      std::cout << "DBGTT:   -> checking edge " << edgeVert[0]->getNum() << " - " << edgeVert[1]->getNum()
-//                  << ", sp = " << sp << "\n";
     }
+  return vFound;
+}
 
-  if (spMax < spLimit) { std::cout << "DBGTT: no normal edge\n"; normalEdge = n; }                            // If max. dot product is below limit, just take normal
 
-  return normalEdge;
 
+// Find a column of vertices starting with a given vertex
+template<class bndType>
+bool findColumn(const std::map<MVertex*, std::vector<MElement*> > &vertex2elements,
+                MVertex *baseVert, const SVector3 &norm,
+                const bndType &baseEd, std::vector<MVertex*> &col, int maxNumLayers)
+{
+  static const double spMin = 0.866025404;                                                // Threshold in cosine between 1st edge and normal
+  static const double spTol = 0.999;                                                      // Threshold in cosine between an edge and 1st edge
+
+  MVertex *vert = findVertFromDir(vertex2elements, baseVert, 0, norm, spMin);
+  if (!vert) {                                                                            // If 1st normal edge not found...
+    vert = findVertFromDir(vertex2elements, baseVert, 0, baseEd.normal(), spMin);         // ... tries normal to base edge/face
+    if (!vert) {
+      Msg::Warning("Could not find normal edge for vertex %i ", baseVert->getNum());
+      return false;
+    }
+  }
+  SVector3 firstEdgeDir(baseVert->point(), vert->point());
+  firstEdgeDir.normalize();
+
+  SVector3 dir(baseVert->point(), vert->point());
+  col.push_back(vert);
+  MVertex *oldVert = baseVert, *newVert;
+  for (int iLayer = 0; iLayer < maxNumLayers; iLayer++) {
+    newVert = findVertFromDir(vertex2elements, vert, oldVert, firstEdgeDir, spTol);
+    col.push_back(newVert);                                                               // Will add null pointer at the end, which is OK
+    if (!newVert) break;
+    oldVert = vert;
+    vert = newVert;
+  }
+
+  return (col.size() > 1);
 }
 
 
 
-// Detect whether edge/face is curved, and give normal
-bool isCurvedAndNormal(int type, int order, const std::vector<MVertex*> &faceVert,
-                       SVector3 &normal, double &maxDist) {
+// Add normal to edge/face to data structure for vertices
+void addNormVertEl(MElement *el, const SVector3 &norm,
+                   std::map<MVertex*, SVector3> &normVert)
+{
+  for(unsigned int iV = 0; iV<el->getNumVertices(); iV++) {
+    MVertex *vert = el->getVertex(iV);
+    std::pair<std::map<MVertex*, SVector3>::iterator, bool> insNorm =           // If nothing for vert...
+      normVert.insert(std::pair<MVertex*, SVector3>(vert, SVector3(0.)));       // ... create zero normal
+    insNorm.first->second += norm;                                              // Cumulate normals
+  }
+}
 
-//  static const double eps = 1.e-10;
-  static const double eps = 1.e-6;
 
-  // Compute HO points in straight edge/face
-  const nodalBasis *lagrange = BasisFactory::getNodalBasis(ElementType::getTag(type,order,false));
-  const nodalBasis *lagrange1 = BasisFactory::getNodalBasis(ElementType::getTag(type,1,false));
-  int nV = lagrange->points.size1();
-  int nV1 = lagrange1->points.size1();
-  SPoint3 sxyz[256];
-  for (int i = 0; i < nV1; ++i) sxyz[i] = faceVert[i]->point();
-  for (int i = nV1; i < nV; ++i) {
-    double f[256];
-    lagrange1->f(lagrange->points(i, 0), lagrange->points(i, 1), lagrange->points(i, 2), f);
-    for (int j = 0; j < nV1; ++j)
-      sxyz[i] += sxyz[j] * f[j];
-  }
 
-  // Compute unit normal to straight edge/face and its scale [length]
-  double scale;
-  const SPoint3 &p0 = sxyz[0], &p1 = sxyz[1];
-  if (type == TYPE_LIN) {
-//    normal = SVector3(p0.y()-p1.y(),p1.x()-p0.x(),0.);
-    normal = SVector3(p1.y()-p0.y(),p0.x()-p1.x(),0.);
-    scale = normal.normalize();
-  }
-  else {
-    const SPoint3 &p2 = sxyz[2];
-    SVector3 p01(p0,p1), p02(p0,p2);
-//    normal = crossprod(p01,p02);
-    normal = crossprod(p02,p01);
-    scale = sqrt(normal.normalize());
+// Compute normals to boundary edges/faces and store them per vertex
+void buildNormals(const std::map<MVertex*, std::vector<MElement*> > &vertex2elements,
+                    const std::multimap<GEntity*,GEntity*> &entities, const FastCurvingParameters &p,
+                    std::map<GEntity*, std::map<MVertex*, SVector3> > &normVertEnt)
+{
+  normVertEnt.clear();
+  for (std::multimap<GEntity*,GEntity*>::const_iterator itBE = entities.begin();
+                                                   itBE != entities.end(); itBE++) {  // Loop over model entities
+    GEntity* const &bndEnt = itBE->second;
+    std::map<MVertex*, SVector3> &normVert = normVertEnt[bndEnt];
+    if (bndEnt->dim() == 1) {
+      GEdge *ge = bndEnt->cast2Edge();
+      for(unsigned int iEl = 0; iEl<ge->lines.size(); iEl++) {
+        MLine* const &line = ge->lines[iEl];
+        SVector3 norm = line->getEdge(0).normal();
+        addNormVertEl(line, norm, normVert);
+      }
+    }
+    else if (bndEnt->dim() == 2) {
+      GFace *gf = bndEnt->cast2Face();
+      for(unsigned int iEl = 0; iEl<gf->triangles.size(); iEl++) {
+        MTriangle* const &tri = gf->triangles[iEl];
+        SVector3 norm = tri->getFace(0).normal();
+        addNormVertEl(tri, norm, normVert);
+      }
+      for(unsigned int iEl = 0; iEl<gf->quadrangles.size(); iEl++) {
+        MQuadrangle* const &quad = gf->quadrangles[iEl];
+        SVector3 norm = quad->getFace(0).normal();
+        addNormVertEl(quad, norm, normVert);
+      }
+    }
+    for (std::map<MVertex*, SVector3> ::iterator itNV =                                 // Re-normalize for each geom. entity
+         normVert.begin(); itNV != normVert.end(); itNV++)
+      itNV->second.normalize();
   }
+}
+
 
-  // Calc max. normal dist. from straight to HO points
-  maxDist = 0.;
-  for (int iV = nV1; iV < nV; iV++) {
-    const double normalDisp = dot(SVector3(sxyz[iV],faceVert[iV]->point()),normal);
-    maxDist = std::max(maxDist,fabs(normalDisp));
+
+// Get first domain element for a given boundary edge
+MElement *getFirstEl(const std::map<MVertex*, std::vector<MElement*> > &vertex2elements,
+                     const MEdge &baseEd)
+{
+  MVertex *vb0 = baseEd.getVertex(0), *vb1 = baseEd.getVertex(1);
+
+  const std::vector<MElement*> &nb0 = vertex2elements.find(vb0)->second;
+  const std::vector<MElement*> &nb1 = vertex2elements.find(vb1)->second;
+  MElement *firstEl;
+  for (int iEl=0; iEl<nb0.size(); iEl++) {
+    if (find(nb1.begin(), nb1.end(), nb0[iEl]) != nb1.end()) {
+      firstEl = nb0[iEl];
+      break;
+    }
   }
 
-//  std::cout << "DBGTT: v0 is " << faceVert[0]->getNum() << ", v1 is " << faceVert[1]->getNum()
-//            << ", v2 is " << faceVert[2]->getNum() << ", maxDist = " << maxDist
-//            << ", scale = " << scale << ", test = " << (maxDist > eps*scale) << "\n";
-  return (maxDist > eps*scale);
+  return firstEl;
+}
 
+
+
+// Get intersection of two vectors of MElement*
+void intersect(const std::vector<MElement*> &vi1, const std::vector<MElement*> &vi2,
+               std::vector<MElement*> &vo)
+{
+  vo.clear();
+  for (std::vector<MElement*>::const_iterator it = vi1.begin(); it != vi1.end(); it++)
+    if (std::find(vi2.begin(), vi2.end(), *it) != vi2.end())
+      vo.push_back(*it);
 }
 
 
 
-void makeStraight(MElement *el, const std::set<MVertex*> &movedVert) {
+// Get first domain element for a given boundary face
+MElement *getFirstEl(const std::map<MVertex*, std::vector<MElement*> > &vertex2elements,
+                     const MFace &baseFace)
+{
+  MVertex *vb0 = baseFace.getVertex(0), *vb1 = baseFace.getVertex(1);
+
+  // Get elements common to vertices 0 and 1
+  const std::vector<MElement*> &nb0 = vertex2elements.find(vb0)->second;
+  const std::vector<MElement*> &nb1 = vertex2elements.find(vb1)->second;
+  std::vector<MElement*> nb01;
+  intersect(nb0, nb1, nb01);
+  if (nb01.empty()) return 0;
+  if (nb01.size() == 1) return nb01.front();
+
+  // Get elements common to vertices 0, 1 and 2
+  MVertex *vb2 = baseFace.getVertex(2);
+  const std::vector<MElement*> &nb2 = vertex2elements.find(vb2)->second;
+  std::vector<MElement*> nb012;
+  intersect(nb01, nb2, nb012);
+  if (nb012.empty()) return 0;
+  if (nb012.size() == 1) return nb012.front();
+
+  // If quad, get elements common to all 4 vertices
+  if (baseFace.getNumVertices() == 4) {
+    MVertex *vb3 = baseFace.getVertex(3);
+    const std::vector<MElement*> &nb3 = vertex2elements.find(vb3)->second;
+    std::vector<MElement*> nb0123;
+    intersect(nb012, nb3, nb0123);
+    if (nb0123.empty()) return 0;
+    if (nb0123.size() == 1) return nb0123.front();
+  }
 
-  const nodalBasis *nb = el->getFunctionSpace();
-  const fullMatrix<double> &pts = nb->points;
+  // Too many elements, return error
+  return 0;
+}
 
-  SPoint3 p;
 
-  for(int iPt = el->getNumPrimaryVertices(); iPt < el->getNumVertices(); ++iPt) {
-    MVertex *vert = el->getVertex(iPt);
-    if (movedVert.find(vert) == movedVert.end()) {
-      el->primaryPnt(pts(iPt,0),pts(iPt,1),pts(iPt,2),p);
-      vert->setXYZ(p.x(),p.y(),p.z());
-    }
+
+// Get base vertices (in order of first element) for an edge
+bool getBaseVertices(const MEdge &baseEd, MElement *firstEl, std::vector<MVertex*> &baseVert)
+{
+  baseVert.clear();
+  for (int iEd = 0; iEd < firstEl->getNumEdges(); iEd++)
+    if (firstEl->getEdge(iEd) == baseEd)
+      firstEl->getEdgeVertices(iEd, baseVert);
+
+  if (baseVert.empty()) {
+    Msg::Error("Base edge vertices not found in getBaseVertices");
+    return false;
   }
+  else
+    return true;
+}
 
+
+
+// Get base vertices (in order of first element) for a face
+bool getBaseVertices(const MFace &baseFace, MElement *firstEl, std::vector<MVertex*> &baseVert)
+{
+  baseVert.clear();
+  for (int iFace = 0; iFace < firstEl->getNumFaces(); iFace++)
+    if (firstEl->getFace(iFace) == baseFace)
+      firstEl->getFaceVertices(iFace, baseVert);
+
+  if (baseVert.empty()) {
+    Msg::Error("Base face vertices not found in getBaseVertices");
+    return false;
+  }
+  else
+    return true;
 }
 
 
 
-std::set<MElement*> getSuperElBlob(MElement *el, const std::map<MVertex*,
-                                   std::vector<MElement*> > &vertex2elements,
-                                   const SuperEl *sEl)
+// Top primary vertices (in order corresponding to the primary base vertices)
+template<class bndType>
+bool getTopPrimVertices(std::map<MVertex*, std::vector<MElement *> > &vertex2elements,
+                        const std::map<MVertex*, SVector3> &normVert,
+                        const BoundaryLayerColumns *blc, const bndType &baseBnd,
+                        int maxNumLayers, const std::vector<MVertex*> &baseVert,
+                        std::vector<MVertex*> &topPrimVert)
 {
+  int nbVert = baseBnd.getNumVertices();
+  topPrimVert = std::vector<MVertex*>(nbVert);
+  for(int i = 0; i < nbVert; i++) {
+    if (blc && (blc->size() > 1)) {                                                             // Is there BL data?
+      const BoundaryLayerData &c = blc->getColumn(baseVert[i], baseBnd);
+      if (c._column.size() == 0) return false;                                                  // Give up element if column not found
+      topPrimVert[i] = *(c._column.end()-2);
+    }
+    else {                                                                                      // No BL data, look for columns of vertices
+      std::map<MVertex*, SVector3>::const_iterator itNorm = normVert.find(baseVert[i]);
+      if (itNorm == normVert.end()) {
+        Msg::Error("Normal to vertex not found in getTopPrimVertices");
+        itNorm == normVert.begin();
+      }
+      std::vector<MVertex*> col;
+      bool colFound = findColumn(vertex2elements, baseVert[i], itNorm->second,
+                                 baseBnd, col, maxNumLayers);
+      if (!colFound) return false;                                                              // Give up element if column not found
+      topPrimVert[i] = *(col.end()-2);
+    }
+  }
+
+  return true;
+}
 
-  static const int depth = 100;
 
-  std::set<MElement*> blob;
-  std::list<MElement*> currentLayer, lastLayer;
 
-  blob.insert(el);
-  lastLayer.push_back(el);
-  for (int d = 0; d < depth; ++d) {
+// Get blob of elements encompassed by a given meta-element
+// FIXME: Implement 3D
+void get2DBLBlob(const std::map<MVertex*, std::vector<MElement*> > &vertex2elements,
+                 MElement *firstEl, const SuperEl *supEl, std::set<MElement*> &blob)
+{
+  static const int maxDepth = 100;
+  typedef std::list<MElement*> elLType;
+  typedef std::vector<MElement*> elVType;
+
+  // Build blob by looping over layers of elements (assuming that if an el is too far, its neighbours are too far as well)
+  blob.clear();
+  elLType currentLayer, lastLayer;
+  blob.insert(firstEl);
+  lastLayer.push_back(firstEl);
+  for (int d = 0; d < maxDepth; ++d) {                                                              // Loop over layers
     currentLayer.clear();
-    for (std::list<MElement*>::iterator it = lastLayer.begin();
-         it != lastLayer.end(); ++it) {
+    for (elLType::iterator it = lastLayer.begin(); it != lastLayer.end(); ++it) {                   // Loop over elements of last layer
       for (int i = 0; i < (*it)->getNumPrimaryVertices(); ++i) {
-        const std::vector<MElement*> &neighbours = vertex2elements.find
-          ((*it)->getVertex(i))->second;
-        for (std::vector<MElement*>::const_iterator itN = neighbours.begin();
-             itN != neighbours.end(); ++itN){
-          if (sEl->isPointIn((*itN)->barycenter(true))) {
-            // Assume that if an el is too far, its neighbours are too far as well
-            if (blob.insert(*itN).second) currentLayer.push_back(*itN);
-          }
+        const elVType &neighbours = vertex2elements.find((*it)->getVertex(i))->second;
+        for (elVType::const_iterator itN = neighbours.begin(); itN != neighbours.end(); ++itN) {    // Loop over neighbours of last layer
+          SPoint3 p = (*itN)->barycenter(true);
+          if (supEl->isPointIn(p))                                                                  // Add neighbour to blob if inside test element
+            if (blob.insert(*itN).second) currentLayer.push_back(*itN);                             // Add to current layer if new in blob
         }
       }
     }
+    if (currentLayer.empty()) break;                                                                // Finished if no new element in blob
     lastLayer = currentLayer;
-    if (currentLayer.empty()) break;
   }
+}
+
+
 
-  return blob;
+void makeStraight(MElement *el, const std::set<MVertex*> &movedVert)
+{
+  const nodalBasis *nb = el->getFunctionSpace();
+  const fullMatrix<double> &pts = nb->points;
+
+  SPoint3 p;
+
+  for(int iPt = el->getNumPrimaryVertices(); iPt < el->getNumVertices(); ++iPt) {
+    MVertex *vert = el->getVertex(iPt);
+    if (movedVert.find(vert) == movedVert.end()) {
+      el->primaryPnt(pts(iPt,0),pts(iPt,1),pts(iPt,2),p);
+      vert->setXYZ(p.x(),p.y(),p.z());
+    }
+  }
 }
 
 
 
-void curveMeshFromFaces(std::map<MVertex*, std::vector<MElement *> > &vertex2elements,
-                        std::set<MElement*> &faceElements, FastCurvingParameters &p)
+inline void insertIfCurved(MElement *el, std::list<MElement*> &bndEl)
 {
+  static const double curvedTol = 1.e-3;                                              // Tolerance to consider element as curved
 
-  const int nbFaceElts = faceElements.size();
-  std::vector<MElement*> faceElts;
-  std::vector<SuperEl*> superElts;
-  faceElts.reserve(nbFaceElts);
-  superElts.reserve(nbFaceElts);
+  const double normalDispCurved = curvedTol*el->getInnerRadius();                     // Threshold in normal displacement to consider element as curved
+  const int dim = el->getDim();
 
-  std::ofstream of("dum.pos");
-  of << "View \" \"{\n";
+  // Compute unit normal to straight edge/face
+  SVector3 normal = (dim == 1) ? el->getEdge(0).normal() : el->getFace(0).normal();
+
+  // Get functional spaces
+  const nodalBasis *lagBasis = el->getFunctionSpace();
+  const fullMatrix<double> &uvw = lagBasis->points;
+  const int &nV = uvw.size1();
+  const nodalBasis *lagBasis1 = el->getFunctionSpace(1);
+  const int &nV1 = lagBasis1->points.size1();
 
-  for (std::set<MElement*>::const_iterator itFE = faceElements.begin(); itFE != faceElements.end(); ++itFE) {
-    const int dim = (*itFE)->getDim();
-    const int order = (*itFE)->getPolynomialOrder();
-    const int numPrimVert = (*itFE)->getNumPrimaryVertices();
-    const int type = (*itFE)->getType();
-    std::vector<MVertex*> faceVert;
-    (*itFE)->getVertices(faceVert);
-    double maxDist;
-    SVector3 faceNormal;
-    if (isCurvedAndNormal(type,order,faceVert,faceNormal,maxDist)) {
-      std::vector<SVector3> baseNormal;
-      for (int iV=0; iV<numPrimVert; iV++)                                // Compute normals to prim. vert. of edge/face
-        baseNormal.push_back(getNormalEdge(faceVert[iV],faceNormal,vertex2elements));
-      faceElts.push_back(*itFE);
-      superElts.push_back(new SuperEl(order,maxDist*p.distanceFactor,type,faceVert,baseNormal));
-      of << superElts.back()->printPOS();
+  // Get first-order vertices
+  std::vector<SPoint3> xyz1(nV1);
+  for (int iV = 0; iV < nV1; ++iV) xyz1[iV] = el->getVertex(iV)->point();
+
+  // Check normal displacement at each HO vertex
+  for (int iV = nV1; iV < nV; ++iV) {                                                 // Loop over HO nodes
+    double f[256];
+    lagBasis1->f(uvw(iV, 0), (dim > 1) ? uvw(iV, 1) : 0., 0., f);
+    SPoint3 xyzS(0.,0.,0.);
+    for (int iSF = 0; iSF < nV1; ++iSF) xyzS += xyz1[iSF]*f[iSF];                     // Compute location of node in straight element
+    const SVector3 vec(xyzS, el->getVertex(iV)->point());
+    const double normalDisp = dot(vec, normal);                                       // Normal component of displacement
+    if (normalDisp > normalDispCurved) {
+      bndEl.push_back(el);
+      break;
     }
   }
+}
+
 
-  of << "};\n";
-  of.close();
+
+// Curve elements adjacent to a boundary model entity
+void curveMeshFromBnd(std::map<MVertex*, std::vector<MElement *> > &vertex2elements,
+                      const std::map<MVertex*, SVector3> &normVert, BoundaryLayerColumns *blc,
+                      GEntity *bndEnt, FastCurvingParameters &p)
+{
+  // Build list of bnd. elements to consider
+  std::list<MElement*> bndEl;
+  if (bndEnt->dim() == 1) {
+    GEdge *gEd = bndEnt->cast2Edge();
+    for(unsigned int i = 0; i< gEd->lines.size(); i++)
+      insertIfCurved(gEd->lines[i], bndEl);
+  }
+  else if (bndEnt->dim() == 2) {
+    GFace *gFace = bndEnt->cast2Face();
+    for(unsigned int i = 0; i< gFace->triangles.size(); i++)
+      insertIfCurved(gFace->triangles[i], bndEl);
+    for(unsigned int i = 0; i< gFace->quadrangles.size(); i++)
+      insertIfCurved(gFace->quadrangles[i], bndEl);
+  }
+  else
+    Msg::Error("Cannot treat model entity %i of dim %i", bndEnt->tag(), bndEnt->dim());
+
+  std::ostringstream oss;
+  oss << "meta-elements_" << bndEnt->tag() << ".pos";
+  std::ofstream of(oss.str().c_str());
+  of << "View \" \"{\n";
 
   std::set<MVertex*> movedVert;
-  for (int iFE=0; iFE<faceElts.size(); ++iFE) {
-    std::set<MElement*> blob = getSuperElBlob(faceElts[iFE], vertex2elements, superElts[iFE]);
-//    std::cout << "DBGTT: Blob of bad el. " << faceElts[iBE]->getNum() << " contains elts.";
-//    for (std::set<MElement*>::iterator itE = blob.begin(); itE != blob.end(); ++itE) std::cout << " " << (*itE)->getNum();
-//    std::cout << "\n";
-//    makeStraight(faceElts[iFE],movedVert);                                             // Make bad. el. straight
+  for(std::list<MElement*>::iterator itBE = bndEl.begin();
+      itBE != bndEl.end(); itBE++) {   // Loop over bnd. elements
+    const int bndType = (*itBE)->getType();
+    int seType;
+    bool foundVert;
+    MElement *firstEl = 0;
+    std::vector<MVertex*> baseVert, topPrimVert;
+    if (bndType == TYPE_LIN) {                                                               // 1D boundary?
+      MVertex *vb0 = (*itBE)->getVertex(0);
+      MVertex *vb1 = (*itBE)->getVertex(1);
+      seType = TYPE_QUA;
+      MEdge baseEd(vb0, vb1);
+      firstEl = getFirstEl(vertex2elements, baseEd);
+      foundVert = getBaseVertices(baseEd, firstEl, baseVert);
+      if (!foundVert) continue;                                                             // Skip bnd. el. if base vertices not found
+      foundVert = getTopPrimVertices(vertex2elements, normVert, blc, baseEd,
+                                     p.maxNumLayers, baseVert, topPrimVert);
+    }
+    else {                                                                                  // 2D boundary
+      MVertex *vb0 = (*itBE)->getVertex(0);
+      MVertex *vb1 = (*itBE)->getVertex(1);
+      MVertex *vb2 = (*itBE)->getVertex(2);
+      MVertex *vb3;
+      if (bndType == TYPE_QUA) {
+        vb3 = (*itBE)->getVertex(3);
+        seType = TYPE_HEX;
+      }
+      else {
+        vb3 = 0;
+        seType = TYPE_PRI;
+      }
+      MFace baseFace(vb0, vb1, vb2, vb3);
+      firstEl = getFirstEl(vertex2elements, baseFace);
+      if (!firstEl) {
+        Msg::Error("Did not find first domain element for boundary element %i",
+                   (*itBE)->getNum());
+        continue;
+      }
+      foundVert = getBaseVertices(baseFace, firstEl, baseVert);
+      if (!foundVert) continue;                                                             // Skip bnd. el. if base vertices not found
+      foundVert = getTopPrimVertices(vertex2elements, normVert, blc,
+                                     baseFace, p.maxNumLayers,
+                                     baseVert, topPrimVert);
+    }
+    if (!foundVert) continue;                                                               // Skip bnd. el. if top vertices not found
+    int order = firstEl->getPolynomialOrder();
+    SuperEl se(seType, order, baseVert, topPrimVert);
+    of << se.printPOS();
+    std::set<MElement*> blob;
+    get2DBLBlob(vertex2elements, firstEl, &se, blob); // TODO: Implement for 3D
     for (std::set<MElement*>::iterator itE = blob.begin(); itE != blob.end(); ++itE) {
-      makeStraight(*itE,movedVert);
-      for (int i = 0; i < (*itE)->getNumVertices(); ++i) {                            // For each vert. of each el. in blob
-//      for (int i = (*itE)->getNumPrimaryVertices(); i < (*itE)->getNumVertices(); ++i) {                            // For each vert. of each el. in blob
+      makeStraight(*itE, movedVert);
+      for (int i = (*itE)->getNumPrimaryVertices(); i < (*itE)->getNumVertices(); ++i) {    // Loop over HO vert. of each el. in blob
         MVertex* vert = (*itE)->getVertex(i);
-        if (movedVert.find(vert) == movedVert.end()) {                                // If vert. not already moved
+        if (movedVert.find(vert) == movedVert.end()) {                                      // Try to move vert. not already moved
           double xyzS[3] = {vert->x(), vert->y(), vert->z()}, xyzC[3];
-          if (superElts[iFE]->straightToCurved(xyzS,xyzC)) {
-//            std::cout << "DBGTT: moving vertex " << vert->getNum() << " from (" << xyzS[0] << "," << xyzS[1] << "," << xyzS[2] << ") to (" << xyzC[0] << "," << xyzC[1] << "," << xyzC[2] << ")\n";
-            vert->setXYZ(xyzC[0],xyzC[1],xyzC[2]);
+          if (se.straightToCurved(xyzS,xyzC)) {
+            vert->setXYZ(xyzC[0], xyzC[1], xyzC[2]);
             movedVert.insert(vert);
           }
-//          else std::cout << "DBGTT: Failed to move vertex " << vert->getNum() << " with bad. el " << faceElts[iBE]->getNum() << "\n";
         }
-//        else std::cout << "DBGTT: Already moved vertex " << vert->getNum() << " with bad. el " << faceElts[iBE]->getNum() << "\n";
       }
     }
   }
 
+  of << "};\n";
+  of.close();
 }
 
 
@@ -349,35 +523,81 @@ void curveMeshFromFaces(std::map<MVertex*, std::vector<MElement *> > &vertex2ele
 
 
 
+// Main function for fast curving
 void HighOrderMeshFastCurving(GModel *gm, FastCurvingParameters &p)
 {
-
   double t1 = Cpu();
 
   Msg::StatusBar(true, "Optimizing high order mesh...");
-  std::vector<GEntity*> entities;
-  gm->getEntities(entities);
-  const int bndDim = p.dim-1;
+  std::vector<GEntity*> allEntities;
+  gm->getEntities(allEntities);
 
   // Compute vert. -> elt. connectivity
   Msg::Info("Computing connectivity...");
   std::map<MVertex*, std::vector<MElement *> > vertex2elements;
-  for (int iEnt = 0; iEnt < entities.size(); ++iEnt)
-    calcVertex2Elements(p.dim,entities[iEnt],vertex2elements);
+  for (int iEnt = 0; iEnt < allEntities.size(); ++iEnt)
+    calcVertex2Elements(p.dim, allEntities[iEnt], vertex2elements);
+
+  // Get BL field (if any)
+  BoundaryLayerField *blf = getBLField(gm);
+
+  // Build multimap of each geometric entity to its boundaries
+  std::multimap<GEntity*,GEntity*> entities;
+  if (blf) {                                                                                    // BF field?
+    for (int iEnt = 0; iEnt < allEntities.size(); ++iEnt) {
+      GEntity* &entity = allEntities[iEnt];
+      if (entity->dim() == p.dim && (!p.onlyVisible || entity->getVisibility()))                // Consider only "domain" entities
+        if (p.dim == 2) {                                                                       // "Domain" face?
+          std::list<GEdge*> edges = entity->edges();
+          for (std::list<GEdge*>::iterator itEd = edges.begin(); itEd != edges.end(); itEd++)   // Loop over model boundary edges
+            if (blf->isEdgeBL((*itEd)->tag()))                                                  // Already skip model edge if no BL there
+              entities.insert(std::pair<GEntity*,GEntity*>(entity, *itEd));
+        }
+        else if (p.dim == 3) {                                                                  // "Domain" region?
+          std::list<GFace*> faces = entity->faces();
+          for (std::list<GFace*>::iterator itF = faces.begin(); itF != faces.end(); itF++)      // Loop over model boundary faces
+            if (blf->isFaceBL((*itF)->tag()))                                                   // Already skip model face if no BL there
+              entities.insert(std::pair<GEntity*,GEntity*>(entity, *itF));
+        }
+    }
+  }
+  else {                                                                                        // No BL field
+    for (int iEnt = 0; iEnt < allEntities.size(); ++iEnt) {
+      GEntity* &entity = allEntities[iEnt];
+      if (entity->dim() == p.dim-1 && (!p.onlyVisible || entity->getVisibility()))              // Consider boundary entities
+        entities.insert(std::pair<GEntity*,GEntity*>(0, entity));
+    }
+  }
+
+  // Build normals if necessary
+  std::map<GEntity*, std::map<MVertex*, SVector3> > normVertEnt;                                // Normal to each vertex for each geom. entity
+  if (!blf) {
+    Msg::Warning("Boundary layer data not found, trying to detect columns");
+    buildNormals(vertex2elements, entities, p, normVertEnt);
+  }
 
   // Loop over geometric entities
-  for (int iEnt = 0; iEnt < entities.size(); ++iEnt) {
-    GEntity* &entity = entities[iEnt];
-    if (entity->dim() != bndDim || (p.onlyVisible && !entity->getVisibility())) continue;
-    Msg::Info("Curving elements for entity %d...",entity->tag());
-    std::set<MElement*> faceElements;
-    for (int iEl = 0; iEl < entity->getNumMeshElements();iEl++)
-      faceElements.insert(entity->getMeshElement(iEl));
-    curveMeshFromFaces(vertex2elements, faceElements, p);
+  for (std::multimap<GEntity*,GEntity*>::iterator itBE = entities.begin();
+       itBE != entities.end(); itBE++) {
+    GEntity *domEnt = itBE->first, *bndEnt = itBE->second;
+    BoundaryLayerColumns *blc = 0;
+    if (blf) {
+      Msg::Info("Curving elements for entity %d bounding entity %d...",
+                bndEnt->tag(), domEnt->tag());
+      if (p.dim == 2)
+        blc = domEnt->cast2Face()->getColumns();
+      else if (p.dim == 3)
+        blc = domEnt->cast2Region()->getColumns();
+      else
+        Msg::Error("Fast curving implemented only in dim. 2 and 3");
+    }
+    else
+      Msg::Info("Curving elements for boundary entity %d...", bndEnt->tag());
+    std::map<MVertex*, SVector3> &normVert = normVertEnt[bndEnt];
+    curveMeshFromBnd(vertex2elements, normVert, blc, bndEnt, p);
   }
 
   double t2 = Cpu();
 
   Msg::StatusBar(true, "Done curving high order mesh (%g s)", t2-t1);
-
 }
diff --git a/contrib/HighOrderMeshOptimizer/OptHomFastCurving.h b/contrib/HighOrderMeshOptimizer/OptHomFastCurving.h
index f9ae4a56b6107f40ccf8c9c0995fa261e144a27f..9b114915daaf47b0d43f6cb0a85c46022e61f5a2 100644
--- a/contrib/HighOrderMeshOptimizer/OptHomFastCurving.h
+++ b/contrib/HighOrderMeshOptimizer/OptHomFastCurving.h
@@ -33,17 +33,12 @@
 class GModel;
 
 struct FastCurvingParameters {
-  // INPUT ------>
-  double BARRIER_MIN ; // minimum scaled jcaobian
-  double BARRIER_MAX ; // maximum scaled jcaobian
-  int dim ; // which dimension to optimize
-  bool onlyVisible ; // apply optimization to visible entities ONLY
-  double distanceFactor; // filter elements such that no elements further away
-                         // than DistanceFactor times the max distance to
-                         // straight sided version of an element are optimized
+  int dim ;           // Which dimension to curve
+  bool onlyVisible ;  // Apply curving to visible entities ONLY
+  int maxNumLayers;   // Maximum number of element layers to consider when trying to detect BL
 
   FastCurvingParameters ()
-    : BARRIER_MIN(0.1), BARRIER_MAX(2.0), dim(3) , onlyVisible(true), distanceFactor(12)
+    : dim(3) , onlyVisible(true), maxNumLayers(6)
   {
   }
 };
diff --git a/contrib/HighOrderMeshOptimizer/OptHomMesh.cpp b/contrib/HighOrderMeshOptimizer/OptHomMesh.cpp
index 4322850bbeaee9e7d1ffd1fd0e137c96126c0bec..3d10e1aca3a63fa62c00cc5dff0f77ab32d55fc1 100644
--- a/contrib/HighOrderMeshOptimizer/OptHomMesh.cpp
+++ b/contrib/HighOrderMeshOptimizer/OptHomMesh.cpp
@@ -222,6 +222,12 @@ void Mesh::elSizeSq(std::vector<double> &sSq)
   }
 }
 
+void Mesh::elInSize(std::vector<double> &s)
+{
+  for (int iEl = 0; iEl < nEl(); iEl++)
+    s[iEl] = fabs(_el[iEl]->getInnerRadius());
+}
+
 void Mesh::updateGEntityPositions()
 {
   for (int iV = 0; iV < nVert(); iV++)
@@ -374,7 +380,7 @@ void Mesh::writeMSH(const char *filename)
   fprintf(f, "$Elements\n");
   fprintf(f, "%d\n", nEl());
   for (int iEl = 0; iEl < nEl(); iEl++) {
-    fprintf(f, "%d %d 2 0 0", iEl+1, _el[iEl]->getTypeForMSH());
+    fprintf(f, "%d %d 2 0 0", _el[iEl]->getNum(), _el[iEl]->getTypeForMSH());
     for (size_t iVEl = 0; iVEl < _el2V[iEl].size(); iVEl++)
       fprintf(f, " %d", _el2V[iEl][iVEl] + 1);
     fprintf(f, "\n");
diff --git a/contrib/HighOrderMeshOptimizer/OptHomMesh.h b/contrib/HighOrderMeshOptimizer/OptHomMesh.h
index ea54b5b2cc41668f3e9bc0652ea990fc3df6d9e0..8a6d6bdb2e244fd33044cff8963e7d4f126979a7 100644
--- a/contrib/HighOrderMeshOptimizer/OptHomMesh.h
+++ b/contrib/HighOrderMeshOptimizer/OptHomMesh.h
@@ -69,6 +69,7 @@ public:
   void updateMesh(const double *it);
   void distSqToStraight(std::vector<double> &dSq);
   void elSizeSq(std::vector<double> &sSq);
+  void elInSize(std::vector<double> &s);
 
   void updateGEntityPositions();
   void writeMSH(const char *filename);
diff --git a/contrib/HighOrderMeshOptimizer/OptHomRun.cpp b/contrib/HighOrderMeshOptimizer/OptHomRun.cpp
index 2e0df6963df6a879994486dd17c9e44a10d42d84..821a8ead4ba505b6bb6450fc862b573cf4670486 100644
--- a/contrib/HighOrderMeshOptimizer/OptHomRun.cpp
+++ b/contrib/HighOrderMeshOptimizer/OptHomRun.cpp
@@ -160,6 +160,15 @@ static std::set<MElement*> getSurroundingBlob
           }
         }
       }
+//      for (int i = 0; i < (*it)->getNumPrimaryVertices(); ++i) {
+//        if ((d < forceDepth) || (p.distance((*it)->getVertex(i)->point()) < limDist)) {
+//          const std::vector<MElement*> &neighbours = vertex2elements.find((*it)->getVertex(i))->second;
+//          for (std::vector<MElement*>::const_iterator itN = neighbours.begin(); itN != neighbours.end(); ++itN) {
+//              // Assume that if an el is too far, its neighbours are too far as well
+//              if (blob.insert(*itN).second) currentLayer.push_back(*itN);
+//          }
+//        }
+//      }
     }
     lastLayer = currentLayer;
   }
@@ -287,13 +296,14 @@ static void optimizeConnectedBlobs
 
   //#pragma omp parallel for schedule(dynamic, 1)
   for (int i = 0; i < toOptimize.size(); ++i) {
+//    if (toOptimize[i].first.size() > 10000) continue;
     Msg::Info("Optimizing a blob %i/%i composed of %4d elements", i+1,
               toOptimize.size(), toOptimize[i].first.size());
     fflush(stdout);
     OptHOM temp(element2entity, toOptimize[i].first, toOptimize[i].second, p.fixBndNodes);
-    //std::ostringstream ossI1;
-    //ossI1 << "initial_ITER_" << i << ".msh";
-    //temp.mesh.writeMSH(ossI1.str().c_str());
+    std::ostringstream ossI1;
+    ossI1 << "initial_blob-" << i << ".msh";
+    temp.mesh.writeMSH(ossI1.str().c_str());
     int success = -1;
     if (temp.mesh.nPC() == 0)
       Msg::Info("Blob %i has no degree of freedom, skipping", i+1);
@@ -537,9 +547,9 @@ static void optimizeOneByOne
                 toOptimize.size());
       fflush(stdout);
       OptHOM *opt = new OptHOM(element2entity, toOptimize, toFix, p.fixBndNodes);
-      //std::ostringstream ossI1;
-      //ossI1 << "initial_corrective_" << iter << ".msh";
-      //opt->mesh.writeMSH(ossI1.str().c_str());
+      std::ostringstream ossI1;
+      ossI1 << "initial_blob-" << iBadEl << ".msh";
+      opt->mesh.writeMSH(ossI1.str().c_str());
       success = opt->optimize(p.weightFixed, p.weightFree, p.BARRIER_MIN,
                               p.BARRIER_MAX, false, samples, p.itMax, p.optPassMax);
       if (success >= 0 && p.BARRIER_MIN_METRIC > 0) {
@@ -568,7 +578,7 @@ static void optimizeOneByOne
                   iBadEl, iterBlob);
 //        if (iterBlob == p.maxAdaptBlob-1) {
           std::ostringstream ossI2;
-          ossI2 << "final_" << iBadEl << ".msh";
+          ossI2 << "final_blob-" << iBadEl << ".msh";
           opt->mesh.writeMSH(ossI2.str().c_str());
 //        }
       }
diff --git a/contrib/HighOrderMeshOptimizer/SuperEl.cpp b/contrib/HighOrderMeshOptimizer/SuperEl.cpp
index 295968872cea5eb3aa84b11f900a734b172629f5..59382a442a9a5216f6e3eea94e91a27dd4cc24b7 100644
--- a/contrib/HighOrderMeshOptimizer/SuperEl.cpp
+++ b/contrib/HighOrderMeshOptimizer/SuperEl.cpp
@@ -34,212 +34,163 @@
 #include "MQuadrangle.h"
 #include "MPrism.h"
 #include "MHexahedron.h"
+#include "BasisFactory.h"
 #include "SuperEl.h"
 
 
 
-SuperEl::SuperEl(int order, double dist, int type, const std::vector<MVertex*> &baseVert,
-                 const std::vector<SVector3> &normals) {
+std::map<int,SuperEl::superInfoType> SuperEl::_superInfo;
 
-//  std::cout << "DBGTT: badEl = " << badEl->getNum() << ", _superEl = " << _superEl << std::endl;
 
+SuperEl::superInfoType::superInfoType(int type, int order) {
+
+  int iBaseFace = 0, iTopFace = 0;
   switch (type) {
-    case TYPE_LIN:
-      createSuperElQuad(order, dist, baseVert, normals[0], normals[1]);
+    case TYPE_QUA:
+      iBaseFace = 0; iTopFace = 2;
       break;
-    case TYPE_TRI:
-      createSuperElPrism(order, dist, baseVert, normals[0], normals[1], normals[2]);
+    case TYPE_PRI:
+      iBaseFace = 0; iTopFace = 1;
       break;
-    case TYPE_QUA:
-      createSuperElHex(order, dist, baseVert, normals[0], normals[1], normals[2], normals[3]);
+    case TYPE_HEX:
+      iBaseFace = 0; iTopFace = 5;
       break;
     default:
       std::cout << "ERROR: SuperEl not implemented for element of type " << type << std::endl;
-      _superEl = 0;
-      break;
+      nV = 0;
+      return;
   }
 
-  if (!_superEl) std::cout << "ERROR: SuperEl not created" << std::endl;
+  // Get HO nodal basis
+  const int tag = ElementType::getTag(type, order, true);                 // Get tag for incomplete element type
+//  const int tag = ElementType::getTag(type, order);                     // Get tag for complete element type
+  const nodalBasis *basis = tag ? BasisFactory::getNodalBasis(tag) : 0;
+
+  nV = basis->getNumShapeFunctions();
+//  _superInfo[type].nV1 = basis->getNumShapeFunctions();
+  points = basis->points;
+
+  baseInd = basis->getClosure(basis->getClosureId(iBaseFace,1,0));
+  topInd = basis->getClosure(basis->getClosureId(iTopFace,0,0));
+  otherInd.reserve(nV-baseInd.size()-topInd.size());
+  for(int i=0; i<nV; ++i) {
+    const std::vector<int>::iterator inBaseFace = find(baseInd.begin(),baseInd.end(),i);
+    const std::vector<int>::iterator inTopFace = find(topInd.begin(),topInd.end(),i);
+    if (inBaseFace == baseInd.end() && inTopFace == topInd.end()) otherInd.push_back(i);
+  }
 
 }
 
 
 
-void SuperEl::createSuperElQuad(int order, double dist, const std::vector<MVertex*> &baseVert,
-                                  const SVector3 &n0, const SVector3 &n1) {
-
-
-  if (baseVert.empty()) {
-    std::cout << "ERROR: Base edge not given for SuperEl\n";
-    _superEl = 0;
-    return;
-  }
-
-  // First-order vertices for super-element
-  MVertex *v0 = new MVertex(*baseVert[0]);
-  MVertex *v1 = new MVertex(*baseVert[1]);
-//  double dist = distFactor*distMaxStraight(badEl);
-  double v2x = v1->x()+n1.x()*dist, v2y = v1->y()+n1.y()*dist;
-  MVertex *v2 = new MVertex(v2x,v2y,0.);
-  double v3x = v0->x()+n0.x()*dist, v3y = v0->y()+n0.y()*dist;
-  MVertex *v3 = new MVertex(v3x,v3y,0.);
-//  std::cout << "DBGTT: createSuperElQuad: v0 = (" << v0->x() << "," << v0->y()
-//              << "), v1 = (" << v1->x() << "," << v1->y()
-//              << "), v2 = (" << v2->x() << "," << v2->y()
-//              << "), v3 = (" << v3->x() << "," << v3->y() << ")" << std::endl;
-
-  // Get basis functions
-  MQuadrangle quad1(v0,v1,v2,v3);
-  const nodalBasis *quadBasis = quad1.getFunctionSpace(order,true);           // Get HO serendip nodal basis
-  const int Nv = quadBasis->getNumShapeFunctions();
-
-  // Store/create all vertices for super-element
-  _superVert.resize(Nv);
-  _superVert[0] = v0;                                                         // First-order vertices
-  _superVert[1] = v1;
-  _superVert[2] = v2;
-  _superVert[3] = v3;
-  for (int i=2; i<order+1; ++i) _superVert[2+i] = new MVertex(*baseVert[i]);  // HO vertices on base edge
-  for (int i=3+order; i<Nv; ++i) {                                            // Additional HO vertices
-    SPoint3 p;
-    quad1.pnt(quadBasis->points(i,0),quadBasis->points(i,1),0.,p);
-    _superVert[i] = new MVertex(p.x(),p.y(),0.);
-//    std::cout << "DBGTT: createSuperElQuad: add vertex (" << _superVert[i]->x()
-//                << "," << _superVert[i]->y()<< "," << _superVert[i]->z() << ")" << std::endl;
-  }
-
-  _superEl = new MQuadrangleN(_superVert,order);
-  _superEl0 = new MQuadrangle(_superVert[0],_superVert[1],_superVert[2],_superVert[3]);
-//  std::cout << "Created SuperEl at address " << _superEl << std::endl;
-
-}
+SuperEl::SuperEl(int type, int order, const std::vector<MVertex*> &baseVert,
+                 const std::vector<MVertex*> &topPrimVert)
+{
 
+  // Get useful info on meta-element type if not already there
+  std::map<int,SuperEl::superInfoType>::iterator itSInfo = _superInfo.find(type);
+  if (itSInfo == _superInfo.end())
+    itSInfo = _superInfo.insert(std::pair<int,superInfoType>(type,superInfoType(type, order))).first;
+  SuperEl::superInfoType &sInfo = itSInfo->second;
 
+  // Exit if unknown type
+  if (sInfo.nV == 0) return;
 
-void SuperEl::createSuperElPrism(int order, double dist, const std::vector<MVertex*> &baseVert,
-                                 const SVector3 &n0, const SVector3 &n1, const SVector3 &n2) {
+  // References for easier writing
+  const int &nV = sInfo.nV;
+  const fullMatrix<double> &points = sInfo.points;
+  const std::vector<int> &baseInd = sInfo.baseInd;
+  const std::vector<int> &topInd = sInfo.topInd;
+  const std::vector<int> &otherInd = sInfo.otherInd;
 
+  // Add copies of vertices in base & top faces (only first-order vertices for top face)
+  _superVert.resize(nV);
+  for (int i=0; i<baseInd.size(); ++i) _superVert[baseInd[i]] = new MVertex(*baseVert[i]);
+  for (int i=0; i<topPrimVert.size(); ++i) _superVert[topInd[i]] = new MVertex(*topPrimVert[i]);
 
-  if (baseVert.empty()) {
-    std::cout << "ERROR: Base edge not given for SuperEl\n";
-    _superEl = 0;
-    return;
+  // Create first-order meta-element
+  switch (type) {
+    case TYPE_QUA:
+      _superEl0 = new MQuadrangle(_superVert);
+      break;
+    case TYPE_PRI:
+      _superEl0 = new MPrism(_superVert);
+      break;
+    case TYPE_HEX:
+      _superEl0 = new MHexahedron(_superVert);
+      break;
+    default:
+      std::cout << "ERROR: SuperEl not implemented for element of type " << type << std::endl;
+      _superEl0 = 0;
+      return;
+      break;
   }
 
-  // First-order vertices for super-element
-  MVertex *v0 = new MVertex(*baseVert[0]);
-  MVertex *v1 = new MVertex(*baseVert[1]);
-  MVertex *v2 = new MVertex(*baseVert[2]);
-  double v3x = v0->x()+n0.x()*dist, v3y = v0->y()+n0.y()*dist,  v3z = v0->z()+n0.z()*dist;
-  MVertex *v3 = new MVertex(v3x,v3y,v3z);
-  double v4x = v1->x()+n1.x()*dist, v4y = v1->y()+n1.y()*dist,  v4z = v1->z()+n1.z()*dist;
-  MVertex *v4 = new MVertex(v4x,v4y,v4z);
-  double v5x = v2->x()+n2.x()*dist, v5y = v2->y()+n2.y()*dist,  v5z = v2->z()+n2.z()*dist;
-  MVertex *v5 = new MVertex(v5x,v5y,v5z);
-
-  // Get basis functions
-  MPrism prism1(v0,v1,v2,v3,v4,v5);
-  const nodalBasis *prismBasis = prism1.getFunctionSpace(order,true);       // Get HO serendip nodal basis
-  const int Nv = prismBasis->getNumShapeFunctions();                        // Number of vertices in HO prism
-
-  // Store/create all vertices for super-element
-  if (order == 2) {
-    _superVert.resize(18);
-    _superVert[0] = v0;                                                     // First-order vertices
-    _superVert[1] = v1;
-    _superVert[2] = v2;
-    _superVert[3] = v3;
-    _superVert[4] = v4;
-    _superVert[5] = v5;
-    _superVert[6] = new MVertex(*baseVert[3]);                              // HO vertices on base face
-    _superVert[9] = new MVertex(*baseVert[4]);
-    _superVert[7] = new MVertex(*baseVert[5]);
-    const int indAddVerts[6] = {8,10,11,12,13,14};                          // Additional HO vertices
+  // Add HO vertices in top face
+  for (int i=topPrimVert.size(); i<topInd.size(); ++i) {
     SPoint3 p;
-    for (int i=0; i<6; ++i) {
-      const int &ind = indAddVerts[i];
-      prism1.pnt(prismBasis->points(ind,0),prismBasis->points(ind,1),prismBasis->points(ind,2),p);
-      _superVert[ind] = new MVertex(p.x(),p.y(),p.z());
-    }
-    _superEl = new MPrism15(_superVert);
-  }
-  else {
-    std::cout << "ERROR: Prismatic superEl. of order " << order << " not supported\n";
-    _superEl = 0;
-    _superEl0 = 0;
-    return;
+    const int ind = topInd[i];
+    _superEl0->pnt(points(ind,0),points(ind,1),points(ind,2),p);
+    _superVert[ind] = new MVertex(p.x(),p.y(),p.z());
   }
 
-  _superEl0 = new MPrism(_superVert[0],_superVert[1],_superVert[2],
-                         _superVert[3],_superVert[4],_superVert[5]);
-
-}
-
-
-
-void SuperEl::createSuperElHex(int order, double dist, const std::vector<MVertex*> &baseVert,
-                               const SVector3 &n0, const SVector3 &n1, const SVector3 &n2, const SVector3 &n3) {
-
+  // Add vertices not in base & top faces
+  for (int i=0; i<otherInd.size(); ++i) {
+    SPoint3 p;
+    const int ind = otherInd[i];
+    _superEl0->pnt(points(ind,0),points(ind,1),points(ind,2),p);
+    _superVert[ind] = new MVertex(p.x(),p.y(),p.z());
+  }
 
-  if (baseVert.empty()) {
-    std::cout << "ERROR: Base edge not given for SuperEl\n";
-    _superEl = 0;
-    return;
+  // Create high-order meta-element
+  switch (type) {
+    case TYPE_QUA:
+      _superEl = new MQuadrangleN(_superVert, order);
+      break;
+    case TYPE_PRI:
+      _superEl = new MPrismN(_superVert, order);
+      break;
+    case TYPE_HEX:
+      _superEl = new MHexahedronN(_superVert, order);
+      break;
   }
 
-  // First-order vertices for super-element
-  MVertex *v0 = new MVertex(*baseVert[0]);
-  MVertex *v1 = new MVertex(*baseVert[1]);
-  MVertex *v2 = new MVertex(*baseVert[2]);
-  MVertex *v3 = new MVertex(*baseVert[3]);
-  double v4x = v0->x()+n0.x()*dist, v4y = v0->y()+n0.y()*dist,  v4z = v0->z()+n0.z()*dist;
-  MVertex *v4 = new MVertex(v4x,v4y,v4z);
-  double v5x = v1->x()+n1.x()*dist, v5y = v1->y()+n1.y()*dist,  v5z = v1->z()+n1.z()*dist;
-  MVertex *v5 = new MVertex(v5x,v5y,v5z);
-  double v6x = v2->x()+n2.x()*dist, v6y = v2->y()+n2.y()*dist,  v6z = v2->z()+n2.z()*dist;
-  MVertex *v6 = new MVertex(v6x,v6y,v6z);
-  double v7x = v3->x()+n3.x()*dist, v7y = v3->y()+n3.y()*dist,  v7z = v3->z()+n3.z()*dist;
-  MVertex *v7 = new MVertex(v7x,v7y,v7z);
-
-  // Get basis functions
-  MHexahedron *hex1 = new MHexahedron(v0,v1,v2,v3,v4,v5,v6,v7);
-  const nodalBasis *prismBasis = hex1->getFunctionSpace(order,true);         // Get HO serendip nodal basis
-  const int Nv = prismBasis->getNumShapeFunctions();                         // Number of vertices in HO hex
-
-  // Store/create all vertices for super-element
-  if (order == 2) {
-    _superVert.resize(27);
-    _superVert[0] = v0;                                                      // First-order vertices
-    _superVert[1] = v1;
-    _superVert[2] = v2;
-    _superVert[3] = v3;
-    _superVert[4] = v4;
-    _superVert[5] = v5;
-    _superVert[6] = v6;
-    _superVert[7] = v7;
-    _superVert[8] = new MVertex(*baseVert[4]);                                // HO vertices on base face
-    _superVert[11] = new MVertex(*baseVert[5]);                               // HO vertices on base face
-    _superVert[13] = new MVertex(*baseVert[6]);                               // HO vertices on base face
-    _superVert[9] = new MVertex(*baseVert[7]);
-    _superVert[20] = new MVertex(*baseVert[8]);
-    const int indAddVerts[8] = {10,12,14,15,16,17,18,19};                     // Additional HO vertices
-    SPoint3 p;
-    for (int i=0; i<8; ++i) {
-      const int &ind = indAddVerts[i];
-      hex1->pnt(prismBasis->points(ind,0),prismBasis->points(ind,1),prismBasis->points(ind,2),p);
-      _superVert[ind] = new MVertex(p.x(),p.y(),p.z());
+  // Scale meta-element if not valid
+  // TODO: Scale depending on target Jmin?
+  // TODO: Could be improved by using complete meta-element and use optimization
+  for (int iter = 0; iter < 10; iter++) {
+    if (_superEl->distoShapeMeasure() > 0) break;
+    if (iter == 0) Msg::Warning("A meta-element has a negative distortion, trying to scale");
+    for (int i=0; i<topPrimVert.size(); ++i) {                                                // Move top primary vert.
+      MVertex *&vb = _superVert[baseInd[i]], *&vt = _superVert[topInd[i]];
+      SPoint3 pb = vb->point(), pt = vt->point();
+      pt += SPoint3(0.25*(pt-pb));
+      vt->setXYZ(pt.x(), pt.y(), pt.z());
+    }
+    for (int i=topPrimVert.size(); i<topInd.size(); ++i) {                                    // Recompute HO vert. in top face
+      SPoint3 p;
+      const int ind = topInd[i];
+      _superEl0->pnt(points(ind,0),points(ind,1),points(ind,2),p);
+      _superVert[ind]->setXYZ(p.x(),p.y(),p.z());
+    }
+    for (int i=0; i<otherInd.size(); ++i) {                                                   // Recompute vert. not in base & top faces
+      SPoint3 p;
+      const int ind = otherInd[i];
+      _superEl0->pnt(points(ind,0),points(ind,1),points(ind,2),p);
+      _superVert[ind]->setXYZ(p.x(),p.y(),p.z());
     }
-    _superEl = new MHexahedron20(_superVert);
-  }
-  else {
-    std::cout << "ERROR: Hex. superEl. of order " << order << " not supported\n";
-    _superEl = 0;
-    _superEl0 = 0;
-    return;
   }
 
-  _superEl0 = hex1;
+}
+
+
 
+SuperEl::~SuperEl()
+{
+  for (int i = 0; i < _superVert.size(); i++) delete _superVert[i];
+  _superVert.clear();
+  delete _superEl;
+  delete _superEl0;
 }
 
 
@@ -275,9 +226,10 @@ bool SuperEl::straightToCurved(double *xyzS, double *xyzC) const {
 std::string SuperEl::printPOS() {
 
   std::vector<MVertex*> verts;
-  _superEl0->getVertices(verts);
-  std::string posStr(_superEl0->getStringForPOS());
-  int n = _superEl0->getNumVertices();
+  _superEl->getVertices(verts);
+//  std::string posStr = _superEl->getStringForPOS();
+  std::string posStr = _superEl0->getStringForPOS();
+  int n = (posStr[posStr.size()-1]=='2' ? _superEl : _superEl0)->getNumVertices();
 
   std::ostringstream oss;
 
@@ -286,8 +238,8 @@ std::string SuperEl::printPOS() {
     if(i) oss << ",";
     oss << _superVert[i]->x() << "," <<  _superVert[i]->y() << "," <<  _superVert[i]->z();
   }
-  oss << "){0";
-  for(int i = 0; i < n; i++) oss << ",0.";
+  oss << "){0.";
+  for(int i = 1; i < n; i++) oss << ",0.";
   oss << "};\n";
 
   return oss.str();
diff --git a/contrib/HighOrderMeshOptimizer/SuperEl.h b/contrib/HighOrderMeshOptimizer/SuperEl.h
index e61cf6b1a3261d5ae72e9dcb2ac9f050ebea07dd..54ba6991a6f11545987330a3e862969be517657e 100644
--- a/contrib/HighOrderMeshOptimizer/SuperEl.h
+++ b/contrib/HighOrderMeshOptimizer/SuperEl.h
@@ -33,13 +33,15 @@
 #include <string>
 #include "MElement.h"
 
+
+
 class SuperEl
 {
 public:
 
-  SuperEl(int order, double dist, int type, const std::vector<MVertex*> &baseVert,
-          const std::vector<SVector3> &normals);
-  ~SuperEl() { _superVert.clear(); delete _superEl; delete _superEl0; }
+  SuperEl(int type, int order, const std::vector<MVertex*> &baseVert,
+          const std::vector<MVertex*> &topPrimVert);
+  ~SuperEl();
 
   bool isOK() const { return _superEl; }
   bool isPointIn(const SPoint3 p) const;
@@ -58,17 +60,17 @@ public:
 
 private:
 
-  std::vector<MVertex*> _superVert;
-  MElement *_superEl, *_superEl0;
-
-  void createSuperElQuad(int order, double dist, const std::vector<MVertex*> &baseVert,
-                         const SVector3 &n0, const SVector3 &n1);
-  void createSuperElPrism(int order, double dist, const std::vector<MVertex*> &baseVert,
-                          const SVector3 &n0, const SVector3 &n1, const SVector3 &n2);
-  void createSuperElHex(int order, double dist, const std::vector<MVertex*> &baseVert,
-                        const SVector3 &n0, const SVector3 &n1, const SVector3 &n2, const SVector3 &n3);
+  struct superInfoType {
+    int nV;
+    fullMatrix<double> points;
+    std::vector<int> baseInd, topInd, otherInd;
+    superInfoType(int type, int order);
+  };
 
+  static std::map<int,superInfoType> _superInfo;
 
+  std::vector<MVertex*> _superVert;
+  MElement *_superEl, *_superEl0;
 };