diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7df4d78308276d6aa28ac683afe2e090c212ffb8..2cc0e1268f32904b68135746f6dab20b53ed06e6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -135,10 +135,13 @@ endif ()
 ######################################################################
 ## Compiler optimization options
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-zero-as-null-pointer-constant -Wno-global-constructors -Wno-padded -Wno-shorten-64-to-32 -Wno-sign-conversion -Wno-old-style-cast -Wno-sign-compare -Wno-c99-extensions -Wno-extra-semi-stmt")
-
-set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g")
-set(CMAKE_CXX_FLAGS_RELEASE "-O3 -g -DNDEBUG")
+if (COMPILER_IS_CLANG)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-zero-as-null-pointer-constant -Wno-global-constructors -Wno-padded -Wno-shorten-64-to-32 -Wno-sign-conversion -Wno-old-style-cast -Wno-sign-compare -Wno-c99-extensions -Wno-extra-semi-stmt")
+else()
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
+endif()
+set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -fpermissive")
+set(CMAKE_CXX_FLAGS_RELEASE "-O3 -g -DNDEBUG -fpermissive")
 set(CMAKE_CXX_FLAGS_RELEASEASSERT "-O3 -g -fpermissive")
 
 option(OPT_ENABLE_OPENMP "Enable OpenMP parallelization" OFF)
diff --git a/entity.cpp b/entity.cpp
index a06c7430102f1695991f2ccaea72e37df79e84b2..f7847288ef0d67fa97c8e05a9128c4e86d998b69 100644
--- a/entity.cpp
+++ b/entity.cpp
@@ -151,7 +151,7 @@ entity::sort_by_orientation(void)
         auto op1 = e1.original_position();
         auto op2 = e2.original_position();
 
-        if ( oe1 < oe2 and op1 < op2 )
+        if ( oe1 < oe2 or (oe1 == oe2 and op1 < op2) )
             return true;
 
         return false;
@@ -224,8 +224,8 @@ entity::populate_differentiation_matrices(entity_data_cpu<1>& ed) const
             vecxd dv_phi = dphi.col(1);
             vecxd dw_phi = dphi.col(2);
 
-            dm.block(0,           0, ed.num_bf, ed.num_bf) += w * phi * du_phi.transpose();
-            dm.block(0,   ed.num_bf, ed.num_bf, ed.num_bf) += w * phi * dv_phi.transpose();
+            dm.block(0, 0*ed.num_bf, ed.num_bf, ed.num_bf) += w * phi * du_phi.transpose();
+            dm.block(0, 1*ed.num_bf, ed.num_bf, ed.num_bf) += w * phi * dv_phi.transpose();
             dm.block(0, 2*ed.num_bf, ed.num_bf, ed.num_bf) += w * phi * dw_phi.transpose();
         }
 
diff --git a/gmsh_io.cpp b/gmsh_io.cpp
index 3b4defe325d301b297bdfed5e7f9217522170884..b3850594ca936485bda0edfbb619fa9d67909259 100644
--- a/gmsh_io.cpp
+++ b/gmsh_io.cpp
@@ -134,7 +134,7 @@ model::import_gmsh_entities(void)
         for (auto& elemType : elemTypes)
         {
             entity e({.dim = dim, .tag = tag, .etype = elemType,
-                      .aorder = approximation_order, .gorder = geometric_order});
+                      .gorder = geometric_order, .aorder = approximation_order});
             entities.push_back( std::move(e) );
         }
     }
diff --git a/kernels_cpu.cpp b/kernels_cpu.cpp
index e8bec0f861f27916c9e682b968a0e8b648e45cfe..83a987b48067aadd3022f435e3f008b6112c7c2e 100644
--- a/kernels_cpu.cpp
+++ b/kernels_cpu.cpp
@@ -28,6 +28,10 @@ void compute_field_derivatives(const entity_data_cpu<1>& ed,
             compute_field_derivatives_kernel<5>(ed, f, df_dx, df_dy, df_dz);
             break;
 
+        case 6:
+            compute_field_derivatives_kernel<6>(ed, f, df_dx, df_dy, df_dz);
+            break;
+
         default:
             std::cout << "compute_field_derivatives: invalid order" << std::endl;
     }
diff --git a/kernels_cpu.h b/kernels_cpu.h
index 339bbe9b3c41d73d658a02c00312f3741d84a12d..b0f4ffb2a3f44a92ca0c520d9816b9aa01ead90e 100644
--- a/kernels_cpu.h
+++ b/kernels_cpu.h
@@ -21,7 +21,6 @@ struct kernel_cpu_sizes
     static const size_t num_all_fluxes = num_fluxes * num_faces;
 };
 
-
 template<size_t AK>
 void compute_field_derivatives_kernel(const entity_data_cpu<1>& ed,
     const vecxd& f, vecxd& df_dx, vecxd& df_dy, vecxd& df_dz)
@@ -49,17 +48,28 @@ void compute_field_derivatives_kernel(const entity_data_cpu<1>& ed,
                 double acc_df_dy = 0.0;
                 double acc_df_dz = 0.0;
 
-                for (size_t refsys_dir = 0; refsys_dir < 3; refsys_dir++)
+                for (size_t idof = 0; idof < KS::num_bf; idof++)
                 {
-                    for (size_t idof = 0; idof < KS::num_bf; idof++)
-                    {
-                        auto dm_row = iO*ed.num_bf + odof;
-                        auto dm_col = refsys_dir*ed.num_bf + idof;
-                        double D = ed.differentiation_matrix(dm_row, dm_col);
-                        acc_df_dx += ed.jacobians(3*ent_iT + 0, refsys_dir) * D * f(dofofs+idof);
-                        acc_df_dy += ed.jacobians(3*ent_iT + 1, refsys_dir) * D * f(dofofs+idof);
-                        acc_df_dz += ed.jacobians(3*ent_iT + 2, refsys_dir) * D * f(dofofs+idof);
-                    }
+                    double F = f(dofofs+idof);
+                    auto dm_row = iO*KS::num_bf + odof;
+                    auto dm_col0 = 0*KS::num_bf + idof;
+                    auto dm_col1 = 1*KS::num_bf + idof;
+                    auto dm_col2 = 2*KS::num_bf + idof;
+                    double v0 = ed.differentiation_matrix(dm_row, dm_col0) * F;
+                    double v1 = ed.differentiation_matrix(dm_row, dm_col1) * F;
+                    double v2 = ed.differentiation_matrix(dm_row, dm_col2) * F;
+                    
+                    acc_df_dx += ed.jacobians(3*ent_iT + 0, 0) * v0;
+                    acc_df_dx += ed.jacobians(3*ent_iT + 0, 1) * v1;
+                    acc_df_dx += ed.jacobians(3*ent_iT + 0, 2) * v2;
+
+                    acc_df_dy += ed.jacobians(3*ent_iT + 1, 0) * v0;
+                    acc_df_dy += ed.jacobians(3*ent_iT + 1, 1) * v1;
+                    acc_df_dy += ed.jacobians(3*ent_iT + 1, 2) * v2;
+
+                    acc_df_dz += ed.jacobians(3*ent_iT + 2, 0) * v0;
+                    acc_df_dz += ed.jacobians(3*ent_iT + 2, 1) * v1;
+                    acc_df_dz += ed.jacobians(3*ent_iT + 2, 2) * v2;
                 }
 
                 df_dx(dofofs+odof) = acc_df_dx;
@@ -72,5 +82,6 @@ void compute_field_derivatives_kernel(const entity_data_cpu<1>& ed,
     }
 }
 
+
 void compute_field_derivatives(const entity_data_cpu<1>& ed,
     const vecxd& f, vecxd& df_dx, vecxd& df_dy, vecxd& df_dz);
\ No newline at end of file
diff --git a/reference_element.cpp b/reference_element.cpp
index b91ae6aeed23b6cd6833420544a1b247fae211d1..7d6ad5cc5ca58cba46732901ff62b9c6106b6201 100644
--- a/reference_element.cpp
+++ b/reference_element.cpp
@@ -137,7 +137,7 @@ reference_elements_factory::get_elements()
         new_re.m_orientation = iO;
         new_re.m_parent_entity_tag = tag;
         new_re.m_basis_functions = vecxd::Zero(num_qp*num_bf);
-        new_re.m_basis_gradients = matxd::Zero(num_bf*num_qp, 3);
+        new_re.m_basis_gradients = matxd::Zero(num_qp*num_bf, 3);
         new_re.m_num_bf = num_bf;
         new_re.m_gmsh_elem_type = elemType;
 
@@ -147,14 +147,25 @@ reference_elements_factory::get_elements()
         {
             for (size_t ibf = 0; ibf < num_bf; ibf++)
             {
+                auto ofs = bf_base + num_bf*iQp + ibf;
+                assert(ofs < basisFunctions.size());
                 new_re.m_basis_functions(num_bf*iQp + ibf) =
-                    basisFunctions[bf_base + num_bf*iQp + ibf];
+                    basisFunctions[ofs];
+
+                ofs = bg_base + 3*num_bf*iQp + 3*ibf + 0;
+                assert(ofs < basisGradients.size());
                 new_re.m_basis_gradients(num_bf*iQp + ibf, 0) =
-                    basisGradients[bg_base + 3*num_bf*iQp + 3*ibf + 0];
+                    basisGradients[ofs];
+                
+                ofs = bg_base + 3*num_bf*iQp + 3*ibf + 1;
+                assert(ofs < basisGradients.size());
                 new_re.m_basis_gradients(num_bf*iQp + ibf, 1) =
-                    basisGradients[bg_base + 3*num_bf*iQp + 3*ibf + 1];
+                    basisGradients[ofs];
+
+                ofs = bg_base + 3*num_bf*iQp + 3*ibf + 2;
+                assert(ofs < basisGradients.size());
                 new_re.m_basis_gradients(num_bf*iQp + ibf, 2) =
-                    basisGradients[bg_base + 3*num_bf*iQp + 3*ibf + 2];
+                    basisGradients[ofs];
             }
         }
 
@@ -174,7 +185,7 @@ reference_elements_factory::get_elements()
             for (size_t iQp = 0; iQp < num_qp; iQp++)
             {
                 auto w = new_re.m_quadpoints[iQp].weight();
-                vecxd phi = new_re.m_basis_functions.segment(num_bf*iQp, num_bf);
+                vecxd phi = new_re.basis_functions(iQp);
                 new_re.m_mass_matrix += w * phi * phi.transpose();
             }
         }
diff --git a/test.cpp b/test.cpp
index 63ff5158d905defc025d634e253efda02adffe0b..4deadfaeaa361d5b878f30f595446041f8406e0c 100644
--- a/test.cpp
+++ b/test.cpp
@@ -29,16 +29,18 @@ int main(void)
             failed_tests += test_basics(go, ao);
         }
     }
-  
+
+
     std::cout << Bmagentafg << " *** TESTING: PROJECTIONS ***" << reset << std::endl;
     for (size_t go = 1; go < 5; go++)
     {
         for (size_t ao = go; ao < 5; ao++)
             failed_tests += test_mass_convergence(go, ao);
     }
-#endif  
+#endif
+
     std::cout << Bmagentafg << " *** TESTING: DIFFERENTIATION ***" << reset << std::endl;
-    for (size_t ao = 1; ao < 5; ao++)
+    for (size_t ao = 1; ao < 7; ao++)
         failed_tests += test_differentiation_convergence(1, ao);
 
     /*****************************************/
diff --git a/test_differentiation.cpp b/test_differentiation.cpp
index 987345998d283c7f62c2e45bf84d80a5c706afb4..d12e902d570c527d591c958fcc992e0d9ef8e0c5 100644
--- a/test_differentiation.cpp
+++ b/test_differentiation.cpp
@@ -71,7 +71,7 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde
         compute_field_derivatives(ed, Pf_e0, df_dx_e0, df_dy_e0, df_dz_e0);
         double time = tc.toc();
         std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: ";
-        size_t flops = 3*ed.num_bf*3*ed.num_bf*e0.num_elements();
+        double flops = 21*ed.num_bf*ed.num_bf*e0.num_elements();
         std::cout << flops/(1e9*time) << " GFlops/s" << std::endl;
 
         vecxd Pdf_dx_e0 = e0.project(df_dx);
@@ -96,8 +96,8 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde
             err_y += diff_y.dot(mass*diff_y);
             err_z += diff_z.dot(mass*diff_z);
         }
-
-        std::cout << std::sqrt(err_x) << " " << std::sqrt(err_y) << " " << std::sqrt(err_z) << std::endl;
+        std::cout << "Errors: " << std::sqrt(err_x) << " " << std::sqrt(err_y);
+        std::cout << " " << std::sqrt(err_z) << std::endl;
     
         errs_x.push_back( std::sqrt(err_x) );
         errs_y.push_back( std::sqrt(err_y) );