From 115d241c6ddb2583fe8cc1d854939a4e7da04228 Mon Sep 17 00:00:00 2001
From: Matteo Cicuttin <datafl4sh@toxicnet.eu>
Date: Mon, 12 Apr 2021 17:56:58 +0200
Subject: [PATCH] Differentiation finally fixed.

---
 CMakeLists.txt           | 11 +++++++----
 entity.cpp               |  6 +++---
 gmsh_io.cpp              |  2 +-
 kernels_cpu.cpp          |  4 ++++
 kernels_cpu.h            | 33 ++++++++++++++++++++++-----------
 reference_element.cpp    | 23 +++++++++++++++++------
 test.cpp                 |  8 +++++---
 test_differentiation.cpp |  6 +++---
 8 files changed, 62 insertions(+), 31 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7df4d78..2cc0e12 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -135,10 +135,13 @@ endif ()
 ######################################################################
 ## Compiler optimization options
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-zero-as-null-pointer-constant -Wno-global-constructors -Wno-padded -Wno-shorten-64-to-32 -Wno-sign-conversion -Wno-old-style-cast -Wno-sign-compare -Wno-c99-extensions -Wno-extra-semi-stmt")
-
-set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g")
-set(CMAKE_CXX_FLAGS_RELEASE "-O3 -g -DNDEBUG")
+if (COMPILER_IS_CLANG)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-zero-as-null-pointer-constant -Wno-global-constructors -Wno-padded -Wno-shorten-64-to-32 -Wno-sign-conversion -Wno-old-style-cast -Wno-sign-compare -Wno-c99-extensions -Wno-extra-semi-stmt")
+else()
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
+endif()
+set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -fpermissive")
+set(CMAKE_CXX_FLAGS_RELEASE "-O3 -g -DNDEBUG -fpermissive")
 set(CMAKE_CXX_FLAGS_RELEASEASSERT "-O3 -g -fpermissive")
 
 option(OPT_ENABLE_OPENMP "Enable OpenMP parallelization" OFF)
diff --git a/entity.cpp b/entity.cpp
index a06c743..f784728 100644
--- a/entity.cpp
+++ b/entity.cpp
@@ -151,7 +151,7 @@ entity::sort_by_orientation(void)
         auto op1 = e1.original_position();
         auto op2 = e2.original_position();
 
-        if ( oe1 < oe2 and op1 < op2 )
+        if ( oe1 < oe2 or (oe1 == oe2 and op1 < op2) )
             return true;
 
         return false;
@@ -224,8 +224,8 @@ entity::populate_differentiation_matrices(entity_data_cpu<1>& ed) const
             vecxd dv_phi = dphi.col(1);
             vecxd dw_phi = dphi.col(2);
 
-            dm.block(0,           0, ed.num_bf, ed.num_bf) += w * phi * du_phi.transpose();
-            dm.block(0,   ed.num_bf, ed.num_bf, ed.num_bf) += w * phi * dv_phi.transpose();
+            dm.block(0, 0*ed.num_bf, ed.num_bf, ed.num_bf) += w * phi * du_phi.transpose();
+            dm.block(0, 1*ed.num_bf, ed.num_bf, ed.num_bf) += w * phi * dv_phi.transpose();
             dm.block(0, 2*ed.num_bf, ed.num_bf, ed.num_bf) += w * phi * dw_phi.transpose();
         }
 
diff --git a/gmsh_io.cpp b/gmsh_io.cpp
index 3b4defe..b385059 100644
--- a/gmsh_io.cpp
+++ b/gmsh_io.cpp
@@ -134,7 +134,7 @@ model::import_gmsh_entities(void)
         for (auto& elemType : elemTypes)
         {
             entity e({.dim = dim, .tag = tag, .etype = elemType,
-                      .aorder = approximation_order, .gorder = geometric_order});
+                      .gorder = geometric_order, .aorder = approximation_order});
             entities.push_back( std::move(e) );
         }
     }
diff --git a/kernels_cpu.cpp b/kernels_cpu.cpp
index e8bec0f..83a987b 100644
--- a/kernels_cpu.cpp
+++ b/kernels_cpu.cpp
@@ -28,6 +28,10 @@ void compute_field_derivatives(const entity_data_cpu<1>& ed,
             compute_field_derivatives_kernel<5>(ed, f, df_dx, df_dy, df_dz);
             break;
 
+        case 6:
+            compute_field_derivatives_kernel<6>(ed, f, df_dx, df_dy, df_dz);
+            break;
+
         default:
             std::cout << "compute_field_derivatives: invalid order" << std::endl;
     }
diff --git a/kernels_cpu.h b/kernels_cpu.h
index 339bbe9..b0f4ffb 100644
--- a/kernels_cpu.h
+++ b/kernels_cpu.h
@@ -21,7 +21,6 @@ struct kernel_cpu_sizes
     static const size_t num_all_fluxes = num_fluxes * num_faces;
 };
 
-
 template<size_t AK>
 void compute_field_derivatives_kernel(const entity_data_cpu<1>& ed,
     const vecxd& f, vecxd& df_dx, vecxd& df_dy, vecxd& df_dz)
@@ -49,17 +48,28 @@ void compute_field_derivatives_kernel(const entity_data_cpu<1>& ed,
                 double acc_df_dy = 0.0;
                 double acc_df_dz = 0.0;
 
-                for (size_t refsys_dir = 0; refsys_dir < 3; refsys_dir++)
+                for (size_t idof = 0; idof < KS::num_bf; idof++)
                 {
-                    for (size_t idof = 0; idof < KS::num_bf; idof++)
-                    {
-                        auto dm_row = iO*ed.num_bf + odof;
-                        auto dm_col = refsys_dir*ed.num_bf + idof;
-                        double D = ed.differentiation_matrix(dm_row, dm_col);
-                        acc_df_dx += ed.jacobians(3*ent_iT + 0, refsys_dir) * D * f(dofofs+idof);
-                        acc_df_dy += ed.jacobians(3*ent_iT + 1, refsys_dir) * D * f(dofofs+idof);
-                        acc_df_dz += ed.jacobians(3*ent_iT + 2, refsys_dir) * D * f(dofofs+idof);
-                    }
+                    double F = f(dofofs+idof);
+                    auto dm_row = iO*KS::num_bf + odof;
+                    auto dm_col0 = 0*KS::num_bf + idof;
+                    auto dm_col1 = 1*KS::num_bf + idof;
+                    auto dm_col2 = 2*KS::num_bf + idof;
+                    double v0 = ed.differentiation_matrix(dm_row, dm_col0) * F;
+                    double v1 = ed.differentiation_matrix(dm_row, dm_col1) * F;
+                    double v2 = ed.differentiation_matrix(dm_row, dm_col2) * F;
+                    
+                    acc_df_dx += ed.jacobians(3*ent_iT + 0, 0) * v0;
+                    acc_df_dx += ed.jacobians(3*ent_iT + 0, 1) * v1;
+                    acc_df_dx += ed.jacobians(3*ent_iT + 0, 2) * v2;
+
+                    acc_df_dy += ed.jacobians(3*ent_iT + 1, 0) * v0;
+                    acc_df_dy += ed.jacobians(3*ent_iT + 1, 1) * v1;
+                    acc_df_dy += ed.jacobians(3*ent_iT + 1, 2) * v2;
+
+                    acc_df_dz += ed.jacobians(3*ent_iT + 2, 0) * v0;
+                    acc_df_dz += ed.jacobians(3*ent_iT + 2, 1) * v1;
+                    acc_df_dz += ed.jacobians(3*ent_iT + 2, 2) * v2;
                 }
 
                 df_dx(dofofs+odof) = acc_df_dx;
@@ -72,5 +82,6 @@ void compute_field_derivatives_kernel(const entity_data_cpu<1>& ed,
     }
 }
 
+
 void compute_field_derivatives(const entity_data_cpu<1>& ed,
     const vecxd& f, vecxd& df_dx, vecxd& df_dy, vecxd& df_dz);
\ No newline at end of file
diff --git a/reference_element.cpp b/reference_element.cpp
index b91ae6a..7d6ad5c 100644
--- a/reference_element.cpp
+++ b/reference_element.cpp
@@ -137,7 +137,7 @@ reference_elements_factory::get_elements()
         new_re.m_orientation = iO;
         new_re.m_parent_entity_tag = tag;
         new_re.m_basis_functions = vecxd::Zero(num_qp*num_bf);
-        new_re.m_basis_gradients = matxd::Zero(num_bf*num_qp, 3);
+        new_re.m_basis_gradients = matxd::Zero(num_qp*num_bf, 3);
         new_re.m_num_bf = num_bf;
         new_re.m_gmsh_elem_type = elemType;
 
@@ -147,14 +147,25 @@ reference_elements_factory::get_elements()
         {
             for (size_t ibf = 0; ibf < num_bf; ibf++)
             {
+                auto ofs = bf_base + num_bf*iQp + ibf;
+                assert(ofs < basisFunctions.size());
                 new_re.m_basis_functions(num_bf*iQp + ibf) =
-                    basisFunctions[bf_base + num_bf*iQp + ibf];
+                    basisFunctions[ofs];
+
+                ofs = bg_base + 3*num_bf*iQp + 3*ibf + 0;
+                assert(ofs < basisGradients.size());
                 new_re.m_basis_gradients(num_bf*iQp + ibf, 0) =
-                    basisGradients[bg_base + 3*num_bf*iQp + 3*ibf + 0];
+                    basisGradients[ofs];
+                
+                ofs = bg_base + 3*num_bf*iQp + 3*ibf + 1;
+                assert(ofs < basisGradients.size());
                 new_re.m_basis_gradients(num_bf*iQp + ibf, 1) =
-                    basisGradients[bg_base + 3*num_bf*iQp + 3*ibf + 1];
+                    basisGradients[ofs];
+
+                ofs = bg_base + 3*num_bf*iQp + 3*ibf + 2;
+                assert(ofs < basisGradients.size());
                 new_re.m_basis_gradients(num_bf*iQp + ibf, 2) =
-                    basisGradients[bg_base + 3*num_bf*iQp + 3*ibf + 2];
+                    basisGradients[ofs];
             }
         }
 
@@ -174,7 +185,7 @@ reference_elements_factory::get_elements()
             for (size_t iQp = 0; iQp < num_qp; iQp++)
             {
                 auto w = new_re.m_quadpoints[iQp].weight();
-                vecxd phi = new_re.m_basis_functions.segment(num_bf*iQp, num_bf);
+                vecxd phi = new_re.basis_functions(iQp);
                 new_re.m_mass_matrix += w * phi * phi.transpose();
             }
         }
diff --git a/test.cpp b/test.cpp
index 63ff515..4deadfa 100644
--- a/test.cpp
+++ b/test.cpp
@@ -29,16 +29,18 @@ int main(void)
             failed_tests += test_basics(go, ao);
         }
     }
-  
+
+
     std::cout << Bmagentafg << " *** TESTING: PROJECTIONS ***" << reset << std::endl;
     for (size_t go = 1; go < 5; go++)
     {
         for (size_t ao = go; ao < 5; ao++)
             failed_tests += test_mass_convergence(go, ao);
     }
-#endif  
+#endif
+
     std::cout << Bmagentafg << " *** TESTING: DIFFERENTIATION ***" << reset << std::endl;
-    for (size_t ao = 1; ao < 5; ao++)
+    for (size_t ao = 1; ao < 7; ao++)
         failed_tests += test_differentiation_convergence(1, ao);
 
     /*****************************************/
diff --git a/test_differentiation.cpp b/test_differentiation.cpp
index 9873459..d12e902 100644
--- a/test_differentiation.cpp
+++ b/test_differentiation.cpp
@@ -71,7 +71,7 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde
         compute_field_derivatives(ed, Pf_e0, df_dx_e0, df_dy_e0, df_dz_e0);
         double time = tc.toc();
         std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: ";
-        size_t flops = 3*ed.num_bf*3*ed.num_bf*e0.num_elements();
+        double flops = 21*ed.num_bf*ed.num_bf*e0.num_elements();
         std::cout << flops/(1e9*time) << " GFlops/s" << std::endl;
 
         vecxd Pdf_dx_e0 = e0.project(df_dx);
@@ -96,8 +96,8 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde
             err_y += diff_y.dot(mass*diff_y);
             err_z += diff_z.dot(mass*diff_z);
         }
-
-        std::cout << std::sqrt(err_x) << " " << std::sqrt(err_y) << " " << std::sqrt(err_z) << std::endl;
+        std::cout << "Errors: " << std::sqrt(err_x) << " " << std::sqrt(err_y);
+        std::cout << " " << std::sqrt(err_z) << std::endl;
     
         errs_x.push_back( std::sqrt(err_x) );
         errs_y.push_back( std::sqrt(err_y) );
-- 
GitLab