Optimized curls computation

5b1986be · Marco D'Antonio · 9cba41a3 · 5b1986be · 5b1986be · 5b1986be
Commit 5b1986be authored Feb 26, 2022 by Marco D'Antonio
--- a/.gitignore
+++ b/.gitignore
+CMakeLists.txt.user
+CMakeCache.txt
+CMakeFiles
+CMakeScripts
+Testing
+Makefile
+cmake_install.cmake
+install_manifest.txt
+compile_commands.json
+CTestTestfile.cmake
+_deps
+*.out
+
+build
+share/**/*.silo
+.vscode
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -281,6 +281,12 @@ option(OPT_EXPENSIVE_ASSERTS "Enable expensive assert()s" OFF)
 if (OPT_EXPENSIVE_ASSERTS)
    add_definitions(-DEXPENSIVE_ASSERTS)
 endif()
+
+option(NEW_CURLS "Use the new method for computing curls" OFF)
+if (NEW_CURLS)
+    add_compile_definitions(NEW_CURLS)
+endif()
+
 ######################################################################
 ## Enable/disable solver modules
 option(OPT_DEBUG_PRINT "Print debug information")
@@ -317,6 +323,9 @@ if (OPT_BUILD_TESTS)
    add_executable(test_mass tests/test_mass.cpp)
    target_link_libraries(test_mass gmshdg ${LINK_LIBS})

+    add_executable(test_curls tests/test_curls.cpp)
+    target_link_libraries(test_curls gmshdg ${LINK_LIBS})
+
    add_executable(test_differentiation tests/test_differentiation.cpp)
    target_link_libraries(test_differentiation gmshdg ${LINK_LIBS})


--- a/include/libgmshdg/kernels_cpu.h
+++ b/include/libgmshdg/kernels_cpu.h
--- a/src/libgmshdg/kernels_cpu.cpp
+++ b/src/libgmshdg/kernels_cpu.cpp
@@ -10,6 +10,113 @@
 #include "libgmshdg/entity_data.h"
 #include "libgmshdg/kernels_cpu.h"

+void compute_field_curls_sources(const entity_data_cpu& ed,
+    const vecxd& fx, const vecxd& fy, const vecxd& fz, double alpha,
+    vecxd& curl_fx, vecxd& curl_fy, vecxd& curl_fz,
+    const vecxd& jx, const vecxd& jy, const vecxd& jz)
+{
+    switch (ed.a_order)
+    {
+        case 1:
+            if (ed.g_order == 1)
+                compute_curls_sources_kernel_planar<1>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz, jx, jy, jz);
+            else
+                compute_curls_sources_kernel_curved<1>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz, jx, jy, jz);
+            break;
+
+        case 2:
+            if (ed.g_order == 1)
+                compute_curls_sources_kernel_planar<2>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz, jx, jy, jz);
+            else
+                compute_curls_sources_kernel_curved<2>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz, jx, jy, jz);
+            break;
+
+        case 3:
+            if (ed.g_order == 1)
+                compute_curls_sources_kernel_planar<3>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz, jx, jy, jz);
+            else
+                compute_curls_sources_kernel_curved<3>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz, jx, jy, jz);
+            break;
+
+        case 4:
+            if (ed.g_order == 1)
+                compute_curls_sources_kernel_planar<4>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz, jx, jy, jz);
+            else
+                compute_curls_sources_kernel_curved<4>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz, jx, jy, jz);
+            break;
+        
+        case 5:
+            if (ed.g_order == 1)
+                compute_curls_sources_kernel_planar<5>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz, jx, jy, jz);
+            else
+                compute_curls_sources_kernel_curved<5>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz, jx, jy, jz);
+            break;
+
+        case 6:
+            if (ed.g_order == 1)
+                compute_curls_sources_kernel_planar<6>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz, jx, jy, jz);
+            else
+                compute_curls_sources_kernel_curved<6>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz, jx, jy, jz);
+            break;
+
+        default:
+            std::cout << "compute_curls_sources: invalid order" << std::endl;
+    }
+}
+
+void compute_field_curls(const entity_data_cpu& ed,
+    const vecxd& fx, const vecxd& fy, const vecxd& fz, double alpha,
+    vecxd& curl_fx, vecxd& curl_fy, vecxd& curl_fz)
+{
+    switch (ed.a_order)
+    {
+        case 1:
+            if (ed.g_order == 1)
+                compute_curls_kernel_planar<1>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz);
+            else
+                compute_curls_kernel_curved<1>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz);
+            break;
+
+        case 2:
+            if (ed.g_order == 1)
+                compute_curls_kernel_planar<2>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz);
+            else
+                compute_curls_kernel_curved<2>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz);
+            break;
+
+        case 3:
+            if (ed.g_order == 1)
+                compute_curls_kernel_planar<3>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz);
+            else
+                compute_curls_kernel_curved<3>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz);
+            break;
+
+        case 4:
+            if (ed.g_order == 1)
+                compute_curls_kernel_planar<4>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz);
+            else
+                compute_curls_kernel_curved<4>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz);
+            break;
+        
+        case 5:
+            if (ed.g_order == 1)
+                compute_curls_kernel_planar<5>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz);
+            else
+                compute_curls_kernel_curved<5>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz);
+            break;
+
+        case 6:
+            if (ed.g_order == 1)
+                compute_curls_kernel_planar<6>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz);
+            else
+                compute_curls_kernel_curved<6>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz);
+            break;
+
+        default:
+            std::cout << "compute_curls: invalid order" << std::endl;
+    }
+}
+
 /* Field derivatives kernel, case for elements with planar faces */
 void compute_field_derivatives(const entity_data_cpu& ed,
    const vecxd& f, vecxd& df_dx, vecxd& df_dy, vecxd& df_dz)

--- a/src/maxwell/maxwell_cpu.cpp
+++ b/src/maxwell/maxwell_cpu.cpp
@@ -112,6 +112,55 @@ init_from_model(const model& mod, solver_state& state, const maxwell::parameter_
 /**************************************************************************
 * Computational kernels: curls
 */
+#ifdef NEW_CURLS
+static void
+compute_curls(solver_state& state, const field& curr, field& next)
+{
+    for (const auto& ed : state.eds)
+    {
+        compute_field_curls(ed, curr.Ex, curr.Ey, curr.Ez, next.Hx, next.Hy, next.Hz);
+        #ifndef ADD_SOURCES
+        compute_field_curls(ed, curr.Hx, curr.Hy, curr.Hz, 1.0, next.Ex, next.Ey, next.Ez);
+        #else
+        compute_field_curls_sources(ed, curr.Hx, curr.Hy, curr.Hz, next.Ex, next.Ey, next.Ez, state.Jx_src, state.Jy_src, state.Jz_src);
+        #endif
+    }
+
+    #ifndef ADD_SOURCES
+    next.Ex -= state.Jx_src;
+    next.Ey -= state.Jy_src;
+    next.Ez -= state.Jz_src;
+    #endif
+}
+
+static void
+compute_curls_E(solver_state& state, const field& curr, field& next)
+{
+    for (const auto& ed : state.eds)
+    {
+        compute_field_curls(ed, curr.Ex, curr.Ey, curr.Ez, next.Hx, next.Hy, next.Hz);
+    }
+}
+
+static void
+compute_curls_H(solver_state& state, const field& curr, field& next)
+{
+    for (const auto& ed : state.eds)
+    {
+        #ifndef ADD_SOURCES
+        compute_field_curls(ed, curr.Hx, curr.Hy, curr.Hz, 1.0, next.Ex, next.Ey, next.Ez);
+        #else
+        compute_field_curls_sources(ed, curr.Hx, curr.Hy, curr.Hz, next.Ex, next.Ey, next.Ez, state.Jx_src, state.Jy_src, state.Jz_src);
+        #endif
+    }
+
+    #ifndef ADD_SOURCES
+    next.Ex -= state.Jx_src;
+    next.Ey -= state.Jy_src;
+    next.Ez -= state.Jz_src;
+    #endif
+}
+#else
 static void
 compute_curls(solver_state& state, const field& curr, field& next)
 {
@@ -162,6 +211,8 @@ compute_curls_H(solver_state& state, const field& curr, field& next)
    next.Ey = state.dz.Hx - state.dx.Hz - state.Jy_src;
    next.Ez = state.dx.Hy - state.dy.Hx - state.Jz_src;
 }
+#endif
+


 /**************************************************************************

--- a/tests/test_curls.cpp
+++ b/tests/test_curls.cpp
+/* This is GMSH/DG, a GPU-Accelerated Nodal Discontinuous Galerkin
+ * solver for Conservation Laws.
+ *
+ * Copyright (C) 2020-2022 Matteo Cicuttin - University of Liège
+ * 
+ * This code is released under GNU AGPLv3 license, see LICENSE.txt for details.
+ */
+
+#include <iostream>
+#include <iomanip>
+
+#include <unistd.h>
+
+#include "test.h"
+#include "sgr.hpp"
+#include "timecounter.h"
+#include "libgmshdg/gmsh_io.h"
+#include "libgmshdg/entity_data.h"
+#include "libgmshdg/kernels_cpu.h"
+#include "libgmshdg/silo_output.hpp"
+
+using namespace sgr;
+
+static void
+make_geometry(int order, double mesh_h)
+{
+    gm::add("difftest");
+
+    std::vector<std::pair<int,int>> objects;
+
+    objects.push_back(
+        std::make_pair(3, gmo::addBox(0.0, 0.0, 0.0, 1.0, 1.0, 0.5) )
+        );
+    objects.push_back(
+        std::make_pair(3, gmo::addBox(0.0, 0.0, 0.5, 0.5, 0.5, 0.5) )
+        );
+
+    std::vector<std::pair<int, int>> tools;
+    gmsh::vectorpair odt;
+    std::vector<gmsh::vectorpair> odtm;
+    gmo::fragment(objects, tools, odt, odtm);
+
+    gmo::synchronize();
+
+    gvp_t vp;
+    gm::getEntities(vp);
+    gmm::setSize(vp, mesh_h);
+}
+
+int test_curls_convergence(int geometric_order, int approximation_order)
+{
+    std::vector<double> sizes({ 0.32, 0.16, 0.08, 0.04 });
+    std::vector<double> errors;
+
+    std::cout << std::endl << cyanfg << "Testing geometric order " << geometric_order;
+    std::cout << ", approximation order = " << approximation_order << nofg;
+    std::cout << std::endl;
+
+    auto fx = [](const point_3d& pt) -> double {
+        return std::sin(M_PI*pt.y())*std::sin(M_PI*pt.z());
+    };
+    auto fy = [](const point_3d& pt) -> double {
+      return std::sin(M_PI*pt.z())*std::sin(M_PI*pt.x());
+    };
+    auto fz = [](const point_3d& pt) -> double {
+      return std::sin(M_PI*pt.x())*std::sin(M_PI*pt.y());
+    };
+    
+    auto cfx = [](const point_3d& pt) -> double {
+        return M_PI*std::sin(M_PI*pt.x())*std::cos(M_PI*pt.y()) - M_PI*std::cos(M_PI*pt.z())*std::sin(M_PI*pt.x());
+    };
+    auto cfy = [](const point_3d& pt) -> double {
+      return M_PI*std::sin(M_PI*pt.y())*std::cos(M_PI*pt.z()) - M_PI*std::cos(M_PI*pt.x())*std::sin(M_PI*pt.y());
+    };
+    auto cfz = [](const point_3d& pt) -> double {
+      return M_PI*std::sin(M_PI*pt.z())*std::cos(M_PI*pt.x()) - M_PI*std::cos(M_PI*pt.y())*std::sin(M_PI*pt.z());
+    };
+
+    std::vector<double> errs_x;
+    std::vector<double> errs_y;
+    std::vector<double> errs_z;
+
+    for (size_t i = 0; i < sizes.size(); i++)
+    {
+        double h = sizes[i];
+        make_geometry(0,h);
+        gmm::generate( DIMENSION(3) );
+
+        model mod(geometric_order, approximation_order);
+        mod.build();
+
+        std::stringstream ss;
+        ss << "diff_go_" << geometric_order << "_ao_" << approximation_order;
+        ss << "_seq_" << i << ".silo";
+
+#ifdef WRITE_TEST_OUTPUTS
+        silo silodb;
+        silodb.create_db(ss.str());
+        silodb.import_mesh_from_gmsh();
+        silodb.write_mesh();
+#endif
+        auto model_num_dofs = mod.num_dofs();
+
+        vecxd proj_fx = vecxd::Zero(model_num_dofs);
+        vecxd proj_fy = vecxd::Zero(model_num_dofs);
+        vecxd proj_fz = vecxd::Zero(model_num_dofs);
+        
+        vecxd proj_cfx = vecxd::Zero(model_num_dofs);
+        vecxd proj_cfy = vecxd::Zero(model_num_dofs);
+        vecxd proj_cfz = vecxd::Zero(model_num_dofs);
+
+        vecxd comp_cfx = vecxd::Zero(model_num_dofs);
+        vecxd comp_cfy = vecxd::Zero(model_num_dofs);
+        vecxd comp_cfz = vecxd::Zero(model_num_dofs);
+
+        std::vector<entity_data_cpu> eds;
+
+        for (auto& e : mod)
+        {
+            e.project(fx, proj_fx);
+            e.project(fy, proj_fy);
+            e.project(fz, proj_fz);
+            
+            e.project(cfx, proj_cfx);
+            e.project(cfy, proj_cfy);
+            e.project(cfz, proj_cfz);
+
+            entity_data_cpu ed;
+            e.populate_entity_data(ed, mod);
+            eds.push_back( std::move(ed) );
+        }
+
+        for (auto& ed : eds)
+        {
+            timecounter tc;
+            tc.tic();
+            compute_field_curls(ed, proj_fx, proj_fy, proj_fz, 1.0, comp_cfx, comp_cfy, comp_cfz);
+            double time = tc.toc();
+
+            auto num_cells = num_elems_all_orientations(ed);
+            auto num_bf = num_dofs_3D(approximation_order);
+            auto field_size = num_cells*num_bf;
+
+            if (geometric_order == 1)
+            {
+                std::cout  << "Kernel runtime: " << time << " seconds. Estimated performance: ";
+                double flops = (35*ed.num_bf+6)*ed.num_bf*num_cells;
+                std::cout << flops/(1e9*time) << " GFlops/s" << std::endl;
+
+
+                double gbytes = (3*field_size    // output curls
+                    +9*field_size*ed.num_bf    // input curls and accumulators
+                    +ed.num_orientations*ed.num_bf*ed.num_bf*ed.num_bf   // differentiation matrix
+                    +num_cells*3*3)*8/1e9;
+                std::cout << "Kernel bandwidth: " << gbytes/time << " GB/s" << std::endl;
+            }
+            else
+            {
+                std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: ";
+                double flops = ((35*ed.num_bf+12)*ed.num_bf*ed.num_qp + 6*((2*ed.num_bf-1) + 1)*ed.num_bf)*num_cells;
+                std::cout << flops/(1e9*time) << " GFlops/s" << std::endl;
+            }
+        }
+        
+        double err_x = 0.0;
+        double err_y = 0.0;
+        double err_z = 0.0;
+
+#ifdef WRITE_TEST_OUTPUTS
+        auto model_num_cells = mod.num_cells();
+        std::vector<double> var_Pf(model_num_cells);
+        std::vector<double> var_df_dx(model_num_cells);
+        std::vector<double> var_df_dy(model_num_cells);
+        std::vector<double> var_df_dz(model_num_cells);
+#endif
+
+        for (auto& e : mod)
+        {
+            for (size_t iT = 0; iT < e.num_cells(); iT++)
+            {
+                auto& pe = e.cell(iT);
+                auto& re = e.cell_refelem(pe);
+                matxd mass = e.mass_matrix(iT);
+                auto num_bf = re.num_basis_functions();
+                auto ofs = e.cell_model_dof_offset(iT);
+                vecxd diff_x = proj_cfx.segment(ofs, num_bf) - comp_cfx.segment(ofs, num_bf); 
+                vecxd diff_y = proj_cfy.segment(ofs, num_bf) - comp_cfy.segment(ofs, num_bf); 
+                vecxd diff_z = proj_cfz.segment(ofs, num_bf) - comp_cfz.segment(ofs, num_bf); 
+            
+                err_x += diff_x.dot(mass*diff_x);
+                err_y += diff_y.dot(mass*diff_y);
+                err_z += diff_z.dot(mass*diff_z);
+
+#ifdef WRITE_TEST_OUTPUTS
+                auto gi = e.cell_global_index_by_gmsh(iT);
+                assert(gi < model_num_cells);
+                vecxd phi_bar = re.basis_functions({1./3., 1./3., 1./3.});
+                var_df_dx[gi] = Cdf_dx.segment(ofs, num_bf).dot(phi_bar);
+                var_df_dy[gi] = Cdf_dy.segment(ofs, num_bf).dot(phi_bar);
+                var_df_dz[gi] = Cdf_dz.segment(ofs, num_bf).dot(phi_bar);
+#endif
+            }
+            std::cout << "Errors: " << std::sqrt(err_x) << " " << std::sqrt(err_y);
+            std::cout << " " << std::sqrt(err_z) << std::endl;
+        }
+
+#ifdef WRITE_TEST_OUTPUTS
+        silodb.write_zonal_variable("df_dx", var_df_dx);
+        silodb.write_zonal_variable("df_dy", var_df_dy);
+        silodb.write_zonal_variable("df_dz", var_df_dz);
+#endif
+    
+        errs_x.push_back( std::sqrt(err_x) );
+        errs_y.push_back( std::sqrt(err_y) );
+        errs_z.push_back( std::sqrt(err_z) );
+        std::cout << std::endl;
+    }
+
+    double rate_x = 0.0;
+    double rate_y = 0.0;
+    double rate_z = 0.0;
+
+    std::cout << Byellowfg << "rate df/dx  rate df/dy  rate df/dz" << reset << std::endl;
+    for (size_t i = 1; i < sizes.size(); i++)
+    {
+        std::cout << (rate_x = std::log2(errs_x[i-1]/errs_x[i]) ) << " ";
+        std::cout << (rate_y = std::log2(errs_y[i-1]/errs_y[i]) ) << " ";
+        std::cout << (rate_z = std::log2(errs_z[i-1]/errs_z[i]) ) << std::endl;
+    }
+
+    COMPARE_VALUES_ABSOLUTE("df/dx", rate_x, double(approximation_order), 0.2);
+    COMPARE_VALUES_ABSOLUTE("df/dy", rate_y, double(approximation_order), 0.2);
+    COMPARE_VALUES_ABSOLUTE("df/dz", rate_z, double(approximation_order), 0.2);
+
+    return 0;
+}
+
+int main(void)
+{
+    gmsh::initialize();
+    gmsh::option::setNumber("General.Terminal", 0);
+    gmsh::option::setNumber("Mesh.Algorithm", 1);
+    gmsh::option::setNumber("Mesh.Algorithm3D", 1);
+
+
+    int failed_tests = 0;
+
+    std::cout << Bmagentafg << " *** TESTING: CURLS ***" << reset << std::endl;
+    for (size_t go = 1; go < 2; go++)
+        for (size_t ao = go; ao < 5; ao++)
+            failed_tests += test_curls_convergence(go, ao);
+
+    return failed_tests;
+}
--- a/tests/test_differentiation.cpp
+++ b/tests/test_differentiation.cpp
@@ -52,7 +52,7 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde
    std::vector<double> sizes({ 0.32, 0.16, 0.08, 0.04 });
    std::vector<double> errors;

-    std::cout << cyanfg << "Testing geometric order " << geometric_order;
+    std::cout << std::endl << cyanfg << "Testing geometric order " << geometric_order;
    std::cout << ", approximation order = " << approximation_order << nofg;
    std::cout << std::endl;

@@ -188,6 +188,8 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde
        errs_x.push_back( std::sqrt(err_x) );
        errs_y.push_back( std::sqrt(err_y) );
        errs_z.push_back( std::sqrt(err_z) );
+
+        std::cout << std::endl;
    }

    double rate_x = 0.0;