Skip to content
Snippets Groups Projects
Commit 5b1986be authored by Marco D'Antonio's avatar Marco D'Antonio
Browse files

Optimized curls computation

parent 9cba41a3
No related branches found
No related tags found
No related merge requests found
CMakeLists.txt.user
CMakeCache.txt
CMakeFiles
CMakeScripts
Testing
Makefile
cmake_install.cmake
install_manifest.txt
compile_commands.json
CTestTestfile.cmake
_deps
*.out
build
share/**/*.silo
.vscode
......@@ -281,6 +281,12 @@ option(OPT_EXPENSIVE_ASSERTS "Enable expensive assert()s" OFF)
if (OPT_EXPENSIVE_ASSERTS)
add_definitions(-DEXPENSIVE_ASSERTS)
endif()
option(NEW_CURLS "Use the new method for computing curls" OFF)
if (NEW_CURLS)
add_compile_definitions(NEW_CURLS)
endif()
######################################################################
## Enable/disable solver modules
option(OPT_DEBUG_PRINT "Print debug information")
......@@ -317,6 +323,9 @@ if (OPT_BUILD_TESTS)
add_executable(test_mass tests/test_mass.cpp)
target_link_libraries(test_mass gmshdg ${LINK_LIBS})
add_executable(test_curls tests/test_curls.cpp)
target_link_libraries(test_curls gmshdg ${LINK_LIBS})
add_executable(test_differentiation tests/test_differentiation.cpp)
target_link_libraries(test_differentiation gmshdg ${LINK_LIBS})
......
This diff is collapsed.
......@@ -10,6 +10,113 @@
#include "libgmshdg/entity_data.h"
#include "libgmshdg/kernels_cpu.h"
void compute_field_curls_sources(const entity_data_cpu& ed,
const vecxd& fx, const vecxd& fy, const vecxd& fz, double alpha,
vecxd& curl_fx, vecxd& curl_fy, vecxd& curl_fz,
const vecxd& jx, const vecxd& jy, const vecxd& jz)
{
switch (ed.a_order)
{
case 1:
if (ed.g_order == 1)
compute_curls_sources_kernel_planar<1>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz, jx, jy, jz);
else
compute_curls_sources_kernel_curved<1>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz, jx, jy, jz);
break;
case 2:
if (ed.g_order == 1)
compute_curls_sources_kernel_planar<2>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz, jx, jy, jz);
else
compute_curls_sources_kernel_curved<2>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz, jx, jy, jz);
break;
case 3:
if (ed.g_order == 1)
compute_curls_sources_kernel_planar<3>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz, jx, jy, jz);
else
compute_curls_sources_kernel_curved<3>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz, jx, jy, jz);
break;
case 4:
if (ed.g_order == 1)
compute_curls_sources_kernel_planar<4>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz, jx, jy, jz);
else
compute_curls_sources_kernel_curved<4>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz, jx, jy, jz);
break;
case 5:
if (ed.g_order == 1)
compute_curls_sources_kernel_planar<5>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz, jx, jy, jz);
else
compute_curls_sources_kernel_curved<5>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz, jx, jy, jz);
break;
case 6:
if (ed.g_order == 1)
compute_curls_sources_kernel_planar<6>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz, jx, jy, jz);
else
compute_curls_sources_kernel_curved<6>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz, jx, jy, jz);
break;
default:
std::cout << "compute_curls_sources: invalid order" << std::endl;
}
}
void compute_field_curls(const entity_data_cpu& ed,
const vecxd& fx, const vecxd& fy, const vecxd& fz, double alpha,
vecxd& curl_fx, vecxd& curl_fy, vecxd& curl_fz)
{
switch (ed.a_order)
{
case 1:
if (ed.g_order == 1)
compute_curls_kernel_planar<1>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz);
else
compute_curls_kernel_curved<1>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz);
break;
case 2:
if (ed.g_order == 1)
compute_curls_kernel_planar<2>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz);
else
compute_curls_kernel_curved<2>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz);
break;
case 3:
if (ed.g_order == 1)
compute_curls_kernel_planar<3>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz);
else
compute_curls_kernel_curved<3>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz);
break;
case 4:
if (ed.g_order == 1)
compute_curls_kernel_planar<4>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz);
else
compute_curls_kernel_curved<4>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz);
break;
case 5:
if (ed.g_order == 1)
compute_curls_kernel_planar<5>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz);
else
compute_curls_kernel_curved<5>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz);
break;
case 6:
if (ed.g_order == 1)
compute_curls_kernel_planar<6>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz);
else
compute_curls_kernel_curved<6>(ed, fx, fy, fz, alpha, curl_fx, curl_fy, curl_fz);
break;
default:
std::cout << "compute_curls: invalid order" << std::endl;
}
}
/* Field derivatives kernel, case for elements with planar faces */
void compute_field_derivatives(const entity_data_cpu& ed,
const vecxd& f, vecxd& df_dx, vecxd& df_dy, vecxd& df_dz)
......
......@@ -112,6 +112,55 @@ init_from_model(const model& mod, solver_state& state, const maxwell::parameter_
/**************************************************************************
* Computational kernels: curls
*/
#ifdef NEW_CURLS
static void
compute_curls(solver_state& state, const field& curr, field& next)
{
for (const auto& ed : state.eds)
{
compute_field_curls(ed, curr.Ex, curr.Ey, curr.Ez, next.Hx, next.Hy, next.Hz);
#ifndef ADD_SOURCES
compute_field_curls(ed, curr.Hx, curr.Hy, curr.Hz, 1.0, next.Ex, next.Ey, next.Ez);
#else
compute_field_curls_sources(ed, curr.Hx, curr.Hy, curr.Hz, next.Ex, next.Ey, next.Ez, state.Jx_src, state.Jy_src, state.Jz_src);
#endif
}
#ifndef ADD_SOURCES
next.Ex -= state.Jx_src;
next.Ey -= state.Jy_src;
next.Ez -= state.Jz_src;
#endif
}
static void
compute_curls_E(solver_state& state, const field& curr, field& next)
{
for (const auto& ed : state.eds)
{
compute_field_curls(ed, curr.Ex, curr.Ey, curr.Ez, next.Hx, next.Hy, next.Hz);
}
}
static void
compute_curls_H(solver_state& state, const field& curr, field& next)
{
for (const auto& ed : state.eds)
{
#ifndef ADD_SOURCES
compute_field_curls(ed, curr.Hx, curr.Hy, curr.Hz, 1.0, next.Ex, next.Ey, next.Ez);
#else
compute_field_curls_sources(ed, curr.Hx, curr.Hy, curr.Hz, next.Ex, next.Ey, next.Ez, state.Jx_src, state.Jy_src, state.Jz_src);
#endif
}
#ifndef ADD_SOURCES
next.Ex -= state.Jx_src;
next.Ey -= state.Jy_src;
next.Ez -= state.Jz_src;
#endif
}
#else
static void
compute_curls(solver_state& state, const field& curr, field& next)
{
......@@ -162,6 +211,8 @@ compute_curls_H(solver_state& state, const field& curr, field& next)
next.Ey = state.dz.Hx - state.dx.Hz - state.Jy_src;
next.Ez = state.dx.Hy - state.dy.Hx - state.Jz_src;
}
#endif
/**************************************************************************
......
/* This is GMSH/DG, a GPU-Accelerated Nodal Discontinuous Galerkin
* solver for Conservation Laws.
*
* Copyright (C) 2020-2022 Matteo Cicuttin - University of Liège
*
* This code is released under GNU AGPLv3 license, see LICENSE.txt for details.
*/
#include <iostream>
#include <iomanip>
#include <unistd.h>
#include "test.h"
#include "sgr.hpp"
#include "timecounter.h"
#include "libgmshdg/gmsh_io.h"
#include "libgmshdg/entity_data.h"
#include "libgmshdg/kernels_cpu.h"
#include "libgmshdg/silo_output.hpp"
using namespace sgr;
static void
make_geometry(int order, double mesh_h)
{
gm::add("difftest");
std::vector<std::pair<int,int>> objects;
objects.push_back(
std::make_pair(3, gmo::addBox(0.0, 0.0, 0.0, 1.0, 1.0, 0.5) )
);
objects.push_back(
std::make_pair(3, gmo::addBox(0.0, 0.0, 0.5, 0.5, 0.5, 0.5) )
);
std::vector<std::pair<int, int>> tools;
gmsh::vectorpair odt;
std::vector<gmsh::vectorpair> odtm;
gmo::fragment(objects, tools, odt, odtm);
gmo::synchronize();
gvp_t vp;
gm::getEntities(vp);
gmm::setSize(vp, mesh_h);
}
int test_curls_convergence(int geometric_order, int approximation_order)
{
std::vector<double> sizes({ 0.32, 0.16, 0.08, 0.04 });
std::vector<double> errors;
std::cout << std::endl << cyanfg << "Testing geometric order " << geometric_order;
std::cout << ", approximation order = " << approximation_order << nofg;
std::cout << std::endl;
auto fx = [](const point_3d& pt) -> double {
return std::sin(M_PI*pt.y())*std::sin(M_PI*pt.z());
};
auto fy = [](const point_3d& pt) -> double {
return std::sin(M_PI*pt.z())*std::sin(M_PI*pt.x());
};
auto fz = [](const point_3d& pt) -> double {
return std::sin(M_PI*pt.x())*std::sin(M_PI*pt.y());
};
auto cfx = [](const point_3d& pt) -> double {
return M_PI*std::sin(M_PI*pt.x())*std::cos(M_PI*pt.y()) - M_PI*std::cos(M_PI*pt.z())*std::sin(M_PI*pt.x());
};
auto cfy = [](const point_3d& pt) -> double {
return M_PI*std::sin(M_PI*pt.y())*std::cos(M_PI*pt.z()) - M_PI*std::cos(M_PI*pt.x())*std::sin(M_PI*pt.y());
};
auto cfz = [](const point_3d& pt) -> double {
return M_PI*std::sin(M_PI*pt.z())*std::cos(M_PI*pt.x()) - M_PI*std::cos(M_PI*pt.y())*std::sin(M_PI*pt.z());
};
std::vector<double> errs_x;
std::vector<double> errs_y;
std::vector<double> errs_z;
for (size_t i = 0; i < sizes.size(); i++)
{
double h = sizes[i];
make_geometry(0,h);
gmm::generate( DIMENSION(3) );
model mod(geometric_order, approximation_order);
mod.build();
std::stringstream ss;
ss << "diff_go_" << geometric_order << "_ao_" << approximation_order;
ss << "_seq_" << i << ".silo";
#ifdef WRITE_TEST_OUTPUTS
silo silodb;
silodb.create_db(ss.str());
silodb.import_mesh_from_gmsh();
silodb.write_mesh();
#endif
auto model_num_dofs = mod.num_dofs();
vecxd proj_fx = vecxd::Zero(model_num_dofs);
vecxd proj_fy = vecxd::Zero(model_num_dofs);
vecxd proj_fz = vecxd::Zero(model_num_dofs);
vecxd proj_cfx = vecxd::Zero(model_num_dofs);
vecxd proj_cfy = vecxd::Zero(model_num_dofs);
vecxd proj_cfz = vecxd::Zero(model_num_dofs);
vecxd comp_cfx = vecxd::Zero(model_num_dofs);
vecxd comp_cfy = vecxd::Zero(model_num_dofs);
vecxd comp_cfz = vecxd::Zero(model_num_dofs);
std::vector<entity_data_cpu> eds;
for (auto& e : mod)
{
e.project(fx, proj_fx);
e.project(fy, proj_fy);
e.project(fz, proj_fz);
e.project(cfx, proj_cfx);
e.project(cfy, proj_cfy);
e.project(cfz, proj_cfz);
entity_data_cpu ed;
e.populate_entity_data(ed, mod);
eds.push_back( std::move(ed) );
}
for (auto& ed : eds)
{
timecounter tc;
tc.tic();
compute_field_curls(ed, proj_fx, proj_fy, proj_fz, 1.0, comp_cfx, comp_cfy, comp_cfz);
double time = tc.toc();
auto num_cells = num_elems_all_orientations(ed);
auto num_bf = num_dofs_3D(approximation_order);
auto field_size = num_cells*num_bf;
if (geometric_order == 1)
{
std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: ";
double flops = (35*ed.num_bf+6)*ed.num_bf*num_cells;
std::cout << flops/(1e9*time) << " GFlops/s" << std::endl;
double gbytes = (3*field_size // output curls
+9*field_size*ed.num_bf // input curls and accumulators
+ed.num_orientations*ed.num_bf*ed.num_bf*ed.num_bf // differentiation matrix
+num_cells*3*3)*8/1e9;
std::cout << "Kernel bandwidth: " << gbytes/time << " GB/s" << std::endl;
}
else
{
std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: ";
double flops = ((35*ed.num_bf+12)*ed.num_bf*ed.num_qp + 6*((2*ed.num_bf-1) + 1)*ed.num_bf)*num_cells;
std::cout << flops/(1e9*time) << " GFlops/s" << std::endl;
}
}
double err_x = 0.0;
double err_y = 0.0;
double err_z = 0.0;
#ifdef WRITE_TEST_OUTPUTS
auto model_num_cells = mod.num_cells();
std::vector<double> var_Pf(model_num_cells);
std::vector<double> var_df_dx(model_num_cells);
std::vector<double> var_df_dy(model_num_cells);
std::vector<double> var_df_dz(model_num_cells);
#endif
for (auto& e : mod)
{
for (size_t iT = 0; iT < e.num_cells(); iT++)
{
auto& pe = e.cell(iT);
auto& re = e.cell_refelem(pe);
matxd mass = e.mass_matrix(iT);
auto num_bf = re.num_basis_functions();
auto ofs = e.cell_model_dof_offset(iT);
vecxd diff_x = proj_cfx.segment(ofs, num_bf) - comp_cfx.segment(ofs, num_bf);
vecxd diff_y = proj_cfy.segment(ofs, num_bf) - comp_cfy.segment(ofs, num_bf);
vecxd diff_z = proj_cfz.segment(ofs, num_bf) - comp_cfz.segment(ofs, num_bf);
err_x += diff_x.dot(mass*diff_x);
err_y += diff_y.dot(mass*diff_y);
err_z += diff_z.dot(mass*diff_z);
#ifdef WRITE_TEST_OUTPUTS
auto gi = e.cell_global_index_by_gmsh(iT);
assert(gi < model_num_cells);
vecxd phi_bar = re.basis_functions({1./3., 1./3., 1./3.});
var_df_dx[gi] = Cdf_dx.segment(ofs, num_bf).dot(phi_bar);
var_df_dy[gi] = Cdf_dy.segment(ofs, num_bf).dot(phi_bar);
var_df_dz[gi] = Cdf_dz.segment(ofs, num_bf).dot(phi_bar);
#endif
}
std::cout << "Errors: " << std::sqrt(err_x) << " " << std::sqrt(err_y);
std::cout << " " << std::sqrt(err_z) << std::endl;
}
#ifdef WRITE_TEST_OUTPUTS
silodb.write_zonal_variable("df_dx", var_df_dx);
silodb.write_zonal_variable("df_dy", var_df_dy);
silodb.write_zonal_variable("df_dz", var_df_dz);
#endif
errs_x.push_back( std::sqrt(err_x) );
errs_y.push_back( std::sqrt(err_y) );
errs_z.push_back( std::sqrt(err_z) );
std::cout << std::endl;
}
double rate_x = 0.0;
double rate_y = 0.0;
double rate_z = 0.0;
std::cout << Byellowfg << "rate df/dx rate df/dy rate df/dz" << reset << std::endl;
for (size_t i = 1; i < sizes.size(); i++)
{
std::cout << (rate_x = std::log2(errs_x[i-1]/errs_x[i]) ) << " ";
std::cout << (rate_y = std::log2(errs_y[i-1]/errs_y[i]) ) << " ";
std::cout << (rate_z = std::log2(errs_z[i-1]/errs_z[i]) ) << std::endl;
}
COMPARE_VALUES_ABSOLUTE("df/dx", rate_x, double(approximation_order), 0.2);
COMPARE_VALUES_ABSOLUTE("df/dy", rate_y, double(approximation_order), 0.2);
COMPARE_VALUES_ABSOLUTE("df/dz", rate_z, double(approximation_order), 0.2);
return 0;
}
int main(void)
{
gmsh::initialize();
gmsh::option::setNumber("General.Terminal", 0);
gmsh::option::setNumber("Mesh.Algorithm", 1);
gmsh::option::setNumber("Mesh.Algorithm3D", 1);
int failed_tests = 0;
std::cout << Bmagentafg << " *** TESTING: CURLS ***" << reset << std::endl;
for (size_t go = 1; go < 2; go++)
for (size_t ao = go; ao < 5; ao++)
failed_tests += test_curls_convergence(go, ao);
return failed_tests;
}
......@@ -52,7 +52,7 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde
std::vector<double> sizes({ 0.32, 0.16, 0.08, 0.04 });
std::vector<double> errors;
std::cout << cyanfg << "Testing geometric order " << geometric_order;
std::cout << std::endl << cyanfg << "Testing geometric order " << geometric_order;
std::cout << ", approximation order = " << approximation_order << nofg;
std::cout << std::endl;
......@@ -188,6 +188,8 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde
errs_x.push_back( std::sqrt(err_x) );
errs_y.push_back( std::sqrt(err_y) );
errs_z.push_back( std::sqrt(err_z) );
std::cout << std::endl;
}
double rate_x = 0.0;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment