Select Git revision
maxwell_gpu.cpp
maxwell_gpu.cpp 22.78 KiB
/* This is GMSH/DG, a GPU-Accelerated Nodal Discontinuous Galerkin
* solver for Conservation Laws.
*
* Copyright (C) 2020-2022 Matteo Cicuttin - University of Liège
*
* This code is released under GNU AGPLv3 license, see LICENSE.txt for details.
*/
#include <iostream>
#include "libgmshdg/gmsh_io.h"
#include "libgmshdg/kernels_cpu.h"
#include "libgmshdg/kernels_gpu.h"
#include "libgmshdg/silo_output.hpp"
#include "maxwell/maxwell_interface.h"
#include "maxwell/maxwell_common.h"
#include "timecounter.h"
namespace maxwell {
void
init_matparams(const model& mod, solver_state_gpu& state,
const parameter_loader& mpl)
{
state.matparams.num_dofs = mod.num_dofs();
state.matparams.num_fluxes = mod.num_fluxes();
vecxd inv_eps = vecxd::Zero( mod.num_dofs() );
vecxd inv_mu = vecxd::Zero( mod.num_dofs() );
vecxd sigma = vecxd::Zero( mod.num_dofs() );
vecxd sigma_over_eps = vecxd::Zero( mod.num_dofs() );
for (auto& e : mod)
{
auto etag = e.material_tag();
for (size_t iT = 0; iT < e.num_cells(); iT++)
{
auto& pe = e.cell(iT);
auto& re = e.cell_refelem(pe);
auto bar = pe.barycenter();
auto v_epsilon = mpl.epsilon(etag, bar);
auto v_mu = mpl.mu(etag, bar);
auto v_sigma = mpl.sigma(etag, bar);
auto ofs = e.cell_model_dof_offset(iT);
for (size_t iD = 0; iD < re.num_basis_functions(); iD++)
{
inv_eps(ofs+iD) = 1./v_epsilon;
inv_mu(ofs+iD) = 1./v_mu;
sigma(ofs+iD) = v_sigma;
sigma_over_eps(ofs+iD) = v_sigma/v_epsilon;
}
}
}
state.matparams.inv_epsilon.copyin( inv_eps.data(), inv_eps.size() );
state.matparams.inv_mu.copyin( inv_mu.data(), inv_mu.size() );
state.matparams.sigma.copyin( sigma.data(), sigma.size() );
state.matparams.sigma_over_epsilon.copyin( sigma_over_eps.data(), sigma_over_eps.size() );
vecxd aE, bE, aH, bH;
init_lax_milgram(mod, mpl, aE, bE, aH, bH);
state.matparams.aE.copyin(aE.data(), aE.size());
state.matparams.bE.copyin(bE.data(), bE.size());
state.matparams.aH.copyin(aH.data(), aH.size());
state.matparams.bH.copyin(bH.data(), bH.size());
vecxd bcc;
init_boundary_conditions(mod, mpl, bcc);
state.matparams.bc_coeffs.copyin(bcc.data(), bcc.size());
}
static void
build_source_compression_tables(const model& mod, solver_state_gpu& state, const parameter_loader& mpl)
{
auto& bds = mod.boundary_descriptors();
size_t face_num_base = 0;
for (auto& e : mod)
{
for (size_t iF = 0; iF < e.num_faces(); iF++)
{
auto& pf = e.face(iF);
auto& rf = e.face_refelem(pf);
auto num_fluxes = rf.num_basis_functions();
auto gfnum = face_num_base + iF;
auto bd = bds[gfnum];
if (bd.type == face_type::INTERFACE)
{
switch ( mpl.interface_type(bd.material_tag()) )
{
case interface_condition::E_FIELD:
case interface_condition::SURFACE_CURRENT:
for (size_t i = 0; i < num_fluxes; i++)
state.bndsrcs_decomp_table_cpu.push_back(gfnum*num_fluxes+i);
break;
default:
break;
}
}
if (bd.type == face_type::BOUNDARY)
{
switch ( mpl.boundary_type(bd.material_tag()) )
{
case boundary_condition::PLANE_WAVE_E:
for (size_t i = 0; i < num_fluxes; i++)
state.bndsrcs_decomp_table_cpu.push_back(gfnum*num_fluxes+i);
break;
default:
break;
}
}
}
face_num_base += e.num_faces();
}
state.bndsrcs_decomp_table.copyin( state.bndsrcs_decomp_table_cpu.data(),
state.bndsrcs_decomp_table_cpu.size() );
state.bndsrcs_cpu.resize( state.bndsrcs_decomp_table_cpu.size() );
state.bndsrcs_buf.resize( state.bndsrcs_decomp_table_cpu.size() );
}
void init_from_model(const model& mod, solver_state_gpu& state, const maxwell::parameter_loader& mpl)
{
state.emf_curr.resize( mod.num_dofs() );
state.emf_next.resize( mod.num_dofs() );
state.dx.resize( mod.num_dofs() );
state.dy.resize( mod.num_dofs() );
state.dz.resize( mod.num_dofs() );
state.jumps.resize( mod.num_fluxes() );
state.fluxes.resize( mod.num_fluxes() );
if ( mpl.sim_timeIntegrator() == time_integrator_type::RK4 )
{
state.k1.resize( mod.num_dofs() );
state.k2.resize( mod.num_dofs() );
state.k3.resize( mod.num_dofs() );
state.k4.resize( mod.num_dofs() );
}
state.tmp.resize( mod.num_dofs() );
state.Jx_src.resize( mod.num_dofs() );
state.Jy_src.resize( mod.num_dofs() );
state.Jz_src.resize( mod.num_dofs() );
state.Jx_src_gpu.resize( mod.num_dofs() );
state.Jy_src_gpu.resize( mod.num_dofs() );
state.Jz_src_gpu.resize( mod.num_dofs() );
state.Jx_src_gpu_buf.resize( mod.num_dofs() );
state.Jy_src_gpu_buf.resize( mod.num_dofs() );
state.Jz_src_gpu_buf.resize( mod.num_dofs() );
state.bndsrcs.resize( mod.num_fluxes() );
state.bndsrcs_decomp_cpu.resize( mod.num_fluxes() );
for (auto& e : mod)
{
entity_data_cpu ed;
e.populate_entity_data(ed, mod);
entity_data_gpu edg(ed);
state.eds.push_back( std::move(ed) );
state.edgs.push_back( std::move(edg) );
}
build_source_compression_tables(mod, state, mpl);
state.curr_time = 0.0;
state.curr_timestep = 0;
}
void
compress_bndsrc(const solver_state_gpu& state, const field& srcs, pinned_field& csrcs)
{
for (size_t i = 0; i < csrcs.num_dofs; i++)
{
auto from_ofs = state.bndsrcs_decomp_table_cpu[i];
csrcs.Ex[i] = srcs.Ex[from_ofs];
csrcs.Ey[i] = srcs.Ey[from_ofs];
csrcs.Ez[i] = srcs.Ez[from_ofs];
csrcs.Hx[i] = srcs.Hx[from_ofs];
csrcs.Hy[i] = srcs.Hy[from_ofs];
csrcs.Hz[i] = srcs.Hz[from_ofs];
}
}
static void
compute_curls(solver_state_gpu& state, const field_gpu& curr, field_gpu& next, bool use_sources)
{
auto currp = curr.data();
auto nextp = next.data();
auto dxp = state.dx.data();
auto dyp = state.dy.data();
auto dzp = state.dz.data();
for (const auto& edg : state.edgs)
{
gpu_compute_field_derivatives(edg, currp.Ex, dxp.Ex, dyp.Ex, dzp.Ex, 1.0, state.compute_stream);
gpu_compute_field_derivatives(edg, currp.Ey, dxp.Ey, dyp.Ey, dzp.Ey, 1.0, state.compute_stream);
gpu_compute_field_derivatives(edg, currp.Ez, dxp.Ez, dyp.Ez, dzp.Ez, 1.0, state.compute_stream);
gpu_compute_field_derivatives(edg, currp.Hx, dxp.Hx, dyp.Hx, dzp.Hx, 1.0, state.compute_stream);
gpu_compute_field_derivatives(edg, currp.Hy, dxp.Hy, dyp.Hy, dzp.Hy, 1.0, state.compute_stream);
gpu_compute_field_derivatives(edg, currp.Hz, dxp.Hz, dyp.Hz, dzp.Hz, 1.0, state.compute_stream);
}
auto Jx = state.Jx_src_gpu.data();
auto Jy = state.Jy_src_gpu.data();
auto Jz = state.Jz_src_gpu.data();
if (use_sources)
{
gpu_curl(nextp.Ex, dyp.Hz, dzp.Hy, Jx, nextp.num_dofs, state.compute_stream);
gpu_curl(nextp.Ey, dzp.Hx, dxp.Hz, Jy, nextp.num_dofs, state.compute_stream);
gpu_curl(nextp.Ez, dxp.Hy, dyp.Hx, Jz, nextp.num_dofs, state.compute_stream);
}
else
{
gpu_curl(nextp.Ex, dyp.Hz, dzp.Hy, nextp.num_dofs, state.compute_stream);
gpu_curl(nextp.Ey, dzp.Hx, dxp.Hz, nextp.num_dofs, state.compute_stream);
gpu_curl(nextp.Ez, dxp.Hy, dyp.Hx, nextp.num_dofs, state.compute_stream);
}
gpu_curl(nextp.Hx, dzp.Ey, dyp.Ez, nextp.num_dofs, state.compute_stream);
gpu_curl(nextp.Hy, dxp.Ez, dzp.Ex, nextp.num_dofs, state.compute_stream);
gpu_curl(nextp.Hz, dyp.Ex, dxp.Ey, nextp.num_dofs, state.compute_stream);
}
static void
compute_curls_E(solver_state_gpu& state, const field_gpu& curr, field_gpu& next)
{
auto currp = curr.data();
auto nextp = next.data();
auto dxp = state.dx.data();
auto dyp = state.dy.data();
auto dzp = state.dz.data();
for (const auto& edg : state.edgs)
{
gpu_compute_field_derivatives(edg, currp.Ex, dxp.Ex, dyp.Ex, dzp.Ex, 1.0, state.compute_stream);
gpu_compute_field_derivatives(edg, currp.Ey, dxp.Ey, dyp.Ey, dzp.Ey, 1.0, state.compute_stream);
gpu_compute_field_derivatives(edg, currp.Ez, dxp.Ez, dyp.Ez, dzp.Ez, 1.0, state.compute_stream);
}
gpu_curl(nextp.Hx, dzp.Ey, dyp.Ez, nextp.num_dofs, state.compute_stream);
gpu_curl(nextp.Hy, dxp.Ez, dzp.Ex, nextp.num_dofs, state.compute_stream);
gpu_curl(nextp.Hz, dyp.Ex, dxp.Ey, nextp.num_dofs, state.compute_stream);
}
static void
compute_curls_H(solver_state_gpu& state, const field_gpu& curr, field_gpu& next, bool use_sources)
{
auto currp = curr.data();
auto nextp = next.data();
auto dxp = state.dx.data();
auto dyp = state.dy.data();
auto dzp = state.dz.data();
for (const auto& edg : state.edgs)
{
gpu_compute_field_derivatives(edg, currp.Hx, dxp.Hx, dyp.Hx, dzp.Hx, 1.0, state.compute_stream);
gpu_compute_field_derivatives(edg, currp.Hy, dxp.Hy, dyp.Hy, dzp.Hy, 1.0, state.compute_stream);
gpu_compute_field_derivatives(edg, currp.Hz, dxp.Hz, dyp.Hz, dzp.Hz, 1.0, state.compute_stream);
}
if (use_sources)
{
auto Jx = state.Jx_src_gpu.data();
auto Jy = state.Jy_src_gpu.data();
auto Jz = state.Jz_src_gpu.data();
gpu_curl(nextp.Ex, dyp.Hz, dzp.Hy, Jx, nextp.num_dofs, state.compute_stream);
gpu_curl(nextp.Ey, dzp.Hx, dxp.Hz, Jy, nextp.num_dofs, state.compute_stream);
gpu_curl(nextp.Ez, dxp.Hy, dyp.Hx, Jz, nextp.num_dofs, state.compute_stream);
}
else
{
gpu_curl(nextp.Ex, dyp.Hz, dzp.Hy, nextp.num_dofs, state.compute_stream);
gpu_curl(nextp.Ey, dzp.Hx, dxp.Hz, nextp.num_dofs, state.compute_stream);
gpu_curl(nextp.Ez, dxp.Hy, dyp.Hx, nextp.num_dofs, state.compute_stream);
}
}
static void
compute_fluxes(solver_state_gpu& state, const field_gpu& in, field_gpu& out, bool use_sources)
{
auto matparams_ptrs = state.matparams.data();
double *bc_coeffs = matparams_ptrs.bc_coeffs;
for (const auto& edg : state.edgs)
maxwell::gpu_compute_jumps(edg, in, state.jumps, bc_coeffs, state.compute_stream);
for (const auto& edg : state.edgs)
maxwell::gpu_compute_fluxes(edg, state.jumps, state.fluxes, state.bndsrcs,
state.matparams, use_sources, state.compute_stream);
auto fluxesp = state.fluxes.data();
auto outp = out.data();
for (auto& edg : state.edgs)
{
gpu_compute_flux_lifting(edg, fluxesp.Ex, outp.Ex, state.compute_stream);
gpu_compute_flux_lifting(edg, fluxesp.Ey, outp.Ey, state.compute_stream);
gpu_compute_flux_lifting(edg, fluxesp.Ez, outp.Ez, state.compute_stream);
gpu_compute_flux_lifting(edg, fluxesp.Hx, outp.Hx, state.compute_stream);
gpu_compute_flux_lifting(edg, fluxesp.Hy, outp.Hy, state.compute_stream);
gpu_compute_flux_lifting(edg, fluxesp.Hz, outp.Hz, state.compute_stream);
}
}
static void
compute_fluxes_E(solver_state_gpu& state, const field_gpu& in, field_gpu& out, bool use_sources)
{
auto matparams_ptrs = state.matparams.data();
double *bc_coeffs = matparams_ptrs.bc_coeffs;
for (const auto& edg : state.edgs)
maxwell::gpu_compute_jumps_E(edg, in, state.jumps, bc_coeffs, state.compute_stream);
for (const auto& edg : state.edgs)
maxwell::gpu_compute_fluxes_E(edg, state.jumps, state.fluxes, state.bndsrcs,
state.matparams, use_sources, state.compute_stream);
auto fluxesp = state.fluxes.data();
auto outp = out.data();
for (auto& edg : state.edgs)
{
gpu_compute_flux_lifting(edg, fluxesp.Hx, outp.Hx, state.compute_stream);
gpu_compute_flux_lifting(edg, fluxesp.Hy, outp.Hy, state.compute_stream);
gpu_compute_flux_lifting(edg, fluxesp.Hz, outp.Hz, state.compute_stream);
}
}
static void
compute_fluxes_H(solver_state_gpu& state, const field_gpu& in, field_gpu& out, bool use_sources)
{
auto matparams_ptrs = state.matparams.data();
double *bc_coeffs = matparams_ptrs.bc_coeffs;
for (const auto& edg : state.edgs)
maxwell::gpu_compute_jumps_H(edg, in, state.jumps, bc_coeffs, state.compute_stream);
for (const auto& edg : state.edgs)
maxwell::gpu_compute_fluxes_H(edg, state.jumps, state.fluxes, state.bndsrcs,
state.matparams, use_sources, state.compute_stream);
auto fluxesp = state.fluxes.data();
auto outp = out.data();
for (auto& edg : state.edgs)
{
gpu_compute_flux_lifting(edg, fluxesp.Ex, outp.Ex, state.compute_stream);
gpu_compute_flux_lifting(edg, fluxesp.Ey, outp.Ey, state.compute_stream);
gpu_compute_flux_lifting(edg, fluxesp.Ez, outp.Ez, state.compute_stream);
}
}
static void
leapfrog(solver_state_gpu& state, const parameter_loader& mpl)
{
double *r = state.matparams.sigma_over_epsilon.data();
double *eps = state.matparams.inv_epsilon.data();
double *mu = state.matparams.inv_mu.data();
auto currp = state.emf_curr.data();
auto nextp = state.emf_next.data();
auto tmpp = state.tmp.data();
bool ve = mpl.volume_sources_enabled();
compute_curls_H(state, state.emf_curr, state.tmp, ve);
auto be = mpl.boundary_sources_enabled();
auto ie = mpl.interface_sources_enabled();
compute_fluxes_H(state, state.emf_curr, state.tmp, be or ie);
auto dt = state.delta_t;
gpu_compute_leapfrog_update(nextp.Ex, currp.Ex, r, tmpp.Ex, eps, nextp.num_dofs,
dt, state.compute_stream);
gpu_compute_leapfrog_update(nextp.Ey, currp.Ey, r, tmpp.Ey, eps, nextp.num_dofs,
dt, state.compute_stream);
gpu_compute_leapfrog_update(nextp.Ez, currp.Ez, r, tmpp.Ez, eps, nextp.num_dofs,
dt, state.compute_stream);
compute_curls_E(state, state.emf_next, state.tmp);
compute_fluxes_E(state, state.emf_next, state.tmp, be or ie);
gpu_compute_leapfrog_update(nextp.Hx, currp.Hx, tmpp.Hx, mu, nextp.num_dofs,
dt, state.compute_stream);
gpu_compute_leapfrog_update(nextp.Hy, currp.Hy, tmpp.Hy, mu, nextp.num_dofs,
dt, state.compute_stream);
gpu_compute_leapfrog_update(nextp.Hz, currp.Hz, tmpp.Hz, mu, nextp.num_dofs,
dt, state.compute_stream);
}
static void
apply_operator(solver_state_gpu& state, const parameter_loader& mpl, const field_gpu& curr,
field_gpu& next)
{
bool ve = mpl.volume_sources_enabled();
auto be = mpl.boundary_sources_enabled();
auto ie = mpl.interface_sources_enabled();
compute_curls(state, curr, next, ve);
compute_fluxes(state, curr, next, be or ie);
}
static void
compute_euler_update(solver_state_gpu& state, const field_gpu& y,
const field_gpu& k, double dt, field_gpu& out)
{
double *r = state.matparams.sigma_over_epsilon.data();
double *eps = state.matparams.inv_epsilon.data();
double *mu = state.matparams.inv_mu.data();
auto outp = out.data();
auto yp = y.data();
auto kp = k.data();
gpu_compute_euler_update(outp.Ex, yp.Ex, kp.Ex, r, eps, out.num_dofs, dt, state.compute_stream);
gpu_compute_euler_update(outp.Ey, yp.Ey, kp.Ey, r, eps, out.num_dofs, dt, state.compute_stream);
gpu_compute_euler_update(outp.Ez, yp.Ez, kp.Ez, r, eps, out.num_dofs, dt, state.compute_stream);
gpu_compute_euler_update(outp.Hx, yp.Hx, kp.Hx, mu, out.num_dofs, dt, state.compute_stream);
gpu_compute_euler_update(outp.Hy, yp.Hy, kp.Hy, mu, out.num_dofs, dt, state.compute_stream);
gpu_compute_euler_update(outp.Hz, yp.Hz, kp.Hz, mu, out.num_dofs, dt, state.compute_stream);
}
static void
compute_rk4_weighted_sum(solver_state_gpu& state, const field_gpu& in,
double dt, field_gpu& out)
{
double *r = state.matparams.sigma_over_epsilon.data();
double *eps = state.matparams.inv_epsilon.data();
double *mu = state.matparams.inv_mu.data();
auto inp = in.data();
auto outp = out.data();
auto k1p = state.k1.data();
auto k2p = state.k2.data();
auto k3p = state.k3.data();
auto k4p = state.k4.data();
gpu_compute_rk4_weighted_sum(outp.Ex, inp.Ex, k1p.Ex, k2p.Ex, k3p.Ex, k4p.Ex, r, eps, out.num_dofs, dt, state.compute_stream);
gpu_compute_rk4_weighted_sum(outp.Ey, inp.Ey, k1p.Ey, k2p.Ey, k3p.Ey, k4p.Ey, r, eps, out.num_dofs, dt, state.compute_stream);
gpu_compute_rk4_weighted_sum(outp.Ez, inp.Ez, k1p.Ez, k2p.Ez, k3p.Ez, k4p.Ez, r, eps, out.num_dofs, dt, state.compute_stream);
gpu_compute_rk4_weighted_sum(outp.Hx, inp.Hx, k1p.Hx, k2p.Hx, k3p.Hx, k4p.Hx, mu, out.num_dofs, dt, state.compute_stream);
gpu_compute_rk4_weighted_sum(outp.Hy, inp.Hy, k1p.Hy, k2p.Hy, k3p.Hy, k4p.Hy, mu, out.num_dofs, dt, state.compute_stream);
gpu_compute_rk4_weighted_sum(outp.Hz, inp.Hz, k1p.Hz, k2p.Hz, k3p.Hz, k4p.Hz, mu, out.num_dofs, dt, state.compute_stream);
}
void
timestep(const model& mod, solver_state_gpu& state, const parameter_loader& mpl, time_integrator_type ti)
{
//timecounter tc;
//tc.tic();
if (ti == time_integrator_type::EULER)
{
apply_operator(state, mpl, state.emf_curr, state.tmp);
compute_euler_update(state, state.emf_curr, state.tmp, state.delta_t, state.emf_next);
}
if (ti == time_integrator_type::RK4)
{
apply_operator(state, mpl, state.emf_curr, state.k1);
compute_euler_update(state, state.emf_curr, state.k1, 0.5*state.delta_t, state.tmp);
apply_operator(state, mpl, state.tmp, state.k2);
compute_euler_update(state, state.emf_curr, state.k2, 0.5*state.delta_t, state.tmp);
apply_operator(state, mpl, state.tmp, state.k3);
compute_euler_update(state, state.emf_curr, state.k3, state.delta_t, state.tmp);
apply_operator(state, mpl, state.tmp, state.k4);
compute_rk4_weighted_sum(state, state.emf_curr, state.delta_t, state.emf_next);
}
if (ti == time_integrator_type::LEAPFROG)
{
leapfrog(state, mpl);
}
state.curr_time += state.delta_t;
state.curr_timestep += 1;
//double time = tc.toc();
//double dofs_per_sec = 6*state.emf_curr.num_dofs/time;
//std::cout << "Timestep " << state.curr_timestep << ", " << dofs_per_sec << " DOFs/s" << std::endl;
}
void
prepare_sources(const model& mod, maxwell::solver_state_gpu& state,
maxwell::parameter_loader& mpl)
{
if ( mpl.boundary_sources_enabled() )
maxwell::eval_boundary_sources_new(mod, mpl, state, state.bndsrcs_decomp_cpu);
if ( mpl.interface_sources_enabled() )
maxwell::eval_interface_sources(mod, mpl, state, state.bndsrcs_decomp_cpu);
compress_bndsrc(state, state.bndsrcs_decomp_cpu, state.bndsrcs_cpu);
state.bndsrcs_buf.copyin(state.bndsrcs_cpu, state.memcpy_stream);
state.memcpy_stream.wait();
decompress_bndsrc(state, state.bndsrcs_buf, state.bndsrcs);
}
static void
copyin_volume_sources(const entity& e, solver_state_gpu& state)
{
auto ofs = e.dof_base();
auto num = e.num_dofs();
state.Jx_src_gpu_buf.copyin( state.Jx_src.data(), ofs, num, state.memcpy_stream );
state.Jy_src_gpu_buf.copyin( state.Jy_src.data(), ofs, num, state.memcpy_stream );
state.Jz_src_gpu_buf.copyin( state.Jz_src.data(), ofs, num, state.memcpy_stream );
}
static void
eval_volume_sources(const model& mod, const parameter_loader& mpl, solver_state_gpu& state)
{
for (auto& e : mod)
{
auto etag = e.material_tag();
if ( not mpl.volume_has_source(etag) )
continue;
auto fJ = [&](const point_3d& pt) -> vec3d {
return mpl.eval_volume_source(etag, pt, state.curr_time);
};
e.project(fJ, state.Jx_src.data(), state.Jy_src.data(), state.Jz_src.data());
copyin_volume_sources(e, state);
}
}
void
do_sources(const model& mod, maxwell::solver_state_gpu& state,
maxwell::parameter_loader& mpl)
{
if ( mpl.source_has_changed_state() )
{
state.Jx_src.zero();
state.Jy_src.zero();
state.Jz_src.zero();
state.Jx_src_gpu.zero();
state.Jy_src_gpu.zero();
state.Jz_src_gpu.zero();
state.Jx_src_gpu_buf.zero();
state.Jy_src_gpu_buf.zero();
state.Jz_src_gpu_buf.zero();
state.bndsrcs_decomp_cpu.zero();
state.bndsrcs_cpu.zero();
mpl.source_was_cleared();
}
auto be = mpl.boundary_sources_enabled();
auto ie = mpl.interface_sources_enabled();
auto ve = mpl.volume_sources_enabled();
if ( ve )
eval_volume_sources(mod, mpl, state);
if ( be )
eval_boundary_sources_new(mod, mpl, state, state.bndsrcs_decomp_cpu);
if ( ie )
eval_interface_sources(mod, mpl, state, state.bndsrcs_decomp_cpu);
if ( be or ie )
{
compress_bndsrc(state, state.bndsrcs_decomp_cpu, state.bndsrcs_cpu);
state.bndsrcs_buf.copyin(state.bndsrcs_cpu, state.memcpy_stream);
}
}
void
swap(solver_state_gpu& state, const parameter_loader& mpl)
{
/* Wait for all async copies to finish */
checkGPU( gpuDeviceSynchronize() );
auto be = mpl.boundary_sources_enabled();
auto ie = mpl.interface_sources_enabled();
if ( be or ie )
decompress_bndsrc(state, state.bndsrcs_buf, state.bndsrcs);
std::swap(state.Jx_src_gpu, state.Jx_src_gpu_buf);
std::swap(state.Jy_src_gpu, state.Jy_src_gpu_buf);
std::swap(state.Jz_src_gpu, state.Jz_src_gpu_buf);
std::swap(state.emf_curr, state.emf_next);
/* Wait for decompression to finish */
checkGPU( gpuDeviceSynchronize() );
}
#ifdef USE_MPI
void
gather_field(const model& mod, const maxwell::field_gpu& in, maxwell::field_gpu& f,
int root, MPI_Comm comm)
{}
#endif /* USE_MPI */
void
export_fields_to_silo(const model& mod, maxwell::solver_state_gpu& state,
const maxwell::parameter_loader& mpl, std::string basename)
{
/* This is supposed to be called after swap */
if (basename == "")
basename = "timestep";
std::stringstream ss;
ss << mpl.sim_name() << "/" << basename << "_" << state.curr_timestep << ".silo";
silo sdb;
sdb.create_db(ss.str());
sdb.import_mesh_from_gmsh();
sdb.write_mesh(state.curr_time, state.curr_timestep);
maxwell::field f;
f.resize( mod.num_dofs() );
state.emf_curr.copyout(f);
auto E_exportmode = mpl.postpro_fieldExportMode("E");
export_vector_field(mod, sdb, f.Ex, f.Ey, f.Ez, "E", E_exportmode);
auto H_exportmode = mpl.postpro_fieldExportMode("H");
export_vector_field(mod, sdb, f.Hx, f.Hy, f.Hz, "H", H_exportmode);
auto sigma = [&](int tag, const point_3d& pt) -> double { return mpl.sigma(tag, pt); };
auto J_exportmode = mpl.postpro_fieldExportMode("J");
export_vector_field(mod, sdb, f.Ex, f.Ey, f.Ez, sigma, "J", J_exportmode);
}
} // namespace maxwell