Skip to content
Snippets Groups Projects
Commit 27bb4c44 authored by Matteo Cicuttin's avatar Matteo Cicuttin
Browse files

Merge branch 'MC/bndsrc_compression' into devel

parents bfde71f6 044712a8
No related branches found
No related tags found
No related merge requests found
......@@ -281,7 +281,13 @@ struct solver_state_gpu
field_gpu bndsrcs;
field_gpu bndsrcs_buf;
field bndsrcs_decomp_cpu;
pinned_field bndsrcs_cpu;
std::vector<size_t> bndsrcs_decomp_table_cpu;
device_vector<size_t> bndsrcs_decomp_table;
stream memcpy_stream;
stream compute_stream;
};
......@@ -384,4 +390,7 @@ void gpu_compute_fluxes_E(const entity_data_gpu&, const field_gpu&, field_gpu&,
void gpu_compute_fluxes_H(const entity_data_gpu&, const field_gpu&, field_gpu&,
const field_gpu&, const material_params_gpu&, gpuStream_t stream = 0);
void decompress_bndsrc(const solver_state_gpu& state, const field_gpu& csrcs,
field_gpu& srcs);
} // namespace maxwell
......@@ -80,8 +80,7 @@ void init_from_model(const model& mod, solver_state_gpu& state)
state.tmp.resize( mod.num_dofs() );
state.bndsrcs.resize( mod.num_fluxes() );
state.bndsrcs_buf.resize( mod.num_fluxes() );
state.bndsrcs_cpu.resize( mod.num_fluxes() );
state.bndsrcs_decomp_cpu.resize( mod.num_fluxes() );
for (auto& e : mod)
{
......@@ -92,10 +91,53 @@ void init_from_model(const model& mod, solver_state_gpu& state)
state.edgs.push_back( std::move(edg) );
}
auto& bds = mod.boundary_descriptors();
size_t face_num_base = 0;
for (auto& e : mod)
{
for (size_t iF = 0; iF < e.num_faces(); iF++)
{
auto& pf = e.face(iF);
auto& rf = e.face_refelem(pf);
auto num_fluxes = rf.num_basis_functions();
auto gfnum = face_num_base + iF;
auto bd = bds[gfnum];
if (bd.type == face_type::BOUNDARY or bd.type == face_type::INTERFACE)
{
for (size_t i = 0; i < num_fluxes; i++)
{
state.bndsrcs_decomp_table_cpu.push_back(gfnum*num_fluxes+i);
}
}
}
face_num_base += e.num_faces();
}
state.bndsrcs_decomp_table.copyin( state.bndsrcs_decomp_table_cpu.data(),
state.bndsrcs_decomp_table_cpu.size() );
state.bndsrcs_cpu.resize( state.bndsrcs_decomp_table_cpu.size() );
state.bndsrcs_buf.resize( state.bndsrcs_decomp_table_cpu.size() );
state.curr_time = 0.0;
state.curr_timestep = 0;
}
void
compress_bndsrc(const solver_state_gpu& state, const field& srcs, pinned_field& csrcs)
{
for (size_t i = 0; i < csrcs.num_dofs; i++)
{
auto from_ofs = state.bndsrcs_decomp_table_cpu[i];
csrcs.Ex[i] = srcs.Ex[from_ofs];
csrcs.Ey[i] = srcs.Ey[from_ofs];
csrcs.Ez[i] = srcs.Ez[from_ofs];
csrcs.Hx[i] = srcs.Hx[from_ofs];
csrcs.Hy[i] = srcs.Hy[from_ofs];
csrcs.Hz[i] = srcs.Hz[from_ofs];
}
}
static void
compute_curls(solver_state_gpu& state, const field_gpu& curr, field_gpu& next)
......@@ -373,14 +415,15 @@ prepare_sources(const model& mod, maxwell::solver_state_gpu& state,
maxwell::parameter_loader& mpl)
{
if ( mpl.boundary_sources_enabled() )
maxwell::eval_boundary_sources(mod, mpl, state, state.bndsrcs_cpu);
maxwell::eval_boundary_sources(mod, mpl, state, state.bndsrcs_decomp_cpu);
if ( mpl.interface_sources_enabled() )
maxwell::eval_interface_sources(mod, mpl, state, state.bndsrcs_cpu);
maxwell::eval_interface_sources(mod, mpl, state, state.bndsrcs_decomp_cpu);
compress_bndsrc(state, state.bndsrcs_decomp_cpu, state.bndsrcs_cpu);
state.bndsrcs_buf.copyin(state.bndsrcs_cpu, state.memcpy_stream);
state.memcpy_stream.wait();
std::swap(state.bndsrcs, state.bndsrcs_buf);
decompress_bndsrc(state, state.bndsrcs_buf, state.bndsrcs);
}
void
......@@ -389,6 +432,7 @@ do_sources(const model& mod, maxwell::solver_state_gpu& state,
{
if ( mpl.source_has_changed_state() )
{
state.bndsrcs_decomp_cpu.zero();
state.bndsrcs_cpu.zero();
mpl.source_was_cleared();
}
......@@ -398,20 +442,23 @@ do_sources(const model& mod, maxwell::solver_state_gpu& state,
auto ve = mpl.volume_sources_enabled();
if ( be )
maxwell::eval_boundary_sources(mod, mpl, state, state.bndsrcs_cpu);
maxwell::eval_boundary_sources(mod, mpl, state, state.bndsrcs_decomp_cpu);
if ( ie )
maxwell::eval_interface_sources(mod, mpl, state, state.bndsrcs_cpu);
maxwell::eval_interface_sources(mod, mpl, state, state.bndsrcs_decomp_cpu);
if ( be or ie or ve )
{
compress_bndsrc(state, state.bndsrcs_decomp_cpu, state.bndsrcs_cpu);
state.bndsrcs_buf.copyin(state.bndsrcs_cpu, state.memcpy_stream);
}
}
void
swap(maxwell::solver_state_gpu& state)
{
checkGPU( gpuDeviceSynchronize() );
std::swap(state.bndsrcs, state.bndsrcs_buf);
decompress_bndsrc(state, state.bndsrcs_buf, state.bndsrcs);
std::swap(state.emf_curr, state.emf_next);
}
......
......@@ -400,4 +400,38 @@ gpu_compute_fluxes_H(const entity_data_gpu& edg, const field_gpu& jumps,
}
}
__global__ void
gpu_bndsrcs_decompress_kernel(const size_t *dtable, const field_gpu::const_raw_ptrs csrcs,
field_gpu::raw_ptrs srcs)
{
auto cdof = blockIdx.x * blockDim.x + threadIdx.x;
if (cdof >= csrcs.num_dofs)
return;
auto ddof = dtable[cdof];
srcs.Ex[ddof] = csrcs.Ex[cdof];
srcs.Ey[ddof] = csrcs.Ey[cdof];
srcs.Ez[ddof] = csrcs.Ez[cdof];
srcs.Hx[ddof] = csrcs.Hx[cdof];
srcs.Hy[ddof] = csrcs.Hy[cdof];
srcs.Hz[ddof] = csrcs.Hz[cdof];
}
void
decompress_bndsrc(const solver_state_gpu& state, const field_gpu& csrcs, field_gpu& srcs)
{
static const size_t DECOMPRESS_THREADS = 256;
auto num_cdofs = csrcs.num_dofs;
auto gs = num_cdofs/DECOMPRESS_THREADS;
if (num_cdofs % DECOMPRESS_THREADS != 0)
gs += 1;
dim3 grid_size(gs);
gpu_bndsrcs_decompress_kernel<<<gs, DECOMPRESS_THREADS, 0, state.compute_stream>>>(
state.bndsrcs_decomp_table.data(), csrcs.data(), srcs.data());
}
} // namespace maxwell
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment