Skip to content
Snippets Groups Projects
Commit 27bb4c44 authored by Matteo Cicuttin's avatar Matteo Cicuttin
Browse files

Merge branch 'MC/bndsrc_compression' into devel

parents bfde71f6 044712a8
Branches
Tags
No related merge requests found
...@@ -281,7 +281,13 @@ struct solver_state_gpu ...@@ -281,7 +281,13 @@ struct solver_state_gpu
field_gpu bndsrcs; field_gpu bndsrcs;
field_gpu bndsrcs_buf; field_gpu bndsrcs_buf;
field bndsrcs_decomp_cpu;
pinned_field bndsrcs_cpu; pinned_field bndsrcs_cpu;
std::vector<size_t> bndsrcs_decomp_table_cpu;
device_vector<size_t> bndsrcs_decomp_table;
stream memcpy_stream; stream memcpy_stream;
stream compute_stream; stream compute_stream;
}; };
...@@ -384,4 +390,7 @@ void gpu_compute_fluxes_E(const entity_data_gpu&, const field_gpu&, field_gpu&, ...@@ -384,4 +390,7 @@ void gpu_compute_fluxes_E(const entity_data_gpu&, const field_gpu&, field_gpu&,
void gpu_compute_fluxes_H(const entity_data_gpu&, const field_gpu&, field_gpu&, void gpu_compute_fluxes_H(const entity_data_gpu&, const field_gpu&, field_gpu&,
const field_gpu&, const material_params_gpu&, gpuStream_t stream = 0); const field_gpu&, const material_params_gpu&, gpuStream_t stream = 0);
void decompress_bndsrc(const solver_state_gpu& state, const field_gpu& csrcs,
field_gpu& srcs);
} // namespace maxwell } // namespace maxwell
...@@ -80,8 +80,7 @@ void init_from_model(const model& mod, solver_state_gpu& state) ...@@ -80,8 +80,7 @@ void init_from_model(const model& mod, solver_state_gpu& state)
state.tmp.resize( mod.num_dofs() ); state.tmp.resize( mod.num_dofs() );
state.bndsrcs.resize( mod.num_fluxes() ); state.bndsrcs.resize( mod.num_fluxes() );
state.bndsrcs_buf.resize( mod.num_fluxes() ); state.bndsrcs_decomp_cpu.resize( mod.num_fluxes() );
state.bndsrcs_cpu.resize( mod.num_fluxes() );
for (auto& e : mod) for (auto& e : mod)
{ {
...@@ -92,10 +91,53 @@ void init_from_model(const model& mod, solver_state_gpu& state) ...@@ -92,10 +91,53 @@ void init_from_model(const model& mod, solver_state_gpu& state)
state.edgs.push_back( std::move(edg) ); state.edgs.push_back( std::move(edg) );
} }
auto& bds = mod.boundary_descriptors();
size_t face_num_base = 0;
for (auto& e : mod)
{
for (size_t iF = 0; iF < e.num_faces(); iF++)
{
auto& pf = e.face(iF);
auto& rf = e.face_refelem(pf);
auto num_fluxes = rf.num_basis_functions();
auto gfnum = face_num_base + iF;
auto bd = bds[gfnum];
if (bd.type == face_type::BOUNDARY or bd.type == face_type::INTERFACE)
{
for (size_t i = 0; i < num_fluxes; i++)
{
state.bndsrcs_decomp_table_cpu.push_back(gfnum*num_fluxes+i);
}
}
}
face_num_base += e.num_faces();
}
state.bndsrcs_decomp_table.copyin( state.bndsrcs_decomp_table_cpu.data(),
state.bndsrcs_decomp_table_cpu.size() );
state.bndsrcs_cpu.resize( state.bndsrcs_decomp_table_cpu.size() );
state.bndsrcs_buf.resize( state.bndsrcs_decomp_table_cpu.size() );
state.curr_time = 0.0; state.curr_time = 0.0;
state.curr_timestep = 0; state.curr_timestep = 0;
} }
void
compress_bndsrc(const solver_state_gpu& state, const field& srcs, pinned_field& csrcs)
{
for (size_t i = 0; i < csrcs.num_dofs; i++)
{
auto from_ofs = state.bndsrcs_decomp_table_cpu[i];
csrcs.Ex[i] = srcs.Ex[from_ofs];
csrcs.Ey[i] = srcs.Ey[from_ofs];
csrcs.Ez[i] = srcs.Ez[from_ofs];
csrcs.Hx[i] = srcs.Hx[from_ofs];
csrcs.Hy[i] = srcs.Hy[from_ofs];
csrcs.Hz[i] = srcs.Hz[from_ofs];
}
}
static void static void
compute_curls(solver_state_gpu& state, const field_gpu& curr, field_gpu& next) compute_curls(solver_state_gpu& state, const field_gpu& curr, field_gpu& next)
...@@ -373,14 +415,15 @@ prepare_sources(const model& mod, maxwell::solver_state_gpu& state, ...@@ -373,14 +415,15 @@ prepare_sources(const model& mod, maxwell::solver_state_gpu& state,
maxwell::parameter_loader& mpl) maxwell::parameter_loader& mpl)
{ {
if ( mpl.boundary_sources_enabled() ) if ( mpl.boundary_sources_enabled() )
maxwell::eval_boundary_sources(mod, mpl, state, state.bndsrcs_cpu); maxwell::eval_boundary_sources(mod, mpl, state, state.bndsrcs_decomp_cpu);
if ( mpl.interface_sources_enabled() ) if ( mpl.interface_sources_enabled() )
maxwell::eval_interface_sources(mod, mpl, state, state.bndsrcs_cpu); maxwell::eval_interface_sources(mod, mpl, state, state.bndsrcs_decomp_cpu);
compress_bndsrc(state, state.bndsrcs_decomp_cpu, state.bndsrcs_cpu);
state.bndsrcs_buf.copyin(state.bndsrcs_cpu, state.memcpy_stream); state.bndsrcs_buf.copyin(state.bndsrcs_cpu, state.memcpy_stream);
state.memcpy_stream.wait(); state.memcpy_stream.wait();
std::swap(state.bndsrcs, state.bndsrcs_buf); decompress_bndsrc(state, state.bndsrcs_buf, state.bndsrcs);
} }
void void
...@@ -389,6 +432,7 @@ do_sources(const model& mod, maxwell::solver_state_gpu& state, ...@@ -389,6 +432,7 @@ do_sources(const model& mod, maxwell::solver_state_gpu& state,
{ {
if ( mpl.source_has_changed_state() ) if ( mpl.source_has_changed_state() )
{ {
state.bndsrcs_decomp_cpu.zero();
state.bndsrcs_cpu.zero(); state.bndsrcs_cpu.zero();
mpl.source_was_cleared(); mpl.source_was_cleared();
} }
...@@ -398,20 +442,23 @@ do_sources(const model& mod, maxwell::solver_state_gpu& state, ...@@ -398,20 +442,23 @@ do_sources(const model& mod, maxwell::solver_state_gpu& state,
auto ve = mpl.volume_sources_enabled(); auto ve = mpl.volume_sources_enabled();
if ( be ) if ( be )
maxwell::eval_boundary_sources(mod, mpl, state, state.bndsrcs_cpu); maxwell::eval_boundary_sources(mod, mpl, state, state.bndsrcs_decomp_cpu);
if ( ie ) if ( ie )
maxwell::eval_interface_sources(mod, mpl, state, state.bndsrcs_cpu); maxwell::eval_interface_sources(mod, mpl, state, state.bndsrcs_decomp_cpu);
if ( be or ie or ve ) if ( be or ie or ve )
{
compress_bndsrc(state, state.bndsrcs_decomp_cpu, state.bndsrcs_cpu);
state.bndsrcs_buf.copyin(state.bndsrcs_cpu, state.memcpy_stream); state.bndsrcs_buf.copyin(state.bndsrcs_cpu, state.memcpy_stream);
}
} }
void void
swap(maxwell::solver_state_gpu& state) swap(maxwell::solver_state_gpu& state)
{ {
checkGPU( gpuDeviceSynchronize() ); checkGPU( gpuDeviceSynchronize() );
std::swap(state.bndsrcs, state.bndsrcs_buf); decompress_bndsrc(state, state.bndsrcs_buf, state.bndsrcs);
std::swap(state.emf_curr, state.emf_next); std::swap(state.emf_curr, state.emf_next);
} }
......
...@@ -400,4 +400,38 @@ gpu_compute_fluxes_H(const entity_data_gpu& edg, const field_gpu& jumps, ...@@ -400,4 +400,38 @@ gpu_compute_fluxes_H(const entity_data_gpu& edg, const field_gpu& jumps,
} }
} }
__global__ void
gpu_bndsrcs_decompress_kernel(const size_t *dtable, const field_gpu::const_raw_ptrs csrcs,
field_gpu::raw_ptrs srcs)
{
auto cdof = blockIdx.x * blockDim.x + threadIdx.x;
if (cdof >= csrcs.num_dofs)
return;
auto ddof = dtable[cdof];
srcs.Ex[ddof] = csrcs.Ex[cdof];
srcs.Ey[ddof] = csrcs.Ey[cdof];
srcs.Ez[ddof] = csrcs.Ez[cdof];
srcs.Hx[ddof] = csrcs.Hx[cdof];
srcs.Hy[ddof] = csrcs.Hy[cdof];
srcs.Hz[ddof] = csrcs.Hz[cdof];
}
void
decompress_bndsrc(const solver_state_gpu& state, const field_gpu& csrcs, field_gpu& srcs)
{
static const size_t DECOMPRESS_THREADS = 256;
auto num_cdofs = csrcs.num_dofs;
auto gs = num_cdofs/DECOMPRESS_THREADS;
if (num_cdofs % DECOMPRESS_THREADS != 0)
gs += 1;
dim3 grid_size(gs);
gpu_bndsrcs_decompress_kernel<<<gs, DECOMPRESS_THREADS, 0, state.compute_stream>>>(
state.bndsrcs_decomp_table.data(), csrcs.data(), srcs.data());
}
} // namespace maxwell } // namespace maxwell
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment