Skip to content
Snippets Groups Projects
Commit ba874e27 authored by Matteo Cicuttin's avatar Matteo Cicuttin
Browse files

Implemented leapfrog on GPU.

parent 3c44d0ed
No related branches found
No related tags found
No related merge requests found
...@@ -172,3 +172,14 @@ gpu_compute_rk4_weighted_sum(double *out, const double *in, const double *k1, ...@@ -172,3 +172,14 @@ gpu_compute_rk4_weighted_sum(double *out, const double *in, const double *k1,
const double *k2, const double *k3, const double *k4, const double *reaction, const double *k2, const double *k3, const double *k4, const double *reaction,
const double *material, size_t num_dofs, double dt, const double *material, size_t num_dofs, double dt,
gpuStream_t stream = 0); gpuStream_t stream = 0);
void
gpu_compute_leapfrog_update(double *next, const double *curr, const double *soe,
const double *tmp, const double *ie, size_t num_dofs, double dt,
cudaStream_t stream = 0);
void
gpu_compute_leapfrog_update(double *next, const double *curr,
const double *tmp, const double *im, size_t num_dofs, double dt,
cudaStream_t stream = 0);
...@@ -363,12 +363,25 @@ init_H_field(const model& mod, solver_state_gpu& state, const Function& H) ...@@ -363,12 +363,25 @@ init_H_field(const model& mod, solver_state_gpu& state, const Function& H)
state.emf_curr.Hz.copyin(Hz.data(), Hz.size()); state.emf_curr.Hz.copyin(Hz.data(), Hz.size());
} }
#endif /* ENABLE_GPU_SOLVER */
void gpu_compute_jumps(const entity_data_gpu&, const field_gpu&, field_gpu&, void gpu_compute_jumps(const entity_data_gpu&, const field_gpu&, field_gpu&,
double *, gpuStream_t stream = 0); double *, gpuStream_t stream = 0);
void gpu_compute_jumps_E(const entity_data_gpu&, const field_gpu&, field_gpu&,
double *, gpuStream_t stream = 0);
void gpu_compute_jumps_H(const entity_data_gpu&, const field_gpu&, field_gpu&,
double *, gpuStream_t stream = 0);
void gpu_compute_fluxes(const entity_data_gpu&, const field_gpu&, field_gpu&, void gpu_compute_fluxes(const entity_data_gpu&, const field_gpu&, field_gpu&,
const field_gpu&, const material_params_gpu&, gpuStream_t stream = 0); const field_gpu&, const material_params_gpu&, gpuStream_t stream = 0);
#endif /* ENABLE_GPU_SOLVER */
void gpu_compute_fluxes_E(const entity_data_gpu&, const field_gpu&, field_gpu&,
const field_gpu&, const material_params_gpu&, gpuStream_t stream = 0);
void gpu_compute_fluxes_H(const entity_data_gpu&, const field_gpu&, field_gpu&,
const field_gpu&, const material_params_gpu&, gpuStream_t stream = 0);
} // namespace maxwell } // namespace maxwell
...@@ -435,3 +435,63 @@ gpu_compute_rk4_weighted_sum(double *out, const double *in, const double *k1, ...@@ -435,3 +435,63 @@ gpu_compute_rk4_weighted_sum(double *out, const double *in, const double *k1,
gpu_rk4_weighted_sum_kernel<<<gs, RK4_THREADS, 0, stream>>>(out, in, k1, k2, k3, k4, gpu_rk4_weighted_sum_kernel<<<gs, RK4_THREADS, 0, stream>>>(out, in, k1, k2, k3, k4,
reaction, material, num_dofs, dt); reaction, material, num_dofs, dt);
} }
void __global__
gpu_leapfrog_update_kernel(double * __restrict__ next, const double * __restrict__ curr,
const double * __restrict__ soe, const double * __restrict__ tmp,
const double * __restrict__ ie, size_t num_dofs, double dt)
{
int32_t dof = blockIdx.x * blockDim.x + threadIdx.x;
if (dof < num_dofs)
{
double dt_sigma_over_2eps = 0.5*dt*soe[dof];
double CR = (1.0 - dt_sigma_over_2eps)/(1.0 + dt_sigma_over_2eps);
double CC = dt*ie[dof]/(1.0 + dt_sigma_over_2eps);
next[dof] = curr[dof]*CR + tmp[dof]*CC;
}
}
void
gpu_compute_leapfrog_update(double *next, const double *curr, const double *soe,
const double *tmp, const double *ie, size_t num_dofs, double dt,
cudaStream_t stream)
{
static const size_t EULER_THREADS = 256;
auto gs = num_dofs/EULER_THREADS;
if (num_dofs % EULER_THREADS != 0)
gs += 1;
gpu_leapfrog_update_kernel<<<gs, EULER_THREADS, 0, stream>>>(next, curr, soe,
tmp, ie, num_dofs, dt);
}
void __global__
gpu_leapfrog_update_kernel(double * __restrict__ next, const double * __restrict__ curr,
const double * __restrict__ tmp, const double * __restrict__ im,
size_t num_dofs, double dt)
{
int32_t dof = blockIdx.x * blockDim.x + threadIdx.x;
if (dof < num_dofs)
{
double CC = dt*im[dof];
next[dof] = curr[dof] + tmp[dof]*CC;
}
}
void
gpu_compute_leapfrog_update(double *next, const double *curr,
const double *tmp, const double *im, size_t num_dofs, double dt,
cudaStream_t stream)
{
static const size_t EULER_THREADS = 256;
auto gs = num_dofs/EULER_THREADS;
if (num_dofs % EULER_THREADS != 0)
gs += 1;
gpu_leapfrog_update_kernel<<<gs, EULER_THREADS, 0, stream>>>(next, curr,
tmp, im, num_dofs, dt);
}
...@@ -15,14 +15,13 @@ void ...@@ -15,14 +15,13 @@ void
init_matparams(const model& mod, solver_state_gpu& state, init_matparams(const model& mod, solver_state_gpu& state,
const parameter_loader& mpl) const parameter_loader& mpl)
{ {
state.matparams.num_dofs = mod.num_dofs();
state.matparams.num_fluxes = mod.num_fluxes();
vecxd inv_eps = vecxd::Zero( mod.num_dofs() ); vecxd inv_eps = vecxd::Zero( mod.num_dofs() );
vecxd inv_mu = vecxd::Zero( mod.num_dofs() ); vecxd inv_mu = vecxd::Zero( mod.num_dofs() );
vecxd sigma = vecxd::Zero( mod.num_dofs() ); vecxd sigma = vecxd::Zero( mod.num_dofs() );
vecxd sigma_over_eps = vecxd::Zero( mod.num_dofs() ); vecxd sigma_over_eps = vecxd::Zero( mod.num_dofs() );
vecxd Z = vecxd::Zero( mod.num_dofs() );
vecxd Y = vecxd::Zero( mod.num_dofs() );
for (auto& e : mod) for (auto& e : mod)
{ {
auto tag = e.gmsh_tag(); auto tag = e.gmsh_tag();
...@@ -126,6 +125,50 @@ compute_curls(solver_state_gpu& state, const field_gpu& curr, field_gpu& next) ...@@ -126,6 +125,50 @@ compute_curls(solver_state_gpu& state, const field_gpu& curr, field_gpu& next)
gpu_subtract(nextp.Hz, dyp.Ex, dxp.Ey, nextp.num_dofs, state.compute_stream); gpu_subtract(nextp.Hz, dyp.Ex, dxp.Ey, nextp.num_dofs, state.compute_stream);
} }
static void
compute_curls_E(solver_state_gpu& state, const field_gpu& curr, field_gpu& next)
{
auto currp = curr.data();
auto nextp = next.data();
auto dxp = state.dx.data();
auto dyp = state.dy.data();
auto dzp = state.dz.data();
for (const auto& edg : state.edgs)
{
gpu_compute_field_derivatives(edg, currp.Ex, dxp.Ex, dyp.Ex, dzp.Ex, 1.0, state.compute_stream);
gpu_compute_field_derivatives(edg, currp.Ey, dxp.Ey, dyp.Ey, dzp.Ey, 1.0, state.compute_stream);
gpu_compute_field_derivatives(edg, currp.Ez, dxp.Ez, dyp.Ez, dzp.Ez, 1.0, state.compute_stream);
}
gpu_subtract(nextp.Hx, dzp.Ey, dyp.Ez, nextp.num_dofs, state.compute_stream);
gpu_subtract(nextp.Hy, dxp.Ez, dzp.Ex, nextp.num_dofs, state.compute_stream);
gpu_subtract(nextp.Hz, dyp.Ex, dxp.Ey, nextp.num_dofs, state.compute_stream);
}
static void
compute_curls_H(solver_state_gpu& state, const field_gpu& curr, field_gpu& next)
{
auto currp = curr.data();
auto nextp = next.data();
auto dxp = state.dx.data();
auto dyp = state.dy.data();
auto dzp = state.dz.data();
for (const auto& edg : state.edgs)
{
gpu_compute_field_derivatives(edg, currp.Hx, dxp.Hx, dyp.Hx, dzp.Hx, 1.0, state.compute_stream);
gpu_compute_field_derivatives(edg, currp.Hy, dxp.Hy, dyp.Hy, dzp.Hy, 1.0, state.compute_stream);
gpu_compute_field_derivatives(edg, currp.Hz, dxp.Hz, dyp.Hz, dzp.Hz, 1.0, state.compute_stream);
}
gpu_subtract(nextp.Ex, dyp.Hz, dzp.Hy, nextp.num_dofs, state.compute_stream);
gpu_subtract(nextp.Ey, dzp.Hx, dxp.Hz, nextp.num_dofs, state.compute_stream);
gpu_subtract(nextp.Ez, dxp.Hy, dyp.Hx, nextp.num_dofs, state.compute_stream);
}
static void static void
compute_fluxes(solver_state_gpu& state, const field_gpu& in, field_gpu& out) compute_fluxes(solver_state_gpu& state, const field_gpu& in, field_gpu& out)
{ {
...@@ -153,6 +196,87 @@ compute_fluxes(solver_state_gpu& state, const field_gpu& in, field_gpu& out) ...@@ -153,6 +196,87 @@ compute_fluxes(solver_state_gpu& state, const field_gpu& in, field_gpu& out)
} }
} }
static void
compute_fluxes_E(solver_state_gpu& state, const field_gpu& in, field_gpu& out)
{
auto matparams_ptrs = state.matparams.data();
double *bc_coeffs = matparams_ptrs.bc_coeffs;
for (const auto& edg : state.edgs)
maxwell::gpu_compute_jumps_E(edg, in, state.jumps, bc_coeffs, state.compute_stream);
for (const auto& edg : state.edgs)
maxwell::gpu_compute_fluxes_E(edg, state.jumps, state.fluxes, state.bndsrcs,
state.matparams, state.compute_stream);
auto fluxesp = state.fluxes.data();
auto outp = out.data();
for (auto& edg : state.edgs)
{
gpu_compute_flux_lifting(edg, fluxesp.Hx, outp.Hx, state.compute_stream);
gpu_compute_flux_lifting(edg, fluxesp.Hy, outp.Hy, state.compute_stream);
gpu_compute_flux_lifting(edg, fluxesp.Hz, outp.Hz, state.compute_stream);
}
}
static void
compute_fluxes_H(solver_state_gpu& state, const field_gpu& in, field_gpu& out)
{
auto matparams_ptrs = state.matparams.data();
double *bc_coeffs = matparams_ptrs.bc_coeffs;
for (const auto& edg : state.edgs)
maxwell::gpu_compute_jumps_H(edg, in, state.jumps, bc_coeffs, state.compute_stream);
for (const auto& edg : state.edgs)
maxwell::gpu_compute_fluxes_H(edg, state.jumps, state.fluxes, state.bndsrcs,
state.matparams, state.compute_stream);
auto fluxesp = state.fluxes.data();
auto outp = out.data();
for (auto& edg : state.edgs)
{
gpu_compute_flux_lifting(edg, fluxesp.Ex, outp.Ex, state.compute_stream);
gpu_compute_flux_lifting(edg, fluxesp.Ey, outp.Ey, state.compute_stream);
gpu_compute_flux_lifting(edg, fluxesp.Ez, outp.Ez, state.compute_stream);
}
}
static void
leapfrog(solver_state_gpu& state)
{
double *r = state.matparams.sigma_over_epsilon.data();
double *eps = state.matparams.inv_epsilon.data();
double *mu = state.matparams.inv_mu.data();
auto currp = state.emf_curr.data();
auto nextp = state.emf_next.data();
auto tmpp = state.tmp.data();
auto dt = state.delta_t;
compute_curls_H(state, state.emf_curr, state.tmp);
compute_fluxes_H(state, state.emf_curr, state.tmp);
gpu_compute_leapfrog_update(nextp.Ex, currp.Ex, r, tmpp.Ex, eps, nextp.num_dofs,
dt, state.compute_stream);
gpu_compute_leapfrog_update(nextp.Ey, currp.Ey, r, tmpp.Ey, eps, nextp.num_dofs,
dt, state.compute_stream);
gpu_compute_leapfrog_update(nextp.Ez, currp.Ez, r, tmpp.Ez, eps, nextp.num_dofs,
dt, state.compute_stream);
compute_curls_E(state, state.emf_next, state.tmp);
compute_fluxes_E(state, state.emf_next, state.tmp);
gpu_compute_leapfrog_update(nextp.Hx, currp.Hx, tmpp.Hx, mu, nextp.num_dofs,
dt, state.compute_stream);
gpu_compute_leapfrog_update(nextp.Hy, currp.Hy, tmpp.Hy, mu, nextp.num_dofs,
dt, state.compute_stream);
gpu_compute_leapfrog_update(nextp.Hz, currp.Hz, tmpp.Hz, mu, nextp.num_dofs,
dt, state.compute_stream);
}
static void static void
apply_operator(solver_state_gpu& state, const field_gpu& curr, field_gpu& next) apply_operator(solver_state_gpu& state, const field_gpu& curr, field_gpu& next)
{ {
...@@ -231,6 +355,11 @@ timestep(solver_state_gpu& state, time_integrator_type ti) ...@@ -231,6 +355,11 @@ timestep(solver_state_gpu& state, time_integrator_type ti)
compute_rk4_weighted_sum(state, state.emf_curr, state.delta_t, state.emf_next); compute_rk4_weighted_sum(state, state.emf_curr, state.delta_t, state.emf_next);
} }
if (ti == time_integrator_type::LEAPFROG)
{
leapfrog(state);
}
state.curr_time += state.delta_t; state.curr_time += state.delta_t;
state.curr_timestep += 1; state.curr_timestep += 1;
......
...@@ -86,6 +86,70 @@ gpu_compute_jumps(const entity_data_gpu& edg, const field_gpu& in, field_gpu& ju ...@@ -86,6 +86,70 @@ gpu_compute_jumps(const entity_data_gpu& edg, const field_gpu& in, field_gpu& ju
checkGPU(cudaPeekAtLastError()); checkGPU(cudaPeekAtLastError());
} }
void
gpu_compute_jumps_E(const entity_data_gpu& edg, const field_gpu& in, field_gpu& jumps,
double *bc_coeffs, cudaStream_t stream)
{
static const size_t JUMP_THREADS = 256;
auto num_all_fluxes = edg.num_all_elems*edg.num_fluxes*edg.num_faces_per_elem;
auto gs = num_all_fluxes/JUMP_THREADS;
if (num_all_fluxes % JUMP_THREADS != 0)
gs += 1;
dim3 grid_size(gs);
dim3 threads_per_block(JUMP_THREADS);
/* Compute E-field jumps */
gpu_compute_jumps_kernel<true><<<gs, threads_per_block, 0, stream>>>(in.Ex.data(),
edg.fluxdofs_mine.data(), edg.fluxdofs_neigh.data(), jumps.Ex.data(),
bc_coeffs, edg.flux_base, num_all_fluxes);
checkGPU(cudaPeekAtLastError());
gpu_compute_jumps_kernel<true><<<gs, threads_per_block, 0, stream>>>(in.Ey.data(),
edg.fluxdofs_mine.data(), edg.fluxdofs_neigh.data(), jumps.Ey.data(),
bc_coeffs, edg.flux_base, num_all_fluxes);
checkGPU(cudaPeekAtLastError());
gpu_compute_jumps_kernel<true><<<gs, threads_per_block, 0, stream>>>(in.Ez.data(),
edg.fluxdofs_mine.data(), edg.fluxdofs_neigh.data(), jumps.Ez.data(),
bc_coeffs, edg.flux_base, num_all_fluxes);
checkGPU(cudaPeekAtLastError());
}
void
gpu_compute_jumps_H(const entity_data_gpu& edg, const field_gpu& in, field_gpu& jumps,
double *bc_coeffs, cudaStream_t stream)
{
static const size_t JUMP_THREADS = 256;
auto num_all_fluxes = edg.num_all_elems*edg.num_fluxes*edg.num_faces_per_elem;
auto gs = num_all_fluxes/JUMP_THREADS;
if (num_all_fluxes % JUMP_THREADS != 0)
gs += 1;
dim3 grid_size(gs);
dim3 threads_per_block(JUMP_THREADS);
/* Compute H-field jumps */
gpu_compute_jumps_kernel<false><<<gs, threads_per_block, 0, stream>>>(in.Hx.data(),
edg.fluxdofs_mine.data(), edg.fluxdofs_neigh.data(), jumps.Hx.data(),
bc_coeffs, edg.flux_base, num_all_fluxes);
checkGPU(cudaPeekAtLastError());
gpu_compute_jumps_kernel<false><<<gs, threads_per_block, 0, stream>>>(in.Hy.data(),
edg.fluxdofs_mine.data(), edg.fluxdofs_neigh.data(), jumps.Hy.data(),
bc_coeffs, edg.flux_base, num_all_fluxes);
checkGPU(cudaPeekAtLastError());
gpu_compute_jumps_kernel<false><<<gs, threads_per_block, 0, stream>>>(in.Hz.data(),
edg.fluxdofs_mine.data(), edg.fluxdofs_neigh.data(), jumps.Hz.data(),
bc_coeffs, edg.flux_base, num_all_fluxes);
checkGPU(cudaPeekAtLastError());
}
template<size_t K> template<size_t K>
__global__ void __global__ void
gpu_compute_fluxes_kernel_planar(field_gpu::const_raw_ptrs jump, field_gpu::raw_ptrs flux, gpu_compute_fluxes_kernel_planar(field_gpu::const_raw_ptrs jump, field_gpu::raw_ptrs flux,
...@@ -131,8 +195,84 @@ gpu_compute_fluxes_kernel_planar(field_gpu::const_raw_ptrs jump, field_gpu::raw_ ...@@ -131,8 +195,84 @@ gpu_compute_fluxes_kernel_planar(field_gpu::const_raw_ptrs jump, field_gpu::raw_
} }
template<size_t K> template<size_t K>
__global__ void
gpu_compute_fluxes_kernel_planar_E(field_gpu::const_raw_ptrs jump, field_gpu::raw_ptrs flux,
field_gpu::const_raw_ptrs bndsrc, material_params_gpu::const_raw_ptrs fcp,
const double * __restrict__ face_dets, const double * __restrict__ face_normals,
size_t flux_base, size_t num_fluxes)
{
using KS = kernel_gpu_sizes<K>;
auto local_dof_pos = blockIdx.x * blockDim.x + threadIdx.x;
auto global_dof_pos = flux_base + local_dof_pos;
auto entry_num = local_dof_pos/KS::num_fluxes;
if (local_dof_pos >= num_fluxes)
return;
auto face_det = face_dets[entry_num];
auto nx = face_normals[3*entry_num + 0];
auto ny = face_normals[3*entry_num + 1];
auto nz = face_normals[3*entry_num + 2];
auto jEx = jump.Ex[global_dof_pos] - bndsrc.Ex[global_dof_pos];
auto jEy = jump.Ey[global_dof_pos] - bndsrc.Ey[global_dof_pos];
auto jEz = jump.Ez[global_dof_pos] - bndsrc.Ez[global_dof_pos];
auto jHx = jump.Hx[global_dof_pos] + bndsrc.Hx[global_dof_pos];
auto jHy = jump.Hy[global_dof_pos] + bndsrc.Hy[global_dof_pos];
auto jHz = jump.Hz[global_dof_pos] + bndsrc.Hz[global_dof_pos];
auto ndotE = nx*jEx + ny*jEy + nz*jEz;
auto aE = face_det * fcp.aE[global_dof_pos];
auto bE = face_det * fcp.bE[global_dof_pos];
flux.Ex[global_dof_pos] = aE*(nz*jHy - ny*jHz) + bE*(ndotE*nx - jEx);
flux.Ey[global_dof_pos] = aE*(nx*jHz - nz*jHx) + bE*(ndotE*ny - jEy);
flux.Ez[global_dof_pos] = aE*(ny*jHx - nx*jHy) + bE*(ndotE*nz - jEz);
}
template<size_t K>
__global__ void
gpu_compute_fluxes_kernel_planar_H(field_gpu::const_raw_ptrs jump, field_gpu::raw_ptrs flux,
field_gpu::const_raw_ptrs bndsrc, material_params_gpu::const_raw_ptrs fcp,
const double * __restrict__ face_dets, const double * __restrict__ face_normals,
size_t flux_base, size_t num_fluxes)
{
using KS = kernel_gpu_sizes<K>;
auto local_dof_pos = blockIdx.x * blockDim.x + threadIdx.x;
auto global_dof_pos = flux_base + local_dof_pos;
auto entry_num = local_dof_pos/KS::num_fluxes;
if (local_dof_pos >= num_fluxes)
return;
auto face_det = face_dets[entry_num];
auto nx = face_normals[3*entry_num + 0];
auto ny = face_normals[3*entry_num + 1];
auto nz = face_normals[3*entry_num + 2];
auto jEx = jump.Ex[global_dof_pos] - bndsrc.Ex[global_dof_pos];
auto jEy = jump.Ey[global_dof_pos] - bndsrc.Ey[global_dof_pos];
auto jEz = jump.Ez[global_dof_pos] - bndsrc.Ez[global_dof_pos];
auto jHx = jump.Hx[global_dof_pos] + bndsrc.Hx[global_dof_pos];
auto jHy = jump.Hy[global_dof_pos] + bndsrc.Hy[global_dof_pos];
auto jHz = jump.Hz[global_dof_pos] + bndsrc.Hz[global_dof_pos];
auto ndotH = nx*jHx + ny*jHy + nz*jHz;
auto aH = face_det * fcp.aH[global_dof_pos];
auto bH = face_det * fcp.bH[global_dof_pos];
flux.Hx[global_dof_pos] = aH*(ny*jEz - nz*jEy) + bH*(ndotH*nx - jHx);
flux.Hy[global_dof_pos] = aH*(nz*jEx - nx*jEz) + bH*(ndotH*ny - jHy);
flux.Hz[global_dof_pos] = aH*(nx*jEy - ny*jEx) + bH*(ndotH*nz - jHz);
}
template<size_t K, typename Kernel>
void void
launch_compute_fluxes_kernel(const entity_data_gpu& edg, const field_gpu& jumps, launch_compute_fluxes_kernel(Kernel& kernel, const entity_data_gpu& edg, const field_gpu& jumps,
field_gpu& fluxes, const field_gpu& bndsrc, const material_params_gpu& fcp, field_gpu& fluxes, const field_gpu& bndsrc, const material_params_gpu& fcp,
cudaStream_t stream) cudaStream_t stream)
{ {
...@@ -150,7 +290,7 @@ launch_compute_fluxes_kernel(const entity_data_gpu& edg, const field_gpu& jumps, ...@@ -150,7 +290,7 @@ launch_compute_fluxes_kernel(const entity_data_gpu& edg, const field_gpu& jumps,
auto pp = fcp.data(); auto pp = fcp.data();
auto bs = bndsrc.data(); auto bs = bndsrc.data();
gpu_compute_fluxes_kernel_planar<K><<<gs, FLUX_THREADS, 0, stream>>>(jp, fp, bs, pp, kernel<<<gs, FLUX_THREADS, 0, stream>>>(jp, fp, bs, pp,
edg.face_determinants.data(), edg.normals.data(), edg.flux_base, edg.face_determinants.data(), edg.normals.data(), edg.flux_base,
num_all_fluxes); num_all_fluxes);
} }
...@@ -162,12 +302,100 @@ gpu_compute_fluxes(const entity_data_gpu& edg, const field_gpu& jumps, ...@@ -162,12 +302,100 @@ gpu_compute_fluxes(const entity_data_gpu& edg, const field_gpu& jumps,
{ {
switch( edg.a_order ) switch( edg.a_order )
{ {
case 1: launch_compute_fluxes_kernel<1>(edg, jumps, fluxes, bndsrc, fcp, stream); break; case 1: launch_compute_fluxes_kernel<1>(gpu_compute_fluxes_kernel_planar<1>,
case 2: launch_compute_fluxes_kernel<2>(edg, jumps, fluxes, bndsrc, fcp, stream); break; edg, jumps, fluxes, bndsrc, fcp, stream);
case 3: launch_compute_fluxes_kernel<3>(edg, jumps, fluxes, bndsrc, fcp, stream); break; break;
case 4: launch_compute_fluxes_kernel<4>(edg, jumps, fluxes, bndsrc, fcp, stream); break;
case 5: launch_compute_fluxes_kernel<5>(edg, jumps, fluxes, bndsrc, fcp, stream); break; case 2: launch_compute_fluxes_kernel<2>(gpu_compute_fluxes_kernel_planar<2>,
case 6: launch_compute_fluxes_kernel<6>(edg, jumps, fluxes, bndsrc, fcp, stream); break; edg, jumps, fluxes, bndsrc, fcp, stream);
break;
case 3: launch_compute_fluxes_kernel<3>(gpu_compute_fluxes_kernel_planar<3>,
edg, jumps, fluxes, bndsrc, fcp, stream);
break;
case 4: launch_compute_fluxes_kernel<4>(gpu_compute_fluxes_kernel_planar<4>,
edg, jumps, fluxes, bndsrc, fcp, stream);
break;
case 5: launch_compute_fluxes_kernel<5>(gpu_compute_fluxes_kernel_planar<5>,
edg, jumps, fluxes, bndsrc, fcp, stream);
break;
case 6: launch_compute_fluxes_kernel<6>(gpu_compute_fluxes_kernel_planar<6>,
edg, jumps, fluxes, bndsrc, fcp, stream);
break;
default: throw std::invalid_argument("unsupported order");
}
}
void
gpu_compute_fluxes_E(const entity_data_gpu& edg, const field_gpu& jumps,
field_gpu& fluxes, const field_gpu& bndsrc, const material_params_gpu& fcp,
cudaStream_t stream)
{
switch( edg.a_order )
{
case 1: launch_compute_fluxes_kernel<1>(gpu_compute_fluxes_kernel_planar_E<1>,
edg, jumps, fluxes, bndsrc, fcp, stream);
break;
case 2: launch_compute_fluxes_kernel<2>(gpu_compute_fluxes_kernel_planar_E<2>,
edg, jumps, fluxes, bndsrc, fcp, stream);
break;
case 3: launch_compute_fluxes_kernel<3>(gpu_compute_fluxes_kernel_planar_E<3>,
edg, jumps, fluxes, bndsrc, fcp, stream);
break;
case 4: launch_compute_fluxes_kernel<4>(gpu_compute_fluxes_kernel_planar_E<4>,
edg, jumps, fluxes, bndsrc, fcp, stream);
break;
case 5: launch_compute_fluxes_kernel<5>(gpu_compute_fluxes_kernel_planar_E<5>,
edg, jumps, fluxes, bndsrc, fcp, stream);
break;
case 6: launch_compute_fluxes_kernel<6>(gpu_compute_fluxes_kernel_planar_E<6>,
edg, jumps, fluxes, bndsrc, fcp, stream);
break;
default: throw std::invalid_argument("unsupported order");
}
}
void
gpu_compute_fluxes_H(const entity_data_gpu& edg, const field_gpu& jumps,
field_gpu& fluxes, const field_gpu& bndsrc, const material_params_gpu& fcp,
cudaStream_t stream)
{
switch( edg.a_order )
{
case 1: launch_compute_fluxes_kernel<1>(gpu_compute_fluxes_kernel_planar_H<1>,
edg, jumps, fluxes, bndsrc, fcp, stream);
break;
case 2: launch_compute_fluxes_kernel<2>(gpu_compute_fluxes_kernel_planar_H<2>,
edg, jumps, fluxes, bndsrc, fcp, stream);
break;
case 3: launch_compute_fluxes_kernel<3>(gpu_compute_fluxes_kernel_planar_H<3>,
edg, jumps, fluxes, bndsrc, fcp, stream);
break;
case 4: launch_compute_fluxes_kernel<4>(gpu_compute_fluxes_kernel_planar_H<4>,
edg, jumps, fluxes, bndsrc, fcp, stream);
break;
case 5: launch_compute_fluxes_kernel<5>(gpu_compute_fluxes_kernel_planar_H<5>,
edg, jumps, fluxes, bndsrc, fcp, stream);
break;
case 6: launch_compute_fluxes_kernel<6>(gpu_compute_fluxes_kernel_planar_H<6>,
edg, jumps, fluxes, bndsrc, fcp, stream);
break;
default: throw std::invalid_argument("unsupported order"); default: throw std::invalid_argument("unsupported order");
} }
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment