diff --git a/src/kernels_cuda.cu b/src/kernels_cuda.cu index d4d0939237085ac8f48accbd18f20c33d55191ac..b3e83015e787e713876bcdbe58b93a690d4e134a 100644 --- a/src/kernels_cuda.cu +++ b/src/kernels_cuda.cu @@ -52,10 +52,12 @@ gpu_deriv_planar(const double *F, const double *J, gpuTextureObject_t DM_tex, dF_dz[cur_dof_offset] = accm_dF_dz; } +template<size_t K> void -gpu_compute_field_derivatives(entity_data_gpu& edg, +launch_deriv_kernel(entity_data_gpu& edg, const double* f, double *df_dx, double* df_dy, double* df_dz) { + const auto THREADS_PER_BLOCK = kernel_gpu_sizes<K>::deriv_threads; auto num_blocks = edg.num_bf*edg.num_all_elems/THREADS_PER_BLOCK; if (edg.num_bf*edg.num_all_elems % THREADS_PER_BLOCK) num_blocks += 1; @@ -66,54 +68,43 @@ gpu_compute_field_derivatives(entity_data_gpu& edg, auto orients = edg.element_orientation.data(); auto num_orients = edg.element_orientation.size(); + if (edg.g_order == 1) + gpu_deriv_planar<K><<<num_blocks, THREADS_PER_BLOCK>>>(f, J, + Dtex, df_dx, df_dy, df_dz, num_elems, orients); + //else + // compute_field_derivatives_kernel_curved<1>(ed, f, df_dx, df_dy, df_dz); +} + +void +gpu_compute_field_derivatives(entity_data_gpu& edg, + const double* f, double *df_dx, double* df_dy, double* df_dz) +{ + + switch (edg.a_order) { case 1: - if (edg.g_order == 1) - gpu_deriv_planar<1><<<num_blocks, kernel_gpu_sizes<1>::deriv_threads>>>(f, J, - Dtex, df_dx, df_dy, df_dz, num_elems, orients); - //else - // compute_field_derivatives_kernel_curved<1>(ed, f, df_dx, df_dy, df_dz); + launch_deriv_kernel<1>(edg, f, df_dx, df_dy, df_dz); break; case 2: - if (edg.g_order == 1) - gpu_deriv_planar<2><<<num_blocks, kernel_gpu_sizes<2>::deriv_threads>>>(f, J, - Dtex, df_dx, df_dy, df_dz, num_elems, orients); - //else - // compute_field_derivatives_kernel_curved<2>(ed, f, df_dx, df_dy, df_dz); + launch_deriv_kernel<2>(edg, f, df_dx, df_dy, df_dz); break; case 3: - if (edg.g_order == 1) - gpu_deriv_planar<3><<<num_blocks, kernel_gpu_sizes<3>::deriv_threads>>>(f, J, - Dtex, df_dx, df_dy, df_dz, num_elems, orients); - //else - // compute_field_derivatives_kernel_curved<3>(ed, f, df_dx, df_dy, df_dz); + launch_deriv_kernel<3>(edg, f, df_dx, df_dy, df_dz); break; case 4: - if (edg.g_order == 1) - gpu_deriv_planar<4><<<num_blocks, kernel_gpu_sizes<4>::deriv_threads>>>(f, J, - Dtex, df_dx, df_dy, df_dz, num_elems, orients); - //else - // compute_field_derivatives_kernel_curved<4>(ed, f, df_dx, df_dy, df_dz); + launch_deriv_kernel<4>(edg, f, df_dx, df_dy, df_dz); break; case 5: - if (edg.g_order == 1) - gpu_deriv_planar<5><<<num_blocks, kernel_gpu_sizes<5>::deriv_threads>>>(f, J, - Dtex, df_dx, df_dy, df_dz, num_elems, orients); - //else - // compute_field_derivatives_kernel_curved<5>(ed, f, df_dx, df_dy, df_dz); + launch_deriv_kernel<5>(edg, f, df_dx, df_dy, df_dz); break; case 6: - if (edg.g_order == 1) - gpu_deriv_planar<6><<<num_blocks, kernel_gpu_sizes<6>::deriv_threads>>>(f, J, - Dtex, df_dx, df_dy, df_dz, num_elems, orients); - //else - // compute_field_derivatives_kernel_curved<5>(ed, f, df_dx, df_dy, df_dz); + launch_deriv_kernel<6>(edg, f, df_dx, df_dy, df_dz); break; default: