diff --git a/include/kernels_gpu.h b/include/kernels_gpu.h index b7f95c14a515ae5234a0464ea251a71175c9ab71..f3a4c59a2475695cb287e62ed792f59f0b73c08a 100644 --- a/include/kernels_gpu.h +++ b/include/kernels_gpu.h @@ -51,6 +51,16 @@ struct kernel_gpu_sizes<4> static const size_t parallel_dblocks = 1; }; +template<> +struct kernel_gpu_sizes<5> +{ + static const size_t num_bf = 56; + static const size_t cells_per_dblock = 12; + static const size_t dblock_bf = num_bf * cells_per_dblock; + static const size_t dblock_size = 128; + static const size_t parallel_dblocks = 1; +}; + struct kernel_gpu_sizes_runtime { size_t num_bf; diff --git a/src/kernels_cuda.cu b/src/kernels_cuda.cu index b37d3e8dd8885ac4db1e08ed72105a50efe635f3..80fb6880b784a08ca58bb09e43d27556e995e849 100644 --- a/src/kernels_cuda.cu +++ b/src/kernels_cuda.cu @@ -100,14 +100,15 @@ gpu_compute_field_derivatives(entity_data_gpu& edg, //else // compute_field_derivatives_kernel_curved<4>(ed, f, df_dx, df_dy, df_dz); break; -#if 0 + case 5: if (edg.g_order == 1) - gpu_compute_field_derivatives_kernel_planar<5>(edg, f, df_dx, df_dy, df_dz); + gpu_deriv_planar<5><<<num_blocks, THREADS_PER_BLOCK>>>(f, J, + Dtex, df_dx, df_dy, df_dz, num_elems, orients); //else // compute_field_derivatives_kernel_curved<5>(ed, f, df_dx, df_dy, df_dz); break; - +#if 0 case 6: if (edg.g_order == 1) gpu_compute_field_derivatives_kernel_planar<6>(edg, f, df_dx, df_dy, df_dz);