Skip to content
Snippets Groups Projects
Commit a4993694 authored by Matteo Cicuttin's avatar Matteo Cicuttin
Browse files

Fixed thread number for each differentiation degree.

parent 580dcac0
No related branches found
No related tags found
No related merge requests found
......@@ -19,6 +19,8 @@ struct kernel_gpu_sizes<1>
static const size_t dblock_bf = num_bf * cells_per_dblock;
static const size_t dblock_size = 128;
static const size_t parallel_dblocks = 4;
static const size_t deriv_threads = 128;
};
template<>
......@@ -29,6 +31,8 @@ struct kernel_gpu_sizes<2>
static const size_t dblock_bf = num_bf * cells_per_dblock;
static const size_t dblock_size = 128;
static const size_t parallel_dblocks = 1;
static const size_t deriv_threads = 128;
};
template<>
......@@ -39,6 +43,8 @@ struct kernel_gpu_sizes<3>
static const size_t dblock_bf = num_bf * cells_per_dblock;
static const size_t dblock_size = 128;
static const size_t parallel_dblocks = 1;
static const size_t deriv_threads = 128;
};
template<>
......@@ -49,6 +55,8 @@ struct kernel_gpu_sizes<4>
static const size_t dblock_bf = num_bf * cells_per_dblock;
static const size_t dblock_size = 128;
static const size_t parallel_dblocks = 1;
static const size_t deriv_threads = 512;
};
template<>
......@@ -59,6 +67,20 @@ struct kernel_gpu_sizes<5>
static const size_t dblock_bf = num_bf * cells_per_dblock;
static const size_t dblock_size = 128;
static const size_t parallel_dblocks = 1;
static const size_t deriv_threads = 1024;
};
template<>
struct kernel_gpu_sizes<6>
{
static const size_t num_bf = 84;
static const size_t cells_per_dblock = 12;
static const size_t dblock_bf = num_bf * cells_per_dblock;
static const size_t dblock_size = 128;
static const size_t parallel_dblocks = 1;
static const size_t deriv_threads = 1024;
};
struct kernel_gpu_sizes_runtime
......
......@@ -56,7 +56,6 @@ void
gpu_compute_field_derivatives(entity_data_gpu& edg,
const double* f, double *df_dx, double* df_dy, double* df_dz)
{
const auto THREADS_PER_BLOCK = 128;
auto num_blocks = edg.num_bf*edg.num_all_elems/THREADS_PER_BLOCK;
if (edg.num_bf*edg.num_all_elems % THREADS_PER_BLOCK)
num_blocks += 1;
......@@ -71,7 +70,7 @@ gpu_compute_field_derivatives(entity_data_gpu& edg,
{
case 1:
if (edg.g_order == 1)
gpu_deriv_planar<1><<<num_blocks, THREADS_PER_BLOCK>>>(f, J,
gpu_deriv_planar<1><<<num_blocks, kernel_gpu_sizes<1>::deriv_threads>>>(f, J,
Dtex, df_dx, df_dy, df_dz, num_elems, orients);
//else
// compute_field_derivatives_kernel_curved<1>(ed, f, df_dx, df_dy, df_dz);
......@@ -79,7 +78,7 @@ gpu_compute_field_derivatives(entity_data_gpu& edg,
case 2:
if (edg.g_order == 1)
gpu_deriv_planar<2><<<num_blocks, THREADS_PER_BLOCK>>>(f, J,
gpu_deriv_planar<2><<<num_blocks, kernel_gpu_sizes<2>::deriv_threads>>>(f, J,
Dtex, df_dx, df_dy, df_dz, num_elems, orients);
//else
// compute_field_derivatives_kernel_curved<2>(ed, f, df_dx, df_dy, df_dz);
......@@ -87,7 +86,7 @@ gpu_compute_field_derivatives(entity_data_gpu& edg,
case 3:
if (edg.g_order == 1)
gpu_deriv_planar<3><<<num_blocks, THREADS_PER_BLOCK>>>(f, J,
gpu_deriv_planar<3><<<num_blocks, kernel_gpu_sizes<3>::deriv_threads>>>(f, J,
Dtex, df_dx, df_dy, df_dz, num_elems, orients);
//else
// compute_field_derivatives_kernel_curved<3>(ed, f, df_dx, df_dy, df_dz);
......@@ -95,7 +94,7 @@ gpu_compute_field_derivatives(entity_data_gpu& edg,
case 4:
if (edg.g_order == 1)
gpu_deriv_planar<4><<<num_blocks, THREADS_PER_BLOCK>>>(f, J,
gpu_deriv_planar<4><<<num_blocks, kernel_gpu_sizes<4>::deriv_threads>>>(f, J,
Dtex, df_dx, df_dy, df_dz, num_elems, orients);
//else
// compute_field_derivatives_kernel_curved<4>(ed, f, df_dx, df_dy, df_dz);
......@@ -103,19 +102,19 @@ gpu_compute_field_derivatives(entity_data_gpu& edg,
case 5:
if (edg.g_order == 1)
gpu_deriv_planar<5><<<num_blocks, THREADS_PER_BLOCK>>>(f, J,
gpu_deriv_planar<5><<<num_blocks, kernel_gpu_sizes<5>::deriv_threads>>>(f, J,
Dtex, df_dx, df_dy, df_dz, num_elems, orients);
//else
// compute_field_derivatives_kernel_curved<5>(ed, f, df_dx, df_dy, df_dz);
break;
#if 0
case 6:
if (edg.g_order == 1)
gpu_compute_field_derivatives_kernel_planar<6>(edg, f, df_dx, df_dy, df_dz);
gpu_deriv_planar<6><<<num_blocks, kernel_gpu_sizes<6>::deriv_threads>>>(f, J,
Dtex, df_dx, df_dy, df_dz, num_elems, orients);
//else
// compute_field_derivatives_kernel_curved<6>(ed, f, df_dx, df_dy, df_dz);
// compute_field_derivatives_kernel_curved<5>(ed, f, df_dx, df_dy, df_dz);
break;
#endif
default:
std::cout << "compute_field_derivatives: invalid order" << std::endl;
......
......@@ -162,7 +162,7 @@ int main(void)
std::cout << Bmagentafg << " *** TESTING: DIFFERENTIATION ***" << reset << std::endl;
for (size_t go = 1; go < 2; go++)
for (size_t ao = go; ao < 5; ao++)
for (size_t ao = go; ao < 6; ao++)
failed_tests += test_differentiation_convergence(go, ao);
return failed_tests;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment