diff --git a/src/libgmshdg/kernels_cuda.cu b/src/libgmshdg/kernels_cuda.cu index 58810b754c0eb422b9faf54e9feadf84c1f45267..ac9b391ced4ba938772f9f9213583150cfaefc86 100644 --- a/src/libgmshdg/kernels_cuda.cu +++ b/src/libgmshdg/kernels_cuda.cu @@ -138,19 +138,6 @@ launch_deriv_kernel(const entity_data_gpu& edg, using KS = kernel_gpu_sizes<K>; -#ifdef USE_BLOCKED_GPU_KERNELS - size_t num_blocks = edg.num_all_elems / (KS::cells_per_dblock * KS::parallel_dblocks); - if (num_blocks % (KS::cells_per_dblock * KS::parallel_dblocks)) - num_blocks += 1; - - dim3 grid_size(1, num_blocks); - dim3 block_size(KS::dblock_size, KS::parallel_dblocks); - if (edg.g_order == 1) - gpu_deriv_planar_blocked<K><<<grid_size, block_size, stream>>>(f, J, - Dtex, df_dx, df_dy, df_dz, num_elems, orients, edg.dof_base); - //else - // compute_field_derivatives_kernel_curved<1>(ed, f, df_dx, df_dy, df_dz); -#else auto num_blocks = edg.num_bf*edg.num_all_elems/KS::deriv_threads; if (edg.num_bf*edg.num_all_elems % KS::deriv_threads) num_blocks += 1; @@ -160,7 +147,6 @@ launch_deriv_kernel(const entity_data_gpu& edg, Dtex, df_dx, df_dy, df_dz, alpha, num_elems, orients, edg.dof_base); //else // compute_field_derivatives_kernel_curved<1>(ed, f, df_dx, df_dy, df_dz); -#endif } void diff --git a/tests/test_differentiation_gpu.cpp b/tests/test_differentiation_gpu.cpp index 3e65e5d731c2b4686290588f1a80b71a2530f935..e6cbe5371aaf7b2cbe8a5a09497a2ac3a3c96a05 100644 --- a/tests/test_differentiation_gpu.cpp +++ b/tests/test_differentiation_gpu.cpp @@ -53,7 +53,7 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde std::vector<double> sizes({ 0.32, 0.16, 0.08, 0.04 }); std::vector<double> errors; - std::cout << cyanfg << "Testing geometric order " << geometric_order; + std::cout << std::endl << cyanfg << "Testing geometric order " << geometric_order; std::cout << ", approximation order = " << approximation_order << nofg; std::cout << std::endl; @@ -101,11 +101,6 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde vecxd Pdf_dz = vecxd::Zero(model_num_dofs); std::vector<entity_data_gpu> edgs; - -#ifdef USE_BLOCKED_GPU_KERNELS - std::vector<entity_data_cpu> eds; - size_t model_num_dofs_gpu = 0; -#else size_t model_num_dofs_gpu = model_num_dofs; #endif for (auto& e : mod) @@ -117,24 +112,13 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde entity_data_cpu ed; e.populate_entity_data(ed, mod); entity_data_gpu edg(ed); -#ifdef USE_BLOCKED_GPU_KERNELS - edg.dof_base = model_num_dofs_gpu; - model_num_dofs_gpu += gpu_dblocks_dofs(ed); - eds.push_back( std::move(ed) ); -#endif edgs.push_back( std::move(edg) ); } /* Prepare I/O vectors and call kernel */ -#ifdef USE_BLOCKED_GPU_KERNELS - assert(eds.size() == edgs.size()); - vecxd Pf_reshaped = vecxd::Zero(model_num_dofs_gpu); - for (size_t i = 0; i < eds.size(); i++) - reshape_dofs(eds[i], edgs[i], Pf, Pf_reshaped, true); - texture_allocator<double> Pf_gpu(Pf_reshaped.data(), Pf_reshaped.size()); -#else + device_vector<double> Pf_gpu(Pf.data(), Pf.size()); -#endif + device_vector<double> df_dx_gpu(model_num_dofs_gpu); device_vector<double> df_dy_gpu(model_num_dofs_gpu); device_vector<double> df_dz_gpu(model_num_dofs_gpu); @@ -161,30 +145,6 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde std::cout << flops/(1e9*time) << " GFlops/s" << std::endl; } } - -#ifdef USE_BLOCKED_GPU_KERNELS - vecxd Cdf_dx_exp = vecxd::Zero(model_num_dofs_gpu); - vecxd Cdf_dy_exp = vecxd::Zero(model_num_dofs_gpu); - vecxd Cdf_dz_exp = vecxd::Zero(model_num_dofs_gpu); - - df_dx_gpu.copyout( Cdf_dx_exp.data() ); - df_dy_gpu.copyout( Cdf_dy_exp.data() ); - df_dz_gpu.copyout( Cdf_dz_exp.data() ); - - vecxd Cdf_dx = vecxd::Zero(model_num_dofs); - vecxd Cdf_dy = vecxd::Zero(model_num_dofs); - vecxd Cdf_dz = vecxd::Zero(model_num_dofs); - - assert(eds.size() == edgs.size()); - Pf = vecxd::Zero(model_num_dofs); - for (size_t i = 0; i < eds.size(); i++) - { - reshape_dofs(eds[i], edgs[i], Cdf_dx_exp, Cdf_dx, false); - reshape_dofs(eds[i], edgs[i], Cdf_dy_exp, Cdf_dy, false); - reshape_dofs(eds[i], edgs[i], Cdf_dz_exp, Cdf_dz, false); - reshape_dofs(eds[i], edgs[i], Pf_reshaped, Pf, false); - } -#else vecxd Cdf_dx = vecxd::Zero(model_num_dofs_gpu); vecxd Cdf_dy = vecxd::Zero(model_num_dofs_gpu); vecxd Cdf_dz = vecxd::Zero(model_num_dofs_gpu); @@ -192,7 +152,6 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde df_dx_gpu.copyout( Cdf_dx.data() ); df_dy_gpu.copyout( Cdf_dy.data() ); df_dz_gpu.copyout( Cdf_dz.data() ); -#endif double err_x = 0.0; double err_y = 0.0; @@ -247,6 +206,8 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde errs_x.push_back( std::sqrt(err_x) ); errs_y.push_back( std::sqrt(err_y) ); errs_z.push_back( std::sqrt(err_z) ); + + std::cout << std::endl; } double rate_x = 0.0;