diff --git a/CMakeLists.txt b/CMakeLists.txt index 727b412268ccc4275bd94617ef8f6b232d219d68..a4e22120f4275435d52c5c5fe3a3cba5cff03041 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -66,7 +66,6 @@ link_directories(${GMSH_LIB}) ###################################################################### ## Use or not use GPU solvers - option(OPT_ENABLE_GPU_SOLVER "Enable GPU solvers" OFF) if (OPT_ENABLE_GPU_SOLVER) add_definitions(-DENABLE_GPU_SOLVER) @@ -117,9 +116,14 @@ if (OPT_ENABLE_GPU_SOLVER) add_definitions(-D__HIP_PLATFORM_HCC__) endif() endif() - endif() +if (OPT_ENABLE_GPU_SOLVER) + option(OPT_DEBUG_GPU "Enable GPU debugging" OFF) + if (OPT_DEBUG_GPU) + add_definitions(-DGPU_DEBUG) + endif() +endif() ###################################################################### ## Compiler identification stuff @@ -235,12 +239,15 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) set(COMMON_SOURCES gmsh_io.cpp reference_element.cpp physical_element.cpp entity.cpp kernels_cpu.cpp connectivity.cpp entity_data.cpp) set(LIBGMSHDG_SOURCES src/gmsh_io.cpp + src/entity.cpp src/reference_element.cpp src/physical_element.cpp - src/entity.cpp - src/kernels_cpu.cpp src/connectivity.cpp - src/entity_data.cpp) + src/entity_data.cpp + src/kernels_cpu.cpp + src/kernels_gpu_mi.cpp + src/kernels_cuda.cu + ) add_library(gmshdg SHARED ${LIBGMSHDG_SOURCES}) target_link_libraries(gmshdg ${LINK_LIBS}) diff --git a/include/eigen.h b/include/eigen.h index 8475f3f459d0463d5bf592141b04d9ea67bccc1b..16d1dd5926953853b5663c473b065649b091b13f 100644 --- a/include/eigen.h +++ b/include/eigen.h @@ -13,4 +13,5 @@ template<typename T> using MatxCM = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>; template<typename T> -using MatxRM = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>; \ No newline at end of file +using MatxRM = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>; + diff --git a/include/entity_data.h b/include/entity_data.h index 01c968e463b7a383966f7d569c78361ef9c51384..43681cdb18c242b9abe193f36a83ca5321d774cf 100644 --- a/include/entity_data.h +++ b/include/entity_data.h @@ -1,5 +1,7 @@ #pragma once +#include <numeric> + #include "eigen.h" #include "types.h" @@ -57,13 +59,20 @@ struct entity_data_cpu vecxd face_determinants; }; +inline size_t +num_elems_all_orientations(const entity_data_cpu& ed) +{ + assert(ed.num_elems.size() == ed.num_orientations); + return std::accumulate(ed.num_elems.begin(), ed.num_elems.end(), 0); +} + #ifdef ENABLE_GPU_SOLVER struct entity_data_gpu { int g_order; int a_order; - size_t* num_elems; - size_t num_orientations; + size_t num_all_elems; + size_t * element_orientation; size_t num_bf; size_t dof_base; @@ -79,13 +88,12 @@ struct entity_data_gpu ~entity_data_gpu(); }; - template<typename T> void gpu_copyin(const std::vector<T> src, T** dst) { checkGPU( gpuMalloc( (void**)dst, src.size()*sizeof(T) ) ); - checkGPU( gpuMemcpy( dst, src.data(), src.size()*sizeof(T), gpuMemcpyHostToDevice) ); + checkGPU( gpuMemcpy( *dst, src.data(), src.size()*sizeof(T), gpuMemcpyHostToDevice) ); } template<typename T> @@ -93,7 +101,7 @@ void gpu_copyin(const MatxCM<T>& src, T** dst) { checkGPU( gpuMalloc( (void**)dst, src.size()*sizeof(T) ) ); - checkGPU( gpuMemcpy( dst, src.data(), src.size()*sizeof(T), gpuMemcpyHostToDevice) ); + checkGPU( gpuMemcpy( *dst, src.data(), src.size()*sizeof(T), gpuMemcpyHostToDevice) ); } template<typename T> @@ -101,7 +109,7 @@ void gpu_copyin(const MatxRM<T>& src, T** dst) { checkGPU( gpuMalloc( (void**)dst, src.size()*sizeof(T) ) ); - checkGPU( gpuMemcpy( dst, src.data(), src.size()*sizeof(T), gpuMemcpyHostToDevice) ); + checkGPU( gpuMemcpy( *dst, src.data(), src.size()*sizeof(T), gpuMemcpyHostToDevice) ); } template<typename T> @@ -109,7 +117,9 @@ void gpu_copyin(const Eigen::Matrix<T, Eigen::Dynamic, 1>& src, double** dst) { checkGPU( gpuMalloc( (void**)dst, src.size()*sizeof(T) ) ); - checkGPU( gpuMemcpy( dst, src.data(), src.size()*sizeof(T), gpuMemcpyHostToDevice) ); + checkGPU( gpuMemcpy( *dst, src.data(), src.size()*sizeof(T), gpuMemcpyHostToDevice) ); } -#endif /* ENABLE_GPU_SOLVER */ \ No newline at end of file +#endif /* ENABLE_GPU_SOLVER */ + + diff --git a/include/gpu_support.hpp b/include/gpu_support.hpp index 26963c7878c5a9d60fcbcc2ad7b28900e2d717a9..758c2e28992759c04ecac92e22a394e4a9265176 100644 --- a/include/gpu_support.hpp +++ b/include/gpu_support.hpp @@ -168,10 +168,10 @@ public: device_vector(T *src, size_t size) : d_vptr(nullptr), vsize(0) { - init(src, size); + copyin(src, size); } - void init(T *src, size_t size) + void copyin(T *src, size_t size) { if (d_vptr != nullptr) (void) gpuFree(d_vptr); @@ -181,6 +181,14 @@ public: vsize = size; } + void copyout(T *dst) + { + if (d_vptr == nullptr) + return; + + (void)checkGPU( gpuMemcpy(dst, d_vptr, vsize*sizeof(T), gpuMemcpyDeviceToHost) ); + } + void resize(size_t size) { if (d_vptr != nullptr) @@ -353,7 +361,7 @@ public: float elapsed; gpuEventSynchronize(stop); gpuEventElapsedTime(&elapsed, start, stop); - return elapsed/1000.0; + return double(elapsed)/1000.0; } }; diff --git a/include/kernels_gpu.h b/include/kernels_gpu.h index 1d6b79779534348a7eb68c958162eff36cbd25c2..1c45af64c9d13acad4d6a03797ca48fa8f900418 100644 --- a/include/kernels_gpu.h +++ b/include/kernels_gpu.h @@ -2,9 +2,12 @@ #include "entity_data.h" -template<size_t K> -struct kernel_gpu_sizes; -/* Compile-time constants configuring sizes for the kernels */ +/* Compile-time constants configuring sizes for the kernels. + * Elements are organized in blocks of cells_per_dblock elements each. + * Each dblock contains dblock_size dofs, where dblock_size must be a + * multiple of the warp size. This means that some dofs in the dblock + * will be padding. The effective dofs used are dblock_bf.*/ + template<size_t K> struct kernel_gpu_sizes; @@ -13,7 +16,7 @@ struct kernel_gpu_sizes<1> { static const size_t num_bf = 4; static const size_t cells_per_dblock = 32; - static const size_t dblock_bf = num_bf * vols_per_ublock; + static const size_t dblock_bf = num_bf * cells_per_dblock; static const size_t dblock_size = 128; static const size_t parallel_dblocks = 4; }; @@ -23,7 +26,17 @@ struct kernel_gpu_sizes<2> { static const size_t num_bf = 10; static const size_t cells_per_dblock = 12; - static const size_t dblock_bf = num_bf * vols_per_ublock; + static const size_t dblock_bf = num_bf * cells_per_dblock; + static const size_t dblock_size = 128; + static const size_t parallel_dblocks = 1; +}; + +template<> +struct kernel_gpu_sizes<3> +{ + static const size_t num_bf = 20; + static const size_t cells_per_dblock = 12; + static const size_t dblock_bf = num_bf * cells_per_dblock; static const size_t dblock_size = 128; static const size_t parallel_dblocks = 1; }; @@ -38,8 +51,7 @@ struct kernel_gpu_sizes_runtime }; template<size_t N> -kernel_gpu_sizes_runtime -get_kernel_gpu_sizes() +kernel_gpu_sizes_runtime get_kernel_gpu_sizes() { kernel_gpu_sizes_runtime ret; @@ -47,13 +59,12 @@ get_kernel_gpu_sizes() ret.cells_per_dblock = kernel_gpu_sizes<N>::cells_per_dblock; ret.dblock_bf = kernel_gpu_sizes<N>::dblock_bf; ret.dblock_size = kernel_gpu_sizes<N>::dblock_size; - ret.parallel_dblocks = kernel_gpu_sizes<N>::slice_size; + ret.parallel_dblocks = kernel_gpu_sizes<N>::parallel_dblocks; return ret; }; -inline -kernel_gpu_sizes_runtime +inline kernel_gpu_sizes_runtime get_kernel_gpu_sizes(int approx_order) { switch (approx_order) @@ -65,49 +76,15 @@ get_kernel_gpu_sizes(int approx_order) throw std::invalid_argument("Can't retrieve info for this element"); } -/* Return the number of dofs that this entity will take on GPU, - * including all the padding dofs */ -size_t gpu_dofs(const entity_data_cpu& ed) -{ - auto ks = get_kernel_gpu_sizes(ed.a_order); - - size_t num_dblocks = tot_elems / ks.cells_per_dblock; - if (num_dblocks % ks.cells_per_dblock) - num_dblocks += 1; - - return num_dblocks * ks.dblock_size; -} +size_t gpu_dblocks(const entity_data_cpu&); +size_t gpu_dblocks_dofs(const entity_data_cpu&); #define TO_GPU true #define FROM_GPU false +void reshape_dofs(const entity_data_cpu&, const entity_data_gpu&, const vecxd&, vecxd&, bool); -void reshape_dofs(const entity_data_cpu& ed, const entity_data_gpu& edg, - const vecxd& in_field, vecxd& out_field, bool to_gpu) -{ - auto ks = get_kernel_gpu_sizes(ed.a_order); - size_t num_dblocks = tot_elems / ks.cells_per_dblock; - if (num_dblocks % ks.cells_per_dblock) - num_dblocks += 1; - - for (size_t i = 0; i < num_dblocks; i++) - { - for (size_t j = 0; j < ks.cells_per_dblock; j++) - { - auto cur_elem = ks.cells_per_dblock*i + j; - if (cur_elem >= tot_elems) - break; - - auto cpu_base = ed.dof_base + cur_elem * ed.num_bf; - auto gpu_base = edg.dof_base + ks.dblock_size*i + ks.num_bf*j; - - if (to_gpu) - for (size_t k = 0; k < ks.num_bf; j++) - out_field(gpu_base + k) = in_field(cpu_base + k); - else - for (size_t k = 0; k < ks.num_bf; j++) - out_field(cpu_base + k) = in_field(gpu_base + k); - } - } -} +void +gpu_compute_field_derivatives(entity_data_gpu& edg, + const double* F, double *dF_dx, double* dF_dy, double* dF_dz); diff --git a/src/entity_data.cpp b/src/entity_data.cpp index 910445e3b3014825c211d25b4391b5b64d579338..fc9c7edbe732085a57a6603d87560177f9245e31 100644 --- a/src/entity_data.cpp +++ b/src/entity_data.cpp @@ -2,7 +2,7 @@ entity_data_gpu::entity_data_gpu() { - num_elems = nullptr; + element_orientation = nullptr; differentiation_matrices = nullptr; jacobians = nullptr; cell_determinants = nullptr; @@ -13,8 +13,18 @@ entity_data_gpu::entity_data_gpu(const entity_data_cpu& ed) g_order = ed.g_order; a_order = ed.a_order; - gpu_copyin(ed.num_elems, &num_elems); - num_orientations = ed.num_orientations; + num_all_elems = num_elems_all_orientations(ed); + + std::vector<size_t> eo; + eo.reserve(num_all_elems); + for (size_t iO = 0; iO < ed.num_orientations; iO++) + { + for (size_t iT = 0; iT < ed.num_elems[iO]; iT++) + eo.push_back(iO); + } + assert(eo.size() == num_all_elems); + gpu_copyin(eo, &element_orientation); + num_bf = ed.num_bf; dof_base = 0; @@ -35,9 +45,10 @@ entity_data_gpu::entity_data_gpu(const entity_data_cpu& ed) entity_data_gpu::~entity_data_gpu() { - if (num_elems) - checkGPU( gpuFree(num_elems) ); - + /* + if (element_orientation) + checkGPU( gpuFree(element_orientation) ); + if (differentiation_matrices) checkGPU( gpuFree(differentiation_matrices) ); @@ -46,5 +57,6 @@ entity_data_gpu::~entity_data_gpu() if (cell_determinants) checkGPU( gpuFree(cell_determinants) ); + */ } diff --git a/src/kernels_cuda.cu b/src/kernels_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..0b131ae7c4450ee7b40c117667b2c04f7367a800 --- /dev/null +++ b/src/kernels_cuda.cu @@ -0,0 +1,125 @@ +#include "entity_data.h" +#include "kernels_gpu.h" + +template<size_t K> +__global__ void +gpu_compute_field_derivatives_kernel_planar(entity_data_gpu edg, + const double* F, double *dF_dx, double* dF_dy, double* dF_dz) +{ + using KS = kernel_gpu_sizes<K>; + + int ofs_in_entity = (blockIdx.x * blockDim.x + threadIdx.x); + + int32_t cur_dof_offset = edg.dof_base + ofs_in_entity; + if (cur_dof_offset >= edg.num_all_elems*edg.num_bf) + return; + + int32_t iT = cur_dof_offset / KS::num_bf; + int32_t elem_dof_base = edg.dof_base + iT*edg.num_bf; + int32_t iO = edg.element_orientation[iT]; + int32_t DM_row = ofs_in_entity % KS::num_bf; + int32_t DM_orient = 3*KS::num_bf*iO*KS::num_bf; + double *DM = edg.differentiation_matrices + DM_orient; + + double accm_dF_dx = 0.0; + double accm_dF_dy = 0.0; + double accm_dF_dz = 0.0; + + int32_t jac_ofs = 9*iT; + double J[9]; + for (int32_t i = 0; i < 9; i++) + J[i] = edg.jacobians[jac_ofs+i]; + + for (int32_t dof = 0; dof < KS::num_bf; dof++) + { + int32_t d_ofs = DM_row + KS::num_bf*dof; + int32_t f_ofs = elem_dof_base + dof; + double v = DM[d_ofs] * F[f_ofs]; + accm_dF_dx += J[0] * v; + accm_dF_dy += J[3] * v; + accm_dF_dz += J[6] * v; + } + + for (int32_t dof = 0; dof < KS::num_bf; dof++) + { + int32_t d_ofs = DM_row + KS::num_bf*KS::num_bf + KS::num_bf*dof; + int32_t f_ofs = elem_dof_base + dof; + double v = DM[d_ofs] * F[f_ofs]; + accm_dF_dx += J[1] * v; + accm_dF_dy += J[4] * v; + accm_dF_dz += J[7] * v; + } + + for (int32_t dof = 0; dof < KS::num_bf; dof++) + { + int32_t d_ofs = DM_row + 2*KS::num_bf*KS::num_bf + KS::num_bf*dof; + int32_t f_ofs = elem_dof_base + dof; + double v = DM[d_ofs] * F[f_ofs]; + accm_dF_dx += J[2] * v; + accm_dF_dy += J[5] * v; + accm_dF_dz += J[8] * v; + } + + dF_dx[cur_dof_offset] = accm_dF_dx; + dF_dy[cur_dof_offset] = accm_dF_dy; + dF_dz[cur_dof_offset] = accm_dF_dz; +} + +void +gpu_compute_field_derivatives(entity_data_gpu& edg, + const double* f, double *df_dx, double* df_dy, double* df_dz) +{ + const auto THREADS_PER_BLOCK = 256; + auto num_blocks = edg.num_bf*edg.num_all_elems/THREADS_PER_BLOCK; + if (edg.num_bf*edg.num_all_elems % THREADS_PER_BLOCK) + num_blocks += 1; + + switch (edg.a_order) + { + case 1: + if (edg.g_order == 1) + gpu_compute_field_derivatives_kernel_planar<1><<<num_blocks, THREADS_PER_BLOCK>>>(edg, f, df_dx, df_dy, df_dz); + //else + // compute_field_derivatives_kernel_curved<1>(ed, f, df_dx, df_dy, df_dz); + break; + + case 2: + if (edg.g_order == 1) + gpu_compute_field_derivatives_kernel_planar<2><<<num_blocks, THREADS_PER_BLOCK>>>(edg, f, df_dx, df_dy, df_dz); + //else + // compute_field_derivatives_kernel_curved<2>(ed, f, df_dx, df_dy, df_dz); + break; + + case 3: + if (edg.g_order == 1) + gpu_compute_field_derivatives_kernel_planar<3><<<num_blocks, THREADS_PER_BLOCK>>>(edg, f, df_dx, df_dy, df_dz); + //else + // compute_field_derivatives_kernel_curved<3>(ed, f, df_dx, df_dy, df_dz); + break; + #if 0 + case 4: + if (edg.g_order == 1) + gpu_compute_field_derivatives_kernel_planar<4>(edg, f, df_dx, df_dy, df_dz); + //else + // compute_field_derivatives_kernel_curved<4>(ed, f, df_dx, df_dy, df_dz); + break; + + case 5: + if (edg.g_order == 1) + gpu_compute_field_derivatives_kernel_planar<5>(edg, f, df_dx, df_dy, df_dz); + //else + // compute_field_derivatives_kernel_curved<5>(ed, f, df_dx, df_dy, df_dz); + break; + + case 6: + if (edg.g_order == 1) + gpu_compute_field_derivatives_kernel_planar<6>(edg, f, df_dx, df_dy, df_dz); + //else + // compute_field_derivatives_kernel_curved<6>(ed, f, df_dx, df_dy, df_dz); + break; +#endif + + default: + std::cout << "compute_field_derivatives: invalid order" << std::endl; + } +} \ No newline at end of file diff --git a/src/kernels_gpu_mi.cpp b/src/kernels_gpu_mi.cpp index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..76587a505425ec53cd5d0fbbb0fed2f65bbd2728 100644 --- a/src/kernels_gpu_mi.cpp +++ b/src/kernels_gpu_mi.cpp @@ -0,0 +1,54 @@ +#include "kernels_gpu.h" + +/* Return how many dblocks are needed to store this entity on GPU. */ +size_t gpu_dblocks(const entity_data_cpu& ed) +{ + auto ks = get_kernel_gpu_sizes(ed.a_order); + + size_t tot_elems = num_elems_all_orientations(ed); + size_t num_dblocks = tot_elems / ks.cells_per_dblock; + if (num_dblocks % ks.cells_per_dblock) + num_dblocks += 1; + + return num_dblocks; +} + +/* Return the total number of dofs (actual + padding) that are contained + * in the dblocks making up this entity. */ +size_t gpu_dblocks_dofs(const entity_data_cpu& ed) +{ + auto ks = get_kernel_gpu_sizes(ed.a_order); + size_t num_dblocks = gpu_dblocks(ed); + return num_dblocks * ks.dblock_size; +} + +void reshape_dofs(const entity_data_cpu& ed, const entity_data_gpu& edg, + const vecxd& in_field, vecxd& out_field, bool to_gpu) +{ + auto ks = get_kernel_gpu_sizes(ed.a_order); + + size_t tot_elems = num_elems_all_orientations(ed); + size_t num_dblocks = tot_elems / ks.cells_per_dblock; + if (num_dblocks % ks.cells_per_dblock) + num_dblocks += 1; + + for (size_t i = 0; i < num_dblocks; i++) + { + for (size_t j = 0; j < ks.cells_per_dblock; j++) + { + auto cur_elem = ks.cells_per_dblock*i + j; + if (cur_elem >= tot_elems) + break; + + auto cpu_base = ed.dof_base + cur_elem * ed.num_bf; + auto gpu_base = edg.dof_base + ks.dblock_size*i + ks.num_bf*j; + + if (to_gpu) + for (size_t k = 0; k < ks.num_bf; k++) + out_field(gpu_base + k) = in_field(cpu_base + k); + else + for (size_t k = 0; k < ks.num_bf; k++) + out_field(cpu_base + k) = in_field(gpu_base + k); + } + } +} diff --git a/tests/test_differentiation_gpu.cpp b/tests/test_differentiation_gpu.cpp index c4e65650307234c019ec9412d4822b50fa57aa03..ac01742014f617149c02eee0f5a5d9f20b5d7f9a 100644 --- a/tests/test_differentiation_gpu.cpp +++ b/tests/test_differentiation_gpu.cpp @@ -10,6 +10,8 @@ #include "kernels_cpu.h" #include "timecounter.h" +#include "kernels_gpu.h" + using namespace sgr; static void @@ -58,22 +60,34 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde auto& e0 = mod.entity_at(0); e0.sort_by_orientation(); - vecxd Pf_e0 = e0.project(f); - vecxd df_dx_e0 = vecxd::Zero( Pf_e0.rows() ); - vecxd df_dy_e0 = vecxd::Zero( Pf_e0.rows() ); - vecxd df_dz_e0 = vecxd::Zero( Pf_e0.rows() ); + vecxd Pf_cpu = e0.project(f); + /* Make CPU entity data */ entity_data_cpu ed; e0.populate_entity_data(ed); - + /* Derive GPU entity data */ entity_data_gpu edg(ed); -#if 0 - timecounter tc; + /* Prepare I/O vectors and call kernel */ + device_vector<double> Pf_gpu(Pf_cpu.data(), Pf_cpu.size()); + device_vector<double> df_dx_gpu(Pf_cpu.size()); + device_vector<double> df_dy_gpu(Pf_cpu.size()); + device_vector<double> df_dz_gpu(Pf_cpu.size()); + + timecounter_gpu tc; tc.tic(); - compute_field_derivatives(ed, Pf_e0, df_dx_e0, df_dy_e0, df_dz_e0); + gpu_compute_field_derivatives(edg, Pf_gpu.data(), df_dx_gpu.data(), + df_dy_gpu.data(), df_dz_gpu.data()); double time = tc.toc(); + vecxd df_dx_cpu = vecxd::Zero( Pf_cpu.size() ); + vecxd df_dy_cpu = vecxd::Zero( Pf_cpu.size() ); + vecxd df_dz_cpu = vecxd::Zero( Pf_cpu.size() ); + + df_dx_gpu.copyout( df_dx_cpu.data() ); + df_dy_gpu.copyout( df_dy_cpu.data() ); + df_dz_gpu.copyout( df_dz_cpu.data() ); + if (geometric_order == 1) { std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: "; @@ -101,9 +115,9 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde auto& re = e0.cell_refelem(pe); matxd mass = e0.mass_matrix(iT); auto num_bf = re.num_basis_functions(); - vecxd diff_x = Pdf_dx_e0.segment(iT*num_bf, num_bf) - df_dx_e0.segment(iT*num_bf, num_bf); - vecxd diff_y = Pdf_dy_e0.segment(iT*num_bf, num_bf) - df_dy_e0.segment(iT*num_bf, num_bf); - vecxd diff_z = Pdf_dz_e0.segment(iT*num_bf, num_bf) - df_dz_e0.segment(iT*num_bf, num_bf); + vecxd diff_x = Pdf_dx_e0.segment(iT*num_bf, num_bf) - df_dx_cpu.segment(iT*num_bf, num_bf); + vecxd diff_y = Pdf_dy_e0.segment(iT*num_bf, num_bf) - df_dy_cpu.segment(iT*num_bf, num_bf); + vecxd diff_z = Pdf_dz_e0.segment(iT*num_bf, num_bf) - df_dz_cpu.segment(iT*num_bf, num_bf); err_x += diff_x.dot(mass*diff_x); err_y += diff_y.dot(mass*diff_y); @@ -115,14 +129,12 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde errs_x.push_back( std::sqrt(err_x) ); errs_y.push_back( std::sqrt(err_y) ); errs_z.push_back( std::sqrt(err_z) ); -#endif } double rate_x = 0.0; double rate_y = 0.0; double rate_z = 0.0; -#if 0 std::cout << Byellowfg << "rate df/dx rate df/dy rate df/dz" << reset << std::endl; for (size_t i = 1; i < sizes.size(); i++) { @@ -134,7 +146,6 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde COMPARE_VALUES_ABSOLUTE("df/dx", rate_x, double(approximation_order), 0.2); COMPARE_VALUES_ABSOLUTE("df/dy", rate_y, double(approximation_order), 0.2); COMPARE_VALUES_ABSOLUTE("df/dz", rate_z, double(approximation_order), 0.2); -#endif return 0; } @@ -150,8 +161,8 @@ int main(void) int failed_tests = 0; std::cout << Bmagentafg << " *** TESTING: DIFFERENTIATION ***" << reset << std::endl; - for (size_t go = 1; go < 5; go++) - for (size_t ao = go; ao < 5; ao++) + for (size_t go = 1; go < 2; go++) + for (size_t ao = go; ao < 4; ao++) failed_tests += test_differentiation_convergence(go, ao); return failed_tests;