diff --git a/CMakeLists.txt b/CMakeLists.txt index 909b1e5e4d4e274d0a631b1f9bb3421123709bc4..a256dd0dad26554e6230fedddefe068706d27853 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -260,15 +260,18 @@ target_link_libraries(test_mass gmshdg) add_executable(test_differentiation tests/test_differentiation.cpp) target_link_libraries(test_differentiation gmshdg) -add_executable(test_lifting tests/test_lifting.cpp) -target_link_libraries(test_lifting gmshdg) - add_executable(test_differentiation_gpu tests/test_differentiation_gpu.cpp) target_link_libraries(test_differentiation_gpu gmshdg) add_executable(profile_differentiation_gpu tests/profile_differentiation_gpu.cpp) target_link_libraries(profile_differentiation_gpu gmshdg) +add_executable(test_lifting tests/test_lifting.cpp) +target_link_libraries(test_lifting gmshdg) + +add_executable(test_lifting_gpu tests/test_lifting_gpu.cpp) +target_link_libraries(test_lifting_gpu gmshdg) + #add_executable(test test.cpp test_basics.cpp test_mass.cpp test_differentiation.cpp test_differentiation_gpu.cpp test_lifting.cpp ${COMMON_SOURCES}) #target_link_libraries(test ${LINK_LIBS}) diff --git a/include/entity_data.h b/include/entity_data.h index 6d96b3710ca93cc2cc1e68a76a84c29308e22b71..ba78e14ce44d5461dde37390e318f19ea68fc7a2 100644 --- a/include/entity_data.h +++ b/include/entity_data.h @@ -74,12 +74,14 @@ struct entity_data_gpu size_t num_all_elems; device_vector<int32_t> element_orientation; size_t num_bf; + size_t num_fluxes; size_t dof_base; size_t flux_base; texture_allocator<double> differentiation_matrices; + texture_allocator<double> lifting_matrices; device_vector<double> jacobians; device_vector<double> cell_determinants; diff --git a/include/gpu_support.hpp b/include/gpu_support.hpp index 758c2e28992759c04ecac92e22a394e4a9265176..052243d3dec8382d010ae9c0378a2eae187ebc29 100644 --- a/include/gpu_support.hpp +++ b/include/gpu_support.hpp @@ -165,13 +165,13 @@ public: } /* allocate memory and copyin data at ptr */ - device_vector(T *src, size_t size) + device_vector(const T *src, size_t size) : d_vptr(nullptr), vsize(0) { copyin(src, size); } - void copyin(T *src, size_t size) + void copyin(const T *src, size_t size) { if (d_vptr != nullptr) (void) gpuFree(d_vptr); diff --git a/include/kernels_cpu.h b/include/kernels_cpu.h index 575b1489d32857ca45791668b2f55ea9a87ac486..c40ca0987606c4143b8b9e34cae8c503951b471b 100644 --- a/include/kernels_cpu.h +++ b/include/kernels_cpu.h @@ -1,24 +1,12 @@ #pragma once -//#include <omp.h> - -template<size_t AK> -struct num_dofs_3D -{ - static const size_t value = ((AK+3)*(AK+2)*(AK+1))/6; -}; - -template<size_t AK> -struct num_dofs_2D -{ - static const size_t value = ((AK+2)*(AK+1))/2; -}; +#include "types.h" template<size_t AK> struct kernel_cpu_sizes { - static const size_t num_bf = num_dofs_3D<AK>::value; - static const size_t num_fluxes = num_dofs_2D<AK>::value; + static const size_t num_bf = num_dofs_3D(AK); + static const size_t num_fluxes = num_dofs_2D(AK); static const size_t num_faces = 4; static const size_t num_all_fluxes = num_fluxes * num_faces; }; diff --git a/include/kernels_gpu.h b/include/kernels_gpu.h index 36c8448e68396fb636dc4f2024959e1333de42b9..f38f35b38eeb2ce8de3347c8156c0881ffa99ee1 100644 --- a/include/kernels_gpu.h +++ b/include/kernels_gpu.h @@ -1,12 +1,8 @@ #pragma once - +#include "types.h" #include "entity_data.h" -/* Compile-time constants configuring sizes for the kernels. - * Elements are organized in blocks of cells_per_dblock elements each. - * Each dblock contains dblock_size dofs, where dblock_size must be a - * multiple of the warp size. This means that some dofs in the dblock - * will be padding. The effective dofs used are dblock_bf.*/ +/* Compile-time constants configuring sizes for the kernels. */ template<size_t K> struct kernel_gpu_sizes; @@ -14,82 +10,66 @@ struct kernel_gpu_sizes; template<> struct kernel_gpu_sizes<1> { - static const size_t num_bf = 4; - static const size_t cells_per_dblock = 32; - static const size_t dblock_bf = num_bf * cells_per_dblock; - static const size_t dblock_size = 128; - static const size_t parallel_dblocks = 4; + static const size_t num_bf = num_dofs_3D(1); + static const size_t num_fluxes = num_dofs_2D(1); static const size_t deriv_threads = 128; + static const size_t lifting_threads = 128; }; template<> struct kernel_gpu_sizes<2> { - static const size_t num_bf = 10; - static const size_t cells_per_dblock = 12; - static const size_t dblock_bf = num_bf * cells_per_dblock; - static const size_t dblock_size = 128; - static const size_t parallel_dblocks = 1; + static const size_t num_bf = num_dofs_3D(2); + static const size_t num_fluxes = num_dofs_2D(2); static const size_t deriv_threads = 128; + static const size_t lifting_threads = 128; }; template<> struct kernel_gpu_sizes<3> { - static const size_t num_bf = 20; - static const size_t cells_per_dblock = 12; - static const size_t dblock_bf = num_bf * cells_per_dblock; - static const size_t dblock_size = 128; - static const size_t parallel_dblocks = 1; + static const size_t num_bf = num_dofs_3D(3); + static const size_t num_fluxes = num_dofs_2D(3); static const size_t deriv_threads = 128; + static const size_t lifting_threads = 128; }; template<> struct kernel_gpu_sizes<4> { - static const size_t num_bf = 35; - static const size_t cells_per_dblock = 12; - static const size_t dblock_bf = num_bf * cells_per_dblock; - static const size_t dblock_size = 128; - static const size_t parallel_dblocks = 1; + static const size_t num_bf = num_dofs_3D(4); + static const size_t num_fluxes = num_dofs_2D(4); static const size_t deriv_threads = 512; + static const size_t lifting_threads = 128; }; template<> struct kernel_gpu_sizes<5> { - static const size_t num_bf = 56; - static const size_t cells_per_dblock = 12; - static const size_t dblock_bf = num_bf * cells_per_dblock; - static const size_t dblock_size = 128; - static const size_t parallel_dblocks = 1; + static const size_t num_bf = num_dofs_3D(5); + static const size_t num_fluxes = num_dofs_2D(5); static const size_t deriv_threads = 1024; + static const size_t lifting_threads = 128; }; template<> struct kernel_gpu_sizes<6> { - static const size_t num_bf = 84; - static const size_t cells_per_dblock = 12; - static const size_t dblock_bf = num_bf * cells_per_dblock; - static const size_t dblock_size = 128; - static const size_t parallel_dblocks = 1; + static const size_t num_bf = num_dofs_3D(6); + static const size_t num_fluxes = num_dofs_2D(6); static const size_t deriv_threads = 1024; + static const size_t lifting_threads = 128; }; struct kernel_gpu_sizes_runtime { size_t num_bf; - size_t cells_per_dblock; - size_t dblock_bf; - size_t dblock_size; - size_t parallel_dblocks; }; template<size_t N> @@ -98,10 +78,7 @@ kernel_gpu_sizes_runtime get_kernel_gpu_sizes() kernel_gpu_sizes_runtime ret; ret.num_bf = kernel_gpu_sizes<N>::num_bf; - ret.cells_per_dblock = kernel_gpu_sizes<N>::cells_per_dblock; - ret.dblock_bf = kernel_gpu_sizes<N>::dblock_bf; - ret.dblock_size = kernel_gpu_sizes<N>::dblock_size; - ret.parallel_dblocks = kernel_gpu_sizes<N>::parallel_dblocks; + return ret; }; @@ -130,3 +107,6 @@ void reshape_dofs(const entity_data_cpu&, const entity_data_gpu&, const vecxd&, void gpu_compute_field_derivatives(entity_data_gpu& edg, gpuTextureObject_t F, double *dF_dx, double* dF_dy, double* dF_dz); + +void +gpu_compute_flux_lifting(entity_data_gpu& edg, gpuTextureObject_t f, double *out); \ No newline at end of file diff --git a/include/types.h b/include/types.h index cb89d6cccb39cfb2cf3ae102278b8fc2be7faaf1..be5bbf76d0c6ca6fbede4c3cc86ed6665a9dcc2f 100644 --- a/include/types.h +++ b/include/types.h @@ -24,3 +24,12 @@ struct entity_params int aorder; }; +constexpr size_t num_dofs_3D(size_t AK) +{ + return ((AK+3)*(AK+2)*(AK+1))/6; +} + +constexpr size_t num_dofs_2D(size_t AK) +{ + return ((AK+2)*(AK+1))/2; +}; \ No newline at end of file diff --git a/src/entity_data.cpp b/src/entity_data.cpp index d89975c296dffd7d2d0a5afa371e30eaf1737cee..6f1d826a4d43f317a8a8301fa2a2b7bf202906b8 100644 --- a/src/entity_data.cpp +++ b/src/entity_data.cpp @@ -22,6 +22,7 @@ entity_data_gpu::entity_data_gpu(const entity_data_cpu& ed) element_orientation.copyin(eo.data(), eo.size()); num_bf = ed.num_bf; + num_fluxes = ed.num_fluxes; dof_base = ed.dof_base; flux_base = ed.flux_base; @@ -39,9 +40,18 @@ entity_data_gpu::entity_data_gpu(const entity_data_cpu& ed) dm.block(i*num_bf, 2*num_bf, num_bf, num_bf) = ed.differentiation_matrices.block(i*num_bf, 2*num_bf, num_bf, num_bf).transpose(); } - differentiation_matrices.init(dm.data(), dm.size()); + auto LMrows = ed.num_bf; + auto LMcols = 4*ed.num_fluxes*ed.num_orientations; + MatxCM<double> lm = MatxCM<double>::Zero(LMrows, LMcols); + for (size_t i = 0; i < ed.num_orientations; i++) + { + lm.block(0, 4*i*ed.num_fluxes, ed.num_bf, 4*ed.num_fluxes) = + ed.lifting_matrices.block(i*ed.num_bf, 0, ed.num_bf, 4*ed.num_fluxes); + } + lifting_matrices.init(lm.data(), lm.size()); + jacobians.copyin(ed.jacobians.data(), ed.jacobians.size()); cell_determinants.copyin(ed.cell_determinants.data(), ed.cell_determinants.size()); diff --git a/src/kernels_cuda.cu b/src/kernels_cuda.cu index 6d01a0615a59550ced383bd09101a494700a61c6..71e440537d89af5b9e2ce4f9ec636d29d548bcfb 100644 --- a/src/kernels_cuda.cu +++ b/src/kernels_cuda.cu @@ -111,4 +111,93 @@ gpu_compute_field_derivatives(entity_data_gpu& edg, default: throw std::invalid_argument("compute_field_derivatives: invalid order"); } -} \ No newline at end of file +} + +template<size_t K> +__global__ void +gpu_lift_planar(gpuTextureObject_t flux, gpuTextureObject_t LM_tex, + const double * __restrict__ dets, double * __restrict__ lifted_flux, + int32_t num_all_elems, int32_t* orients, int32_t dof_base, int32_t flux_base) +{ + using KS = kernel_gpu_sizes<K>; + + /* One thread per *output* dof */ + int ofs_in_entity = (blockIdx.x * blockDim.x + threadIdx.x); + if (ofs_in_entity >= num_all_elems*KS::num_bf) + return; + + int32_t cur_dof_offset = dof_base + ofs_in_entity; + + int32_t iT = ofs_in_entity / KS::num_bf; + int32_t elem_flux_base = flux_base + 4*iT*KS::num_fluxes; + int32_t iO = orients[iT]; + int32_t LM_row = ofs_in_entity % KS::num_bf; + int32_t LM_orient = 4*KS::num_bf*KS::num_fluxes*iO; + double inv_det = 1./dets[iT]; + + double acc = 0.0; + for (int32_t dof = 0; dof < 4*KS::num_fluxes; dof++) + { + int32_t l_ofs = LM_orient + LM_row + KS::num_bf*dof; + int32_t f_ofs = elem_flux_base + dof; + acc += inv_det * fetch_tex(LM_tex, l_ofs) * fetch_tex(flux, f_ofs); + } + + lifted_flux[cur_dof_offset] += acc; +} + +template<size_t K> +void +launch_lift_kernel(entity_data_gpu& edg, gpuTextureObject_t f, double *out) +{ + const auto THREADS_PER_BLOCK = 256;//kernel_gpu_sizes<K>::deriv_threads; + auto num_blocks = edg.num_bf*edg.num_all_elems/THREADS_PER_BLOCK; + if (edg.num_bf*edg.num_all_elems % THREADS_PER_BLOCK) + num_blocks += 1; + + auto Ltex = edg.lifting_matrices.get_texture(); + int32_t num_elems = edg.num_all_elems; + auto orients = edg.element_orientation.data(); + auto num_orients = edg.element_orientation.size(); + auto dets = edg.cell_determinants.data(); + + if (edg.g_order == 1) + gpu_lift_planar<K><<<num_blocks, THREADS_PER_BLOCK>>>(f, Ltex, + dets, out, num_elems, orients, edg.dof_base, edg.flux_base); + //else + // compute_field_derivatives_kernel_curved<1>(ed, f, df_dx, df_dy, df_dz); +} + +void +gpu_compute_flux_lifting(entity_data_gpu& edg, gpuTextureObject_t f, double *out) +{ + switch (edg.a_order) + { + case 1: + launch_lift_kernel<1>(edg, f, out); + break; + + case 2: + launch_lift_kernel<2>(edg, f, out); + break; + + case 3: + launch_lift_kernel<3>(edg, f, out); + break; + + case 4: + launch_lift_kernel<4>(edg, f, out); + break; + + case 5: + launch_lift_kernel<5>(edg, f, out); + break; + + case 6: + launch_lift_kernel<6>(edg, f, out); + break; + + default: + throw std::invalid_argument("compute_field_derivatives: invalid order"); + } +} diff --git a/src/kernels_gpu_mi.cpp b/src/kernels_gpu_mi.cpp index 76587a505425ec53cd5d0fbbb0fed2f65bbd2728..4983265fddf7c3046bae0028c447b546781f899a 100644 --- a/src/kernels_gpu_mi.cpp +++ b/src/kernels_gpu_mi.cpp @@ -1,5 +1,6 @@ #include "kernels_gpu.h" +#if 0 /* Return how many dblocks are needed to store this entity on GPU. */ size_t gpu_dblocks(const entity_data_cpu& ed) { @@ -52,3 +53,4 @@ void reshape_dofs(const entity_data_cpu& ed, const entity_data_gpu& edg, } } } +#endif diff --git a/tests/test_lifting.cpp b/tests/test_lifting.cpp index 1a93cb992173f492ad0b8084fdf5969378d05c13..836bcc62485ed604da13028ecd605323741e95e6 100644 --- a/tests/test_lifting.cpp +++ b/tests/test_lifting.cpp @@ -39,8 +39,6 @@ make_geometry(int order, double mesh_h) gmm::setSize(vp, mesh_h); } -#define WRITE_TEST_OUTPUTS - int test_lifting(int geometric_order, int approximation_order) { std::vector<double> sizes({ 0.32, 0.16, 0.08, 0.04 }); @@ -170,8 +168,10 @@ int test_lifting(int geometric_order, int approximation_order) std::vector<double> var_vol( mod.num_cells() ); #endif - double err = 0.0; + /* Compute (∇∙F,g)_T = (F∙n,g)_∂T - (F,∇g)_T on each element T + * to verify that the lifting computation is correct */ + double err = 0.0; for (auto& e : mod) { for (size_t iT = 0; iT < e.num_cells(); iT++) @@ -216,16 +216,24 @@ int test_lifting(int geometric_order, int approximation_order) } } + errors.push_back( std::sqrt(err) ); + #ifdef WRITE_TEST_OUTPUTS silodb.write_zonal_variable("expected_div_F", var_ediv_F); silodb.write_zonal_variable("div_F", var_div_F); silodb.write_zonal_variable("lift", var_lift); silodb.write_zonal_variable("vol", var_vol); #endif - std::cout << std::sqrt(err) << std::endl; + std::cout << "Error: " << std::sqrt(err) << std::endl; + } + double rate = 0.0; - } + std::cout << Byellowfg << "rate" << reset << std::endl; + for (size_t i = 1; i < sizes.size(); i++) + std::cout << (rate = std::log2(errors[i-1]/errors[i]) ) << std::endl; + + COMPARE_VALUES_ABSOLUTE("df/dx", rate, double(approximation_order), 0.2); return 0; } diff --git a/tests/test_lifting_gpu.cpp b/tests/test_lifting_gpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c9277d143e99b68893f31e9a4653cef0335169fd --- /dev/null +++ b/tests/test_lifting_gpu.cpp @@ -0,0 +1,263 @@ +#include <iostream> +#include <iomanip> +#include <sstream> +#include <unistd.h> + +#include "test.h" +#include "gmsh_io.h" +#include "sgr.hpp" +#include "entity_data.h" +#include "kernels_cpu.h" +#include "timecounter.h" +#include "silo_output.hpp" +#include "kernels_gpu.h" + +using namespace sgr; + +static void +make_geometry(int order, double mesh_h) +{ + gm::add("difftest"); + + std::vector<std::pair<int,int>> objects; + + objects.push_back( + std::make_pair(3, gmo::addBox(0.0, 0.0, 0.0, 1.0, 1.0, 0.5) ) + ); + objects.push_back( + std::make_pair(3, gmo::addBox(0.0, 0.0, 0.5, 0.5, 0.5, 0.5) ) + ); + + std::vector<std::pair<int, int>> tools; + gmsh::vectorpair odt; + std::vector<gmsh::vectorpair> odtm; + gmo::fragment(objects, tools, odt, odtm); + + gmo::synchronize(); + + gvp_t vp; + gm::getEntities(vp); + gmm::setSize(vp, mesh_h); +} + +int test_lifting(int geometric_order, int approximation_order) +{ + std::vector<double> sizes({ 0.32, 0.16, 0.08, 0.04 }); + std::vector<double> errors; + + std::cout << cyanfg << "Testing geometric order " << geometric_order; + std::cout << ", approximation order = " << approximation_order << reset; + std::cout << std::endl; + + /* Test field */ + auto Fx = [](const point_3d& pt) -> double { + return std::sin(M_PI*pt.x()); + }; + + auto Fy = [](const point_3d& pt) -> double { + return std::sin(M_PI*pt.y()); + }; + + auto Fz = [](const point_3d& pt) -> double { + return std::sin(M_PI*pt.z()); + }; + + auto F = [&](const point_3d& pt) -> vecxd { + vec3d Fret; + Fret(0) = Fx(pt); + Fret(1) = Fy(pt); + Fret(2) = Fz(pt); + return Fret; + }; + + /* Expected divergence */ + auto div_F = [](const point_3d& pt) -> double { + auto dFx_dx = M_PI*std::cos(M_PI*pt.x()); + auto dFy_dy = M_PI*std::cos(M_PI*pt.y()); + auto dFz_dz = M_PI*std::cos(M_PI*pt.z()); + return dFx_dx + dFy_dy + dFz_dz; + }; + + for (size_t sz = 0; sz < sizes.size(); sz++) + { + double h = sizes[sz]; + make_geometry(0,h); + model mod(geometric_order, approximation_order); + +#ifdef WRITE_TEST_OUTPUTS + std::stringstream ss; + ss << "lift_go_" << geometric_order << "_ao_" << approximation_order; + ss << "_seq_" << sz << ".silo"; + + silo silodb; + silodb.create_db(ss.str()); + silodb.import_mesh_from_gmsh(); + silodb.write_mesh(); +#endif + + auto model_num_dofs = mod.num_dofs(); + auto model_num_fluxes = mod.num_fluxes(); + + vecxd Pdiv_F = vecxd::Zero(model_num_dofs); + vecxd PFdotn = vecxd::Zero(model_num_fluxes); + vecxd LiftF = vecxd::Zero(model_num_dofs); + + std::vector<entity_data_gpu> edgs; + + for (auto& e : mod) + { + e.project(div_F, Pdiv_F); + entity_data_cpu ed; + e.populate_entity_data(ed); + + vecxd PFx = e.project(Fx); + vecxd PFy = e.project(Fy); + vecxd PFz = e.project(Fz); + + for (size_t iT = 0; iT < e.num_cells(); iT++) + { + for (size_t iF = 0; iF < 4; iF++) + { + auto face_det = ed.face_determinants[4*iT+iF]; + vec3d n = ed.normals.row(4*iT+iF); + for (size_t k = 0; k < ed.num_fluxes; k++) + { + auto base = e.flux_base(); + auto ofs = (4*iT+iF)*ed.num_fluxes + k; + auto Fx = PFx( ed.fluxdofs_mine[ofs] ); + auto Fy = PFy( ed.fluxdofs_mine[ofs] ); + auto Fz = PFz( ed.fluxdofs_mine[ofs] ); + PFdotn(base+ofs) = face_det*Fx*n(0) + + face_det*Fy*n(1) + + face_det*Fz*n(2); + } + } + } + entity_data_gpu edg(ed); + edgs.push_back( std::move(edg) ); + } + + texture_allocator<double> PFdotn_gpu(PFdotn.data(), PFdotn.size()); + device_vector<double> LiftF_gpu(LiftF.data(), LiftF.size()); + + for (auto& edg : edgs) + { + timecounter_gpu tc; + tc.tic(); + gpu_compute_flux_lifting(edg, PFdotn_gpu.get_texture(), LiftF_gpu.data()); + double time = tc.toc(); + + auto num_cells = edg.num_all_elems; + if (geometric_order == 1) + { + std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: "; + double flops = 3*(edg.num_bf)*4*edg.num_fluxes*num_cells; + std::cout << flops/(1e9*time) << " GFlops/s" << std::endl; + } + else + { + //std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: "; + //double flops = ((21*edg.num_bf+6)*edg.num_bf*edg.num_qp + 3*(2*edg.num_bf-1)*edg.num_bf)*num_cells; + //std::cout << flops/(1e9*time) << " GFlops/s" << std::endl; + } + } + + LiftF_gpu.copyout(LiftF.data()); + +#ifdef WRITE_TEST_OUTPUTS + std::vector<double> var_ediv_F( mod.num_cells() ); + std::vector<double> var_div_F( mod.num_cells() ); + std::vector<double> var_lift( mod.num_cells() ); + std::vector<double> var_vol( mod.num_cells() ); +#endif + + /* Compute (∇∙F,g)_T = (F∙n,g)_∂T - (F,∇g)_T on each element T + * to verify that the lifting computation is correct */ + + double err = 0.0; + for (auto& e : mod) + { + for (size_t iT = 0; iT < e.num_cells(); iT++) + { + auto& pe = e.cell(iT); + auto& re = e.cell_refelem(pe); + auto num_bf = re.num_basis_functions(); + matxd mass = matxd::Zero(num_bf, num_bf); + vecxd vol = vecxd::Zero(num_bf); + + const auto pqps = pe.integration_points(); + for(size_t iQp = 0; iQp < pqps.size(); iQp++) + { + const auto& pqp = pqps[iQp]; + const vecxd phi = re.basis_functions(iQp); + const matxd dphi = re.basis_gradients(iQp); + const mat3d J = pe.jacobian(iQp); + mass += pqp.weight() * phi * phi.transpose(); + vol += pqp.weight() * (dphi*J.inverse()) * F(pqp.point()); + } + + auto ofs = e.cell_global_dof_offset(iT); + vecxd excpected_div_F = Pdiv_F.segment(ofs, num_bf); + vecxd Plf = LiftF.segment(ofs, num_bf); + vecxd Pvol = mass.ldlt().solve(vol); + + + vecxd computed_div_F = Plf - Pvol; + vecxd diff = computed_div_F - excpected_div_F; + double loc_err = diff.dot(mass*diff); + err += loc_err; + + auto bar = pe.barycenter(); + vecxd phi_bar = re.basis_functions({1./3., 1./3., 1./3.}); +#ifdef WRITE_TEST_OUTPUTS + auto gi = e.cell_global_index_by_gmsh(iT); + var_ediv_F[gi] = div_F(bar); + var_div_F[gi] = computed_div_F.dot(phi_bar); + var_lift[gi] = Plf.dot(phi_bar); + var_vol[gi] = Pvol.dot(phi_bar); +#endif + } + } + + errors.push_back( std::sqrt(err) ); + +#ifdef WRITE_TEST_OUTPUTS + silodb.write_zonal_variable("expected_div_F", var_ediv_F); + silodb.write_zonal_variable("div_F", var_div_F); + silodb.write_zonal_variable("lift", var_lift); + silodb.write_zonal_variable("vol", var_vol); +#endif + std::cout << "Error: " << std::sqrt(err) << std::endl; + } + + double rate = 0.0; + + std::cout << Byellowfg << "rate" << reset << std::endl; + for (size_t i = 1; i < sizes.size(); i++) + std::cout << (rate = std::log2(errors[i-1]/errors[i]) ) << std::endl; + + COMPARE_VALUES_ABSOLUTE("df/dx", rate, double(approximation_order), 0.2); + + return 0; +} + +int main(void) +{ + gmsh::initialize(); + gmsh::option::setNumber("General.Terminal", 0); + gmsh::option::setNumber("Mesh.Algorithm", 1); + gmsh::option::setNumber("Mesh.Algorithm3D", 1); + + + int failed_tests = 0; + + std::cout << Bmagentafg << " *** TESTING: LIFTING ***" << reset << std::endl; + for (size_t ao = 1; ao < 2; ao++) + test_lifting(1, ao); + + gmsh::finalize(); + + return failed_tests; +} + +