Skip to content
Snippets Groups Projects
Commit 81b5ae3b authored by Matteo Cicuttin's avatar Matteo Cicuttin
Browse files

GPU lifting kernel

parent 0c50ab25
Branches
Tags
No related merge requests found
...@@ -260,15 +260,18 @@ target_link_libraries(test_mass gmshdg) ...@@ -260,15 +260,18 @@ target_link_libraries(test_mass gmshdg)
add_executable(test_differentiation tests/test_differentiation.cpp) add_executable(test_differentiation tests/test_differentiation.cpp)
target_link_libraries(test_differentiation gmshdg) target_link_libraries(test_differentiation gmshdg)
add_executable(test_lifting tests/test_lifting.cpp)
target_link_libraries(test_lifting gmshdg)
add_executable(test_differentiation_gpu tests/test_differentiation_gpu.cpp) add_executable(test_differentiation_gpu tests/test_differentiation_gpu.cpp)
target_link_libraries(test_differentiation_gpu gmshdg) target_link_libraries(test_differentiation_gpu gmshdg)
add_executable(profile_differentiation_gpu tests/profile_differentiation_gpu.cpp) add_executable(profile_differentiation_gpu tests/profile_differentiation_gpu.cpp)
target_link_libraries(profile_differentiation_gpu gmshdg) target_link_libraries(profile_differentiation_gpu gmshdg)
add_executable(test_lifting tests/test_lifting.cpp)
target_link_libraries(test_lifting gmshdg)
add_executable(test_lifting_gpu tests/test_lifting_gpu.cpp)
target_link_libraries(test_lifting_gpu gmshdg)
#add_executable(test test.cpp test_basics.cpp test_mass.cpp test_differentiation.cpp test_differentiation_gpu.cpp test_lifting.cpp ${COMMON_SOURCES}) #add_executable(test test.cpp test_basics.cpp test_mass.cpp test_differentiation.cpp test_differentiation_gpu.cpp test_lifting.cpp ${COMMON_SOURCES})
#target_link_libraries(test ${LINK_LIBS}) #target_link_libraries(test ${LINK_LIBS})
......
...@@ -74,12 +74,14 @@ struct entity_data_gpu ...@@ -74,12 +74,14 @@ struct entity_data_gpu
size_t num_all_elems; size_t num_all_elems;
device_vector<int32_t> element_orientation; device_vector<int32_t> element_orientation;
size_t num_bf; size_t num_bf;
size_t num_fluxes;
size_t dof_base; size_t dof_base;
size_t flux_base; size_t flux_base;
texture_allocator<double> differentiation_matrices; texture_allocator<double> differentiation_matrices;
texture_allocator<double> lifting_matrices;
device_vector<double> jacobians; device_vector<double> jacobians;
device_vector<double> cell_determinants; device_vector<double> cell_determinants;
......
...@@ -165,13 +165,13 @@ public: ...@@ -165,13 +165,13 @@ public:
} }
/* allocate memory and copyin data at ptr */ /* allocate memory and copyin data at ptr */
device_vector(T *src, size_t size) device_vector(const T *src, size_t size)
: d_vptr(nullptr), vsize(0) : d_vptr(nullptr), vsize(0)
{ {
copyin(src, size); copyin(src, size);
} }
void copyin(T *src, size_t size) void copyin(const T *src, size_t size)
{ {
if (d_vptr != nullptr) if (d_vptr != nullptr)
(void) gpuFree(d_vptr); (void) gpuFree(d_vptr);
......
#pragma once #pragma once
//#include <omp.h> #include "types.h"
template<size_t AK>
struct num_dofs_3D
{
static const size_t value = ((AK+3)*(AK+2)*(AK+1))/6;
};
template<size_t AK>
struct num_dofs_2D
{
static const size_t value = ((AK+2)*(AK+1))/2;
};
template<size_t AK> template<size_t AK>
struct kernel_cpu_sizes struct kernel_cpu_sizes
{ {
static const size_t num_bf = num_dofs_3D<AK>::value; static const size_t num_bf = num_dofs_3D(AK);
static const size_t num_fluxes = num_dofs_2D<AK>::value; static const size_t num_fluxes = num_dofs_2D(AK);
static const size_t num_faces = 4; static const size_t num_faces = 4;
static const size_t num_all_fluxes = num_fluxes * num_faces; static const size_t num_all_fluxes = num_fluxes * num_faces;
}; };
......
#pragma once #pragma once
#include "types.h"
#include "entity_data.h" #include "entity_data.h"
/* Compile-time constants configuring sizes for the kernels. /* Compile-time constants configuring sizes for the kernels. */
* Elements are organized in blocks of cells_per_dblock elements each.
* Each dblock contains dblock_size dofs, where dblock_size must be a
* multiple of the warp size. This means that some dofs in the dblock
* will be padding. The effective dofs used are dblock_bf.*/
template<size_t K> template<size_t K>
struct kernel_gpu_sizes; struct kernel_gpu_sizes;
...@@ -14,82 +10,66 @@ struct kernel_gpu_sizes; ...@@ -14,82 +10,66 @@ struct kernel_gpu_sizes;
template<> template<>
struct kernel_gpu_sizes<1> struct kernel_gpu_sizes<1>
{ {
static const size_t num_bf = 4; static const size_t num_bf = num_dofs_3D(1);
static const size_t cells_per_dblock = 32; static const size_t num_fluxes = num_dofs_2D(1);
static const size_t dblock_bf = num_bf * cells_per_dblock;
static const size_t dblock_size = 128;
static const size_t parallel_dblocks = 4;
static const size_t deriv_threads = 128; static const size_t deriv_threads = 128;
static const size_t lifting_threads = 128;
}; };
template<> template<>
struct kernel_gpu_sizes<2> struct kernel_gpu_sizes<2>
{ {
static const size_t num_bf = 10; static const size_t num_bf = num_dofs_3D(2);
static const size_t cells_per_dblock = 12; static const size_t num_fluxes = num_dofs_2D(2);
static const size_t dblock_bf = num_bf * cells_per_dblock;
static const size_t dblock_size = 128;
static const size_t parallel_dblocks = 1;
static const size_t deriv_threads = 128; static const size_t deriv_threads = 128;
static const size_t lifting_threads = 128;
}; };
template<> template<>
struct kernel_gpu_sizes<3> struct kernel_gpu_sizes<3>
{ {
static const size_t num_bf = 20; static const size_t num_bf = num_dofs_3D(3);
static const size_t cells_per_dblock = 12; static const size_t num_fluxes = num_dofs_2D(3);
static const size_t dblock_bf = num_bf * cells_per_dblock;
static const size_t dblock_size = 128;
static const size_t parallel_dblocks = 1;
static const size_t deriv_threads = 128; static const size_t deriv_threads = 128;
static const size_t lifting_threads = 128;
}; };
template<> template<>
struct kernel_gpu_sizes<4> struct kernel_gpu_sizes<4>
{ {
static const size_t num_bf = 35; static const size_t num_bf = num_dofs_3D(4);
static const size_t cells_per_dblock = 12; static const size_t num_fluxes = num_dofs_2D(4);
static const size_t dblock_bf = num_bf * cells_per_dblock;
static const size_t dblock_size = 128;
static const size_t parallel_dblocks = 1;
static const size_t deriv_threads = 512; static const size_t deriv_threads = 512;
static const size_t lifting_threads = 128;
}; };
template<> template<>
struct kernel_gpu_sizes<5> struct kernel_gpu_sizes<5>
{ {
static const size_t num_bf = 56; static const size_t num_bf = num_dofs_3D(5);
static const size_t cells_per_dblock = 12; static const size_t num_fluxes = num_dofs_2D(5);
static const size_t dblock_bf = num_bf * cells_per_dblock;
static const size_t dblock_size = 128;
static const size_t parallel_dblocks = 1;
static const size_t deriv_threads = 1024; static const size_t deriv_threads = 1024;
static const size_t lifting_threads = 128;
}; };
template<> template<>
struct kernel_gpu_sizes<6> struct kernel_gpu_sizes<6>
{ {
static const size_t num_bf = 84; static const size_t num_bf = num_dofs_3D(6);
static const size_t cells_per_dblock = 12; static const size_t num_fluxes = num_dofs_2D(6);
static const size_t dblock_bf = num_bf * cells_per_dblock;
static const size_t dblock_size = 128;
static const size_t parallel_dblocks = 1;
static const size_t deriv_threads = 1024; static const size_t deriv_threads = 1024;
static const size_t lifting_threads = 128;
}; };
struct kernel_gpu_sizes_runtime struct kernel_gpu_sizes_runtime
{ {
size_t num_bf; size_t num_bf;
size_t cells_per_dblock;
size_t dblock_bf;
size_t dblock_size;
size_t parallel_dblocks;
}; };
template<size_t N> template<size_t N>
...@@ -98,10 +78,7 @@ kernel_gpu_sizes_runtime get_kernel_gpu_sizes() ...@@ -98,10 +78,7 @@ kernel_gpu_sizes_runtime get_kernel_gpu_sizes()
kernel_gpu_sizes_runtime ret; kernel_gpu_sizes_runtime ret;
ret.num_bf = kernel_gpu_sizes<N>::num_bf; ret.num_bf = kernel_gpu_sizes<N>::num_bf;
ret.cells_per_dblock = kernel_gpu_sizes<N>::cells_per_dblock;
ret.dblock_bf = kernel_gpu_sizes<N>::dblock_bf;
ret.dblock_size = kernel_gpu_sizes<N>::dblock_size;
ret.parallel_dblocks = kernel_gpu_sizes<N>::parallel_dblocks;
return ret; return ret;
}; };
...@@ -130,3 +107,6 @@ void reshape_dofs(const entity_data_cpu&, const entity_data_gpu&, const vecxd&, ...@@ -130,3 +107,6 @@ void reshape_dofs(const entity_data_cpu&, const entity_data_gpu&, const vecxd&,
void void
gpu_compute_field_derivatives(entity_data_gpu& edg, gpu_compute_field_derivatives(entity_data_gpu& edg,
gpuTextureObject_t F, double *dF_dx, double* dF_dy, double* dF_dz); gpuTextureObject_t F, double *dF_dx, double* dF_dy, double* dF_dz);
void
gpu_compute_flux_lifting(entity_data_gpu& edg, gpuTextureObject_t f, double *out);
\ No newline at end of file
...@@ -24,3 +24,12 @@ struct entity_params ...@@ -24,3 +24,12 @@ struct entity_params
int aorder; int aorder;
}; };
constexpr size_t num_dofs_3D(size_t AK)
{
return ((AK+3)*(AK+2)*(AK+1))/6;
}
constexpr size_t num_dofs_2D(size_t AK)
{
return ((AK+2)*(AK+1))/2;
};
\ No newline at end of file
...@@ -22,6 +22,7 @@ entity_data_gpu::entity_data_gpu(const entity_data_cpu& ed) ...@@ -22,6 +22,7 @@ entity_data_gpu::entity_data_gpu(const entity_data_cpu& ed)
element_orientation.copyin(eo.data(), eo.size()); element_orientation.copyin(eo.data(), eo.size());
num_bf = ed.num_bf; num_bf = ed.num_bf;
num_fluxes = ed.num_fluxes;
dof_base = ed.dof_base; dof_base = ed.dof_base;
flux_base = ed.flux_base; flux_base = ed.flux_base;
...@@ -39,9 +40,18 @@ entity_data_gpu::entity_data_gpu(const entity_data_cpu& ed) ...@@ -39,9 +40,18 @@ entity_data_gpu::entity_data_gpu(const entity_data_cpu& ed)
dm.block(i*num_bf, 2*num_bf, num_bf, num_bf) = dm.block(i*num_bf, 2*num_bf, num_bf, num_bf) =
ed.differentiation_matrices.block(i*num_bf, 2*num_bf, num_bf, num_bf).transpose(); ed.differentiation_matrices.block(i*num_bf, 2*num_bf, num_bf, num_bf).transpose();
} }
differentiation_matrices.init(dm.data(), dm.size()); differentiation_matrices.init(dm.data(), dm.size());
auto LMrows = ed.num_bf;
auto LMcols = 4*ed.num_fluxes*ed.num_orientations;
MatxCM<double> lm = MatxCM<double>::Zero(LMrows, LMcols);
for (size_t i = 0; i < ed.num_orientations; i++)
{
lm.block(0, 4*i*ed.num_fluxes, ed.num_bf, 4*ed.num_fluxes) =
ed.lifting_matrices.block(i*ed.num_bf, 0, ed.num_bf, 4*ed.num_fluxes);
}
lifting_matrices.init(lm.data(), lm.size());
jacobians.copyin(ed.jacobians.data(), ed.jacobians.size()); jacobians.copyin(ed.jacobians.data(), ed.jacobians.size());
cell_determinants.copyin(ed.cell_determinants.data(), ed.cell_determinants.size()); cell_determinants.copyin(ed.cell_determinants.data(), ed.cell_determinants.size());
......
...@@ -111,4 +111,93 @@ gpu_compute_field_derivatives(entity_data_gpu& edg, ...@@ -111,4 +111,93 @@ gpu_compute_field_derivatives(entity_data_gpu& edg,
default: default:
throw std::invalid_argument("compute_field_derivatives: invalid order"); throw std::invalid_argument("compute_field_derivatives: invalid order");
} }
} }
\ No newline at end of file
template<size_t K>
__global__ void
gpu_lift_planar(gpuTextureObject_t flux, gpuTextureObject_t LM_tex,
const double * __restrict__ dets, double * __restrict__ lifted_flux,
int32_t num_all_elems, int32_t* orients, int32_t dof_base, int32_t flux_base)
{
using KS = kernel_gpu_sizes<K>;
/* One thread per *output* dof */
int ofs_in_entity = (blockIdx.x * blockDim.x + threadIdx.x);
if (ofs_in_entity >= num_all_elems*KS::num_bf)
return;
int32_t cur_dof_offset = dof_base + ofs_in_entity;
int32_t iT = ofs_in_entity / KS::num_bf;
int32_t elem_flux_base = flux_base + 4*iT*KS::num_fluxes;
int32_t iO = orients[iT];
int32_t LM_row = ofs_in_entity % KS::num_bf;
int32_t LM_orient = 4*KS::num_bf*KS::num_fluxes*iO;
double inv_det = 1./dets[iT];
double acc = 0.0;
for (int32_t dof = 0; dof < 4*KS::num_fluxes; dof++)
{
int32_t l_ofs = LM_orient + LM_row + KS::num_bf*dof;
int32_t f_ofs = elem_flux_base + dof;
acc += inv_det * fetch_tex(LM_tex, l_ofs) * fetch_tex(flux, f_ofs);
}
lifted_flux[cur_dof_offset] += acc;
}
template<size_t K>
void
launch_lift_kernel(entity_data_gpu& edg, gpuTextureObject_t f, double *out)
{
const auto THREADS_PER_BLOCK = 256;//kernel_gpu_sizes<K>::deriv_threads;
auto num_blocks = edg.num_bf*edg.num_all_elems/THREADS_PER_BLOCK;
if (edg.num_bf*edg.num_all_elems % THREADS_PER_BLOCK)
num_blocks += 1;
auto Ltex = edg.lifting_matrices.get_texture();
int32_t num_elems = edg.num_all_elems;
auto orients = edg.element_orientation.data();
auto num_orients = edg.element_orientation.size();
auto dets = edg.cell_determinants.data();
if (edg.g_order == 1)
gpu_lift_planar<K><<<num_blocks, THREADS_PER_BLOCK>>>(f, Ltex,
dets, out, num_elems, orients, edg.dof_base, edg.flux_base);
//else
// compute_field_derivatives_kernel_curved<1>(ed, f, df_dx, df_dy, df_dz);
}
void
gpu_compute_flux_lifting(entity_data_gpu& edg, gpuTextureObject_t f, double *out)
{
switch (edg.a_order)
{
case 1:
launch_lift_kernel<1>(edg, f, out);
break;
case 2:
launch_lift_kernel<2>(edg, f, out);
break;
case 3:
launch_lift_kernel<3>(edg, f, out);
break;
case 4:
launch_lift_kernel<4>(edg, f, out);
break;
case 5:
launch_lift_kernel<5>(edg, f, out);
break;
case 6:
launch_lift_kernel<6>(edg, f, out);
break;
default:
throw std::invalid_argument("compute_field_derivatives: invalid order");
}
}
#include "kernels_gpu.h" #include "kernels_gpu.h"
#if 0
/* Return how many dblocks are needed to store this entity on GPU. */ /* Return how many dblocks are needed to store this entity on GPU. */
size_t gpu_dblocks(const entity_data_cpu& ed) size_t gpu_dblocks(const entity_data_cpu& ed)
{ {
...@@ -52,3 +53,4 @@ void reshape_dofs(const entity_data_cpu& ed, const entity_data_gpu& edg, ...@@ -52,3 +53,4 @@ void reshape_dofs(const entity_data_cpu& ed, const entity_data_gpu& edg,
} }
} }
} }
#endif
...@@ -39,8 +39,6 @@ make_geometry(int order, double mesh_h) ...@@ -39,8 +39,6 @@ make_geometry(int order, double mesh_h)
gmm::setSize(vp, mesh_h); gmm::setSize(vp, mesh_h);
} }
#define WRITE_TEST_OUTPUTS
int test_lifting(int geometric_order, int approximation_order) int test_lifting(int geometric_order, int approximation_order)
{ {
std::vector<double> sizes({ 0.32, 0.16, 0.08, 0.04 }); std::vector<double> sizes({ 0.32, 0.16, 0.08, 0.04 });
...@@ -170,8 +168,10 @@ int test_lifting(int geometric_order, int approximation_order) ...@@ -170,8 +168,10 @@ int test_lifting(int geometric_order, int approximation_order)
std::vector<double> var_vol( mod.num_cells() ); std::vector<double> var_vol( mod.num_cells() );
#endif #endif
double err = 0.0; /* Compute (∇∙F,g)_T = (F∙n,g)_∂T - (F,∇g)_T on each element T
* to verify that the lifting computation is correct */
double err = 0.0;
for (auto& e : mod) for (auto& e : mod)
{ {
for (size_t iT = 0; iT < e.num_cells(); iT++) for (size_t iT = 0; iT < e.num_cells(); iT++)
...@@ -216,16 +216,24 @@ int test_lifting(int geometric_order, int approximation_order) ...@@ -216,16 +216,24 @@ int test_lifting(int geometric_order, int approximation_order)
} }
} }
errors.push_back( std::sqrt(err) );
#ifdef WRITE_TEST_OUTPUTS #ifdef WRITE_TEST_OUTPUTS
silodb.write_zonal_variable("expected_div_F", var_ediv_F); silodb.write_zonal_variable("expected_div_F", var_ediv_F);
silodb.write_zonal_variable("div_F", var_div_F); silodb.write_zonal_variable("div_F", var_div_F);
silodb.write_zonal_variable("lift", var_lift); silodb.write_zonal_variable("lift", var_lift);
silodb.write_zonal_variable("vol", var_vol); silodb.write_zonal_variable("vol", var_vol);
#endif #endif
std::cout << std::sqrt(err) << std::endl; std::cout << "Error: " << std::sqrt(err) << std::endl;
}
double rate = 0.0;
} std::cout << Byellowfg << "rate" << reset << std::endl;
for (size_t i = 1; i < sizes.size(); i++)
std::cout << (rate = std::log2(errors[i-1]/errors[i]) ) << std::endl;
COMPARE_VALUES_ABSOLUTE("df/dx", rate, double(approximation_order), 0.2);
return 0; return 0;
} }
......
#include <iostream>
#include <iomanip>
#include <sstream>
#include <unistd.h>
#include "test.h"
#include "gmsh_io.h"
#include "sgr.hpp"
#include "entity_data.h"
#include "kernels_cpu.h"
#include "timecounter.h"
#include "silo_output.hpp"
#include "kernels_gpu.h"
using namespace sgr;
static void
make_geometry(int order, double mesh_h)
{
gm::add("difftest");
std::vector<std::pair<int,int>> objects;
objects.push_back(
std::make_pair(3, gmo::addBox(0.0, 0.0, 0.0, 1.0, 1.0, 0.5) )
);
objects.push_back(
std::make_pair(3, gmo::addBox(0.0, 0.0, 0.5, 0.5, 0.5, 0.5) )
);
std::vector<std::pair<int, int>> tools;
gmsh::vectorpair odt;
std::vector<gmsh::vectorpair> odtm;
gmo::fragment(objects, tools, odt, odtm);
gmo::synchronize();
gvp_t vp;
gm::getEntities(vp);
gmm::setSize(vp, mesh_h);
}
int test_lifting(int geometric_order, int approximation_order)
{
std::vector<double> sizes({ 0.32, 0.16, 0.08, 0.04 });
std::vector<double> errors;
std::cout << cyanfg << "Testing geometric order " << geometric_order;
std::cout << ", approximation order = " << approximation_order << reset;
std::cout << std::endl;
/* Test field */
auto Fx = [](const point_3d& pt) -> double {
return std::sin(M_PI*pt.x());
};
auto Fy = [](const point_3d& pt) -> double {
return std::sin(M_PI*pt.y());
};
auto Fz = [](const point_3d& pt) -> double {
return std::sin(M_PI*pt.z());
};
auto F = [&](const point_3d& pt) -> vecxd {
vec3d Fret;
Fret(0) = Fx(pt);
Fret(1) = Fy(pt);
Fret(2) = Fz(pt);
return Fret;
};
/* Expected divergence */
auto div_F = [](const point_3d& pt) -> double {
auto dFx_dx = M_PI*std::cos(M_PI*pt.x());
auto dFy_dy = M_PI*std::cos(M_PI*pt.y());
auto dFz_dz = M_PI*std::cos(M_PI*pt.z());
return dFx_dx + dFy_dy + dFz_dz;
};
for (size_t sz = 0; sz < sizes.size(); sz++)
{
double h = sizes[sz];
make_geometry(0,h);
model mod(geometric_order, approximation_order);
#ifdef WRITE_TEST_OUTPUTS
std::stringstream ss;
ss << "lift_go_" << geometric_order << "_ao_" << approximation_order;
ss << "_seq_" << sz << ".silo";
silo silodb;
silodb.create_db(ss.str());
silodb.import_mesh_from_gmsh();
silodb.write_mesh();
#endif
auto model_num_dofs = mod.num_dofs();
auto model_num_fluxes = mod.num_fluxes();
vecxd Pdiv_F = vecxd::Zero(model_num_dofs);
vecxd PFdotn = vecxd::Zero(model_num_fluxes);
vecxd LiftF = vecxd::Zero(model_num_dofs);
std::vector<entity_data_gpu> edgs;
for (auto& e : mod)
{
e.project(div_F, Pdiv_F);
entity_data_cpu ed;
e.populate_entity_data(ed);
vecxd PFx = e.project(Fx);
vecxd PFy = e.project(Fy);
vecxd PFz = e.project(Fz);
for (size_t iT = 0; iT < e.num_cells(); iT++)
{
for (size_t iF = 0; iF < 4; iF++)
{
auto face_det = ed.face_determinants[4*iT+iF];
vec3d n = ed.normals.row(4*iT+iF);
for (size_t k = 0; k < ed.num_fluxes; k++)
{
auto base = e.flux_base();
auto ofs = (4*iT+iF)*ed.num_fluxes + k;
auto Fx = PFx( ed.fluxdofs_mine[ofs] );
auto Fy = PFy( ed.fluxdofs_mine[ofs] );
auto Fz = PFz( ed.fluxdofs_mine[ofs] );
PFdotn(base+ofs) = face_det*Fx*n(0) +
face_det*Fy*n(1) +
face_det*Fz*n(2);
}
}
}
entity_data_gpu edg(ed);
edgs.push_back( std::move(edg) );
}
texture_allocator<double> PFdotn_gpu(PFdotn.data(), PFdotn.size());
device_vector<double> LiftF_gpu(LiftF.data(), LiftF.size());
for (auto& edg : edgs)
{
timecounter_gpu tc;
tc.tic();
gpu_compute_flux_lifting(edg, PFdotn_gpu.get_texture(), LiftF_gpu.data());
double time = tc.toc();
auto num_cells = edg.num_all_elems;
if (geometric_order == 1)
{
std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: ";
double flops = 3*(edg.num_bf)*4*edg.num_fluxes*num_cells;
std::cout << flops/(1e9*time) << " GFlops/s" << std::endl;
}
else
{
//std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: ";
//double flops = ((21*edg.num_bf+6)*edg.num_bf*edg.num_qp + 3*(2*edg.num_bf-1)*edg.num_bf)*num_cells;
//std::cout << flops/(1e9*time) << " GFlops/s" << std::endl;
}
}
LiftF_gpu.copyout(LiftF.data());
#ifdef WRITE_TEST_OUTPUTS
std::vector<double> var_ediv_F( mod.num_cells() );
std::vector<double> var_div_F( mod.num_cells() );
std::vector<double> var_lift( mod.num_cells() );
std::vector<double> var_vol( mod.num_cells() );
#endif
/* Compute (∇∙F,g)_T = (F∙n,g)_∂T - (F,∇g)_T on each element T
* to verify that the lifting computation is correct */
double err = 0.0;
for (auto& e : mod)
{
for (size_t iT = 0; iT < e.num_cells(); iT++)
{
auto& pe = e.cell(iT);
auto& re = e.cell_refelem(pe);
auto num_bf = re.num_basis_functions();
matxd mass = matxd::Zero(num_bf, num_bf);
vecxd vol = vecxd::Zero(num_bf);
const auto pqps = pe.integration_points();
for(size_t iQp = 0; iQp < pqps.size(); iQp++)
{
const auto& pqp = pqps[iQp];
const vecxd phi = re.basis_functions(iQp);
const matxd dphi = re.basis_gradients(iQp);
const mat3d J = pe.jacobian(iQp);
mass += pqp.weight() * phi * phi.transpose();
vol += pqp.weight() * (dphi*J.inverse()) * F(pqp.point());
}
auto ofs = e.cell_global_dof_offset(iT);
vecxd excpected_div_F = Pdiv_F.segment(ofs, num_bf);
vecxd Plf = LiftF.segment(ofs, num_bf);
vecxd Pvol = mass.ldlt().solve(vol);
vecxd computed_div_F = Plf - Pvol;
vecxd diff = computed_div_F - excpected_div_F;
double loc_err = diff.dot(mass*diff);
err += loc_err;
auto bar = pe.barycenter();
vecxd phi_bar = re.basis_functions({1./3., 1./3., 1./3.});
#ifdef WRITE_TEST_OUTPUTS
auto gi = e.cell_global_index_by_gmsh(iT);
var_ediv_F[gi] = div_F(bar);
var_div_F[gi] = computed_div_F.dot(phi_bar);
var_lift[gi] = Plf.dot(phi_bar);
var_vol[gi] = Pvol.dot(phi_bar);
#endif
}
}
errors.push_back( std::sqrt(err) );
#ifdef WRITE_TEST_OUTPUTS
silodb.write_zonal_variable("expected_div_F", var_ediv_F);
silodb.write_zonal_variable("div_F", var_div_F);
silodb.write_zonal_variable("lift", var_lift);
silodb.write_zonal_variable("vol", var_vol);
#endif
std::cout << "Error: " << std::sqrt(err) << std::endl;
}
double rate = 0.0;
std::cout << Byellowfg << "rate" << reset << std::endl;
for (size_t i = 1; i < sizes.size(); i++)
std::cout << (rate = std::log2(errors[i-1]/errors[i]) ) << std::endl;
COMPARE_VALUES_ABSOLUTE("df/dx", rate, double(approximation_order), 0.2);
return 0;
}
int main(void)
{
gmsh::initialize();
gmsh::option::setNumber("General.Terminal", 0);
gmsh::option::setNumber("Mesh.Algorithm", 1);
gmsh::option::setNumber("Mesh.Algorithm3D", 1);
int failed_tests = 0;
std::cout << Bmagentafg << " *** TESTING: LIFTING ***" << reset << std::endl;
for (size_t ao = 1; ao < 2; ao++)
test_lifting(1, ao);
gmsh::finalize();
return failed_tests;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment