Skip to content
Snippets Groups Projects
Commit 81b5ae3b authored by Matteo Cicuttin's avatar Matteo Cicuttin
Browse files

GPU lifting kernel

parent 0c50ab25
No related branches found
No related tags found
No related merge requests found
...@@ -260,15 +260,18 @@ target_link_libraries(test_mass gmshdg) ...@@ -260,15 +260,18 @@ target_link_libraries(test_mass gmshdg)
add_executable(test_differentiation tests/test_differentiation.cpp) add_executable(test_differentiation tests/test_differentiation.cpp)
target_link_libraries(test_differentiation gmshdg) target_link_libraries(test_differentiation gmshdg)
add_executable(test_lifting tests/test_lifting.cpp)
target_link_libraries(test_lifting gmshdg)
add_executable(test_differentiation_gpu tests/test_differentiation_gpu.cpp) add_executable(test_differentiation_gpu tests/test_differentiation_gpu.cpp)
target_link_libraries(test_differentiation_gpu gmshdg) target_link_libraries(test_differentiation_gpu gmshdg)
add_executable(profile_differentiation_gpu tests/profile_differentiation_gpu.cpp) add_executable(profile_differentiation_gpu tests/profile_differentiation_gpu.cpp)
target_link_libraries(profile_differentiation_gpu gmshdg) target_link_libraries(profile_differentiation_gpu gmshdg)
add_executable(test_lifting tests/test_lifting.cpp)
target_link_libraries(test_lifting gmshdg)
add_executable(test_lifting_gpu tests/test_lifting_gpu.cpp)
target_link_libraries(test_lifting_gpu gmshdg)
#add_executable(test test.cpp test_basics.cpp test_mass.cpp test_differentiation.cpp test_differentiation_gpu.cpp test_lifting.cpp ${COMMON_SOURCES}) #add_executable(test test.cpp test_basics.cpp test_mass.cpp test_differentiation.cpp test_differentiation_gpu.cpp test_lifting.cpp ${COMMON_SOURCES})
#target_link_libraries(test ${LINK_LIBS}) #target_link_libraries(test ${LINK_LIBS})
......
...@@ -74,12 +74,14 @@ struct entity_data_gpu ...@@ -74,12 +74,14 @@ struct entity_data_gpu
size_t num_all_elems; size_t num_all_elems;
device_vector<int32_t> element_orientation; device_vector<int32_t> element_orientation;
size_t num_bf; size_t num_bf;
size_t num_fluxes;
size_t dof_base; size_t dof_base;
size_t flux_base; size_t flux_base;
texture_allocator<double> differentiation_matrices; texture_allocator<double> differentiation_matrices;
texture_allocator<double> lifting_matrices;
device_vector<double> jacobians; device_vector<double> jacobians;
device_vector<double> cell_determinants; device_vector<double> cell_determinants;
......
...@@ -165,13 +165,13 @@ public: ...@@ -165,13 +165,13 @@ public:
} }
/* allocate memory and copyin data at ptr */ /* allocate memory and copyin data at ptr */
device_vector(T *src, size_t size) device_vector(const T *src, size_t size)
: d_vptr(nullptr), vsize(0) : d_vptr(nullptr), vsize(0)
{ {
copyin(src, size); copyin(src, size);
} }
void copyin(T *src, size_t size) void copyin(const T *src, size_t size)
{ {
if (d_vptr != nullptr) if (d_vptr != nullptr)
(void) gpuFree(d_vptr); (void) gpuFree(d_vptr);
......
#pragma once #pragma once
//#include <omp.h> #include "types.h"
template<size_t AK>
struct num_dofs_3D
{
static const size_t value = ((AK+3)*(AK+2)*(AK+1))/6;
};
template<size_t AK>
struct num_dofs_2D
{
static const size_t value = ((AK+2)*(AK+1))/2;
};
template<size_t AK> template<size_t AK>
struct kernel_cpu_sizes struct kernel_cpu_sizes
{ {
static const size_t num_bf = num_dofs_3D<AK>::value; static const size_t num_bf = num_dofs_3D(AK);
static const size_t num_fluxes = num_dofs_2D<AK>::value; static const size_t num_fluxes = num_dofs_2D(AK);
static const size_t num_faces = 4; static const size_t num_faces = 4;
static const size_t num_all_fluxes = num_fluxes * num_faces; static const size_t num_all_fluxes = num_fluxes * num_faces;
}; };
......
#pragma once #pragma once
#include "types.h"
#include "entity_data.h" #include "entity_data.h"
/* Compile-time constants configuring sizes for the kernels. /* Compile-time constants configuring sizes for the kernels. */
* Elements are organized in blocks of cells_per_dblock elements each.
* Each dblock contains dblock_size dofs, where dblock_size must be a
* multiple of the warp size. This means that some dofs in the dblock
* will be padding. The effective dofs used are dblock_bf.*/
template<size_t K> template<size_t K>
struct kernel_gpu_sizes; struct kernel_gpu_sizes;
...@@ -14,82 +10,66 @@ struct kernel_gpu_sizes; ...@@ -14,82 +10,66 @@ struct kernel_gpu_sizes;
template<> template<>
struct kernel_gpu_sizes<1> struct kernel_gpu_sizes<1>
{ {
static const size_t num_bf = 4; static const size_t num_bf = num_dofs_3D(1);
static const size_t cells_per_dblock = 32; static const size_t num_fluxes = num_dofs_2D(1);
static const size_t dblock_bf = num_bf * cells_per_dblock;
static const size_t dblock_size = 128;
static const size_t parallel_dblocks = 4;
static const size_t deriv_threads = 128; static const size_t deriv_threads = 128;
static const size_t lifting_threads = 128;
}; };
template<> template<>
struct kernel_gpu_sizes<2> struct kernel_gpu_sizes<2>
{ {
static const size_t num_bf = 10; static const size_t num_bf = num_dofs_3D(2);
static const size_t cells_per_dblock = 12; static const size_t num_fluxes = num_dofs_2D(2);
static const size_t dblock_bf = num_bf * cells_per_dblock;
static const size_t dblock_size = 128;
static const size_t parallel_dblocks = 1;
static const size_t deriv_threads = 128; static const size_t deriv_threads = 128;
static const size_t lifting_threads = 128;
}; };
template<> template<>
struct kernel_gpu_sizes<3> struct kernel_gpu_sizes<3>
{ {
static const size_t num_bf = 20; static const size_t num_bf = num_dofs_3D(3);
static const size_t cells_per_dblock = 12; static const size_t num_fluxes = num_dofs_2D(3);
static const size_t dblock_bf = num_bf * cells_per_dblock;
static const size_t dblock_size = 128;
static const size_t parallel_dblocks = 1;
static const size_t deriv_threads = 128; static const size_t deriv_threads = 128;
static const size_t lifting_threads = 128;
}; };
template<> template<>
struct kernel_gpu_sizes<4> struct kernel_gpu_sizes<4>
{ {
static const size_t num_bf = 35; static const size_t num_bf = num_dofs_3D(4);
static const size_t cells_per_dblock = 12; static const size_t num_fluxes = num_dofs_2D(4);
static const size_t dblock_bf = num_bf * cells_per_dblock;
static const size_t dblock_size = 128;
static const size_t parallel_dblocks = 1;
static const size_t deriv_threads = 512; static const size_t deriv_threads = 512;
static const size_t lifting_threads = 128;
}; };
template<> template<>
struct kernel_gpu_sizes<5> struct kernel_gpu_sizes<5>
{ {
static const size_t num_bf = 56; static const size_t num_bf = num_dofs_3D(5);
static const size_t cells_per_dblock = 12; static const size_t num_fluxes = num_dofs_2D(5);
static const size_t dblock_bf = num_bf * cells_per_dblock;
static const size_t dblock_size = 128;
static const size_t parallel_dblocks = 1;
static const size_t deriv_threads = 1024; static const size_t deriv_threads = 1024;
static const size_t lifting_threads = 128;
}; };
template<> template<>
struct kernel_gpu_sizes<6> struct kernel_gpu_sizes<6>
{ {
static const size_t num_bf = 84; static const size_t num_bf = num_dofs_3D(6);
static const size_t cells_per_dblock = 12; static const size_t num_fluxes = num_dofs_2D(6);
static const size_t dblock_bf = num_bf * cells_per_dblock;
static const size_t dblock_size = 128;
static const size_t parallel_dblocks = 1;
static const size_t deriv_threads = 1024; static const size_t deriv_threads = 1024;
static const size_t lifting_threads = 128;
}; };
struct kernel_gpu_sizes_runtime struct kernel_gpu_sizes_runtime
{ {
size_t num_bf; size_t num_bf;
size_t cells_per_dblock;
size_t dblock_bf;
size_t dblock_size;
size_t parallel_dblocks;
}; };
template<size_t N> template<size_t N>
...@@ -98,10 +78,7 @@ kernel_gpu_sizes_runtime get_kernel_gpu_sizes() ...@@ -98,10 +78,7 @@ kernel_gpu_sizes_runtime get_kernel_gpu_sizes()
kernel_gpu_sizes_runtime ret; kernel_gpu_sizes_runtime ret;
ret.num_bf = kernel_gpu_sizes<N>::num_bf; ret.num_bf = kernel_gpu_sizes<N>::num_bf;
ret.cells_per_dblock = kernel_gpu_sizes<N>::cells_per_dblock;
ret.dblock_bf = kernel_gpu_sizes<N>::dblock_bf;
ret.dblock_size = kernel_gpu_sizes<N>::dblock_size;
ret.parallel_dblocks = kernel_gpu_sizes<N>::parallel_dblocks;
return ret; return ret;
}; };
...@@ -130,3 +107,6 @@ void reshape_dofs(const entity_data_cpu&, const entity_data_gpu&, const vecxd&, ...@@ -130,3 +107,6 @@ void reshape_dofs(const entity_data_cpu&, const entity_data_gpu&, const vecxd&,
void void
gpu_compute_field_derivatives(entity_data_gpu& edg, gpu_compute_field_derivatives(entity_data_gpu& edg,
gpuTextureObject_t F, double *dF_dx, double* dF_dy, double* dF_dz); gpuTextureObject_t F, double *dF_dx, double* dF_dy, double* dF_dz);
void
gpu_compute_flux_lifting(entity_data_gpu& edg, gpuTextureObject_t f, double *out);
\ No newline at end of file
...@@ -24,3 +24,12 @@ struct entity_params ...@@ -24,3 +24,12 @@ struct entity_params
int aorder; int aorder;
}; };
constexpr size_t num_dofs_3D(size_t AK)
{
return ((AK+3)*(AK+2)*(AK+1))/6;
}
constexpr size_t num_dofs_2D(size_t AK)
{
return ((AK+2)*(AK+1))/2;
};
\ No newline at end of file
...@@ -22,6 +22,7 @@ entity_data_gpu::entity_data_gpu(const entity_data_cpu& ed) ...@@ -22,6 +22,7 @@ entity_data_gpu::entity_data_gpu(const entity_data_cpu& ed)
element_orientation.copyin(eo.data(), eo.size()); element_orientation.copyin(eo.data(), eo.size());
num_bf = ed.num_bf; num_bf = ed.num_bf;
num_fluxes = ed.num_fluxes;
dof_base = ed.dof_base; dof_base = ed.dof_base;
flux_base = ed.flux_base; flux_base = ed.flux_base;
...@@ -39,9 +40,18 @@ entity_data_gpu::entity_data_gpu(const entity_data_cpu& ed) ...@@ -39,9 +40,18 @@ entity_data_gpu::entity_data_gpu(const entity_data_cpu& ed)
dm.block(i*num_bf, 2*num_bf, num_bf, num_bf) = dm.block(i*num_bf, 2*num_bf, num_bf, num_bf) =
ed.differentiation_matrices.block(i*num_bf, 2*num_bf, num_bf, num_bf).transpose(); ed.differentiation_matrices.block(i*num_bf, 2*num_bf, num_bf, num_bf).transpose();
} }
differentiation_matrices.init(dm.data(), dm.size()); differentiation_matrices.init(dm.data(), dm.size());
auto LMrows = ed.num_bf;
auto LMcols = 4*ed.num_fluxes*ed.num_orientations;
MatxCM<double> lm = MatxCM<double>::Zero(LMrows, LMcols);
for (size_t i = 0; i < ed.num_orientations; i++)
{
lm.block(0, 4*i*ed.num_fluxes, ed.num_bf, 4*ed.num_fluxes) =
ed.lifting_matrices.block(i*ed.num_bf, 0, ed.num_bf, 4*ed.num_fluxes);
}
lifting_matrices.init(lm.data(), lm.size());
jacobians.copyin(ed.jacobians.data(), ed.jacobians.size()); jacobians.copyin(ed.jacobians.data(), ed.jacobians.size());
cell_determinants.copyin(ed.cell_determinants.data(), ed.cell_determinants.size()); cell_determinants.copyin(ed.cell_determinants.data(), ed.cell_determinants.size());
......
...@@ -112,3 +112,92 @@ gpu_compute_field_derivatives(entity_data_gpu& edg, ...@@ -112,3 +112,92 @@ gpu_compute_field_derivatives(entity_data_gpu& edg,
throw std::invalid_argument("compute_field_derivatives: invalid order"); throw std::invalid_argument("compute_field_derivatives: invalid order");
} }
} }
template<size_t K>
__global__ void
gpu_lift_planar(gpuTextureObject_t flux, gpuTextureObject_t LM_tex,
const double * __restrict__ dets, double * __restrict__ lifted_flux,
int32_t num_all_elems, int32_t* orients, int32_t dof_base, int32_t flux_base)
{
using KS = kernel_gpu_sizes<K>;
/* One thread per *output* dof */
int ofs_in_entity = (blockIdx.x * blockDim.x + threadIdx.x);
if (ofs_in_entity >= num_all_elems*KS::num_bf)
return;
int32_t cur_dof_offset = dof_base + ofs_in_entity;
int32_t iT = ofs_in_entity / KS::num_bf;
int32_t elem_flux_base = flux_base + 4*iT*KS::num_fluxes;
int32_t iO = orients[iT];
int32_t LM_row = ofs_in_entity % KS::num_bf;
int32_t LM_orient = 4*KS::num_bf*KS::num_fluxes*iO;
double inv_det = 1./dets[iT];
double acc = 0.0;
for (int32_t dof = 0; dof < 4*KS::num_fluxes; dof++)
{
int32_t l_ofs = LM_orient + LM_row + KS::num_bf*dof;
int32_t f_ofs = elem_flux_base + dof;
acc += inv_det * fetch_tex(LM_tex, l_ofs) * fetch_tex(flux, f_ofs);
}
lifted_flux[cur_dof_offset] += acc;
}
template<size_t K>
void
launch_lift_kernel(entity_data_gpu& edg, gpuTextureObject_t f, double *out)
{
const auto THREADS_PER_BLOCK = 256;//kernel_gpu_sizes<K>::deriv_threads;
auto num_blocks = edg.num_bf*edg.num_all_elems/THREADS_PER_BLOCK;
if (edg.num_bf*edg.num_all_elems % THREADS_PER_BLOCK)
num_blocks += 1;
auto Ltex = edg.lifting_matrices.get_texture();
int32_t num_elems = edg.num_all_elems;
auto orients = edg.element_orientation.data();
auto num_orients = edg.element_orientation.size();
auto dets = edg.cell_determinants.data();
if (edg.g_order == 1)
gpu_lift_planar<K><<<num_blocks, THREADS_PER_BLOCK>>>(f, Ltex,
dets, out, num_elems, orients, edg.dof_base, edg.flux_base);
//else
// compute_field_derivatives_kernel_curved<1>(ed, f, df_dx, df_dy, df_dz);
}
void
gpu_compute_flux_lifting(entity_data_gpu& edg, gpuTextureObject_t f, double *out)
{
switch (edg.a_order)
{
case 1:
launch_lift_kernel<1>(edg, f, out);
break;
case 2:
launch_lift_kernel<2>(edg, f, out);
break;
case 3:
launch_lift_kernel<3>(edg, f, out);
break;
case 4:
launch_lift_kernel<4>(edg, f, out);
break;
case 5:
launch_lift_kernel<5>(edg, f, out);
break;
case 6:
launch_lift_kernel<6>(edg, f, out);
break;
default:
throw std::invalid_argument("compute_field_derivatives: invalid order");
}
}
#include "kernels_gpu.h" #include "kernels_gpu.h"
#if 0
/* Return how many dblocks are needed to store this entity on GPU. */ /* Return how many dblocks are needed to store this entity on GPU. */
size_t gpu_dblocks(const entity_data_cpu& ed) size_t gpu_dblocks(const entity_data_cpu& ed)
{ {
...@@ -52,3 +53,4 @@ void reshape_dofs(const entity_data_cpu& ed, const entity_data_gpu& edg, ...@@ -52,3 +53,4 @@ void reshape_dofs(const entity_data_cpu& ed, const entity_data_gpu& edg,
} }
} }
} }
#endif
...@@ -39,8 +39,6 @@ make_geometry(int order, double mesh_h) ...@@ -39,8 +39,6 @@ make_geometry(int order, double mesh_h)
gmm::setSize(vp, mesh_h); gmm::setSize(vp, mesh_h);
} }
#define WRITE_TEST_OUTPUTS
int test_lifting(int geometric_order, int approximation_order) int test_lifting(int geometric_order, int approximation_order)
{ {
std::vector<double> sizes({ 0.32, 0.16, 0.08, 0.04 }); std::vector<double> sizes({ 0.32, 0.16, 0.08, 0.04 });
...@@ -170,8 +168,10 @@ int test_lifting(int geometric_order, int approximation_order) ...@@ -170,8 +168,10 @@ int test_lifting(int geometric_order, int approximation_order)
std::vector<double> var_vol( mod.num_cells() ); std::vector<double> var_vol( mod.num_cells() );
#endif #endif
double err = 0.0; /* Compute (∇∙F,g)_T = (F∙n,g)_∂T - (F,∇g)_T on each element T
* to verify that the lifting computation is correct */
double err = 0.0;
for (auto& e : mod) for (auto& e : mod)
{ {
for (size_t iT = 0; iT < e.num_cells(); iT++) for (size_t iT = 0; iT < e.num_cells(); iT++)
...@@ -216,16 +216,24 @@ int test_lifting(int geometric_order, int approximation_order) ...@@ -216,16 +216,24 @@ int test_lifting(int geometric_order, int approximation_order)
} }
} }
errors.push_back( std::sqrt(err) );
#ifdef WRITE_TEST_OUTPUTS #ifdef WRITE_TEST_OUTPUTS
silodb.write_zonal_variable("expected_div_F", var_ediv_F); silodb.write_zonal_variable("expected_div_F", var_ediv_F);
silodb.write_zonal_variable("div_F", var_div_F); silodb.write_zonal_variable("div_F", var_div_F);
silodb.write_zonal_variable("lift", var_lift); silodb.write_zonal_variable("lift", var_lift);
silodb.write_zonal_variable("vol", var_vol); silodb.write_zonal_variable("vol", var_vol);
#endif #endif
std::cout << std::sqrt(err) << std::endl; std::cout << "Error: " << std::sqrt(err) << std::endl;
}
double rate = 0.0;
} std::cout << Byellowfg << "rate" << reset << std::endl;
for (size_t i = 1; i < sizes.size(); i++)
std::cout << (rate = std::log2(errors[i-1]/errors[i]) ) << std::endl;
COMPARE_VALUES_ABSOLUTE("df/dx", rate, double(approximation_order), 0.2);
return 0; return 0;
} }
......
#include <iostream>
#include <iomanip>
#include <sstream>
#include <unistd.h>
#include "test.h"
#include "gmsh_io.h"
#include "sgr.hpp"
#include "entity_data.h"
#include "kernels_cpu.h"
#include "timecounter.h"
#include "silo_output.hpp"
#include "kernels_gpu.h"
using namespace sgr;
static void
make_geometry(int order, double mesh_h)
{
gm::add("difftest");
std::vector<std::pair<int,int>> objects;
objects.push_back(
std::make_pair(3, gmo::addBox(0.0, 0.0, 0.0, 1.0, 1.0, 0.5) )
);
objects.push_back(
std::make_pair(3, gmo::addBox(0.0, 0.0, 0.5, 0.5, 0.5, 0.5) )
);
std::vector<std::pair<int, int>> tools;
gmsh::vectorpair odt;
std::vector<gmsh::vectorpair> odtm;
gmo::fragment(objects, tools, odt, odtm);
gmo::synchronize();
gvp_t vp;
gm::getEntities(vp);
gmm::setSize(vp, mesh_h);
}
int test_lifting(int geometric_order, int approximation_order)
{
std::vector<double> sizes({ 0.32, 0.16, 0.08, 0.04 });
std::vector<double> errors;
std::cout << cyanfg << "Testing geometric order " << geometric_order;
std::cout << ", approximation order = " << approximation_order << reset;
std::cout << std::endl;
/* Test field */
auto Fx = [](const point_3d& pt) -> double {
return std::sin(M_PI*pt.x());
};
auto Fy = [](const point_3d& pt) -> double {
return std::sin(M_PI*pt.y());
};
auto Fz = [](const point_3d& pt) -> double {
return std::sin(M_PI*pt.z());
};
auto F = [&](const point_3d& pt) -> vecxd {
vec3d Fret;
Fret(0) = Fx(pt);
Fret(1) = Fy(pt);
Fret(2) = Fz(pt);
return Fret;
};
/* Expected divergence */
auto div_F = [](const point_3d& pt) -> double {
auto dFx_dx = M_PI*std::cos(M_PI*pt.x());
auto dFy_dy = M_PI*std::cos(M_PI*pt.y());
auto dFz_dz = M_PI*std::cos(M_PI*pt.z());
return dFx_dx + dFy_dy + dFz_dz;
};
for (size_t sz = 0; sz < sizes.size(); sz++)
{
double h = sizes[sz];
make_geometry(0,h);
model mod(geometric_order, approximation_order);
#ifdef WRITE_TEST_OUTPUTS
std::stringstream ss;
ss << "lift_go_" << geometric_order << "_ao_" << approximation_order;
ss << "_seq_" << sz << ".silo";
silo silodb;
silodb.create_db(ss.str());
silodb.import_mesh_from_gmsh();
silodb.write_mesh();
#endif
auto model_num_dofs = mod.num_dofs();
auto model_num_fluxes = mod.num_fluxes();
vecxd Pdiv_F = vecxd::Zero(model_num_dofs);
vecxd PFdotn = vecxd::Zero(model_num_fluxes);
vecxd LiftF = vecxd::Zero(model_num_dofs);
std::vector<entity_data_gpu> edgs;
for (auto& e : mod)
{
e.project(div_F, Pdiv_F);
entity_data_cpu ed;
e.populate_entity_data(ed);
vecxd PFx = e.project(Fx);
vecxd PFy = e.project(Fy);
vecxd PFz = e.project(Fz);
for (size_t iT = 0; iT < e.num_cells(); iT++)
{
for (size_t iF = 0; iF < 4; iF++)
{
auto face_det = ed.face_determinants[4*iT+iF];
vec3d n = ed.normals.row(4*iT+iF);
for (size_t k = 0; k < ed.num_fluxes; k++)
{
auto base = e.flux_base();
auto ofs = (4*iT+iF)*ed.num_fluxes + k;
auto Fx = PFx( ed.fluxdofs_mine[ofs] );
auto Fy = PFy( ed.fluxdofs_mine[ofs] );
auto Fz = PFz( ed.fluxdofs_mine[ofs] );
PFdotn(base+ofs) = face_det*Fx*n(0) +
face_det*Fy*n(1) +
face_det*Fz*n(2);
}
}
}
entity_data_gpu edg(ed);
edgs.push_back( std::move(edg) );
}
texture_allocator<double> PFdotn_gpu(PFdotn.data(), PFdotn.size());
device_vector<double> LiftF_gpu(LiftF.data(), LiftF.size());
for (auto& edg : edgs)
{
timecounter_gpu tc;
tc.tic();
gpu_compute_flux_lifting(edg, PFdotn_gpu.get_texture(), LiftF_gpu.data());
double time = tc.toc();
auto num_cells = edg.num_all_elems;
if (geometric_order == 1)
{
std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: ";
double flops = 3*(edg.num_bf)*4*edg.num_fluxes*num_cells;
std::cout << flops/(1e9*time) << " GFlops/s" << std::endl;
}
else
{
//std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: ";
//double flops = ((21*edg.num_bf+6)*edg.num_bf*edg.num_qp + 3*(2*edg.num_bf-1)*edg.num_bf)*num_cells;
//std::cout << flops/(1e9*time) << " GFlops/s" << std::endl;
}
}
LiftF_gpu.copyout(LiftF.data());
#ifdef WRITE_TEST_OUTPUTS
std::vector<double> var_ediv_F( mod.num_cells() );
std::vector<double> var_div_F( mod.num_cells() );
std::vector<double> var_lift( mod.num_cells() );
std::vector<double> var_vol( mod.num_cells() );
#endif
/* Compute (∇∙F,g)_T = (F∙n,g)_∂T - (F,∇g)_T on each element T
* to verify that the lifting computation is correct */
double err = 0.0;
for (auto& e : mod)
{
for (size_t iT = 0; iT < e.num_cells(); iT++)
{
auto& pe = e.cell(iT);
auto& re = e.cell_refelem(pe);
auto num_bf = re.num_basis_functions();
matxd mass = matxd::Zero(num_bf, num_bf);
vecxd vol = vecxd::Zero(num_bf);
const auto pqps = pe.integration_points();
for(size_t iQp = 0; iQp < pqps.size(); iQp++)
{
const auto& pqp = pqps[iQp];
const vecxd phi = re.basis_functions(iQp);
const matxd dphi = re.basis_gradients(iQp);
const mat3d J = pe.jacobian(iQp);
mass += pqp.weight() * phi * phi.transpose();
vol += pqp.weight() * (dphi*J.inverse()) * F(pqp.point());
}
auto ofs = e.cell_global_dof_offset(iT);
vecxd excpected_div_F = Pdiv_F.segment(ofs, num_bf);
vecxd Plf = LiftF.segment(ofs, num_bf);
vecxd Pvol = mass.ldlt().solve(vol);
vecxd computed_div_F = Plf - Pvol;
vecxd diff = computed_div_F - excpected_div_F;
double loc_err = diff.dot(mass*diff);
err += loc_err;
auto bar = pe.barycenter();
vecxd phi_bar = re.basis_functions({1./3., 1./3., 1./3.});
#ifdef WRITE_TEST_OUTPUTS
auto gi = e.cell_global_index_by_gmsh(iT);
var_ediv_F[gi] = div_F(bar);
var_div_F[gi] = computed_div_F.dot(phi_bar);
var_lift[gi] = Plf.dot(phi_bar);
var_vol[gi] = Pvol.dot(phi_bar);
#endif
}
}
errors.push_back( std::sqrt(err) );
#ifdef WRITE_TEST_OUTPUTS
silodb.write_zonal_variable("expected_div_F", var_ediv_F);
silodb.write_zonal_variable("div_F", var_div_F);
silodb.write_zonal_variable("lift", var_lift);
silodb.write_zonal_variable("vol", var_vol);
#endif
std::cout << "Error: " << std::sqrt(err) << std::endl;
}
double rate = 0.0;
std::cout << Byellowfg << "rate" << reset << std::endl;
for (size_t i = 1; i < sizes.size(); i++)
std::cout << (rate = std::log2(errors[i-1]/errors[i]) ) << std::endl;
COMPARE_VALUES_ABSOLUTE("df/dx", rate, double(approximation_order), 0.2);
return 0;
}
int main(void)
{
gmsh::initialize();
gmsh::option::setNumber("General.Terminal", 0);
gmsh::option::setNumber("Mesh.Algorithm", 1);
gmsh::option::setNumber("Mesh.Algorithm3D", 1);
int failed_tests = 0;
std::cout << Bmagentafg << " *** TESTING: LIFTING ***" << reset << std::endl;
for (size_t ao = 1; ao < 2; ao++)
test_lifting(1, ao);
gmsh::finalize();
return failed_tests;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment