Select Git revision
DivideAndConquer.cpp
test_differentiation_gpu.cpp 9.53 KiB
#include <iostream>
#include <iomanip>
#include <unistd.h>
#include "test.h"
#include "gmsh_io.h"
#include "sgr.hpp"
#include "entity_data.h"
#include "kernels_cpu.h"
#include "timecounter.h"
#include "silo_output.hpp"
#include "kernels_gpu.h"
using namespace sgr;
static void
make_geometry(int order, double mesh_h)
{
gm::add("difftest");
std::vector<std::pair<int,int>> objects;
objects.push_back(
std::make_pair(3, gmo::addBox(0.0, 0.0, 0.0, 1.0, 1.0, 0.5) )
);
objects.push_back(
std::make_pair(3, gmo::addBox(0.0, 0.0, 0.5, 0.5, 0.5, 0.5) )
);
std::vector<std::pair<int, int>> tools;
gmsh::vectorpair odt;
std::vector<gmsh::vectorpair> odtm;
gmo::fragment(objects, tools, odt, odtm);
gmo::synchronize();
gvp_t vp;
gm::getEntities(vp);
gmm::setSize(vp, mesh_h);
}
int test_differentiation_convergence(int geometric_order, int approximation_order)
{
std::vector<double> sizes({ 0.32, 0.16, 0.08, 0.04 });
std::vector<double> errors;
std::cout << cyanfg << "Testing geometric order " << geometric_order;
std::cout << ", approximation order = " << approximation_order << nofg;
std::cout << std::endl;
auto f = [](const point_3d& pt) -> double {
return std::sin(M_PI*pt.x())*std::sin(M_PI*pt.y())*std::sin(M_PI*pt.z());
};
auto df_dx = [](const point_3d& pt) -> double {
return M_PI*std::cos(M_PI*pt.x())*std::sin(M_PI*pt.y())*std::sin(M_PI*pt.z());
};
auto df_dy = [](const point_3d& pt) -> double {
return M_PI*std::sin(M_PI*pt.x())*std::cos(M_PI*pt.y())*std::sin(M_PI*pt.z());
};
auto df_dz = [](const point_3d& pt) -> double {
return M_PI*std::sin(M_PI*pt.x())*std::sin(M_PI*pt.y())*std::cos(M_PI*pt.z());
};
std::vector<double> errs_x;
std::vector<double> errs_y;
std::vector<double> errs_z;
for (size_t i = 0; i < sizes.size(); i++)
{
double h = sizes[i];
make_geometry(0,h);
model mod(geometric_order, approximation_order);
mod.build();
#ifdef WRITE_TEST_OUTPUTS
std::stringstream ss;
ss << "diff_go_" << geometric_order << "_ao_" << approximation_order;
ss << "_seq_" << i << ".silo";
silo silodb;
silodb.create_db(ss.str());
silodb.import_mesh_from_gmsh();
silodb.write_mesh();
#endif
auto model_num_dofs = mod.num_dofs();
vecxd Pf = vecxd::Zero(model_num_dofs);
vecxd Pdf_dx = vecxd::Zero(model_num_dofs);
vecxd Pdf_dy = vecxd::Zero(model_num_dofs);
vecxd Pdf_dz = vecxd::Zero(model_num_dofs);
std::vector<entity_data_gpu> edgs;
#ifdef USE_BLOCKED_GPU_KERNELS
std::vector<entity_data_cpu> eds;
size_t model_num_dofs_gpu = 0;
#else
size_t model_num_dofs_gpu = model_num_dofs;
#endif
for (auto& e : mod)
{
e.project(f, Pf);
e.project(df_dx, Pdf_dx);
e.project(df_dy, Pdf_dy);
e.project(df_dz, Pdf_dz);
entity_data_cpu ed;
e.populate_entity_data(ed, mod);
entity_data_gpu edg(ed);
#ifdef USE_BLOCKED_GPU_KERNELS
edg.dof_base = model_num_dofs_gpu;
model_num_dofs_gpu += gpu_dblocks_dofs(ed);
eds.push_back( std::move(ed) );
#endif
edgs.push_back( std::move(edg) );
}
/* Prepare I/O vectors and call kernel */
#ifdef USE_BLOCKED_GPU_KERNELS
assert(eds.size() == edgs.size());
vecxd Pf_reshaped = vecxd::Zero(model_num_dofs_gpu);
for (size_t i = 0; i < eds.size(); i++)
reshape_dofs(eds[i], edgs[i], Pf, Pf_reshaped, true);
texture_allocator<double> Pf_gpu(Pf_reshaped.data(), Pf_reshaped.size());
#else
device_vector<double> Pf_gpu(Pf.data(), Pf.size());
#endif
device_vector<double> df_dx_gpu(model_num_dofs_gpu);
device_vector<double> df_dy_gpu(model_num_dofs_gpu);
device_vector<double> df_dz_gpu(model_num_dofs_gpu);
for (auto& edg : edgs)
{
timecounter_gpu tc;
tc.tic();
gpu_compute_field_derivatives(edg, Pf_gpu.data(),
df_dx_gpu.data(), df_dy_gpu.data(), df_dz_gpu.data(), 1.0);
double time = tc.toc();
auto num_cells = edg.num_all_elems;
if (geometric_order == 1)
{
std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: ";
double flops = (21*edg.num_bf + 3)*edg.num_bf*num_cells;
std::cout << flops/(1e9*time) << " GFlops/s" << std::endl;
}
else
{
std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: ";
double flops = ((21*edg.num_bf+6)*edg.num_bf/**edg.num_qp*/ + 3*(2*edg.num_bf-1)*edg.num_bf)*num_cells;
std::cout << flops/(1e9*time) << " GFlops/s" << std::endl;
}
}
#ifdef USE_BLOCKED_GPU_KERNELS
vecxd Cdf_dx_exp = vecxd::Zero(model_num_dofs_gpu);
vecxd Cdf_dy_exp = vecxd::Zero(model_num_dofs_gpu);
vecxd Cdf_dz_exp = vecxd::Zero(model_num_dofs_gpu);
df_dx_gpu.copyout( Cdf_dx_exp.data() );
df_dy_gpu.copyout( Cdf_dy_exp.data() );
df_dz_gpu.copyout( Cdf_dz_exp.data() );
vecxd Cdf_dx = vecxd::Zero(model_num_dofs);
vecxd Cdf_dy = vecxd::Zero(model_num_dofs);
vecxd Cdf_dz = vecxd::Zero(model_num_dofs);
assert(eds.size() == edgs.size());
Pf = vecxd::Zero(model_num_dofs);
for (size_t i = 0; i < eds.size(); i++)
{
reshape_dofs(eds[i], edgs[i], Cdf_dx_exp, Cdf_dx, false);
reshape_dofs(eds[i], edgs[i], Cdf_dy_exp, Cdf_dy, false);
reshape_dofs(eds[i], edgs[i], Cdf_dz_exp, Cdf_dz, false);
reshape_dofs(eds[i], edgs[i], Pf_reshaped, Pf, false);
}
#else
vecxd Cdf_dx = vecxd::Zero(model_num_dofs_gpu);
vecxd Cdf_dy = vecxd::Zero(model_num_dofs_gpu);
vecxd Cdf_dz = vecxd::Zero(model_num_dofs_gpu);
df_dx_gpu.copyout( Cdf_dx.data() );
df_dy_gpu.copyout( Cdf_dy.data() );
df_dz_gpu.copyout( Cdf_dz.data() );
#endif
double err_x = 0.0;
double err_y = 0.0;
double err_z = 0.0;
#ifdef WRITE_TEST_OUTPUTS
auto model_num_cells = mod.num_cells();
std::vector<double> var_Pf(model_num_cells);
std::vector<double> var_df_dx(model_num_cells);
std::vector<double> var_df_dy(model_num_cells);
std::vector<double> var_df_dz(model_num_cells);
#endif
for (auto& e : mod)
{
for (size_t iT = 0; iT < e.num_cells(); iT++)
{
auto& pe = e.cell(iT);
auto& re = e.cell_refelem(pe);
matxd mass = e.mass_matrix(iT);
auto num_bf = re.num_basis_functions();
auto ofs = e.cell_model_dof_offset(iT);
vecxd diff_x = Pdf_dx.segment(ofs, num_bf) - Cdf_dx.segment(ofs, num_bf);
vecxd diff_y = Pdf_dy.segment(ofs, num_bf) - Cdf_dy.segment(ofs, num_bf);
vecxd diff_z = Pdf_dz.segment(ofs, num_bf) - Cdf_dz.segment(ofs, num_bf);
err_x += diff_x.dot(mass*diff_x);
err_y += diff_y.dot(mass*diff_y);
err_z += diff_z.dot(mass*diff_z);
#ifdef WRITE_TEST_OUTPUTS
auto gi = e.cell_global_index_by_gmsh(iT);
assert(gi < model_num_cells);
vecxd phi_bar = re.basis_functions({1./3., 1./3., 1./3.});
var_Pf[gi] = Pf.segment(ofs, num_bf).dot(phi_bar);
var_df_dx[gi] = Cdf_dx.segment(ofs, num_bf).dot(phi_bar);
var_df_dy[gi] = Cdf_dy.segment(ofs, num_bf).dot(phi_bar);
var_df_dz[gi] = Cdf_dz.segment(ofs, num_bf).dot(phi_bar);
#endif
}
std::cout << "Errors: " << std::sqrt(err_x) << " " << std::sqrt(err_y);
std::cout << " " << std::sqrt(err_z) << std::endl;
}
#ifdef WRITE_TEST_OUTPUTS
silodb.write_zonal_variable("f", var_Pf);
silodb.write_zonal_variable("df_dx", var_df_dx);
silodb.write_zonal_variable("df_dy", var_df_dy);
silodb.write_zonal_variable("df_dz", var_df_dz);
#endif
errs_x.push_back( std::sqrt(err_x) );
errs_y.push_back( std::sqrt(err_y) );
errs_z.push_back( std::sqrt(err_z) );
}
double rate_x = 0.0;
double rate_y = 0.0;
double rate_z = 0.0;
std::cout << Byellowfg << "rate df/dx rate df/dy rate df/dz" << reset << std::endl;
for (size_t i = 1; i < sizes.size(); i++)
{
std::cout << (rate_x = std::log2(errs_x[i-1]/errs_x[i]) ) << " ";
std::cout << (rate_y = std::log2(errs_y[i-1]/errs_y[i]) ) << " ";
std::cout << (rate_z = std::log2(errs_z[i-1]/errs_z[i]) ) << std::endl;
}
COMPARE_VALUES_ABSOLUTE("df/dx", rate_x, double(approximation_order), 0.2);
COMPARE_VALUES_ABSOLUTE("df/dy", rate_y, double(approximation_order), 0.2);
COMPARE_VALUES_ABSOLUTE("df/dz", rate_z, double(approximation_order), 0.2);
return 0;
}
int main(void)
{
gmsh::initialize();
gmsh::option::setNumber("General.Terminal", 0);
gmsh::option::setNumber("Mesh.Algorithm", 1);
gmsh::option::setNumber("Mesh.Algorithm3D", 1);
int failed_tests = 0;
std::cout << Bmagentafg << " *** TESTING: DIFFERENTIATION ***" << reset << std::endl;
for (size_t go = 1; go < 2; go++)
for (size_t ao = 1; ao < 7; ao++)
failed_tests += test_differentiation_convergence(go, ao);
return failed_tests;
}