Newer
Older
#include <iostream>
#include <iomanip>
#include <unistd.h>
#include "test.h"
#include "gmsh_io.h"
#include "sgr.hpp"
#include "entity_data.h"
#include "kernels_cpu.h"
#include "timecounter.h"
#include "silo_output.hpp"
using namespace sgr;
static void
make_geometry(int order, double mesh_h)
{
gm::add("difftest");
std::vector<std::pair<int,int>> objects;
objects.push_back(
std::make_pair(3, gmo::addBox(0.0, 0.0, 0.0, 1.0, 1.0, 0.5) )
);
objects.push_back(
std::make_pair(3, gmo::addBox(0.0, 0.0, 0.5, 0.5, 0.5, 0.5) )
);
std::vector<std::pair<int, int>> tools;
gmsh::vectorpair odt;
std::vector<gmsh::vectorpair> odtm;
gmo::fragment(objects, tools, odt, odtm);
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
gmo::synchronize();
gvp_t vp;
gm::getEntities(vp);
gmm::setSize(vp, mesh_h);
}
int test_differentiation_convergence(int geometric_order, int approximation_order)
{
std::vector<double> sizes({ 0.32, 0.16, 0.08, 0.04 });
std::vector<double> errors;
std::cout << cyanfg << "Testing geometric order " << geometric_order;
std::cout << ", approximation order = " << approximation_order << nofg;
std::cout << std::endl;
auto f = [](const point_3d& pt) -> double {
return std::sin(M_PI*pt.x())*std::sin(M_PI*pt.y())*std::sin(M_PI*pt.z());
};
auto df_dx = [](const point_3d& pt) -> double {
return M_PI*std::cos(M_PI*pt.x())*std::sin(M_PI*pt.y())*std::sin(M_PI*pt.z());
};
auto df_dy = [](const point_3d& pt) -> double {
return M_PI*std::sin(M_PI*pt.x())*std::cos(M_PI*pt.y())*std::sin(M_PI*pt.z());
};
auto df_dz = [](const point_3d& pt) -> double {
return M_PI*std::sin(M_PI*pt.x())*std::sin(M_PI*pt.y())*std::cos(M_PI*pt.z());
};
std::vector<double> errs_x;
std::vector<double> errs_y;
std::vector<double> errs_z;
for (size_t i = 0; i < sizes.size(); i++)
{
double h = sizes[i];
make_geometry(0,h);
model mod(geometric_order, approximation_order);
#ifdef WRITE_TEST_OUTPUTS
std::stringstream ss;
ss << "diff_go_" << geometric_order << "_ao_" << approximation_order;
ss << "_seq_" << i << ".silo";
silo silodb;
silodb.create_db(ss.str());
silodb.import_mesh_from_gmsh();
silodb.write_mesh();
#endif
auto model_num_dofs = mod.num_dofs();
vecxd Pf = vecxd::Zero(model_num_dofs);
vecxd Pdf_dx = vecxd::Zero(model_num_dofs);
vecxd Pdf_dy = vecxd::Zero(model_num_dofs);
vecxd Pdf_dz = vecxd::Zero(model_num_dofs);
std::vector<entity_data_gpu> edgs;
#ifdef USE_BLOCKED_GPU_KERNELS
std::vector<entity_data_cpu> eds;
size_t model_num_dofs_gpu = 0;
#else
size_t model_num_dofs_gpu = model_num_dofs;
#endif
for (auto& e : mod)
e.project(f, Pf);
e.project(df_dx, Pdf_dx);
e.project(df_dy, Pdf_dy);
e.project(df_dz, Pdf_dz);
entity_data_cpu ed;
e.populate_entity_data(ed);
entity_data_gpu edg(ed);
#ifdef USE_BLOCKED_GPU_KERNELS
edg.dof_base = model_num_dofs_gpu;
model_num_dofs_gpu += gpu_dblocks_dofs(ed);
eds.push_back( std::move(ed) );
#endif
edgs.push_back( std::move(edg) );
/* Prepare I/O vectors and call kernel */
#ifdef USE_BLOCKED_GPU_KERNELS
assert(eds.size() == edgs.size());
vecxd Pf_reshaped = vecxd::Zero(model_num_dofs_gpu);
for (size_t i = 0; i < eds.size(); i++)
reshape_dofs(eds[i], edgs[i], Pf, Pf_reshaped, true);
texture_allocator<double> Pf_gpu(Pf_reshaped.data(), Pf_reshaped.size());
#else
texture_allocator<double> Pf_gpu(Pf.data(), Pf.size());
#endif
device_vector<double> df_dx_gpu(model_num_dofs_gpu);
device_vector<double> df_dy_gpu(model_num_dofs_gpu);
device_vector<double> df_dz_gpu(model_num_dofs_gpu);
for (auto& edg : edgs)
timecounter_gpu tc;
tc.tic();
gpu_compute_field_derivatives(edg, Pf_gpu.get_texture(),
df_dx_gpu.data(), df_dy_gpu.data(), df_dz_gpu.data());
double time = tc.toc();
auto num_cells = edg.num_all_elems;
if (geometric_order == 1)
{
std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: ";
double flops = 21*edg.num_bf*edg.num_bf*num_cells;
std::cout << flops/(1e9*time) << " GFlops/s" << std::endl;
}
else
{
std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: ";
double flops = ((21*edg.num_bf+6)*edg.num_bf/**edg.num_qp*/ + 3*(2*edg.num_bf-1)*edg.num_bf)*num_cells;
std::cout << flops/(1e9*time) << " GFlops/s" << std::endl;
}
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#ifdef USE_BLOCKED_GPU_KERNELS
vecxd Cdf_dx_exp = vecxd::Zero(model_num_dofs_gpu);
vecxd Cdf_dy_exp = vecxd::Zero(model_num_dofs_gpu);
vecxd Cdf_dz_exp = vecxd::Zero(model_num_dofs_gpu);
df_dx_gpu.copyout( Cdf_dx_exp.data() );
df_dy_gpu.copyout( Cdf_dy_exp.data() );
df_dz_gpu.copyout( Cdf_dz_exp.data() );
vecxd Cdf_dx = vecxd::Zero(model_num_dofs);
vecxd Cdf_dy = vecxd::Zero(model_num_dofs);
vecxd Cdf_dz = vecxd::Zero(model_num_dofs);
assert(eds.size() == edgs.size());
Pf = vecxd::Zero(model_num_dofs);
for (size_t i = 0; i < eds.size(); i++)
{
reshape_dofs(eds[i], edgs[i], Cdf_dx_exp, Cdf_dx, false);
reshape_dofs(eds[i], edgs[i], Cdf_dy_exp, Cdf_dy, false);
reshape_dofs(eds[i], edgs[i], Cdf_dz_exp, Cdf_dz, false);
reshape_dofs(eds[i], edgs[i], Pf_reshaped, Pf, false);
}
#else
vecxd Cdf_dx = vecxd::Zero(model_num_dofs_gpu);
vecxd Cdf_dy = vecxd::Zero(model_num_dofs_gpu);
vecxd Cdf_dz = vecxd::Zero(model_num_dofs_gpu);
df_dx_gpu.copyout( Cdf_dx.data() );
df_dy_gpu.copyout( Cdf_dy.data() );
df_dz_gpu.copyout( Cdf_dz.data() );
double err_x = 0.0;
double err_y = 0.0;
double err_z = 0.0;
#ifdef WRITE_TEST_OUTPUTS
auto model_num_cells = mod.num_cells();
std::vector<double> var_Pf(model_num_cells);
std::vector<double> var_df_dx(model_num_cells);
std::vector<double> var_df_dy(model_num_cells);
std::vector<double> var_df_dz(model_num_cells);
#endif
for (auto& e : mod)
for (size_t iT = 0; iT < e.num_cells(); iT++)
{
auto& pe = e.cell(iT);
auto& re = e.cell_refelem(pe);
matxd mass = e.mass_matrix(iT);
auto num_bf = re.num_basis_functions();
auto ofs = e.cell_global_dof_offset(iT);
vecxd diff_x = Pdf_dx.segment(ofs, num_bf) - Cdf_dx.segment(ofs, num_bf);
vecxd diff_y = Pdf_dy.segment(ofs, num_bf) - Cdf_dy.segment(ofs, num_bf);
vecxd diff_z = Pdf_dz.segment(ofs, num_bf) - Cdf_dz.segment(ofs, num_bf);
err_x += diff_x.dot(mass*diff_x);
err_y += diff_y.dot(mass*diff_y);
err_z += diff_z.dot(mass*diff_z);
#ifdef WRITE_TEST_OUTPUTS
auto gi = e.cell_global_index_by_gmsh(iT);
assert(gi < model_num_cells);
vecxd phi_bar = re.basis_functions({1./3., 1./3., 1./3.});
var_Pf[gi] = Pf.segment(ofs, num_bf).dot(phi_bar);
var_df_dx[gi] = Cdf_dx.segment(ofs, num_bf).dot(phi_bar);
var_df_dy[gi] = Cdf_dy.segment(ofs, num_bf).dot(phi_bar);
var_df_dz[gi] = Cdf_dz.segment(ofs, num_bf).dot(phi_bar);
#endif
}
std::cout << "Errors: " << std::sqrt(err_x) << " " << std::sqrt(err_y);
std::cout << " " << std::sqrt(err_z) << std::endl;
#ifdef WRITE_TEST_OUTPUTS
silodb.write_zonal_variable("f", var_Pf);
silodb.write_zonal_variable("df_dx", var_df_dx);
silodb.write_zonal_variable("df_dy", var_df_dy);
silodb.write_zonal_variable("df_dz", var_df_dz);
#endif
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
errs_x.push_back( std::sqrt(err_x) );
errs_y.push_back( std::sqrt(err_y) );
errs_z.push_back( std::sqrt(err_z) );
}
double rate_x = 0.0;
double rate_y = 0.0;
double rate_z = 0.0;
std::cout << Byellowfg << "rate df/dx rate df/dy rate df/dz" << reset << std::endl;
for (size_t i = 1; i < sizes.size(); i++)
{
std::cout << (rate_x = std::log2(errs_x[i-1]/errs_x[i]) ) << " ";
std::cout << (rate_y = std::log2(errs_y[i-1]/errs_y[i]) ) << " ";
std::cout << (rate_z = std::log2(errs_z[i-1]/errs_z[i]) ) << std::endl;
}
COMPARE_VALUES_ABSOLUTE("df/dx", rate_x, double(approximation_order), 0.2);
COMPARE_VALUES_ABSOLUTE("df/dy", rate_y, double(approximation_order), 0.2);
COMPARE_VALUES_ABSOLUTE("df/dz", rate_z, double(approximation_order), 0.2);
return 0;
}
int main(void)
{
gmsh::initialize();
gmsh::option::setNumber("General.Terminal", 0);
gmsh::option::setNumber("Mesh.Algorithm", 1);
gmsh::option::setNumber("Mesh.Algorithm3D", 1);
int failed_tests = 0;
std::cout << Bmagentafg << " *** TESTING: DIFFERENTIATION ***" << reset << std::endl;
for (size_t ao = go; ao < 5; ao++)
failed_tests += test_differentiation_convergence(go, ao);
return failed_tests;
}