diff --git a/include/entity.h b/include/entity.h index fc9219b97b32f734339db41f9ddabe8e1f3ca5c3..ebf3997f849c6670d7c8ff4911b72d725ee77bda 100644 --- a/include/entity.h +++ b/include/entity.h @@ -18,6 +18,10 @@ class entity int g_order; int a_order; + size_t m_dof_base; + size_t m_flux_base; + size_t m_index_base; + entity_ordering cur_elem_ordering; std::vector<reference_element> reference_cells; @@ -56,6 +60,7 @@ public: double measure(void) const; vecxd project(const scalar_function&) const; + void project(const scalar_function&, vecxd&) const; size_t num_cells(void) const; size_t num_cell_orientations(void) const; @@ -65,6 +70,11 @@ public: const reference_element& cell_refelem(size_t) const; const reference_element& cell_refelem(const physical_element&) const; + size_t cell_global_index(size_t) const; + size_t cell_global_index_by_gmsh(size_t) const; + size_t cell_local_dof_offset(size_t) const; + size_t cell_global_dof_offset(size_t) const; + size_t num_faces(void) const; std::vector<size_t> face_tags() const; @@ -73,6 +83,14 @@ public: const reference_element& face_refelem(size_t) const; const reference_element& face_refelem(const physical_element&) const; + size_t num_dofs(void) const; + size_t num_fluxes(void) const; + void base(size_t, size_t, size_t); + + size_t dof_base(void) const; + size_t flux_base(void) const; + size_t index_base(void) const; + matxd mass_matrix(size_t) const; void sort_by_orientation(void); @@ -84,7 +102,7 @@ public: void populate_entity_data(entity_data_cpu&) const; - constexpr size_t num_faces_per_elem(void) const { return 4; } + constexpr size_t num_faces_per_elem(void) const { return 4; } std::vector<physical_element>::const_iterator begin() const { return physical_cells.begin(); } diff --git a/include/entity_data.h b/include/entity_data.h index dd369949228ad52ec399e82028fb0aaafba4b191..6d96b3710ca93cc2cc1e68a76a84c29308e22b71 100644 --- a/include/entity_data.h +++ b/include/entity_data.h @@ -16,21 +16,21 @@ enum class entity_ordering { struct entity_data_cpu { - int g_order; - int a_order; + int g_order; /* Geometric order */ + int a_order; /* Approximation order */ std::vector<size_t> num_elems; /* No. of elements in the entity */ size_t num_orientations; /* No. of bf. orientations */ size_t num_bf; /* No. of dofs per elem */ size_t num_qp; /* No. of integration pts per elem */ - size_t num_fluxes; /* No. of flux dofs per elem */ + size_t num_fluxes; /* No. of fluxes per elem */ size_t num_face_qp; /* No. of integration pts per face */ size_t num_faces; /* No. of faces */ - entity_ordering ordering; + entity_ordering ordering; /* Entity element ordering */ + size_t dof_base; /* Global start position for dofs */ + size_t flux_base; /* Global start position for fluxes */ - size_t dof_base; /* Offset in the global dof vector */ - size_t flux_base; /* Offset in the global flux vector */ - std::vector<size_t> fluxdofs_mine; - std::vector<size_t> fluxdofs_neighbour; + std::vector<size_t> fluxdofs_mine; /* Flux to DOF mapping, on myself */ + std::vector<size_t> fluxdofs_neighbour; /* Flux to DOF mapping, on neighbours */ /* Inverse of mass matrices. Populated only if geometric order > 1. * num_bf*num_elems rows, num_bf columns. Matrix at offset elem*num_bf @@ -76,6 +76,7 @@ struct entity_data_gpu size_t num_bf; size_t dof_base; + size_t flux_base; texture_allocator<double> differentiation_matrices; @@ -84,6 +85,7 @@ struct entity_data_gpu entity_data_gpu(); + entity_data_gpu(entity_data_gpu&&) = default; entity_data_gpu(const entity_data_cpu&); ~entity_data_gpu(); }; diff --git a/include/gmsh_io.h b/include/gmsh_io.h index 190515764b364e82bae4e35a25d73e428d82af91..12d730b222624bec51a4ce607484b8037f8b2dd5 100644 --- a/include/gmsh_io.h +++ b/include/gmsh_io.h @@ -57,6 +57,10 @@ public: return boundary_map.at(which); } + size_t num_dofs() const; + size_t num_fluxes() const; + size_t num_cells() const; + std::vector<entity>::const_iterator begin() const; std::vector<entity>::const_iterator end() const; std::vector<entity>::iterator begin(); diff --git a/src/entity.cpp b/src/entity.cpp index 361ebffbdd5d3bb99af6a675b0b26286ffcc4a3d..71f63439a6ffbfb5fa0fee9537211e6aea05d037 100644 --- a/src/entity.cpp +++ b/src/entity.cpp @@ -9,7 +9,8 @@ entity::entity(const entity_params& ep) : dim(ep.dim), tag(ep.tag), elemType(ep.etype), g_order(ep.gorder), - a_order(ep.aorder), cur_elem_ordering(entity_ordering::GMSH) + a_order(ep.aorder), cur_elem_ordering(entity_ordering::GMSH), + m_dof_base(0), m_flux_base(0), m_index_base(0) { /* Prepare reference elements */ reference_elements_factory ref(ep); @@ -59,6 +60,8 @@ entity::measure(void) const return meas; } +/* Project a function on this entity and return the + * corresponding vector of dofs on this entity. */ vecxd entity::project(const scalar_function& function) const { @@ -92,6 +95,37 @@ entity::project(const scalar_function& function) const return ret; } +/* Project a function on this entity and store the partial result in + * the global vector pf at the offset corresponding to this entity. */ +void +entity::project(const scalar_function& function, vecxd& pf) const +{ + auto num_bf = reference_cells[0].num_basis_functions(); + + for (size_t iT = 0; iT < physical_cells.size(); iT++) + { + const auto& pe = physical_cells[iT]; + assert( pe.orientation() < reference_cells.size() ); + const auto& re = reference_cells[pe.orientation()]; + const auto pqps = pe.integration_points(); + const auto dets = pe.determinants(); + + assert(pqps.size() == dets.size()); + + matxd mass = matxd::Zero(num_bf, num_bf); + vecxd rhs = vecxd::Zero(num_bf); + for (size_t iQp = 0; iQp < pqps.size(); iQp++) + { + const auto& pqp = pqps[iQp]; + const vecxd phi = re.basis_functions(iQp); + mass += pqp.weight() * phi * phi.transpose(); + rhs += pqp.weight() * function(pqp.point()) * phi; + } + + pf.segment(m_dof_base + iT*num_bf, num_bf) = mass.ldlt().solve(rhs); + } +} + const reference_element& entity::cell_refelem(size_t iO) const { @@ -106,6 +140,35 @@ entity::cell_refelem(const physical_element& pe) const return reference_cells[pe.orientation()]; } +size_t +entity::cell_global_index(size_t iT) const +{ + assert(iT < physical_cells.size()); + return m_index_base + iT; +} + +size_t +entity::cell_global_index_by_gmsh(size_t iT) const +{ + assert(iT < physical_cells.size()); + return m_index_base + physical_cells[iT].original_position(); +} + + +size_t +entity::cell_local_dof_offset(size_t iT) const +{ + assert(reference_cells.size() > 0); + return iT * reference_cells[0].num_basis_functions(); +} + +size_t +entity::cell_global_dof_offset(size_t iT) const +{ + assert(reference_cells.size() > 0); + return m_dof_base + iT * reference_cells[0].num_basis_functions(); +} + const reference_element& entity::face_refelem(const physical_element& pe) const { @@ -158,6 +221,45 @@ entity::face(size_t iF) const return physical_faces[iF]; } +size_t +entity::num_dofs(void) const +{ + assert(reference_cells.size() > 0); + return physical_cells.size() * reference_cells[0].num_basis_functions(); +} + +size_t +entity::num_fluxes(void) const +{ + assert(reference_faces.size() > 0); + return physical_faces.size() * reference_faces[0].num_basis_functions(); +} + +void +entity::base(size_t d_base, size_t f_base, size_t i_base) +{ + m_dof_base = d_base; + m_flux_base = f_base; + m_index_base = i_base; +} + +size_t +entity::dof_base(void) const +{ + return m_dof_base; +} + +size_t +entity::flux_base(void) const +{ + return m_flux_base; +} + +size_t +entity::index_base(void) const +{ + return m_index_base; +} matxd entity::mass_matrix(size_t iT) const @@ -200,8 +302,10 @@ entity::sort_by_orientation(void) return false; }; + /* Sort cells */ std::sort(physical_cells.begin(), physical_cells.end(), comp); + /* Sort all the stuff related to faces */ std::vector<physical_element> new_pf; new_pf.resize( physical_faces.size() ); std::vector<size_t> new_ft; @@ -578,8 +682,8 @@ entity::populate_entity_data(entity_data_cpu& ed) const ed.num_fluxes = reference_faces[0].num_basis_functions(); ed.num_qp = (g_order == 1) ? 1 : reference_cells[0].num_integration_points(); ed.num_face_qp = (g_order == 1) ? 1 : reference_faces[0].num_integration_points(); - ed.dof_base = 0; - ed.flux_base = 0; + ed.dof_base = m_dof_base; + ed.flux_base = m_flux_base; populate_differentiation_matrices(ed); populate_jacobians(ed); diff --git a/src/entity_data.cpp b/src/entity_data.cpp index 5fef224fccfe036ef2d5c45aa05687e7a1f6b72e..d89975c296dffd7d2d0a5afa371e30eaf1737cee 100644 --- a/src/entity_data.cpp +++ b/src/entity_data.cpp @@ -22,8 +22,8 @@ entity_data_gpu::entity_data_gpu(const entity_data_cpu& ed) element_orientation.copyin(eo.data(), eo.size()); num_bf = ed.num_bf; - - dof_base = 0; + dof_base = ed.dof_base; + flux_base = ed.flux_base; auto rows = ed.num_bf*ed.num_orientations; auto cols = ed.num_bf*3; diff --git a/src/gmsh_io.cpp b/src/gmsh_io.cpp index 6b13afd9f291dbd5e229b8086311e7b564ae7a3d..7009681dce42cb49332b9db161adfa02e29dfee3 100644 --- a/src/gmsh_io.cpp +++ b/src/gmsh_io.cpp @@ -96,7 +96,7 @@ model::begin() const std::vector<entity>::const_iterator model::end() const { - return entities.begin(); + return entities.end(); } std::vector<entity>::iterator @@ -108,7 +108,7 @@ model::begin() std::vector<entity>::iterator model::end() { - return entities.begin(); + return entities.end(); } entity& @@ -117,6 +117,36 @@ model::entity_at(size_t pos) return entities.at(pos); } +size_t +model::num_dofs(void) const +{ + size_t ret = 0; + for (const auto& e : entities) + ret += e.num_dofs(); + + return ret; +} + +size_t +model::num_fluxes(void) const +{ + size_t ret = 0; + for (const auto& e : entities) + ret += e.num_fluxes(); + + return ret; +} + +size_t +model::num_cells(void) const +{ + size_t ret = 0; + for (const auto& e : entities) + ret += e.num_cells(); + + return ret; +} + void model::update_connectivity(const entity& e, size_t entity_index) { @@ -139,6 +169,9 @@ model::import_gmsh_entities(void) gm::getEntities(gmsh_entities, DIMENSION(3)); size_t entity_index = 0; + size_t dof_base = 0; + size_t flux_base = 0; + size_t index_base = 0; for (auto [dim, tag] : gmsh_entities) { std::vector<int> elemTypes; @@ -154,6 +187,11 @@ model::import_gmsh_entities(void) .gorder = geometric_order, .aorder = approximation_order}); e.sort_by_orientation(); + e.base(dof_base, flux_base, index_base); + dof_base += e.num_dofs(); + flux_base += e.num_fluxes(); + index_base += e.num_cells(); + update_connectivity(e, entity_index); entities.push_back( std::move(e) ); diff --git a/src/kernels_cuda.cu b/src/kernels_cuda.cu index bd95928d3e04bb892db370070550d86944470341..528311bd1466320a96c08eb224f4f1a11a2c8f83 100644 --- a/src/kernels_cuda.cu +++ b/src/kernels_cuda.cu @@ -6,18 +6,18 @@ __global__ void gpu_deriv_planar(const double *F, const double * __restrict__ J, gpuTextureObject_t DM_tex, double * __restrict__ dF_dx, double * __restrict__ dF_dy, double * __restrict__ dF_dz, - int32_t num_all_elems, int32_t* orients) + int32_t num_all_elems, int32_t* orients, int32_t dof_base) { using KS = kernel_gpu_sizes<K>; int ofs_in_entity = (blockIdx.x * blockDim.x + threadIdx.x); - - int32_t cur_dof_offset = /*edg.dof_base + */ofs_in_entity; - if (cur_dof_offset >= num_all_elems*KS::num_bf) + if (ofs_in_entity >= num_all_elems*KS::num_bf) return; - int32_t iT = cur_dof_offset / KS::num_bf; - int32_t elem_dof_base = /*edg.dof_base + */iT*KS::num_bf; + int32_t cur_dof_offset = dof_base + ofs_in_entity; + + int32_t iT = ofs_in_entity / KS::num_bf; + int32_t elem_dof_base = dof_base + iT*KS::num_bf; int32_t iO = orients[iT]; int32_t DM_row = ofs_in_entity % KS::num_bf; int32_t DM_orient = 3*KS::num_bf*KS::num_bf*iO; @@ -71,7 +71,7 @@ launch_deriv_kernel(entity_data_gpu& edg, if (edg.g_order == 1) gpu_deriv_planar<K><<<num_blocks, THREADS_PER_BLOCK>>>(f, J, - Dtex, df_dx, df_dy, df_dz, num_elems, orients); + Dtex, df_dx, df_dy, df_dz, num_elems, orients, edg.dof_base); //else // compute_field_derivatives_kernel_curved<1>(ed, f, df_dx, df_dy, df_dz); } diff --git a/tests/test_differentiation.cpp b/tests/test_differentiation.cpp index 9ffc813b127b0feef2576bdcec5d26925f5e3010..d8b17e041b6fe696f95c0c26f2739a3c83a90a26 100644 --- a/tests/test_differentiation.cpp +++ b/tests/test_differentiation.cpp @@ -9,6 +9,7 @@ #include "entity_data.h" #include "kernels_cpu.h" #include "timecounter.h" +#include "silo_output.hpp" using namespace sgr; @@ -16,7 +17,21 @@ static void make_geometry(int order, double mesh_h) { gm::add("difftest"); - gmo::addBox(0.0, 0.0, 0.0, 1.0, 1.0, 1.0); + + std::vector<std::pair<int,int>> objects; + + objects.push_back( + std::make_pair(3, gmo::addBox(0.0, 0.0, 0.0, 1.0, 1.0, 0.5) ) + ); + objects.push_back( + std::make_pair(3, gmo::addBox(0.0, 0.0, 0.5, 0.5, 0.5, 0.5) ) + ); + + std::vector<std::pair<int, int>> tools; + gmsh::vectorpair odt; + std::vector<gmsh::vectorpair> odtm; + gmo::fragment(objects, tools, odt, odtm); + gmo::synchronize(); gvp_t vp; @@ -56,58 +71,108 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde make_geometry(0,h); model mod(geometric_order, approximation_order); - auto& e0 = mod.entity_at(0); - e0.sort_by_orientation(); - vecxd Pf_e0 = e0.project(f); - vecxd df_dx_e0 = vecxd::Zero( Pf_e0.rows() ); - vecxd df_dy_e0 = vecxd::Zero( Pf_e0.rows() ); - vecxd df_dz_e0 = vecxd::Zero( Pf_e0.rows() ); - - entity_data_cpu ed; - e0.populate_entity_data(ed); - - timecounter tc; - tc.tic(); - compute_field_derivatives(ed, Pf_e0, df_dx_e0, df_dy_e0, df_dz_e0); - double time = tc.toc(); - - if (geometric_order == 1) + std::stringstream ss; + ss << "diff_go_" << geometric_order << "_ao_" << approximation_order; + ss << "_seq_" << i << ".silo"; + +#ifdef WRITE_TEST_OUTPUTS + silo silodb; + silodb.create_db(ss.str()); + silodb.import_mesh_from_gmsh(); + silodb.write_mesh(); +#endif + auto model_num_dofs = mod.num_dofs(); + + vecxd Pf = vecxd::Zero(model_num_dofs); + vecxd Pdf_dx = vecxd::Zero(model_num_dofs); + vecxd Pdf_dy = vecxd::Zero(model_num_dofs); + vecxd Pdf_dz = vecxd::Zero(model_num_dofs); + vecxd Cdf_dx = vecxd::Zero(model_num_dofs); + vecxd Cdf_dy = vecxd::Zero(model_num_dofs); + vecxd Cdf_dz = vecxd::Zero(model_num_dofs); + + std::vector<entity_data_cpu> eds; + + for (auto& e : mod) { - std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: "; - double flops = 21*ed.num_bf*ed.num_bf*e0.num_cells(); - std::cout << flops/(1e9*time) << " GFlops/s" << std::endl; + e.project(f, Pf); + e.project(df_dx, Pdf_dx); + e.project(df_dy, Pdf_dy); + e.project(df_dz, Pdf_dz); + entity_data_cpu ed; + e.populate_entity_data(ed); + eds.push_back( std::move(ed) ); } - else + + for (auto& ed : eds) { - std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: "; - double flops = ((21*ed.num_bf+6)*ed.num_bf*ed.num_qp + 3*(2*ed.num_bf-1)*ed.num_bf)*e0.num_cells(); - std::cout << flops/(1e9*time) << " GFlops/s" << std::endl; + timecounter tc; + tc.tic(); + compute_field_derivatives(ed, Pf, Cdf_dx, Cdf_dy, Cdf_dz); + double time = tc.toc(); + + auto num_cells = num_elems_all_orientations(ed); + if (geometric_order == 1) + { + std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: "; + double flops = 21*ed.num_bf*ed.num_bf*num_cells; + std::cout << flops/(1e9*time) << " GFlops/s" << std::endl; + } + else + { + std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: "; + double flops = ((21*ed.num_bf+6)*ed.num_bf*ed.num_qp + 3*(2*ed.num_bf-1)*ed.num_bf)*num_cells; + std::cout << flops/(1e9*time) << " GFlops/s" << std::endl; + } } - - vecxd Pdf_dx_e0 = e0.project(df_dx); - vecxd Pdf_dy_e0 = e0.project(df_dy); - vecxd Pdf_dz_e0 = e0.project(df_dz); - + double err_x = 0.0; double err_y = 0.0; double err_z = 0.0; - for (size_t iT = 0; iT < e0.num_cells(); iT++) +#ifdef WRITE_TEST_OUTPUTS + auto model_num_cells = mod.num_cells(); + std::vector<double> var_Pf(model_num_cells); + std::vector<double> var_df_dx(model_num_cells); + std::vector<double> var_df_dy(model_num_cells); + std::vector<double> var_df_dz(model_num_cells); +#endif + + for (auto& e : mod) { - auto& pe = e0.cell(iT); - auto& re = e0.cell_refelem(pe); - matxd mass = e0.mass_matrix(iT); - auto num_bf = re.num_basis_functions(); - vecxd diff_x = Pdf_dx_e0.segment(iT*num_bf, num_bf) - df_dx_e0.segment(iT*num_bf, num_bf); - vecxd diff_y = Pdf_dy_e0.segment(iT*num_bf, num_bf) - df_dy_e0.segment(iT*num_bf, num_bf); - vecxd diff_z = Pdf_dz_e0.segment(iT*num_bf, num_bf) - df_dz_e0.segment(iT*num_bf, num_bf); - - err_x += diff_x.dot(mass*diff_x); - err_y += diff_y.dot(mass*diff_y); - err_z += diff_z.dot(mass*diff_z); + for (size_t iT = 0; iT < e.num_cells(); iT++) + { + auto& pe = e.cell(iT); + auto& re = e.cell_refelem(pe); + matxd mass = e.mass_matrix(iT); + auto num_bf = re.num_basis_functions(); + auto ofs = e.cell_global_dof_offset(iT); + vecxd diff_x = Pdf_dx.segment(ofs, num_bf) - Cdf_dx.segment(ofs, num_bf); + vecxd diff_y = Pdf_dy.segment(ofs, num_bf) - Cdf_dy.segment(ofs, num_bf); + vecxd diff_z = Pdf_dz.segment(ofs, num_bf) - Cdf_dz.segment(ofs, num_bf); + + err_x += diff_x.dot(mass*diff_x); + err_y += diff_y.dot(mass*diff_y); + err_z += diff_z.dot(mass*diff_z); + +#ifdef WRITE_TEST_OUTPUTS + auto gi = e.cell_global_index_by_gmsh(iT); + assert(gi < model_num_cells); + vecxd phi_bar = re.basis_functions({1./3., 1./3., 1./3.}); + var_df_dx[gi] = Cdf_dx.segment(ofs, num_bf).dot(phi_bar); + var_df_dy[gi] = Cdf_dy.segment(ofs, num_bf).dot(phi_bar); + var_df_dz[gi] = Cdf_dz.segment(ofs, num_bf).dot(phi_bar); +#endif + } + std::cout << "Errors: " << std::sqrt(err_x) << " " << std::sqrt(err_y); + std::cout << " " << std::sqrt(err_z) << std::endl; } - std::cout << "Errors: " << std::sqrt(err_x) << " " << std::sqrt(err_y); - std::cout << " " << std::sqrt(err_z) << std::endl; + +#ifdef WRITE_TEST_OUTPUTS + silodb.write_zonal_variable("df_dx", var_df_dx); + silodb.write_zonal_variable("df_dy", var_df_dy); + silodb.write_zonal_variable("df_dz", var_df_dz); +#endif errs_x.push_back( std::sqrt(err_x) ); errs_y.push_back( std::sqrt(err_y) ); diff --git a/tests/test_differentiation_gpu.cpp b/tests/test_differentiation_gpu.cpp index 46dbe740d28cf60022d71c484a629d10b22de299..4f77055272077170bd8f0e2c75374b195d29c73c 100644 --- a/tests/test_differentiation_gpu.cpp +++ b/tests/test_differentiation_gpu.cpp @@ -9,6 +9,7 @@ #include "entity_data.h" #include "kernels_cpu.h" #include "timecounter.h" +#include "silo_output.hpp" #include "kernels_gpu.h" @@ -18,7 +19,21 @@ static void make_geometry(int order, double mesh_h) { gm::add("difftest"); - gmo::addBox(0.0, 0.0, 0.0, 1.0, 1.0, 1.0); + + std::vector<std::pair<int,int>> objects; + + objects.push_back( + std::make_pair(3, gmo::addBox(0.0, 0.0, 0.0, 1.0, 1.0, 0.5) ) + ); + objects.push_back( + std::make_pair(3, gmo::addBox(0.0, 0.0, 0.5, 0.5, 0.5, 0.5) ) + ); + + std::vector<std::pair<int, int>> tools; + gmsh::vectorpair odt; + std::vector<gmsh::vectorpair> odtm; + gmo::fragment(objects, tools, odt, odtm); + gmo::synchronize(); gvp_t vp; @@ -26,6 +41,8 @@ make_geometry(int order, double mesh_h) gmm::setSize(vp, mesh_h); } +#define WRITE_TEST_OUTPUTS + int test_differentiation_convergence(int geometric_order, int approximation_order) { std::vector<double> sizes({ 0.32, 0.16, 0.08, 0.04 }); @@ -58,73 +75,120 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde make_geometry(0,h); model mod(geometric_order, approximation_order); - auto& e0 = mod.entity_at(0); - e0.sort_by_orientation(); - vecxd Pf_cpu = e0.project(f); - - /* Make CPU entity data */ - entity_data_cpu ed; - e0.populate_entity_data(ed); - /* Derive GPU entity data */ - entity_data_gpu edg(ed); - - /* Prepare I/O vectors and call kernel */ - device_vector<double> Pf_gpu(Pf_cpu.data(), Pf_cpu.size()); - device_vector<double> df_dx_gpu(Pf_cpu.size()); - device_vector<double> df_dy_gpu(Pf_cpu.size()); - device_vector<double> df_dz_gpu(Pf_cpu.size()); - - timecounter_gpu tc; - tc.tic(); - gpu_compute_field_derivatives(edg, Pf_gpu.data(), df_dx_gpu.data(), - df_dy_gpu.data(), df_dz_gpu.data()); - double time = tc.toc(); - - vecxd df_dx_cpu = vecxd::Zero( Pf_cpu.size() ); - vecxd df_dy_cpu = vecxd::Zero( Pf_cpu.size() ); - vecxd df_dz_cpu = vecxd::Zero( Pf_cpu.size() ); - - df_dx_gpu.copyout( df_dx_cpu.data() ); - df_dy_gpu.copyout( df_dy_cpu.data() ); - df_dz_gpu.copyout( df_dz_cpu.data() ); - - if (geometric_order == 1) + std::stringstream ss; + ss << "diff_go_" << geometric_order << "_ao_" << approximation_order; + ss << "_seq_" << i << ".silo"; + +#ifdef WRITE_TEST_OUTPUTS + silo silodb; + silodb.create_db(ss.str()); + silodb.import_mesh_from_gmsh(); + silodb.write_mesh(); +#endif + auto model_num_dofs = mod.num_dofs(); + + vecxd Pf = vecxd::Zero(model_num_dofs); + vecxd Pdf_dx = vecxd::Zero(model_num_dofs); + vecxd Pdf_dy = vecxd::Zero(model_num_dofs); + vecxd Pdf_dz = vecxd::Zero(model_num_dofs); + vecxd Cdf_dx = vecxd::Zero(model_num_dofs); + vecxd Cdf_dy = vecxd::Zero(model_num_dofs); + vecxd Cdf_dz = vecxd::Zero(model_num_dofs); + + std::vector<entity_data_gpu> edgs; + + for (auto& e : mod) { - std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: "; - double flops = 21*ed.num_bf*ed.num_bf*e0.num_cells(); - std::cout << flops/(1e9*time) << " GFlops/s" << std::endl; + e.project(f, Pf); + e.project(df_dx, Pdf_dx); + e.project(df_dy, Pdf_dy); + e.project(df_dz, Pdf_dz); + entity_data_cpu ed; + e.populate_entity_data(ed); + entity_data_gpu edg(ed); + edgs.push_back( std::move(edg) ); } - else + + /* Prepare I/O vectors and call kernel */ + device_vector<double> Pf_gpu(Pf.data(), Pf.size()); + device_vector<double> df_dx_gpu(model_num_dofs); + device_vector<double> df_dy_gpu(model_num_dofs); + device_vector<double> df_dz_gpu(model_num_dofs); + + for (auto& edg : edgs) { - std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: "; - double flops = ((21*ed.num_bf+6)*ed.num_bf*ed.num_qp + 3*(2*ed.num_bf-1)*ed.num_bf)*e0.num_cells(); - std::cout << flops/(1e9*time) << " GFlops/s" << std::endl; + timecounter_gpu tc; + tc.tic(); + gpu_compute_field_derivatives(edg, Pf_gpu.data(), df_dx_gpu.data(), + df_dy_gpu.data(), df_dz_gpu.data()); + double time = tc.toc(); + + auto num_cells = edg.num_all_elems; + if (geometric_order == 1) + { + std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: "; + double flops = 21*edg.num_bf*edg.num_bf*num_cells; + std::cout << flops/(1e9*time) << " GFlops/s" << std::endl; + } + else + { + std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: "; + double flops = ((21*edg.num_bf+6)*edg.num_bf/**edg.num_qp*/ + 3*(2*edg.num_bf-1)*edg.num_bf)*num_cells; + std::cout << flops/(1e9*time) << " GFlops/s" << std::endl; + } } - vecxd Pdf_dx_e0 = e0.project(df_dx); - vecxd Pdf_dy_e0 = e0.project(df_dy); - vecxd Pdf_dz_e0 = e0.project(df_dz); - + df_dx_gpu.copyout( Cdf_dx.data() ); + df_dy_gpu.copyout( Cdf_dy.data() ); + df_dz_gpu.copyout( Cdf_dz.data() ); + double err_x = 0.0; double err_y = 0.0; double err_z = 0.0; - for (size_t iT = 0; iT < e0.num_cells(); iT++) +#ifdef WRITE_TEST_OUTPUTS + auto model_num_cells = mod.num_cells(); + std::vector<double> var_Pf(model_num_cells); + std::vector<double> var_df_dx(model_num_cells); + std::vector<double> var_df_dy(model_num_cells); + std::vector<double> var_df_dz(model_num_cells); +#endif + + for (auto& e : mod) { - auto& pe = e0.cell(iT); - auto& re = e0.cell_refelem(pe); - matxd mass = e0.mass_matrix(iT); - auto num_bf = re.num_basis_functions(); - vecxd diff_x = Pdf_dx_e0.segment(iT*num_bf, num_bf) - df_dx_cpu.segment(iT*num_bf, num_bf); - vecxd diff_y = Pdf_dy_e0.segment(iT*num_bf, num_bf) - df_dy_cpu.segment(iT*num_bf, num_bf); - vecxd diff_z = Pdf_dz_e0.segment(iT*num_bf, num_bf) - df_dz_cpu.segment(iT*num_bf, num_bf); - - err_x += diff_x.dot(mass*diff_x); - err_y += diff_y.dot(mass*diff_y); - err_z += diff_z.dot(mass*diff_z); + for (size_t iT = 0; iT < e.num_cells(); iT++) + { + auto& pe = e.cell(iT); + auto& re = e.cell_refelem(pe); + matxd mass = e.mass_matrix(iT); + auto num_bf = re.num_basis_functions(); + auto ofs = e.cell_global_dof_offset(iT); + vecxd diff_x = Pdf_dx.segment(ofs, num_bf) - Cdf_dx.segment(ofs, num_bf); + vecxd diff_y = Pdf_dy.segment(ofs, num_bf) - Cdf_dy.segment(ofs, num_bf); + vecxd diff_z = Pdf_dz.segment(ofs, num_bf) - Cdf_dz.segment(ofs, num_bf); + + err_x += diff_x.dot(mass*diff_x); + err_y += diff_y.dot(mass*diff_y); + err_z += diff_z.dot(mass*diff_z); + +#ifdef WRITE_TEST_OUTPUTS + auto gi = e.cell_global_index_by_gmsh(iT); + assert(gi < model_num_cells); + vecxd phi_bar = re.basis_functions({1./3., 1./3., 1./3.}); + var_df_dx[gi] = Cdf_dx.segment(ofs, num_bf).dot(phi_bar); + var_df_dy[gi] = Cdf_dy.segment(ofs, num_bf).dot(phi_bar); + var_df_dz[gi] = Cdf_dz.segment(ofs, num_bf).dot(phi_bar); +#endif + } + std::cout << "Errors: " << std::sqrt(err_x) << " " << std::sqrt(err_y); + std::cout << " " << std::sqrt(err_z) << std::endl; } - std::cout << "Errors: " << std::sqrt(err_x) << " " << std::sqrt(err_y); - std::cout << " " << std::sqrt(err_z) << std::endl; + +#ifdef WRITE_TEST_OUTPUTS + silodb.write_zonal_variable("df_dx", var_df_dx); + silodb.write_zonal_variable("df_dy", var_df_dy); + silodb.write_zonal_variable("df_dz", var_df_dz); +#endif errs_x.push_back( std::sqrt(err_x) ); errs_y.push_back( std::sqrt(err_y) ); @@ -161,8 +225,8 @@ int main(void) int failed_tests = 0; std::cout << Bmagentafg << " *** TESTING: DIFFERENTIATION ***" << reset << std::endl; - for (size_t go = 1; go < 2; go++) - for (size_t ao = go; ao < 7; ao++) + for (size_t go = 1; go < 5; go++) + for (size_t ao = go; ao < 5; ao++) failed_tests += test_differentiation_convergence(go, ao); return failed_tests;