diff --git a/include/kernels_cpu.h b/include/kernels_cpu.h index b71bd44a1aded5bf83254e9b84c73d89af6a9325..575b1489d32857ca45791668b2f55ea9a87ac486 100644 --- a/include/kernels_cpu.h +++ b/include/kernels_cpu.h @@ -201,18 +201,8 @@ void compute_lifting_kernel_planar(const entity_data_cpu& ed, size_t dof_ofs = dof_base + iT*ed.num_bf; size_t flux_ofs = flux_base + 4*iT*KS::num_fluxes; auto inv_det = 1./ed.cell_determinants[orient_elem_base+iT]; - - for (size_t i = 0; i < KS::num_bf; i++) - { - auto facc = field(dof_ofs+i); - for (size_t iF = 0; iF < 4; iF++) - { - auto face_det = ed.face_determinants[4*(orient_elem_base+iT)+iF]; - for (size_t k = 0; k < KS::num_fluxes; k++) - facc += face_det * inv_det * ed.lifting_matrices(iO*KS::num_bf+i, KS::num_fluxes*iF + k) * flux(flux_ofs+KS::num_fluxes*iF + k); - } - field(dof_ofs+i) = facc; - } + + field.segment<KS::num_bf>(dof_ofs) += (inv_det) * ed.lifting_matrices.block<KS::num_bf, 4*KS::num_fluxes>(iO*KS::num_bf, 0) * flux.segment<4*KS::num_fluxes>(flux_ofs); } orient_elem_base += num_elems; diff --git a/include/silo_output.hpp b/include/silo_output.hpp index cfa006ee0799d8596144ffd7524c4d52a262eb52..52fac0cb7db4497ab51a09240b53a3eed0d9f139 100644 --- a/include/silo_output.hpp +++ b/include/silo_output.hpp @@ -97,7 +97,6 @@ class silo num_cells += elemTags.size(); } } - std::cout << nodelist.size() << " " << num_cells << std::endl; assert(nodelist.size() == 4*num_cells); } diff --git a/tests/test_differentiation_gpu.cpp b/tests/test_differentiation_gpu.cpp index f074f0d7e51e4fa34bbe86e7c4505de995ec1c82..a252212b6fa315d163d7544635e1ecb81b549dd9 100644 --- a/tests/test_differentiation_gpu.cpp +++ b/tests/test_differentiation_gpu.cpp @@ -73,11 +73,11 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde make_geometry(0,h); model mod(geometric_order, approximation_order); +#ifdef WRITE_TEST_OUTPUTS std::stringstream ss; ss << "diff_go_" << geometric_order << "_ao_" << approximation_order; ss << "_seq_" << i << ".silo"; -#ifdef WRITE_TEST_OUTPUTS silo silodb; silodb.create_db(ss.str()); silodb.import_mesh_from_gmsh(); diff --git a/tests/test_lifting.cpp b/tests/test_lifting.cpp index d6bd47cc96eb9a68813e049d8ea36d5789c9996b..1a93cb992173f492ad0b8084fdf5969378d05c13 100644 --- a/tests/test_lifting.cpp +++ b/tests/test_lifting.cpp @@ -17,7 +17,21 @@ static void make_geometry(int order, double mesh_h) { gm::add("difftest"); - gmo::addBox(0.0, 0.0, 0.0, 1.0, 1.0, 1.0); + + std::vector<std::pair<int,int>> objects; + + objects.push_back( + std::make_pair(3, gmo::addBox(0.0, 0.0, 0.0, 1.0, 1.0, 0.5) ) + ); + objects.push_back( + std::make_pair(3, gmo::addBox(0.0, 0.0, 0.5, 0.5, 0.5, 0.5) ) + ); + + std::vector<std::pair<int, int>> tools; + gmsh::vectorpair odt; + std::vector<gmsh::vectorpair> odtm; + gmo::fragment(objects, tools, odt, odtm); + gmo::synchronize(); gvp_t vp; @@ -25,39 +39,7 @@ make_geometry(int order, double mesh_h) gmm::setSize(vp, mesh_h); } - -int test_lifting_xx(int geometric_order, int approximation_order) -{ - make_geometry(0, 0.25); - model mod(geometric_order, approximation_order); - - auto& e0 = mod.entity_at(0); - - entity_data_cpu ed; - e0.populate_entity_data(ed); - - for (size_t iO = 0; iO < e0.num_cell_orientations(); iO++) - { - auto& re = e0.cell_refelem(iO); - auto num_bf = re.num_basis_functions(); - - auto lift_cols = ed.lifting_matrices.cols(); - - matxd mass = re.mass_matrix(); - matxd curlift = mass * ed.lifting_matrices.block(iO*num_bf, 0, num_bf, lift_cols); - - vecxd ones_cell = vecxd::Ones(num_bf); - vecxd ones_face = vecxd::Ones(lift_cols/4); - - for (size_t iF = 0; iF < 4; iF++) - { - matxd liftblock = curlift.block(0,iF*lift_cols/4,num_bf,lift_cols/4); - std::cout << ones_cell.dot(liftblock*ones_face) << std::endl; - } - } - - return 0; -} +#define WRITE_TEST_OUTPUTS int test_lifting(int geometric_order, int approximation_order) { @@ -65,7 +47,7 @@ int test_lifting(int geometric_order, int approximation_order) std::vector<double> errors; std::cout << cyanfg << "Testing geometric order " << geometric_order; - std::cout << ", approximation order = " << approximation_order << nofg; + std::cout << ", approximation order = " << approximation_order << reset; std::cout << std::endl; /* Test field */ @@ -91,7 +73,6 @@ int test_lifting(int geometric_order, int approximation_order) /* Expected divergence */ auto div_F = [](const point_3d& pt) -> double { - //return 0; auto dFx_dx = M_PI*std::cos(M_PI*pt.x()); auto dFy_dy = M_PI*std::cos(M_PI*pt.y()); auto dFz_dz = M_PI*std::cos(M_PI*pt.z()); @@ -104,162 +85,146 @@ int test_lifting(int geometric_order, int approximation_order) make_geometry(0,h); model mod(geometric_order, approximation_order); +#ifdef WRITE_TEST_OUTPUTS std::stringstream ss; - ss << "lifting_test_" << sz << ".silo"; - - silo s; - s.create_db( ss.str() ); - s.import_mesh_from_gmsh(); - s.write_mesh(); + ss << "lift_go_" << geometric_order << "_ao_" << approximation_order; + ss << "_seq_" << sz << ".silo"; - auto& e0 = mod.entity_at(0); + silo silodb; + silodb.create_db(ss.str()); + silodb.import_mesh_from_gmsh(); + silodb.write_mesh(); +#endif - entity_data_cpu ed; - e0.populate_entity_data(ed); + auto model_num_dofs = mod.num_dofs(); + auto model_num_fluxes = mod.num_fluxes(); - vecxd exp_field = e0.project(div_F); - vecxd Fx_proj = e0.project(Fx); - vecxd Fy_proj = e0.project(Fy); - vecxd Fz_proj = e0.project(Fz); + vecxd Pdiv_F = vecxd::Zero(model_num_dofs); + vecxd PFdotn = vecxd::Zero(model_num_fluxes); + vecxd LiftF = vecxd::Zero(model_num_dofs); - auto flx_size = e0.num_cells() * 4*ed.num_fluxes; - vecxd Fx_flux = vecxd::Zero( flx_size ); - vecxd Fy_flux = vecxd::Zero( flx_size ); - vecxd Fz_flux = vecxd::Zero( flx_size ); + std::vector<entity_data_cpu> eds; - for (size_t i = 0; i < flx_size; i++) + for (auto& e : mod) { - Fx_flux(i) = Fx_proj( ed.fluxdofs_mine[i] ); - Fy_flux(i) = Fy_proj( ed.fluxdofs_mine[i] ); - Fz_flux(i) = Fz_proj( ed.fluxdofs_mine[i] ); - } + e.project(div_F, Pdiv_F); + entity_data_cpu ed; + e.populate_entity_data(ed); - vecxd flux = vecxd::Zero( flx_size ); + vecxd PFx = e.project(Fx); + vecxd PFy = e.project(Fy); + vecxd PFz = e.project(Fz); - for (size_t iT = 0; iT < e0.num_cells(); iT++) - { - for (size_t iF = 0; iF < 4; iF++) + for (size_t iT = 0; iT < e.num_cells(); iT++) { - vec3d n = ed.normals.row(4*iT+iF); - for (size_t k = 0; k < ed.num_fluxes; k++) + for (size_t iF = 0; iF < 4; iF++) { - auto dof_ofs = (4*iT+iF)*ed.num_fluxes + k; - flux(dof_ofs) = Fx_flux(dof_ofs)*n(0) + - Fy_flux(dof_ofs)*n(1) + - Fz_flux(dof_ofs)*n(2); + auto face_det = ed.face_determinants[4*iT+iF]; + vec3d n = ed.normals.row(4*iT+iF); + for (size_t k = 0; k < ed.num_fluxes; k++) + { + auto base = e.flux_base(); + auto ofs = (4*iT+iF)*ed.num_fluxes + k; + auto Fx = PFx( ed.fluxdofs_mine[ofs] ); + auto Fy = PFy( ed.fluxdofs_mine[ofs] ); + auto Fz = PFz( ed.fluxdofs_mine[ofs] ); + PFdotn(base+ofs) = face_det*Fx*n(0) + + face_det*Fy*n(1) + + face_det*Fz*n(2); + } } } + + eds.push_back( std::move(ed) ); } - vecxd lift_field = vecxd::Zero( e0.num_cells() * ed.num_bf ); -#if 0 - for (size_t iT = 0; iT < e0.num_cells(); iT++) + + + for (auto& ed : eds) { - for (size_t iF = 0; iF < 4; iF++) - { - auto& pf = e0.face(4*iT+iF); - auto& rf = e0.face_refelem(pf); - auto num_fluxes = rf.num_basis_functions(); - const auto pqps = pf.integration_points(); - - matxd mass = matxd::Zero(num_fluxes, num_fluxes); - vecxd rhs = vecxd::Zero(num_fluxes); - for (size_t iQp = 0; iQp < pqps.size(); iQp++) - { - vec3d n; - if (ed.g_order == 1) - n = ed.normals.row(4*iT+iF); - else - n = ed.normals.row( (4*iT+iF)*pqps.size() + iQp ); + timecounter tc; + tc.tic(); + compute_flux_lifting(ed, PFdotn, LiftF); + double time = tc.toc(); - const auto& pqp = pqps[iQp]; - const vecxd phi = rf.basis_functions(iQp); - mass += pqp.weight() * phi * phi.transpose(); - rhs += pqp.weight() * F(pqp.point()).dot(n) * phi; - } - auto fluxofs = (4*iT+iF)*num_fluxes; - flux.segment(fluxofs, num_fluxes) = mass.ldlt().solve(rhs); + auto num_cells = num_elems_all_orientations(ed); + if (geometric_order == 1) + { + std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: "; + double flops = 3*(ed.num_bf)*4*ed.num_fluxes*num_cells; + std::cout << flops/(1e9*time) << " GFlops/s" << std::endl; + } + else + { + std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: "; + double flops = ((21*ed.num_bf+6)*ed.num_bf*ed.num_qp + 3*(2*ed.num_bf-1)*ed.num_bf)*num_cells; + std::cout << flops/(1e9*time) << " GFlops/s" << std::endl; } } -#endif - timecounter tc; - tc.tic(); - compute_flux_lifting(ed, flux, lift_field); - double time = tc.toc(); - - std::vector<double> var_div_F( e0.num_cells() ); - std::vector<double> var_lift( e0.num_cells() ); - std::vector<double> var_volp( e0.num_cells() ); - std::vector<double> var_elift( e0.num_cells() ); - std::vector<double> var_err( e0.num_cells() ); - std::vector<double> var_orient( e0.num_cells() ); + + +#ifdef WRITE_TEST_OUTPUTS + std::vector<double> var_ediv_F( mod.num_cells() ); + std::vector<double> var_div_F( mod.num_cells() ); + std::vector<double> var_lift( mod.num_cells() ); + std::vector<double> var_vol( mod.num_cells() ); +#endif double err = 0.0; - for (size_t iT = 0; iT < e0.num_cells(); iT++) + + for (auto& e : mod) { - auto& pe = e0.cell(iT); - auto& re = e0.cell_refelem(pe); - auto num_bf = re.num_basis_functions(); - matxd mass = matxd::Zero(num_bf, num_bf); - vecxd vol = vecxd::Zero(num_bf); - - const auto pqps = pe.integration_points(); - for(size_t iQp = 0; iQp < pqps.size(); iQp++) + for (size_t iT = 0; iT < e.num_cells(); iT++) { - const auto& pqp = pqps[iQp]; - const vecxd phi = re.basis_functions(iQp); - const matxd dphi = re.basis_gradients(iQp); - const mat3d J = pe.jacobian(iQp); - mass += pqp.weight() * phi * phi.transpose(); - vol += pqp.weight() * (dphi*J.inverse()) * F(pqp.point()); - } + auto& pe = e.cell(iT); + auto& re = e.cell_refelem(pe); + auto num_bf = re.num_basis_functions(); + matxd mass = matxd::Zero(num_bf, num_bf); + vecxd vol = vecxd::Zero(num_bf); + + const auto pqps = pe.integration_points(); + for(size_t iQp = 0; iQp < pqps.size(); iQp++) + { + const auto& pqp = pqps[iQp]; + const vecxd phi = re.basis_functions(iQp); + const matxd dphi = re.basis_gradients(iQp); + const mat3d J = pe.jacobian(iQp); + mass += pqp.weight() * phi * phi.transpose(); + vol += pqp.weight() * (dphi*J.inverse()) * F(pqp.point()); + } - vecxd expected = exp_field.segment(iT*num_bf, num_bf); - vecxd lf = lift_field.segment(iT*num_bf, num_bf); - vecxd lfp = mass*lf; - vecxd volp = mass.ldlt().solve(vol); - - //std::cout << "***" << std::endl; - //std::cout << lfp.transpose() << std::endl; - //std::cout << vol.transpose() << std::endl; - - vecxd comp_field = lf - volp; - vecxd diff = comp_field - expected; - double loc_err = diff.dot(mass*diff); - err += loc_err; - - auto bar = pe.barycenter(); - vecxd phi_bar = re.basis_functions({1./3., 1./3., 1./3.}); - - var_div_F.at( pe.original_position() ) = expected.dot(phi_bar); - var_lift.at( pe.original_position() ) = lf.dot(phi_bar); - var_volp.at( pe.original_position() ) = volp.dot(phi_bar); - var_elift.at( pe.original_position() ) = (volp + expected).dot(phi_bar); - var_err.at( pe.original_position() ) = loc_err; - var_orient.at( pe.original_position() ) = pe.orientation(); + auto ofs = e.cell_global_dof_offset(iT); + vecxd excpected_div_F = Pdiv_F.segment(ofs, num_bf); + vecxd Plf = LiftF.segment(ofs, num_bf); + vecxd Pvol = mass.ldlt().solve(vol); + + + vecxd computed_div_F = Plf - Pvol; + vecxd diff = computed_div_F - excpected_div_F; + double loc_err = diff.dot(mass*diff); + err += loc_err; + + auto bar = pe.barycenter(); + vecxd phi_bar = re.basis_functions({1./3., 1./3., 1./3.}); +#ifdef WRITE_TEST_OUTPUTS + auto gi = e.cell_global_index_by_gmsh(iT); + var_ediv_F[gi] = div_F(bar); + var_div_F[gi] = computed_div_F.dot(phi_bar); + var_lift[gi] = Plf.dot(phi_bar); + var_vol[gi] = Pvol.dot(phi_bar); +#endif + } } - s.write_zonal_variable("div_F", var_div_F); - s.write_zonal_variable("lift", var_lift); - s.write_zonal_variable("vol", var_volp); - s.write_zonal_variable("exp_lift", var_elift); - s.write_zonal_variable("err", var_err); - s.write_zonal_variable("orient", var_orient); - +#ifdef WRITE_TEST_OUTPUTS + silodb.write_zonal_variable("expected_div_F", var_ediv_F); + silodb.write_zonal_variable("div_F", var_div_F); + silodb.write_zonal_variable("lift", var_lift); + silodb.write_zonal_variable("vol", var_vol); +#endif std::cout << std::sqrt(err) << std::endl; - if (geometric_order == 1) - { - std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: "; - double flops = 3*(ed.num_bf)*4*ed.num_fluxes*e0.num_cells(); - std::cout << flops/(1e9*time) << " GFlops/s" << std::endl; - } - else - { - std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: "; - double flops = ((21*ed.num_bf+6)*ed.num_bf*ed.num_qp + 3*(2*ed.num_bf-1)*ed.num_bf)*e0.num_cells(); - std::cout << flops/(1e9*time) << " GFlops/s" << std::endl; - } + } return 0;