Skip to content
Snippets Groups Projects
Select Git revision
  • 3b86cf6119b6abf3ae983fa9569d6a0e65815a35
  • master default protected
  • overlaps_tags_and_distributed_export
  • overlaps_tags_and_distributed_export_rebased
  • relaying
  • alphashapes
  • patches-4.14
  • steplayer
  • bl
  • pluginMeshQuality
  • fixBugsAmaury
  • hierarchical-basis
  • new_export_boris
  • oras_vs_osm
  • reassign_partitions
  • distributed_fwi
  • rename-classes
  • fix/fortran-api-example-t4
  • robust_partitions
  • reducing_files
  • fix_overlaps
  • gmsh_4_14_0
  • gmsh_4_13_1
  • gmsh_4_13_0
  • gmsh_4_12_2
  • gmsh_4_12_1
  • gmsh_4_12_0
  • gmsh_4_11_1
  • gmsh_4_11_0
  • gmsh_4_10_5
  • gmsh_4_10_4
  • gmsh_4_10_3
  • gmsh_4_10_2
  • gmsh_4_10_1
  • gmsh_4_10_0
  • gmsh_4_9_5
  • gmsh_4_9_4
  • gmsh_4_9_3
  • gmsh_4_9_2
  • gmsh_4_9_1
  • gmsh_4_9_0
41 results

DivideAndConquer.cpp

  • test_differentiation_gpu.cpp 9.53 KiB
    #include <iostream>
    #include <iomanip>
    
    #include <unistd.h>
    
    #include "test.h"
    #include "gmsh_io.h"
    #include "sgr.hpp"
    #include "entity_data.h"
    #include "kernels_cpu.h"
    #include "timecounter.h"
    #include "silo_output.hpp"
    
    #include "kernels_gpu.h"
    
    using namespace sgr;
    
    static void
    make_geometry(int order, double mesh_h)
    {
        gm::add("difftest");
    
        std::vector<std::pair<int,int>> objects;
    
        objects.push_back(
            std::make_pair(3, gmo::addBox(0.0, 0.0, 0.0, 1.0, 1.0, 0.5) )
            );
        objects.push_back(
            std::make_pair(3, gmo::addBox(0.0, 0.0, 0.5, 0.5, 0.5, 0.5) )
            );
    
        std::vector<std::pair<int, int>> tools;
        gmsh::vectorpair odt;
        std::vector<gmsh::vectorpair> odtm;
        gmo::fragment(objects, tools, odt, odtm);
    
        gmo::synchronize();
    
        gvp_t vp;
        gm::getEntities(vp);
        gmm::setSize(vp, mesh_h);
    }
    
    int test_differentiation_convergence(int geometric_order, int approximation_order)
    {
        std::vector<double> sizes({ 0.32, 0.16, 0.08, 0.04 });
        std::vector<double> errors;
    
        std::cout << cyanfg << "Testing geometric order " << geometric_order;
        std::cout << ", approximation order = " << approximation_order << nofg;
        std::cout << std::endl;
    
        auto f = [](const point_3d& pt) -> double {
            return std::sin(M_PI*pt.x())*std::sin(M_PI*pt.y())*std::sin(M_PI*pt.z()); 
        };
        auto df_dx = [](const point_3d& pt) -> double {
            return M_PI*std::cos(M_PI*pt.x())*std::sin(M_PI*pt.y())*std::sin(M_PI*pt.z()); 
        };
        auto df_dy = [](const point_3d& pt) -> double {
            return M_PI*std::sin(M_PI*pt.x())*std::cos(M_PI*pt.y())*std::sin(M_PI*pt.z()); 
        };
        auto df_dz = [](const point_3d& pt) -> double {
            return M_PI*std::sin(M_PI*pt.x())*std::sin(M_PI*pt.y())*std::cos(M_PI*pt.z()); 
        };
    
        std::vector<double> errs_x;
        std::vector<double> errs_y;
        std::vector<double> errs_z;
    
        for (size_t i = 0; i < sizes.size(); i++)
        {
            double h = sizes[i];
            make_geometry(0,h);
            model mod(geometric_order, approximation_order);
            mod.build();
    
    #ifdef WRITE_TEST_OUTPUTS
            std::stringstream ss;
            ss << "diff_go_" << geometric_order << "_ao_" << approximation_order;
            ss << "_seq_" << i << ".silo";
    
            silo silodb;
            silodb.create_db(ss.str());
            silodb.import_mesh_from_gmsh();
            silodb.write_mesh();
    #endif
            auto model_num_dofs = mod.num_dofs();
    
            vecxd Pf        = vecxd::Zero(model_num_dofs);
            vecxd Pdf_dx    = vecxd::Zero(model_num_dofs);
            vecxd Pdf_dy    = vecxd::Zero(model_num_dofs);
            vecxd Pdf_dz    = vecxd::Zero(model_num_dofs);
    
            std::vector<entity_data_gpu> edgs;
    
    #ifdef USE_BLOCKED_GPU_KERNELS
            std::vector<entity_data_cpu> eds;
            size_t model_num_dofs_gpu = 0;
    #else
            size_t model_num_dofs_gpu = model_num_dofs;
    #endif
            for (auto& e : mod)
            {
                e.project(f, Pf);
                e.project(df_dx, Pdf_dx);
                e.project(df_dy, Pdf_dy);
                e.project(df_dz, Pdf_dz);
                entity_data_cpu ed;
                e.populate_entity_data(ed, mod);
                entity_data_gpu edg(ed);
    #ifdef USE_BLOCKED_GPU_KERNELS
                edg.dof_base = model_num_dofs_gpu;
                model_num_dofs_gpu += gpu_dblocks_dofs(ed);
                eds.push_back( std::move(ed) );
    #endif
                edgs.push_back( std::move(edg) );
            }
    
             /* Prepare I/O vectors and call kernel */
    #ifdef USE_BLOCKED_GPU_KERNELS
            assert(eds.size() == edgs.size());
            vecxd Pf_reshaped = vecxd::Zero(model_num_dofs_gpu);
            for (size_t i = 0; i < eds.size(); i++)
                reshape_dofs(eds[i], edgs[i], Pf, Pf_reshaped, true);
            texture_allocator<double> Pf_gpu(Pf_reshaped.data(), Pf_reshaped.size());
    #else
            device_vector<double> Pf_gpu(Pf.data(), Pf.size());
    #endif
            device_vector<double> df_dx_gpu(model_num_dofs_gpu);
            device_vector<double> df_dy_gpu(model_num_dofs_gpu);
            device_vector<double> df_dz_gpu(model_num_dofs_gpu);
    
            for (auto& edg : edgs)
            {
                timecounter_gpu tc;
                tc.tic();
                gpu_compute_field_derivatives(edg, Pf_gpu.data(),
                    df_dx_gpu.data(), df_dy_gpu.data(), df_dz_gpu.data(), 1.0);
                double time = tc.toc();
    
                auto num_cells = edg.num_all_elems;
                if (geometric_order == 1)
                {
                    std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: ";
                    double flops = (21*edg.num_bf + 3)*edg.num_bf*num_cells;
                    std::cout << flops/(1e9*time) << " GFlops/s" << std::endl;
                }
                else
                {
                    std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: ";
                    double flops = ((21*edg.num_bf+6)*edg.num_bf/**edg.num_qp*/ + 3*(2*edg.num_bf-1)*edg.num_bf)*num_cells;
                    std::cout << flops/(1e9*time) << " GFlops/s" << std::endl;
                }
            }
    
    #ifdef USE_BLOCKED_GPU_KERNELS
            vecxd Cdf_dx_exp = vecxd::Zero(model_num_dofs_gpu);
            vecxd Cdf_dy_exp = vecxd::Zero(model_num_dofs_gpu);
            vecxd Cdf_dz_exp = vecxd::Zero(model_num_dofs_gpu);
    
            df_dx_gpu.copyout( Cdf_dx_exp.data() );
            df_dy_gpu.copyout( Cdf_dy_exp.data() );
            df_dz_gpu.copyout( Cdf_dz_exp.data() );
    
            vecxd Cdf_dx = vecxd::Zero(model_num_dofs);
            vecxd Cdf_dy = vecxd::Zero(model_num_dofs);
            vecxd Cdf_dz = vecxd::Zero(model_num_dofs);
    
            assert(eds.size() == edgs.size());
            Pf = vecxd::Zero(model_num_dofs);
            for (size_t i = 0; i < eds.size(); i++)
            {
                reshape_dofs(eds[i], edgs[i], Cdf_dx_exp, Cdf_dx, false);
                reshape_dofs(eds[i], edgs[i], Cdf_dy_exp, Cdf_dy, false);
                reshape_dofs(eds[i], edgs[i], Cdf_dz_exp, Cdf_dz, false);
                reshape_dofs(eds[i], edgs[i], Pf_reshaped, Pf, false);
            }
    #else
            vecxd Cdf_dx = vecxd::Zero(model_num_dofs_gpu);
            vecxd Cdf_dy = vecxd::Zero(model_num_dofs_gpu);
            vecxd Cdf_dz = vecxd::Zero(model_num_dofs_gpu);
    
            df_dx_gpu.copyout( Cdf_dx.data() );
            df_dy_gpu.copyout( Cdf_dy.data() );
            df_dz_gpu.copyout( Cdf_dz.data() );
    #endif
            
            double err_x = 0.0;
            double err_y = 0.0;
            double err_z = 0.0;
    
    #ifdef WRITE_TEST_OUTPUTS
            auto model_num_cells = mod.num_cells();
            std::vector<double> var_Pf(model_num_cells);
            std::vector<double> var_df_dx(model_num_cells);
            std::vector<double> var_df_dy(model_num_cells);
            std::vector<double> var_df_dz(model_num_cells);
    #endif
    
            for (auto& e : mod)
            {
                for (size_t iT = 0; iT < e.num_cells(); iT++)
                {
                    auto& pe = e.cell(iT);
                    auto& re = e.cell_refelem(pe);
                    matxd mass = e.mass_matrix(iT);
                    auto num_bf = re.num_basis_functions();
                    auto ofs = e.cell_model_dof_offset(iT);
                    vecxd diff_x = Pdf_dx.segment(ofs, num_bf) - Cdf_dx.segment(ofs, num_bf); 
                    vecxd diff_y = Pdf_dy.segment(ofs, num_bf) - Cdf_dy.segment(ofs, num_bf); 
                    vecxd diff_z = Pdf_dz.segment(ofs, num_bf) - Cdf_dz.segment(ofs, num_bf); 
                
                    err_x += diff_x.dot(mass*diff_x);
                    err_y += diff_y.dot(mass*diff_y);
                    err_z += diff_z.dot(mass*diff_z);
    
    #ifdef WRITE_TEST_OUTPUTS
                    auto gi = e.cell_global_index_by_gmsh(iT);
                    assert(gi < model_num_cells);
                    vecxd phi_bar = re.basis_functions({1./3., 1./3., 1./3.});
                    var_Pf[gi] = Pf.segment(ofs, num_bf).dot(phi_bar);
                    var_df_dx[gi] = Cdf_dx.segment(ofs, num_bf).dot(phi_bar);
                    var_df_dy[gi] = Cdf_dy.segment(ofs, num_bf).dot(phi_bar);
                    var_df_dz[gi] = Cdf_dz.segment(ofs, num_bf).dot(phi_bar);
    #endif
                }
                std::cout << "Errors: " << std::sqrt(err_x) << " " << std::sqrt(err_y);
                std::cout << " " << std::sqrt(err_z) << std::endl;
            }
    
    #ifdef WRITE_TEST_OUTPUTS
            silodb.write_zonal_variable("f", var_Pf);
            silodb.write_zonal_variable("df_dx", var_df_dx);
            silodb.write_zonal_variable("df_dy", var_df_dy);
            silodb.write_zonal_variable("df_dz", var_df_dz);
    #endif
        
            errs_x.push_back( std::sqrt(err_x) );
            errs_y.push_back( std::sqrt(err_y) );
            errs_z.push_back( std::sqrt(err_z) );
        }
    
        double rate_x = 0.0;
        double rate_y = 0.0;
        double rate_z = 0.0;
    
        std::cout << Byellowfg << "rate df/dx  rate df/dy  rate df/dz" << reset << std::endl;
        for (size_t i = 1; i < sizes.size(); i++)
        {
            std::cout << (rate_x = std::log2(errs_x[i-1]/errs_x[i]) ) << " ";
            std::cout << (rate_y = std::log2(errs_y[i-1]/errs_y[i]) ) << " ";
            std::cout << (rate_z = std::log2(errs_z[i-1]/errs_z[i]) ) << std::endl;
        }
    
        COMPARE_VALUES_ABSOLUTE("df/dx", rate_x, double(approximation_order), 0.2);
        COMPARE_VALUES_ABSOLUTE("df/dy", rate_y, double(approximation_order), 0.2);
        COMPARE_VALUES_ABSOLUTE("df/dz", rate_z, double(approximation_order), 0.2);
    
        return 0;
    }
    
    int main(void)
    {
        gmsh::initialize();
        gmsh::option::setNumber("General.Terminal", 0);
        gmsh::option::setNumber("Mesh.Algorithm", 1);
        gmsh::option::setNumber("Mesh.Algorithm3D", 1);
    
    
        int failed_tests = 0;
    
        std::cout << Bmagentafg << " *** TESTING: DIFFERENTIATION ***" << reset << std::endl;
        for (size_t go = 1; go < 2; go++)
            for (size_t ao = 1; ao < 7; ao++)
                failed_tests += test_differentiation_convergence(go, ao);
    
        return failed_tests;
    }