test_differentiation_gpu.cpp

#include <iostream>
#include <iomanip>

#include <unistd.h>

#include "test.h"
#include "gmsh_io.h"
#include "sgr.hpp"
#include "entity_data.h"
#include "kernels_cpu.h"
#include "timecounter.h"
#include "silo_output.hpp"

#include "kernels_gpu.h"

using namespace sgr;

static void
make_geometry(int order, double mesh_h)
{
    gm::add("difftest");

    std::vector<std::pair<int,int>> objects;

    objects.push_back(
        std::make_pair(3, gmo::addBox(0.0, 0.0, 0.0, 1.0, 1.0, 0.5) )
        );
    objects.push_back(
        std::make_pair(3, gmo::addBox(0.0, 0.0, 0.5, 0.5, 0.5, 0.5) )
        );

    std::vector<std::pair<int, int>> tools;
    gmsh::vectorpair odt;
    std::vector<gmsh::vectorpair> odtm;
    gmo::fragment(objects, tools, odt, odtm);

    gmo::synchronize();

    gvp_t vp;
    gm::getEntities(vp);
    gmm::setSize(vp, mesh_h);
}

int test_differentiation_convergence(int geometric_order, int approximation_order)
{
    std::vector<double> sizes({ 0.32, 0.16, 0.08, 0.04 });
    std::vector<double> errors;

    std::cout << cyanfg << "Testing geometric order " << geometric_order;
    std::cout << ", approximation order = " << approximation_order << nofg;
    std::cout << std::endl;

    auto f = [](const point_3d& pt) -> double {
        return std::sin(M_PI*pt.x())*std::sin(M_PI*pt.y())*std::sin(M_PI*pt.z()); 
    };
    auto df_dx = [](const point_3d& pt) -> double {
        return M_PI*std::cos(M_PI*pt.x())*std::sin(M_PI*pt.y())*std::sin(M_PI*pt.z()); 
    };
    auto df_dy = [](const point_3d& pt) -> double {
        return M_PI*std::sin(M_PI*pt.x())*std::cos(M_PI*pt.y())*std::sin(M_PI*pt.z()); 
    };
    auto df_dz = [](const point_3d& pt) -> double {
        return M_PI*std::sin(M_PI*pt.x())*std::sin(M_PI*pt.y())*std::cos(M_PI*pt.z()); 
    };

    std::vector<double> errs_x;
    std::vector<double> errs_y;
    std::vector<double> errs_z;

    for (size_t i = 0; i < sizes.size(); i++)
    {
        double h = sizes[i];
        make_geometry(0,h);
        model mod(geometric_order, approximation_order);
        mod.build();

#ifdef WRITE_TEST_OUTPUTS
        std::stringstream ss;
        ss << "diff_go_" << geometric_order << "_ao_" << approximation_order;
        ss << "_seq_" << i << ".silo";

        silo silodb;
        silodb.create_db(ss.str());
        silodb.import_mesh_from_gmsh();
        silodb.write_mesh();
#endif
        auto model_num_dofs = mod.num_dofs();

        vecxd Pf        = vecxd::Zero(model_num_dofs);
        vecxd Pdf_dx    = vecxd::Zero(model_num_dofs);
        vecxd Pdf_dy    = vecxd::Zero(model_num_dofs);
        vecxd Pdf_dz    = vecxd::Zero(model_num_dofs);

        std::vector<entity_data_gpu> edgs;

#ifdef USE_BLOCKED_GPU_KERNELS
        std::vector<entity_data_cpu> eds;
        size_t model_num_dofs_gpu = 0;
#else
        size_t model_num_dofs_gpu = model_num_dofs;
#endif
        for (auto& e : mod)
        {
            e.project(f, Pf);
            e.project(df_dx, Pdf_dx);
            e.project(df_dy, Pdf_dy);
            e.project(df_dz, Pdf_dz);
            entity_data_cpu ed;
            e.populate_entity_data(ed, mod);
            entity_data_gpu edg(ed);
#ifdef USE_BLOCKED_GPU_KERNELS
            edg.dof_base = model_num_dofs_gpu;
            model_num_dofs_gpu += gpu_dblocks_dofs(ed);
            eds.push_back( std::move(ed) );
#endif
            edgs.push_back( std::move(edg) );
        }

         /* Prepare I/O vectors and call kernel */
#ifdef USE_BLOCKED_GPU_KERNELS
        assert(eds.size() == edgs.size());
        vecxd Pf_reshaped = vecxd::Zero(model_num_dofs_gpu);
        for (size_t i = 0; i < eds.size(); i++)
            reshape_dofs(eds[i], edgs[i], Pf, Pf_reshaped, true);
        texture_allocator<double> Pf_gpu(Pf_reshaped.data(), Pf_reshaped.size());
#else
        device_vector<double> Pf_gpu(Pf.data(), Pf.size());
#endif
        device_vector<double> df_dx_gpu(model_num_dofs_gpu);
        device_vector<double> df_dy_gpu(model_num_dofs_gpu);
        device_vector<double> df_dz_gpu(model_num_dofs_gpu);

        for (auto& edg : edgs)
        {
            timecounter_gpu tc;
            tc.tic();
            gpu_compute_field_derivatives(edg, Pf_gpu.data(),
                df_dx_gpu.data(), df_dy_gpu.data(), df_dz_gpu.data(), 1.0);
            double time = tc.toc();
            auto num_cells = edg.num_all_elems;
            if (geometric_order == 1)
            {
                std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: ";
                double flops = (21*edg.num_bf + 3)*edg.num_bf*num_cells;
                std::cout << flops/(1e9*time) << " GFlops/s" << std::endl;
            }
            else
            {
                std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: ";
                double flops = ((21*edg.num_bf+6)*edg.num_bf/**edg.num_qp*/ + 3*(2*edg.num_bf-1)*edg.num_bf)*num_cells;
                std::cout << flops/(1e9*time) << " GFlops/s" << std::endl;
            }
        }

#ifdef USE_BLOCKED_GPU_KERNELS
        vecxd Cdf_dx_exp = vecxd::Zero(model_num_dofs_gpu);
        vecxd Cdf_dy_exp = vecxd::Zero(model_num_dofs_gpu);
        vecxd Cdf_dz_exp = vecxd::Zero(model_num_dofs_gpu);

        df_dx_gpu.copyout( Cdf_dx_exp.data() );
        df_dy_gpu.copyout( Cdf_dy_exp.data() );
        df_dz_gpu.copyout( Cdf_dz_exp.data() );

        vecxd Cdf_dx = vecxd::Zero(model_num_dofs);
        vecxd Cdf_dy = vecxd::Zero(model_num_dofs);
        vecxd Cdf_dz = vecxd::Zero(model_num_dofs);

        assert(eds.size() == edgs.size());
        Pf = vecxd::Zero(model_num_dofs);
        for (size_t i = 0; i < eds.size(); i++)
        {
            reshape_dofs(eds[i], edgs[i], Cdf_dx_exp, Cdf_dx, false);
            reshape_dofs(eds[i], edgs[i], Cdf_dy_exp, Cdf_dy, false);
            reshape_dofs(eds[i], edgs[i], Cdf_dz_exp, Cdf_dz, false);
            reshape_dofs(eds[i], edgs[i], Pf_reshaped, Pf, false);
        }
#else
        vecxd Cdf_dx = vecxd::Zero(model_num_dofs_gpu);
        vecxd Cdf_dy = vecxd::Zero(model_num_dofs_gpu);
        vecxd Cdf_dz = vecxd::Zero(model_num_dofs_gpu);

        df_dx_gpu.copyout( Cdf_dx.data() );
        df_dy_gpu.copyout( Cdf_dy.data() );
        df_dz_gpu.copyout( Cdf_dz.data() );
#endif
        
        double err_x = 0.0;
        double err_y = 0.0;
        double err_z = 0.0;

#ifdef WRITE_TEST_OUTPUTS
        auto model_num_cells = mod.num_cells();
        std::vector<double> var_Pf(model_num_cells);
        std::vector<double> var_df_dx(model_num_cells);
        std::vector<double> var_df_dy(model_num_cells);
        std::vector<double> var_df_dz(model_num_cells);
#endif

        for (auto& e : mod)
        {
            for (size_t iT = 0; iT < e.num_cells(); iT++)
            {
                auto& pe = e.cell(iT);
                auto& re = e.cell_refelem(pe);
                matxd mass = e.mass_matrix(iT);
                auto num_bf = re.num_basis_functions();
                auto ofs = e.cell_model_dof_offset(iT);
                vecxd diff_x = Pdf_dx.segment(ofs, num_bf) - Cdf_dx.segment(ofs, num_bf); 
                vecxd diff_y = Pdf_dy.segment(ofs, num_bf) - Cdf_dy.segment(ofs, num_bf);
                vecxd diff_z = Pdf_dz.segment(ofs, num_bf) - Cdf_dz.segment(ofs, num_bf); 
            
                err_x += diff_x.dot(mass*diff_x);
                err_y += diff_y.dot(mass*diff_y);
                err_z += diff_z.dot(mass*diff_z);

#ifdef WRITE_TEST_OUTPUTS
                auto gi = e.cell_global_index_by_gmsh(iT);
                assert(gi < model_num_cells);
                vecxd phi_bar = re.basis_functions({1./3., 1./3., 1./3.});
                var_Pf[gi] = Pf.segment(ofs, num_bf).dot(phi_bar);
                var_df_dx[gi] = Cdf_dx.segment(ofs, num_bf).dot(phi_bar);
                var_df_dy[gi] = Cdf_dy.segment(ofs, num_bf).dot(phi_bar);
                var_df_dz[gi] = Cdf_dz.segment(ofs, num_bf).dot(phi_bar);
#endif
            }
            std::cout << "Errors: " << std::sqrt(err_x) << " " << std::sqrt(err_y);
            std::cout << " " << std::sqrt(err_z) << std::endl;
        }

#ifdef WRITE_TEST_OUTPUTS
        silodb.write_zonal_variable("f", var_Pf);
        silodb.write_zonal_variable("df_dx", var_df_dx);
        silodb.write_zonal_variable("df_dy", var_df_dy);
        silodb.write_zonal_variable("df_dz", var_df_dz);
#endif
    
        errs_x.push_back( std::sqrt(err_x) );
        errs_y.push_back( std::sqrt(err_y) );
        errs_z.push_back( std::sqrt(err_z) );
    }

    double rate_x = 0.0;
    double rate_y = 0.0;
    double rate_z = 0.0;

    std::cout << Byellowfg << "rate df/dx  rate df/dy  rate df/dz" << reset << std::endl;
    for (size_t i = 1; i < sizes.size(); i++)
    {
        std::cout << (rate_x = std::log2(errs_x[i-1]/errs_x[i]) ) << " ";
        std::cout << (rate_y = std::log2(errs_y[i-1]/errs_y[i]) ) << " ";
        std::cout << (rate_z = std::log2(errs_z[i-1]/errs_z[i]) ) << std::endl;
    }

    COMPARE_VALUES_ABSOLUTE("df/dx", rate_x, double(approximation_order), 0.2);
    COMPARE_VALUES_ABSOLUTE("df/dy", rate_y, double(approximation_order), 0.2);
    COMPARE_VALUES_ABSOLUTE("df/dz", rate_z, double(approximation_order), 0.2);

    return 0;
}

int main(void)
{
    gmsh::initialize();
    gmsh::option::setNumber("General.Terminal", 0);
    gmsh::option::setNumber("Mesh.Algorithm", 1);
    gmsh::option::setNumber("Mesh.Algorithm3D", 1);


    int failed_tests = 0;

    std::cout << Bmagentafg << " *** TESTING: DIFFERENTIATION ***" << reset << std::endl;
    for (size_t go = 1; go < 2; go++)
        for (size_t ao = 1; ao < 7; ao++)
            failed_tests += test_differentiation_convergence(go, ao);

    return failed_tests;
}