Newer
Older
/* This is GMSH/DG, a GPU-Accelerated Nodal Discontinuous Galerkin
* solver for Conservation Laws.
*
* Copyright (C) 2020-2022 Matteo Cicuttin - University of Liège
*
* This code is released under GNU AGPLv3 license, see LICENSE.txt for details.
*/
#ifdef DONT_USE_STD_FILESYSTEM
#include <sys/stat.h>
#include <sys/types.h>
#else
#include <sys/time.h>
#include <sys/resource.h>
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
#include <mpi.h>
#pragma clang diagnostic pop
#include "libgmshdg/silo_output.hpp"
#include "libgmshdg/param_loader.h"
#include "maxwell/maxwell_interface.h"
#include "maxwell/maxwell_common.h"
#include "sgr.hpp"
using namespace sgr;
void initialize_solver(const model& mod, State& state, const maxwell::parameter_loader& mpl)
maxwell::init_from_model(mod, state, mpl);
if ( mpl.initial_Efield_defined() )
{
auto E = [&](const point_3d& pt) -> vec3d { return mpl.initial_Efield(pt); };
maxwell::init_E_field(mod, state, E);
}
if ( mpl.initial_Hfield_defined() )
{
auto H = [&](const point_3d& pt) -> vec3d { return mpl.initial_Hfield(pt); };
maxwell::init_H_field(mod, state, H);
}
#ifdef ENABLE_PROFILING
#ifdef ENABLE_GPU_SOLVER
#define TYPE profiler::type::GPU
#else
#define TYPE profiler::type::CPU
#endif /* ENABLE_GPU_SOLVER */
template<typename State>
void initialize_profiling(const model& mod, State& state, time_integrator_type ti)
{
auto num_entities = mod.num_entities();
auto num_cells = mod.num_cells_world();
auto num_all_faces = mod.num_faces();
auto cell_dofs = num_dofs_3D(mod.approximation_order);
auto cell_fluxes = num_dofs_2D(mod.approximation_order);
auto num_all_dofs = mod.num_dofs();
auto num_all_fluxes = mod.num_fluxes();
auto num_all_orientations = mod.num_orientations();
std::cout << "num_entities: " << num_entities << std::endl
<< "num_cells: " << num_cells << std::endl
<< "num_all_faces: " << num_all_faces << std::endl
<< "cell_dofs: " << cell_dofs << std::endl
<< "cell_fluxes: " << cell_fluxes << std::endl
<< "num_all_dofs: " << num_all_dofs << std::endl
<< "num_all_fluxes: " << num_all_fluxes << std::endl;
state.logger.register_profiler("solver", TYPE)
auto jacobians_read = 3*3*num_cells;
auto dm_read = 3*cell_dofs*cell_dofs;
auto curl_readwrite = 6*num_all_dofs;
auto sources_application = 3*num_all_dofs;
if (ti == time_integrator_type::LEAPFROG)
{
#ifdef TM_NEW_CURLS
state.logger.register_profiler("compute_curls_H", TYPE)
.set_quantity("GFlops", ((45*cell_dofs+9)*num_all_dofs)/1e9)
.set_quantity("GB", (jacobians_read+dm_read+curl_readwrite+sources_application)*8/1e9);
state.logger.register_profiler("compute_curls_E", TYPE)
.set_quantity("GFlops", ((45*cell_dofs+6)*num_all_dofs)/1e9)
.set_quantity("GB", (jacobians_read+dm_read+curl_readwrite)*8/1e9);
auto derivatives = jacobians_read+dm_read+4*num_all_dofs;
auto curls_h = 4*num_all_dofs;
auto curls_e = 3*num_all_dofs;
state.logger.register_profiler("compute_curls_H", TYPE)
.set_quantity("GFlops", ((21*cell_dofs*num_all_dofs)*3 + 6*num_all_dofs)/1e9)
.set_quantity("GB", (derivatives*3+curls_h)*8/1e9);
state.logger.register_profiler("compute_curls_E", TYPE)
.set_quantity("GFlops", ((21*cell_dofs*num_all_dofs)*3 + 3*num_all_dofs)/1e9)
.set_quantity("GB", (derivatives*3+curls_e)*8/1e9);
#endif /* TM_NEW_CURLS */
state.logger.register_profiler("compute_field_jumps", TYPE)
.set_quantity("GFlops", 3*num_all_fluxes/1e9)
.set_quantity("GB", 9*num_all_fluxes*8/1e9);
auto normals_read = num_all_faces;
auto dets_read = num_all_faces;
auto sources_imposition = 18*num_all_fluxes;
auto matparams_readwrite = 4*num_all_fluxes;
auto fluxes_write = 3*num_all_fluxes;
state.logger.register_profiler("compute_fluxes_planar", TYPE)
.set_quantity("GFlops", 37*num_all_fluxes/1e9)
.set_quantity("GB", (normals_read+dets_read+sources_imposition+matparams_readwrite+fluxes_write)*8/1e9);
dets_read = num_cells;
auto lm_read = num_all_orientations*cell_dofs*4*cell_fluxes;
auto lifting_readwrite = num_all_fluxes+num_all_dofs;
state.logger.register_profiler("compute_flux_lifting", TYPE)
.set_quantity("GFlops", 21*cell_dofs*num_all_dofs*3/1e9)
.set_quantity("GB", 3*(dets_read+lm_read+lifting_readwrite)*8/1e9);
}
else
{
#ifdef TM_NEW_CURLS
state.logger.register_profiler("compute_curls", TYPE)
.set_quantity("GFlops", ((45*cell_dofs+6)*num_all_dofs + (45*cell_dofs+9)*num_all_dofs)/1e9)
.set_quantity("GB", (2*(jacobians_read+dm_read+curl_readwrite)+sources_application)*8/1e9);
auto derivatives = jacobians_read+dm_read+4*num_all_dofs;
auto curls = 4*num_all_dofs + 3*num_all_dofs;
state.logger.register_profiler("compute_curls", TYPE)
.set_quantity("GFlops", ((21*cell_dofs*num_all_dofs)*6 + 9*num_all_dofs)/1e9)
.set_quantity("GB", (derivatives*6+curls)*8/1e9);
#endif /* TM_NEW_CURLS */
#ifdef USE_MPI
state.logger.register_profiler("compute_field_jumps", TYPE)
.set_quantity("GFlops", 6*num_all_fluxes/1e9)
.set_quantity("GB", 18*num_all_fluxes*8/1e9);
auto normals_read = num_all_faces;
auto dets_read = num_all_faces;
auto sources_imposition = 18*num_all_fluxes;
auto matparams_readwrite = 8*num_all_fluxes;
auto fluxes_write = 6*num_all_fluxes;
state.logger.register_profiler("compute_fluxes_planar", TYPE)
.set_quantity("GFlops", 68*num_all_fluxes/1e9)
.set_quantity("GB", (normals_read+dets_read+sources_imposition+matparams_readwrite+fluxes_write)*8/1e9);
auto normals_read = num_all_faces;
auto dets_read = num_all_faces;
auto jumps_and_sources = 24*num_all_fluxes;
auto matparams_readwrite = 8*num_all_fluxes;
auto fluxes_write = 6*num_all_fluxes;
state.logger.register_profiler("compute_field_jumps_and_fluxes", TYPE)
.set_quantity("GFlops", 72*num_all_fluxes/1e9)
.set_quantity("GB", (normals_read+dets_read+jumps_and_sources+matparams_readwrite+fluxes_write)*8/1e9);
dets_read = num_cells;
auto lm_read = num_all_orientations*cell_dofs*4*cell_fluxes;
auto lifting_readwrite = num_all_fluxes+num_all_dofs;
state.logger.register_profiler("compute_flux_lifting", TYPE)
.set_quantity("GFlops", 21*cell_dofs*num_all_dofs*6/1e9)
.set_quantity("GB", 6*(dets_read+lm_read+lifting_readwrite)*8/1e9);
}
}
#endif /* ENABLE_PROFILING */
{
std::vector<size_t> element_tags;
std::vector<point_3d> sampling_points_phys;
std::vector<point_3d> sampling_points_ref;
integration_line(const point_3d& start,
const point_3d& end, size_t samples)
{
sampling_increment = (end - start)/samples;
for (size_t i = 0; i < samples; i++)
{
point_3d sp = start + sampling_increment*(i+0.5);
sampling_points_phys.push_back(sp);
size_t etag;
int etype;
std::vector<size_t> ntags;
double u, v, w;
gmsh::model::mesh::getElementByCoordinates(sp.x(), sp.y(), sp.z(),
etag, etype, ntags, u, v, w, 3, true);
element_tags.push_back(etag);
sampling_points_ref.push_back( point_3d(u,v,w) );
}
}
assert(element_tags.size() == sampling_points_phys.size());
assert(element_tags.size() == sampling_points_ref.size());
return element_tags.size();
}
std::tuple<size_t, point_3d, point_3d>
operator[](size_t pos)
{
assert(element_tags.size() == sampling_points_phys.size());
assert(element_tags.size() == sampling_points_ref.size());
assert(pos < element_tags.size());
return std::make_tuple(
element_tags[pos],
sampling_points_phys[pos],
sampling_points_ref[pos]
);
}
point_3d increment() const
{
return sampling_increment;
integrate_electric_field(const model& mod, const maxwell::solver_state& state,
integration_line& iline)
double integral_val = 0.0;
auto si = iline.increment();
for (size_t i = 0; i < iline.sampling_points(); i++)
{
auto [tag, physp, refp] = iline[i];
auto [entnum, ofs] = mod.lookup_tag(tag);
auto& e = mod.entity_at(entnum);
auto& pe = e.cell(ofs);
auto& re = e.cell_refelem(pe);
Matteo Cicuttin
committed
auto dof_ofs = e.cell_model_dof_offset(ofs);
auto dof_num = re.num_basis_functions();
vecxd phi = re.basis_functions( refp );
vecxd locEx = state.emf_curr.Ex.segment(dof_ofs, dof_num);
vecxd locEy = state.emf_curr.Ey.segment(dof_ofs, dof_num);
vecxd locEz = state.emf_curr.Ez.segment(dof_ofs, dof_num);
integral_val += locEx.dot(phi) * si.x() +
locEy.dot(phi) * si.y() +
locEz.dot(phi) * si.z();
}
return integral_val;
integrate(const model& mod, const maxwell::solver_state_gpu& state, integration_line& lint)
return 0.0;
}
#endif
void
reinit_materials(const model& mod, maxwell::solver_state& state, maxwell::parameter_loader& mpl)
{
for (auto& e : mod)
{
auto tag = e.gmsh_tag();
for (size_t iT = 0; iT < e.num_cells(); iT++)
{
auto& pe = e.cell(iT);
auto& re = e.cell_refelem(pe);
auto bar = pe.barycenter();
auto epsilon = mpl.epsilon(tag, bar);
//auto mu = mpl.mu(tag, bar);
auto sigma = mpl.sigma(tag, bar);
Matteo Cicuttin
committed
auto ofs = e.cell_model_dof_offset(iT);
for (size_t iD = 0; iD < re.num_basis_functions(); iD++)
{
//state.matparams.inv_epsilon(ofs+iD) = 1./epsilon;
//state.matparams.inv_mu(ofs+iD) = 1./mu;
state.matparams.sigma(ofs+iD) = sigma;
state.matparams.sigma_over_epsilon(ofs+iD) = sigma/epsilon;
}
}
}
/****************************************************************************/
/* Solver driver
*/
template<typename State>
void solver_mainloop(const model& mod, State& state, maxwell::parameter_loader& mpl)
#ifdef USE_MPI
int proc_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank);
#endif /* USE_MPI */
initialize_solver(mod, state, mpl);
state.delta_t = mpl.sim_dt();
auto num_timesteps = mpl.sim_timesteps();
auto silo_output_rate = mpl.postpro_siloOutputRate();
auto cycle_print_rate = mpl.postpro_cyclePrintRate();
Matteo Cicuttin
committed
auto ti = mpl.sim_timeIntegrator();
#ifdef ENABLE_PROFILING
initialize_profiling(mod, state, ti);
#endif /* ENABLE_PROFILING */
#ifdef USE_MPI
if (proc_rank == 0) {
#endif /* USE_MPI */
Matteo Cicuttin
committed
std::cout << " BEGINNING SIMULATION" << std::endl;
std::cout << "I will do " << num_timesteps << " timesteps of " << state.delta_t;
Matteo Cicuttin
committed
std::cout << "s each." << std::endl;
std::cout << "Time integrator: " << mpl.sim_timeIntegratorName() << std::endl;
#ifdef USE_MPI
}
#endif /* USE_MPI */
#ifdef USE_MPI
size_t my_num_dofs = 6*mod.num_dofs();
size_t num_dofs;
MPI_Reduce(&my_num_dofs, &num_dofs, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
if (proc_rank == 0)
std::cout << "Model has " << num_dofs << " DoFs" << std::endl;
#else /* USE_MPI */
size_t num_dofs = 6*mod.num_dofs();
std::cout << "Model has " << num_dofs << " DoFs" << std::endl;
#endif /* USE_MPI */
#ifdef USE_MPI
#error "The geometric diode code is not compatible with MPI yet."
#endif
auto relmat = [&](){ reinit_materials(mod, state, mpl); };
sol::state& lua = mpl.lua_state();
lua["relmat"] = relmat;
double total_iteration_time = 0.;
#ifdef ENABLE_PROFILING
auto& solver_prof = state.logger.get_profiler("solver");
#endif /* ENABLE_PROFILING */
for(size_t i = 0; i < num_timesteps; i++)
tc.tic();
double time = tc.toc();
total_iteration_time += time;
if ( (silo_output_rate != 0) and (i%silo_output_rate == 0) )
export_fields_to_silo(mod, state, mpl, "");
if ( (cycle_print_rate != 0) and (i%cycle_print_rate == 0) )
double dofs_s_proc = (mod.num_dofs()*6)/time;
#ifdef USE_MPI
double dofs_s_total;
MPI_Reduce(&dofs_s_proc, &dofs_s_total, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
if (proc_rank == 0)
{
std::cout << cr << clrline << "Cycle " << i << ": t = ";
std::cout << state.curr_time << " s" << ", DOFs/s: ";
std::cout << dofs_s_total << std::flush;
std::cout << cr << clrline << "Cycle " << i << ": t = ";
std::cout << state.curr_time << " s" << ", DOFs/s: ";
std::cout << dofs_s_proc << ", time: " << time << " s\n";
#ifdef ENABLE_PROFILING
state.logger.print_current();
#endif /* ENABLE_PROFILING */
#ifdef USE_MPI
#endif
std::cout << std::endl;
mpl.call_finalization_callback();
#ifdef USE_MPI
if (proc_rank == 0)
#endif
{
std::cout << Bon << greenfg;
std::cout << "Total iteration time = " << total_iteration_time << " s" << std::endl;
std::cout << "Avg iteration time = " << total_iteration_time/num_timesteps << " s" << std::endl;
#ifdef USE_MPI
std::cout << "Avg dofs/s = " << num_timesteps*(mod.num_dofs_world()*6)/total_iteration_time << reset << std::endl;
#else
std::cout << "Avg dofs/s = " << num_timesteps*(mod.num_dofs()*6)/total_iteration_time << reset << std::endl;
#ifdef ENABLE_PROFILING
state.logger.print_average();
#endif /* ENABLE_PROFILING */
#endif
}
/****************************************************************************/
/* Register Lua usertypes valid for both CPU and GPU
*/
register_lua_usertypes(maxwell::parameter_loader& mpl)
{
sol::usertype<field_values> field_values_ut =
lua.new_usertype<field_values>("field_values",
sol::constructors<field_values()>()
);
field_values_ut["Ex"] = &field_values::Ex;
field_values_ut["Ey"] = &field_values::Ey;
field_values_ut["Ez"] = &field_values::Ez;
field_values_ut["Hx"] = &field_values::Hx;
field_values_ut["Hy"] = &field_values::Hy;
field_values_ut["Hz"] = &field_values::Hz;
sol::usertype<point_3d> point_3d_ut = lua.new_usertype<point_3d>("point",
sol::constructors<point_3d(), point_3d(double, double, double)>()
);
sol::usertype<integration_line> line_integrator_ut =
lua.new_usertype<integration_line>("integration_line",
sol::constructors<integration_line(const point_3d&, const point_3d&, size_t)>()
);
/****************************************************************************/
/* Register GMSH API stuff
*/
static void
register_lua_callbacks(maxwell::parameter_loader& mpl, model& mod)
{
sol::state& lua = mpl.lua_state();
auto volume_physical_group_entities = [&](int tag) -> auto {
return sol::as_table( mod.lookup_physical_group_entities(3, tag) );
};
lua["volume_physical_group_entities"] = volume_physical_group_entities;
auto surface_physical_group_entities = [&](int tag) -> auto {
return sol::as_table( mod.lookup_physical_group_entities(2, tag) );
};
lua["surface_physical_group_entities"] = surface_physical_group_entities;
}
/****************************************************************************/
/* Register Lua usertypes valid only for CPU
*/
register_lua_usertypes_bystate(maxwell::parameter_loader& mpl,
model& mod, maxwell::solver_state& state)
{
sol::state& lua = mpl.lua_state();
lua["internal"] = lua.create_table();
lua["internal"]["model"] = &mod;
lua["internal"]["state"] = &state;
auto compute_energy_lambda = [&]() {
return compute_energy(mod, state, mpl);
};
lua["compute_energy"] = compute_energy_lambda;
auto compute_error_lambda = [&]() {
return compute_error(mod, state, mpl);
};
lua["compute_error"] = compute_error_lambda;
auto compare_at_gauss_points_lambda = [&]() {
compare_at_gauss_points(mod, state, mpl);
};
lua["compare_at_gauss_points"] = compare_at_gauss_points_lambda;
lua["integrate_E"] = integrate_electric_field;
/****************************************************************************/
/* Register Lua usertypes valid only for GPU
*/
register_lua_usertypes_bystate(maxwell::parameter_loader& mpl,
model& mod, maxwell::solver_state_gpu& state)
{}
#endif
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
#ifdef ENABLE_GPU_SOLVER
static void
select_gpu(void)
{
int comm_rank = -1, comm_size = -1;
MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank);
MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
MPI_Comm local_comm;
MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, comm_rank, MPI_INFO_NULL, &local_comm);
int local_comm_rank = -1, local_comm_size = -1;
MPI_Comm_rank(local_comm, &local_comm_rank);
MPI_Comm_size(local_comm, &local_comm_size);
printf("CR: %d, CS: %d, LCR: %d, LCS: %d\n", comm_rank, comm_size, local_comm_rank, local_comm_size);
int num_devices = 0;
gpuGetDeviceCount(&num_devices);
if (num_devices < local_comm_size)
{
printf("Insufficient GPUs (%d detected, %d required)\n", num_devices, local_comm_size);
MPI_Abort(MPI_COMM_WORLD, -1);
}
gpuSetDevice(local_comm_rank);
}
#endif /* ENABLE_GPU_SOLVER */
/****************************************************************************/
/* Main
*/
struct rusage ru_start, ru_end;
getrusage(RUSAGE_SELF, &ru_start);
#ifdef USE_MPI
MPI_Init(&argc, &argv);
int comm_rank, comm_size;
MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank);
MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
if (argc != 2)
{
std::cout << "Please specify configuration file" << std::endl;
return 1;
}
if ( not mpl.load_file( argv[1] ) )
{
std::cout << "Configuration problem, exiting" << std::endl;
return 1;
}
#ifdef DONT_USE_STD_FILESYSTEM
mkdir(mpl.sim_name().c_str(), 0755);
#else
std::filesystem::create_directory( mpl.sim_name() );
#ifdef USE_MPI
if (comm_rank == 0) {
#endif /* USE_MPI */
gmsh::initialize();
gmsh::option::setNumber("General.Terminal", 0);
gmsh::open( mpl.sim_gmshmodel() );
bool generate = std::regex_match(mpl.sim_gmshmodel(), std::regex(".*\\.geo$") );
if (generate)
{
std::cout << "Generating mesh..." << std::flush;
gmm::generate( DIMENSION(3) );
std::cout << "done" << std::endl;
}
auto [scaled, sf] = mpl.mesh_scalefactor();
if (scaled)
{
std::vector<double> tr = {sf, 0, 0, 0,
0, sf, 0, 0,
0, 0, sf, 0 };
gmm::affineTransform(tr);
}
#ifdef USE_MPI
}
#endif /* USE_MPI */
model mod( mpl.sim_geomorder(), mpl.sim_approxorder() );
if (comm_size > 1)
mod.partition_mesh(comm_size);
std::cout << "Distributing mesh partitions..." << std::flush;
std::cout << "done" << std::endl;
}
else
{
mod.populate_from_gmsh();
}
sol::state& lua = mpl.lua_state();
auto on_mesh_loaded = lua["on_mesh_loaded"];
if (on_mesh_loaded.valid())
on_mesh_loaded();
if ( not mpl.validate_model_params(mod) )
{
std::cout << "Configuration problem, exiting" << std::endl;
return 1;
}
if ( mpl.sim_usegpu() )
#ifdef ENABLE_GPU_SOLVER
if (comm_rank == 0)
{
std::cout << " *** MPI not supported yet on GPU. ***" << std::endl;
std::cout << "Running on " << greenfg << "GPU " << nofg << "with ";
std::cout << greenfg << comm_size << nofg << " MPI processes";
std::cout << std::endl;
}
select_gpu();
}
#endif
std::cout << "Running on " << greenfg << "GPU" << nofg << std::endl;
maxwell::solver_state_gpu state_g;
register_lua_usertypes_bystate(mpl, mod, state_g);
solver_mainloop(mod, state_g, mpl);
#else
std::cout << "GPU solver not compiled. Exiting." << std::endl;
#endif /* ENABLE_GPU_SOLVER */
}
else
{
#ifdef USE_MPI
if (comm_rank == 0) {
std::cout << "Running on " << yellowfg << "CPU " << nofg << "with ";
std::cout << yellowfg << comm_size << nofg << " MPI processes";
std::cout << std::endl;
}
#else /* USE_MPI */
std::cout << "Running on " << yellowfg << "CPU" << nofg << std::endl;
#endif /* USE_MPI */
register_lua_usertypes_bystate(mpl, mod, state_c);
solver_mainloop(mod, state_c, mpl);
#ifdef USE_MPI
MPI_Finalize();
#endif
getrusage(RUSAGE_SELF, &ru_end);
std::cout << "Max RSS: " << (ru_end.ru_maxrss - ru_start.ru_maxrss) << std::endl;