diff --git a/include/connectivity.h b/include/connectivity.h index 23a4fa2c569552a5bd5022dc8ca4e3fb202e47ec..83ba43508e54f3da2ffc9ebcf9d5568141973bce 100644 --- a/include/connectivity.h +++ b/include/connectivity.h @@ -330,4 +330,5 @@ public: } }; -#endif /* USE_MPI */ \ No newline at end of file +#endif /* USE_MPI */ + diff --git a/include/gmsh_io.h b/include/gmsh_io.h index 10a780fee37b1f68e56374781ffcc8db117159bb..431e135048d3d1352f970352806f541023164952 100644 --- a/include/gmsh_io.h +++ b/include/gmsh_io.h @@ -77,7 +77,26 @@ struct boundary_descriptor } }; +#ifdef USE_MPI +struct comm_descriptor +{ + int boundary_tag; + int partition_mine; + int partition_other; + int rank_mine; + int rank_other; + std::vector<size_t> dof_mapping; + std::vector<element_key> fks; +}; +inline +std::ostream& operator<<(std::ostream& os, const comm_descriptor& cd) +{ + os << "tag: " << cd.boundary_tag << ", PM: " << cd.partition_mine << ", PO: " << cd.partition_other; + os << ", RM: " << cd.rank_mine << ", RO: " << cd.rank_other << ", map size: " << cd.dof_mapping.size(); + return os; +} +#endif /* USE_MPI */ #define IMPLIES(a, b) ((not(a)) or (b)) #define IFF(a,b) (IMPLIES(a,b) and IMPLIES(b,a)) @@ -104,7 +123,7 @@ class model #endif /* USE_MPI */ /* Map from boundary tag to all the faces of the entity with that tag */ - std::map<int, std::vector<element_key>> boundary_map; + std::map<int, std::vector<element_key>> boundary_map; std::vector<entity> entities; std::vector<boundary_descriptor> bnd_descriptors; @@ -130,6 +149,8 @@ class model /* RANK 0 only: copy of remote entities for postpro */ std::map<int, std::vector<entity>> remote_entities; + + std::map<int, comm_descriptor> ipc_boundary_comm_table; #endif /* USE_MPI */ using entofs_pair = std::pair<size_t, size_t>; @@ -147,7 +168,8 @@ class model void import_gmsh_entities_rankN(void); int my_partition(void) const; void make_partition_to_entities_map(void); - void map_interprocess_boundaries(); + void map_interprocess_boundaries(void); + void make_comm_descriptors(void); #endif /* USE_MPI */ void make_boundary_to_faces_map(void); @@ -168,6 +190,10 @@ public: void populate_from_gmsh(void); #ifdef USE_MPI + const std::map<int, comm_descriptor>& comm_descriptors(void) const { + return ipc_boundary_comm_table; + } + const std::vector<int>& world_entities_tags(void) const { return all_entities_tags; diff --git a/include/gmsh_mpi.h b/include/gmsh_mpi.h index b9a0ef9ee94492f738c19493b04f357846573015..f05a6be1227f604706d6db112b32450721ff5bc3 100644 --- a/include/gmsh_mpi.h +++ b/include/gmsh_mpi.h @@ -3,7 +3,10 @@ #include <vector> #include <map> #include <array> -#include <mpi/mpi.h> +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Weverything" +#include <mpi.h> +#pragma clang diagnostic pop #include "eigen.h" @@ -245,4 +248,6 @@ priv_MPI_Bcast(std::map<T1, T2>& map, int root, MPI_Comm comm) for (size_t i = 0; i < left.size(); i++) map[ left[i] ] = right[i]; } -} \ No newline at end of file +} + + diff --git a/include/maxwell_interface.h b/include/maxwell_interface.h index 2c344cc66e3fd51646feccfb305efb27b6380aa5..46ec674c15549f0bf60e7628e77b410f8ec98967 100644 --- a/include/maxwell_interface.h +++ b/include/maxwell_interface.h @@ -19,7 +19,6 @@ #endif #ifdef USE_MPI -#include <mpi/mpi.h> #include "gmsh_mpi.h" #endif /* USE_MPI */ @@ -268,6 +267,10 @@ struct solver_state vecxd Jy_src; vecxd Jz_src; field bndsrcs; + +#ifdef USE_MPI + field ipc; +#endif /* USE_MPI */ }; #ifdef ENABLE_GPU_SOLVER @@ -326,7 +329,7 @@ void init_from_model(const model&, solver_state&); void init_matparams(const model&, solver_state&, const parameter_loader&); //void apply_operator(solver_state&, const field&, field&); void export_fields_to_silo(const model&, solver_state&, const parameter_loader&); -void timestep(solver_state&, const parameter_loader&, time_integrator_type); +void timestep(const model&, solver_state&, const parameter_loader&, time_integrator_type); void prepare_sources(const model&, maxwell::solver_state&, maxwell::parameter_loader&); void do_sources(const model&, maxwell::solver_state&, maxwell::parameter_loader&); void swap(maxwell::solver_state&, const parameter_loader&); @@ -339,7 +342,7 @@ void init_from_model(const model&, solver_state_gpu&); void init_matparams(const model&, solver_state_gpu&, const parameter_loader&); //void apply_operator(solver_state_gpu&, const field_gpu&, field_gpu&); void export_fields_to_silo(const model&, solver_state_gpu&, const parameter_loader&); -void timestep(solver_state_gpu&, const parameter_loader&, time_integrator_type); +void timestep(const model&, solver_state_gpu&, const parameter_loader&, time_integrator_type); void prepare_sources(const model&, maxwell::solver_state_gpu&, maxwell::parameter_loader&); void do_sources(const model&, maxwell::solver_state_gpu&, maxwell::parameter_loader&); void swap(maxwell::solver_state_gpu&, const parameter_loader&); diff --git a/include/silo_output.hpp b/include/silo_output.hpp index ee7d1521b63f5838b87e813221439e1fade8ddc2..c5ac5c9c56870392a88bf302b0cde6a3756cf50d 100644 --- a/include/silo_output.hpp +++ b/include/silo_output.hpp @@ -1,7 +1,11 @@ #pragma once #include <vector> + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Weverything" #include <silo.h> +#pragma clang diagnostic pop class silo { diff --git a/src/gmsh_io.cpp b/src/gmsh_io.cpp index acc18cc645b32f2e44d640f919191926c449ccf9..8753340db0f204e45e48811736a62d59454e9f23 100644 --- a/src/gmsh_io.cpp +++ b/src/gmsh_io.cpp @@ -1,12 +1,13 @@ #include <iostream> #include <sstream> +#include <fstream> #include <cassert> #include "gmsh_io.h" #include "entity.h" #ifdef USE_MPI -#include <mpi/mpi.h> +#include <mpi.h> #endif /* USE_MPI */ std::string @@ -531,6 +532,111 @@ model::map_interprocess_boundaries(void) connect(e, part); } } + +void +model::make_comm_descriptors(void) +{ + /* Make a vector mapping element_key to entity tag */ + using bfk_t = std::pair<element_key, int>; + std::vector<bfk_t> bfk; + for (auto& [tag, keys] : boundary_map) + { + if ( not is_interprocess_boundary(tag) ) + continue; + + for (auto& k : keys) + bfk.push_back( std::make_pair(k, tag) ); + } + /* Sort it for lookups */ + auto comp = [](const bfk_t& a, const bfk_t& b) -> bool { + return a.first < b.first; + }; + std::sort(bfk.begin(), bfk.end(), comp); + + auto lbcomp = [](const bfk_t& a, const element_key& b) -> bool { + return a.first < b; + }; + + size_t flux_base = 0; + /* For each entity */ + + struct lazy_hack { + element_key fk; + size_t fbs; + std::vector<size_t> offsets; + size_t ibtag; + }; + + std::vector<lazy_hack> lhs; + + for (auto& e : entities) + { + /* and for each face */ + for (size_t iF = 0; iF < e.num_faces(); iF++) + { + /* get the element key */ + auto& pf = e.face(iF); + auto& rf = e.face_refelem(pf); + auto fk = element_key(pf); + + /* and lookup it in the vector we created before. */ + auto itor = std::lower_bound(bfk.begin(), bfk.end(), fk, lbcomp); + if ( (itor == bfk.end()) or not ((*itor).first == fk) ) + continue; + auto ibtag = (*itor).second; + auto fbs = rf.num_basis_functions(); + /* + auto& cd = ipc_boundary_comm_table[ibtag]; + for (size_t i = 0; i < fbs; i++) + cd.dof_mapping.push_back(flux_base + fbs*iF + i); + cd.fks.push_back(fk); + */ + lazy_hack lh; + lh.fk = fk; + lh.fbs = fbs; + for (size_t i = 0; i < fbs; i++) + lh.offsets.push_back(flux_base + fbs*iF + i); + lh.ibtag = ibtag; + lhs.push_back( std::move(lh) ); + } + + flux_base += e.num_fluxes(); + } + + auto lhcomp = [](const lazy_hack& a, const lazy_hack& b) -> bool { + return a.fk < b.fk; + }; + std::sort(lhs.begin(), lhs.end(), lhcomp); + for (auto& lh : lhs) + { + auto& cd = ipc_boundary_comm_table[lh.ibtag]; + for (size_t i = 0; i < lh.fbs; i++) + cd.dof_mapping.push_back( lh.offsets[i] ); + } + + + for (auto& [tag, cd] : ipc_boundary_comm_table) + { + cd.boundary_tag = tag; + auto parts = comm_map.at(tag); + assert(parts.size() == 2); + if (parts[0] == my_partition()) + { + cd.partition_mine = parts[0]; + cd.partition_other = parts[1]; + cd.rank_mine = parts[0]-1; + cd.rank_other = parts[1]-1; + } + else + { + cd.partition_mine = parts[1]; + cd.partition_other = parts[0]; + cd.rank_mine = parts[1]-1; + cd.rank_other = parts[0]-1; + } + } +} + #endif void @@ -581,6 +687,7 @@ model::populate_from_gmsh_rank0() #ifdef USE_MPI map_interprocess_boundaries(); ipconn.mpi_bcast(0, MPI_COMM_WORLD); + make_comm_descriptors(); #endif /* USE_MPI */ } @@ -610,6 +717,7 @@ model::populate_from_gmsh_rankN() import_gmsh_entities_rankN(); map_boundaries(); ipconn.mpi_bcast(0, MPI_COMM_WORLD); + make_comm_descriptors(); } #endif /* USE_MPI */ diff --git a/src/maxwell_common.cpp b/src/maxwell_common.cpp index e6a0febad004b97ea4e765622e2bf5bef972347f..c8f83c026a65c3b2f94473159492049dd8ed5bf2 100644 --- a/src/maxwell_common.cpp +++ b/src/maxwell_common.cpp @@ -4,7 +4,9 @@ #define DIRICHLET 2.0 #define ROBIN 1.0 +#ifdef USE_MPI #define INTERPROCESS 1.0 +#endif /* USE_MPI */ #define NEUMANN 0.0 namespace maxwell { @@ -30,12 +32,14 @@ init_boundary_conditions(const model& mod, const parameter_loader& mpl, continue; double bc_coeff = DIRICHLET; +#ifdef USE_MPI if (bd.type == face_type::INTERPROCESS_BOUNDARY) { bc_coeff = INTERPROCESS; } else { +#endif /* USE_MPI */ switch ( mpl.boundary_type(bd.material_tag()) ) { case boundary_condition::UNSPECIFIED: @@ -56,8 +60,9 @@ init_boundary_conditions(const model& mod, const parameter_loader& mpl, bc_coeff = NEUMANN; break; } +#ifdef USE_MPI } - +#endif /* USE_MPI */ auto& pf = e.face(iF); auto& rf = e.face_refelem(pf); auto num_bf = rf.num_basis_functions(); diff --git a/src/maxwell_cpu.cpp b/src/maxwell_cpu.cpp index 8ebd00e3651def056241cad3805d4777525b2e3e..d0d07a4508e3ce942a94621c1ce9f8f2895e74dd 100644 --- a/src/maxwell_cpu.cpp +++ b/src/maxwell_cpu.cpp @@ -83,6 +83,15 @@ init_from_model(const model& mod, solver_state& state) state.eds.push_back( std::move(ed) ); } + auto& cds = mod.comm_descriptors(); + + size_t dofs = 0; + for (auto& [tag, cd] : cds) + if (cd.dof_mapping.size() > dofs) + dofs = cd.dof_mapping.size(); + + state.ipc.resize(dofs); + state.curr_time = 0.0; state.curr_timestep = 0; } @@ -176,6 +185,56 @@ compute_field_jumps(solver_state& state, const field& in) } } +static void +exchange_boundary_data(const model& mod, solver_state& state) +{ + auto& cds = mod.comm_descriptors(); + for (auto& [tag, cd] : cds) + { + if (cd.rank_mine < cd.rank_other) + { + for (size_t i = 0; i < cd.dof_mapping.size(); i++) + { + auto srci = cd.dof_mapping[i]; + state.ipc.Ex[i] = state.jumps.Ex[srci]; + state.ipc.Ey[i] = state.jumps.Ey[srci]; + state.ipc.Ez[i] = state.jumps.Ez[srci]; + state.ipc.Hx[i] = state.jumps.Hx[srci]; + state.ipc.Hy[i] = state.jumps.Hy[srci]; + state.ipc.Hz[i] = state.jumps.Hz[srci]; + } + send_field(state.ipc, cd.rank_other, tag, 0, cd.dof_mapping.size(), MPI_COMM_WORLD); + receive_field(state.ipc, cd.rank_other, tag, 0, cd.dof_mapping.size(), MPI_COMM_WORLD); + for (size_t i = 0; i < cd.dof_mapping.size(); i++) + { + auto dsti = cd.dof_mapping[i]; + state.jumps.Ex[dsti] -= state.ipc.Ex[i]; + state.jumps.Ey[dsti] -= state.ipc.Ey[i]; + state.jumps.Ez[dsti] -= state.ipc.Ez[i]; + state.jumps.Hx[dsti] -= state.ipc.Hx[i]; + state.jumps.Hy[dsti] -= state.ipc.Hy[i]; + state.jumps.Hz[dsti] -= state.ipc.Hz[i]; + } + } + else + { + receive_field(state.ipc, cd.rank_other, tag, 0, cd.dof_mapping.size(), MPI_COMM_WORLD); + for (size_t i = 0; i < cd.dof_mapping.size(); i++) + { + auto dsti = cd.dof_mapping[i]; + double tmp; + tmp = state.jumps.Ex[dsti]; state.jumps.Ex[dsti] -= state.ipc.Ex[i]; state.ipc.Ex[i] = tmp; + tmp = state.jumps.Ey[dsti]; state.jumps.Ey[dsti] -= state.ipc.Ey[i]; state.ipc.Ey[i] = tmp; + tmp = state.jumps.Ez[dsti]; state.jumps.Ez[dsti] -= state.ipc.Ez[i]; state.ipc.Ez[i] = tmp; + tmp = state.jumps.Hx[dsti]; state.jumps.Hx[dsti] -= state.ipc.Hx[i]; state.ipc.Hx[i] = tmp; + tmp = state.jumps.Hy[dsti]; state.jumps.Hy[dsti] -= state.ipc.Hy[i]; state.ipc.Hy[i] = tmp; + tmp = state.jumps.Hz[dsti]; state.jumps.Hz[dsti] -= state.ipc.Hz[i]; state.ipc.Hz[i] = tmp; + } + send_field(state.ipc, cd.rank_other, tag, 0, cd.dof_mapping.size(), MPI_COMM_WORLD); + } + } +} + static void compute_field_jumps_E(solver_state& state, const field& in) { @@ -371,9 +430,14 @@ compute_fluxes_planar_H(solver_state& state) } static void -compute_fluxes(solver_state& state, const field& in, field& out) +compute_fluxes(const model& mod, solver_state& state, const field& in, field& out) { compute_field_jumps(state, in); + +#ifdef USE_MPI + exchange_boundary_data(mod, state); +#endif /* USE_MPI */ + compute_fluxes_planar(state); for (auto& ed : state.eds) @@ -458,10 +522,10 @@ compute_rk4_weighted_sum(solver_state& state, const field& in, double dt, field& } static void -apply_operator(solver_state& state, const field& curr, field& next) +apply_operator(const model& mod, solver_state& state, const field& curr, field& next) { compute_curls(state, curr, next); - compute_fluxes(state, curr, next); + compute_fluxes(mod, state, curr, next); } static void @@ -493,26 +557,26 @@ leapfrog(solver_state& state) } void -timestep(solver_state& state, const parameter_loader&, time_integrator_type ti) +timestep(const model& mod, solver_state& state, const parameter_loader&, time_integrator_type ti) { if (ti == time_integrator_type::EULER) { - apply_operator(state, state.emf_curr, state.tmp); + apply_operator(mod, state, state.emf_curr, state.tmp); compute_euler_update(state, state.emf_curr, state.tmp, state.delta_t, state.emf_next); } if (ti == time_integrator_type::RK4) { - apply_operator(state, state.emf_curr, state.k1); + apply_operator(mod, state, state.emf_curr, state.k1); compute_euler_update(state, state.emf_curr, state.k1, 0.5*state.delta_t, state.tmp); - apply_operator(state, state.tmp, state.k2); + apply_operator(mod, state, state.tmp, state.k2); compute_euler_update(state, state.emf_curr, state.k2, 0.5*state.delta_t, state.tmp); - apply_operator(state, state.tmp, state.k3); + apply_operator(mod, state, state.tmp, state.k3); compute_euler_update(state, state.emf_curr, state.k3, state.delta_t, state.tmp); - apply_operator(state, state.tmp, state.k4); + apply_operator(mod, state, state.tmp, state.k4); compute_rk4_weighted_sum(state, state.emf_curr, state.delta_t, state.emf_next); } diff --git a/src/maxwell_gpu.cpp b/src/maxwell_gpu.cpp index 2314859e3ff2b17aec4ba9dbbdfd4b23799ebfa6..6756b64bbe831f2cb9c5d12a7dd456aa420c8357 100644 --- a/src/maxwell_gpu.cpp +++ b/src/maxwell_gpu.cpp @@ -413,7 +413,7 @@ compute_rk4_weighted_sum(solver_state_gpu& state, const field_gpu& in, } void -timestep(solver_state_gpu& state, const parameter_loader& mpl, time_integrator_type ti) +timestep(const model& mod, solver_state_gpu& state, const parameter_loader& mpl, time_integrator_type ti) { //timecounter tc; //tc.tic(); diff --git a/src/maxwell_solver.cpp b/src/maxwell_solver.cpp index ebe62490f11b6ecf72fa1b456a7ce32d70877cff..27fab666e1d169a3d9035ac05d3bd06027567c37 100644 --- a/src/maxwell_solver.cpp +++ b/src/maxwell_solver.cpp @@ -6,7 +6,10 @@ #endif #ifdef USE_MPI -#include <mpi/mpi.h> +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Weverything" +#include <mpi.h> +#pragma clang diagnostic pop #endif #include <fstream> @@ -219,7 +222,7 @@ void test_it(const model& mod, State& state, maxwell::parameter_loader& mpl) for(size_t i = 0; i < num_timesteps; i++) { mpl.call_timestep_callback(i); - timestep(state, mpl, ti); + timestep(mod, state, mpl, ti); do_sources(mod, state, mpl); if ( (silo_output_rate != 0) and (i%silo_output_rate == 0) ) @@ -237,7 +240,7 @@ void test_it(const model& mod, State& state, maxwell::parameter_loader& mpl) } } -void +static void register_lua_usertypes(maxwell::parameter_loader& mpl) { #ifdef ENABLE_EXP_GEOMETRIC_DIODE @@ -253,7 +256,7 @@ register_lua_usertypes(maxwell::parameter_loader& mpl) #endif /* ENABLE_EXP_GEOMETRIC_DIODE */ } -void +static void register_lua_usertypes_bystate(maxwell::parameter_loader& mpl, model& mod, maxwell::solver_state& state) { @@ -268,7 +271,7 @@ register_lua_usertypes_bystate(maxwell::parameter_loader& mpl, } #ifdef ENABLE_GPU_SOLVER -void +static void register_lua_usertypes_bystate(maxwell::parameter_loader& mpl, model& mod, maxwell::solver_state_gpu& state) {}