diff --git a/CMakeLists.txt b/CMakeLists.txt index b35c03eb94dd6c9b22c529c143d09655fe38a3be..81dbb896bc809258861bfb0131f0484c51b785f6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -178,7 +178,7 @@ else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-sign-compare -Wshadow \ -Wno-unknown-pragmas -Wno-unused-parameter") endif() -set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -fpermissive") +set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -fpermissive -fsanitize=address") set(CMAKE_CXX_FLAGS_RELEASE "-O3 -g -DNDEBUG -fpermissive") set(CMAKE_CXX_FLAGS_RELEASEASSERT "-O3 -g -fpermissive") diff --git a/include/entity.h b/include/entity.h index 9779db4c210210e738def4c442aa4af60ffa60f1..8c052f1fcdbbfc1ee52796baf23e0e41c4b7f1da 100644 --- a/include/entity.h +++ b/include/entity.h @@ -83,6 +83,7 @@ public: const reference_element& cell_refelem(size_t) const; const reference_element& cell_refelem(const physical_element&) const; size_t cell_global_index(size_t) const; + size_t cell_local_index_by_gmsh(size_t) const; size_t cell_global_index_by_gmsh(size_t) const; size_t cell_local_dof_offset(size_t) const; size_t cell_global_dof_offset(size_t) const; @@ -107,6 +108,7 @@ public: int material_tag(void) const; int gmsh_tag(void) const; + int gmsh_dim(void) const; int gmsh_elem_type(void) const; entity_ordering current_elem_ordering(void) const; constexpr size_t num_faces_per_elem(void) const { return 4; } diff --git a/include/gmsh_io.h b/include/gmsh_io.h index 2b340140ab8e3bdd5100bbf099bc01d659656e2b..51be9f9f9245737f9fb5e2bd4fb2865cfcd3be7b 100644 --- a/include/gmsh_io.h +++ b/include/gmsh_io.h @@ -85,16 +85,31 @@ struct boundary_descriptor #ifdef USE_MPI struct dof_range { - int tag; - int dim; - size_t base; - size_t length; + int tag; + int dim; + size_t base; + std::vector<size_t> dof_offsets; + + //dof_range(dof_range&&) = default; + + dof_range(const entity& e, size_t b) + { + tag = e.gmsh_tag(); + dim = e.gmsh_dim(); + base = b; + dof_offsets.resize( e.num_dofs() ); + auto dofs_per_cell = e.num_dofs()/e.num_cells(); + assert( dofs_per_cell*e.num_cells() == e.num_dofs() ); + for (size_t iT = 0; iT < e.num_cells(); iT++) + for (size_t iD = 0; iD < dofs_per_cell; iD++) + dof_offsets[iT*dofs_per_cell+iD] = e.cell_global_dof_offset(iT)+iD; + } }; inline std::ostream& operator<<(std::ostream& os, const dof_range& dr) { os << "Tag " << dr.tag << ", dim " << dr.dim << ": ("; - os << dr.base << ", " << dr.length << ")"; + os << dr.base << ", " << dr.dof_offsets.size() << ")"; return os; } #endif @@ -126,9 +141,12 @@ class model /* Map from 3D entity tag to partition */ std::map<int, int> partition_inverse_map; - - /* Global dof range, only rank0 has this */ +public: + /* RANK 0 only: keeps global (process-wide) DOF numbering and positioning */ std::map<int, dof_range> global_dof_ranges; +private: + /* RANK 0 only: copy of remote entities for postpro */ + std::map<int, std::vector<entity>> remote_entities; #endif /* USE_MPI */ using entofs_pair = std::pair<size_t, size_t>; @@ -165,6 +183,42 @@ public: void partition_mesh(int); void populate_from_gmsh(void); +#ifdef USE_MPI + const std::map<int, dof_range>& dof_ranges(void) const + { + return global_dof_ranges; + } + + size_t global_num_dofs() const + { + size_t ret = 0; + for (auto& dr : global_dof_ranges) + ret += dr.second.dof_offsets.size(); + return ret; + } + + size_t global_num_cells() const + { + size_t ret = 0; + for (auto& e : entities) + ret += e.num_cells(); + + for (auto& re : remote_entities) + for (auto& e : re.second) + ret += e.num_cells(); + + return ret; + } + + int entity_rank(int tag) const + { + return partition_inverse_map.at(tag)-1; + } + + entity& entity_global_lookup(int); + const entity& entity_global_lookup(int) const; +#endif /* USE_MPI */ + const element_connectivity<element_key>& connectivity() const { return conn; diff --git a/include/maxwell_interface.h b/include/maxwell_interface.h index 2e7af8af5c06e062ac36944f1bf8dc5415840fc8..fc686a3006477fbac9f165f947d2e853fd1b1fdd 100644 --- a/include/maxwell_interface.h +++ b/include/maxwell_interface.h @@ -18,6 +18,11 @@ #include "param_loader.h" #endif +#ifdef USE_MPI +#include <mpi/mpi.h> +#include "gmsh_mpi.h" +#endif /* USE_MPI */ + namespace maxwell { #ifndef HIDE_THIS_FROM_NVCC @@ -94,6 +99,13 @@ struct field void resize(size_t); }; +#ifdef USE_MPI +void send_field(field& f, int dst, int tag, MPI_Comm comm); +void send_field(field& f, int dst, int tag, size_t offset, size_t length, MPI_Comm comm); +void receive_field(field& f, int src, int tag, MPI_Comm comm); +void receive_field(field& f, int src, int tag, size_t offset, size_t length, MPI_Comm comm); +#endif /* USE_MPI */ + #ifdef ENABLE_GPU_SOLVER struct pinned_field { @@ -314,20 +326,28 @@ void init_from_model(const model&, solver_state&); void init_matparams(const model&, solver_state&, const parameter_loader&); //void apply_operator(solver_state&, const field&, field&); void export_fields_to_silo(const model&, const solver_state&, const parameter_loader&); +void export_fields_to_silo_2(const model&, solver_state&, const parameter_loader&); void timestep(solver_state&, const parameter_loader&, time_integrator_type); void prepare_sources(const model&, maxwell::solver_state&, maxwell::parameter_loader&); void do_sources(const model&, maxwell::solver_state&, maxwell::parameter_loader&); void swap(maxwell::solver_state&, const parameter_loader&); +#ifdef USE_MPI +void gather_field(const model&, maxwell::field&, int, MPI_Comm); +#endif /* USE_MPI */ #ifdef ENABLE_GPU_SOLVER void init_from_model(const model&, solver_state_gpu&); void init_matparams(const model&, solver_state_gpu&, const parameter_loader&); //void apply_operator(solver_state_gpu&, const field_gpu&, field_gpu&); void export_fields_to_silo(const model&, const solver_state_gpu&, const parameter_loader&); +void export_fields_to_silo_2(const model&, solver_state_gpu&, const parameter_loader&); void timestep(solver_state_gpu&, const parameter_loader&, time_integrator_type); void prepare_sources(const model&, maxwell::solver_state_gpu&, maxwell::parameter_loader&); void do_sources(const model&, maxwell::solver_state_gpu&, maxwell::parameter_loader&); void swap(maxwell::solver_state_gpu&, const parameter_loader&); +#ifdef USE_MPI +void gather_field(const model&, maxwell::field_gpu&, int, MPI_Comm); +#endif /* USE_MPI */ #endif /* ENABLE_GPU_SOLVER */ void init_boundary_conditions(const model&, const parameter_loader&, vecxd&); diff --git a/src/entity.cpp b/src/entity.cpp index 0fd29eba6270af2b757b324123a114883492c200..8e1d58f48f4e3851664572c89e8efeb3c6d95dd4 100644 --- a/src/entity.cpp +++ b/src/entity.cpp @@ -273,6 +273,13 @@ entity::cell_global_index(size_t iT) const return m_index_base + iT; } +size_t +entity::cell_local_index_by_gmsh(size_t iT) const +{ + assert(iT < physical_cells.size()); + return physical_cells[iT].original_position(); +} + size_t entity::cell_global_index_by_gmsh(size_t iT) const { @@ -517,6 +524,12 @@ entity::gmsh_tag(void) const return tag; } +int +entity::gmsh_dim(void) const +{ + return dim; +} + int entity::gmsh_elem_type(void) const { diff --git a/src/gmsh_io.cpp b/src/gmsh_io.cpp index 6915ece09ee4dffc8ceb67482cfdaa526cf44da7..358ea89d1770589e8dbd8e602551ba9f412f9125 100644 --- a/src/gmsh_io.cpp +++ b/src/gmsh_io.cpp @@ -235,22 +235,26 @@ model::import_gmsh_entities_rank0(void) #ifdef USE_MPI if (num_partitions > 1) { + /* If there are multiple partitions, decide to which one this + * entity belongs to. If the partition is remote, send entity + * to the remote process. The entity is saved also locally + * to have all the data for postprocessing later. */ auto mp = my_partition(); auto tag_partition = partition_inverse_map.at(tag); bool entity_is_remote = ( tag_partition != mp ); if (entity_is_remote) { - dof_range dr; - dr.dim = dim; dr.tag = tag; - dr.base = global_dof_base; dr.length = e.num_dofs(); - global_dof_ranges[tag] = dr; + dof_range dr(e, global_dof_base); + global_dof_ranges.emplace(tag, std::move(dr)); global_dof_base += e.num_dofs(); assert(tag_partition > 0); int rank = tag_partition - 1; e.mpi_send(rank, MPI_COMM_WORLD); - std::cout << "Sending entity " << tag << " to " << rank << std::endl; + std::cout << "Sending entity " << tag << " to " << rank << " " << e.num_dofs() << std::endl; + /* Save locally the entity */ + remote_entities[tag_partition].push_back( std::move(e) ); continue; } } @@ -273,19 +277,37 @@ model::import_gmsh_entities_rank0(void) index_base += e.num_cells(); #ifdef USE_MPI - dof_range dr; - dr.dim = dim; dr.tag = tag; - dr.base = global_dof_base; dr.length = e.num_dofs(); - global_dof_ranges[tag] = dr; + dof_range dr(e, global_dof_base); + global_dof_ranges.emplace(tag, std::move(dr)); global_dof_base += e.num_dofs(); #endif /* USE_MPI */ update_connectivity(e, entity_index); - entities.push_back( std::move(e) ); entity_index++; } } + +#ifdef USE_MPI + /* Compute DOF offset in the remote entities saved locally. */ + assert( IMPLIES(num_partitions == 1, remote_entities.size() == 0) ); + for (auto& re : remote_entities) + { + entity_index = 0; + dof_base = 0; + flux_base = 0; + index_base = 0; + for (auto& e : re.second) + { + e.base(dof_base, flux_base, index_base); + e.number(entity_index); + dof_base += e.num_dofs(); + flux_base += e.num_fluxes(); + index_base += e.num_cells(); + entity_index++; + } + } +#endif } #ifdef USE_MPI @@ -301,9 +323,9 @@ model::import_gmsh_entities_rankN(void) for (auto& rtag : partition_map.at(mp)) { - std::cout << "Receiving " << rtag << ", rank " << mp-1 << std::endl; entity e; e.mpi_recv(0, MPI_COMM_WORLD); + std::cout << "Receiving " << rtag << ", rank " << mp-1 << " " << e.num_dofs() << std::endl; for (size_t i = 0; i < e.num_cells(); i++) { @@ -561,6 +583,40 @@ model::partition_mesh(int parts) gmm::partition(parts); num_partitions = parts; } + +entity& +model::entity_global_lookup(int tag) +{ + /* Look for tag between local entities */ + for (auto& e : entities) + if (e.gmsh_tag() == tag) + return e; + + /* Try between remote entities */ + int partition = partition_inverse_map.at(tag); + for (auto& e : remote_entities[partition]) + if (e.gmsh_tag() == tag) + return e; + + throw std::invalid_argument("tag not found"); +} + +const entity& +model::entity_global_lookup(int tag) const +{ + /* Look for tag between local entities */ + for (const auto& e : entities) + if (e.gmsh_tag() == tag) + return e; + + /* Try between remote entities */ + int partition = partition_inverse_map.at(tag); + for (const auto& e : remote_entities.at(partition)) + if (e.gmsh_tag() == tag) + return e; + + throw std::invalid_argument("tag not found"); +} #endif /* USE_MPI */ std::vector<entity>::const_iterator diff --git a/src/maxwell_cpu.cpp b/src/maxwell_cpu.cpp index e1e167a5e8d60d1ceed2e527b05f83d02a84c43b..d0b49e4db42205ef33cdf8896a22ba21bda2fff4 100644 --- a/src/maxwell_cpu.cpp +++ b/src/maxwell_cpu.cpp @@ -590,6 +590,114 @@ swap(maxwell::solver_state& state, const parameter_loader&) std::swap(state.emf_curr, state.emf_next); } +#ifdef USE_MPI +void +gather_field(const model& mod, maxwell::field& in, maxwell::field& out, + int root, MPI_Comm comm) +{ + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + if (rank == root) + { + auto& gdr = mod.dof_ranges(); + for (auto& dr : gdr) + { + auto tag = dr.second.tag; + if (mod.entity_rank(tag) == root) + continue; + + auto base = dr.second.base; + auto len = dr.second.dof_offsets.size(); + //std::cout << "GR: " << tag << " " << len << std::endl; + receive_field(out, mod.entity_rank(tag), tag, base, len, comm); + } + + for (auto& e : mod) + { + auto in_base = e.dof_base(); + auto in_len = e.num_dofs(); + auto out_base = gdr.at(e.gmsh_tag()).base; + auto out_len = gdr.at(e.gmsh_tag()).dof_offsets.size(); + assert(in_len == out_len); + out.Ex.segment(out_base, out_len) = in.Ex.segment(in_base, in_len); + out.Ey.segment(out_base, out_len) = in.Ey.segment(in_base, in_len); + out.Ez.segment(out_base, out_len) = in.Ez.segment(in_base, in_len); + out.Hx.segment(out_base, out_len) = in.Hx.segment(in_base, in_len); + out.Hy.segment(out_base, out_len) = in.Hy.segment(in_base, in_len); + out.Hz.segment(out_base, out_len) = in.Hz.segment(in_base, in_len); + } + } + else + { + for (auto& e : mod) + { + //std::cout << "GS: " << e.gmsh_tag() << " " << e.num_dofs() << std::endl; + send_field(in, root, e.gmsh_tag(), e.dof_base(), e.num_dofs(), comm); + } + } +} +#endif + +void +export_fields_to_silo_2(const model& mod, maxwell::solver_state& state, + const maxwell::parameter_loader& mpl) +{ + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + if (rank == 0) + { + size_t gdnum = mod.global_num_dofs(); + maxwell::field f; + f.resize(gdnum); + gather_field(mod, state.emf_next, f, 0, MPI_COMM_WORLD); + + std::stringstream ss; + ss << mpl.sim_name() << "/timestep_" << state.curr_timestep << ".silo"; + + silo sdb; + sdb.create_db(ss.str()); + sdb.import_mesh_from_gmsh(); + sdb.write_mesh(state.curr_time, state.curr_timestep); + + std::vector<double> out_Ex( mod.global_num_cells() ); + std::vector<double> out_Ey( mod.global_num_cells() ); + std::vector<double> out_Ez( mod.global_num_cells() ); + + size_t nc = 0; + for (auto& dr : mod.dof_ranges()) + { + auto etag = dr.first; + std::cout << etag << std::endl; + auto& e = mod.entity_global_lookup(etag); + for (size_t iT = 0; iT < e.num_cells(); iT++) + { + auto& pe = e.cell(iT); + auto& re = e.cell_refelem(pe); + vecxd phi_bar = re.basis_functions({1./3., 1./3., 1./3.}); + auto num_bf = re.num_basis_functions(); + auto ofs = dr.second.base + num_bf*iT; + auto gi = nc+e.cell_local_index_by_gmsh(iT); + out_Ex[gi] = f.Ex.segment(ofs, num_bf).dot(phi_bar); + out_Ey[gi] = f.Ey.segment(ofs, num_bf).dot(phi_bar); + out_Ez[gi] = f.Ez.segment(ofs, num_bf).dot(phi_bar); + } + nc += e.num_cells(); + } + + sdb.write_zonal_variable("Ex", out_Ex); + sdb.write_zonal_variable("Ey", out_Ey); + sdb.write_zonal_variable("Ez", out_Ez); + sdb.write_vector_expression("E", "{Ex, Ey, Ez}"); + } + else + { + maxwell::field dummy; + gather_field(mod, state.emf_next, dummy, 0, MPI_COMM_WORLD); + } +} + void export_fields_to_silo(const model& mod, const maxwell::solver_state& state, const maxwell::parameter_loader& mpl) diff --git a/src/maxwell_gpu.cpp b/src/maxwell_gpu.cpp index 25e0d6b17cdc13a31c6e63027816cfb3ba25d2b6..00ff575ae74f63a80e85e321e7a3b3e51b1378e7 100644 --- a/src/maxwell_gpu.cpp +++ b/src/maxwell_gpu.cpp @@ -555,6 +555,16 @@ swap(solver_state_gpu& state, const parameter_loader& mpl) std::swap(state.emf_curr, state.emf_next); } +void +gather_field(const model& mod, const maxwell::field_gpu& in, maxwell::field_gpu& f, + int root, MPI_Comm comm) +{} + +void +export_fields_to_silo_2(const model& mod, maxwell::solver_state_gpu& state, + const maxwell::parameter_loader& mpl) +{} + void export_fields_to_silo(const model& mod, const maxwell::solver_state_gpu& state, const maxwell::parameter_loader& mpl) diff --git a/src/maxwell_interface.cpp b/src/maxwell_interface.cpp index 31727dc1f1744969f8852313c94d7f6c517ec07e..9747e74fc6973c3da97798ac1dcbe7ef76a46c26 100644 --- a/src/maxwell_interface.cpp +++ b/src/maxwell_interface.cpp @@ -25,6 +25,67 @@ field::resize(size_t p_num_dofs) num_dofs = p_num_dofs; } +#ifdef USE_MPI +void send_field(field& f, int dst, int tag, MPI_Comm comm) +{ + MPI_Send(&f.num_dofs, 1, MPI_UNSIGNED_LONG_LONG, dst, tag, comm); + MPI_Send(f.Ex.data(), f.num_dofs, MPI_DOUBLE, dst, tag, comm); + MPI_Send(f.Ey.data(), f.num_dofs, MPI_DOUBLE, dst, tag, comm); + MPI_Send(f.Ez.data(), f.num_dofs, MPI_DOUBLE, dst, tag, comm); + MPI_Send(f.Hx.data(), f.num_dofs, MPI_DOUBLE, dst, tag, comm); + MPI_Send(f.Hy.data(), f.num_dofs, MPI_DOUBLE, dst, tag, comm); + MPI_Send(f.Hz.data(), f.num_dofs, MPI_DOUBLE, dst, tag, comm); +} + +void send_field(field& f, int dst, int tag, size_t offset, size_t length, MPI_Comm comm) +{ + MPI_Send(&length, 1, MPI_UNSIGNED_LONG_LONG, dst, tag, comm); + assert(offset+length <= f.num_dofs); + + MPI_Send(f.Ex.data()+offset, length, MPI_DOUBLE, dst, tag, comm); + MPI_Send(f.Ey.data()+offset, length, MPI_DOUBLE, dst, tag, comm); + MPI_Send(f.Ez.data()+offset, length, MPI_DOUBLE, dst, tag, comm); + MPI_Send(f.Hx.data()+offset, length, MPI_DOUBLE, dst, tag, comm); + MPI_Send(f.Hy.data()+offset, length, MPI_DOUBLE, dst, tag, comm); + MPI_Send(f.Hz.data()+offset, length, MPI_DOUBLE, dst, tag, comm); +} + +void receive_field(field& f, int src, int tag, MPI_Comm comm) +{ + MPI_Status status; + size_t ndofs; + MPI_Recv(&ndofs, 1, MPI_UNSIGNED_LONG_LONG, src, tag, comm, &status); + if (ndofs != f.num_dofs) + f.resize(ndofs); + + MPI_Recv(f.Ex.data(), ndofs, MPI_DOUBLE, src, tag, comm, &status); + MPI_Recv(f.Ey.data(), ndofs, MPI_DOUBLE, src, tag, comm, &status); + MPI_Recv(f.Ez.data(), ndofs, MPI_DOUBLE, src, tag, comm, &status); + MPI_Recv(f.Hx.data(), ndofs, MPI_DOUBLE, src, tag, comm, &status); + MPI_Recv(f.Hy.data(), ndofs, MPI_DOUBLE, src, tag, comm, &status); + MPI_Recv(f.Hz.data(), ndofs, MPI_DOUBLE, src, tag, comm, &status); +} + +void receive_field(field& f, int src, int tag, size_t offset, size_t length, MPI_Comm comm) +{ + MPI_Status status; + size_t ndofs; + MPI_Recv(&ndofs, 1, MPI_UNSIGNED_LONG_LONG, src, tag, comm, &status); + if ( (ndofs != length) or (offset+length > f.num_dofs) ) + { + std::cout << "Unexpected size in receiving field" << std::endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + MPI_Recv(f.Ex.data()+offset, ndofs, MPI_DOUBLE, src, tag, comm, &status); + MPI_Recv(f.Ey.data()+offset, ndofs, MPI_DOUBLE, src, tag, comm, &status); + MPI_Recv(f.Ez.data()+offset, ndofs, MPI_DOUBLE, src, tag, comm, &status); + MPI_Recv(f.Hx.data()+offset, ndofs, MPI_DOUBLE, src, tag, comm, &status); + MPI_Recv(f.Hy.data()+offset, ndofs, MPI_DOUBLE, src, tag, comm, &status); + MPI_Recv(f.Hz.data()+offset, ndofs, MPI_DOUBLE, src, tag, comm, &status); +} +#endif /* USE_MPI */ + #ifdef ENABLE_GPU_SOLVER pinned_field::pinned_field() diff --git a/src/maxwell_solver.cpp b/src/maxwell_solver.cpp index f917513b93cb2397780b97c39d5a4016151978bf..7c705643b533b198631008d69e115958b0af8632 100644 --- a/src/maxwell_solver.cpp +++ b/src/maxwell_solver.cpp @@ -221,12 +221,12 @@ void test_it(const model& mod, State& state, maxwell::parameter_loader& mpl) do_sources(mod, state, mpl); #ifdef USE_MPI - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - if (rank == 0) + //int rank; + //MPI_Comm_rank(MPI_COMM_WORLD, &rank); + //if (rank == 0) #endif /* USE_MPI */ if ( (silo_output_rate != 0) and (i%silo_output_rate == 0) ) - export_fields_to_silo(mod, state, mpl); + export_fields_to_silo_2(mod, state, mpl); swap(state, mpl);