diff --git a/src/field/DistributedField.cpp b/src/field/DistributedField.cpp index 93210c793e293bd82ba09b224438e0de3fbea422..15b8c38105f9ac194e1ae6762b6d206fe1ee22e5 100644 --- a/src/field/DistributedField.cpp +++ b/src/field/DistributedField.cpp @@ -217,9 +217,6 @@ namespace gmshfem::field // Free the type MPI_Type_free(&mpi_struct_type); - if (rank == 0) - msg::info << "Synchronizing Interface Ownership : " << global.size() << " interface DOFs" << msg::endl; - /** * global is now a list of pair (dof, rank). We can locally decide for each dof if we own it, using the min policy */ @@ -261,7 +258,7 @@ namespace gmshfem::field void DistributedField< T_Scalar, T_Form >::_computeToSend(const std::optional<std::vector<int>>& neighborRanks) { #ifdef HAVE_MPI - + auto rank = gmshfem::common::GmshFem::getMPIRank(); // What I have to send is what others have to read // Create temporary MPI Type for the struct @@ -286,24 +283,57 @@ namespace gmshfem::field local.push_back({dof->numType(), dof->entity()}); } - // Get the sizes and offset of the global array - int total_size = 0; - std::vector< int > sizes(commSize), displs(commSize); - int loc_size = local.size(); - MPI_Allgather(&loc_size, 1, MPI_INT, sizes.data(), 1, MPI_INT, MPI_COMM_WORLD); + // Without topology, global sync + if (!neighborRanks) + { + // Get the sizes and offset of the global array + int total_size = 0; + std::vector< int > sizes(commSize), displs(commSize); + int loc_size = local.size(); + MPI_Allgather(&loc_size, 1, MPI_INT, sizes.data(), 1, MPI_INT, MPI_COMM_WORLD); - for(unsigned i = 0; i < commSize; ++i) { - displs[i] = total_size; - total_size += sizes[i]; - } + for(unsigned i = 0; i < commSize; ++i) { + displs[i] = total_size; + total_size += sizes[i]; + } - // Synchronize the full array - global.resize(total_size); + // Synchronize the full array + global.resize(total_size); - MPI_Allgatherv(local.data(), local.size(), mpi_struct_type, - global.data(), sizes.data(), displs.data(), mpi_struct_type, - MPI_COMM_WORLD); + MPI_Allgatherv(local.data(), local.size(), mpi_struct_type, + global.data(), sizes.data(), displs.data(), mpi_struct_type, + MPI_COMM_WORLD); + } + else { + const auto &ranks = *neighborRanks; + std::vector< unsigned long long > recvSizes(ranks.size()); + unsigned long long toSendSize = local.size(); + // 1) Send my local size to all my neighbors + std::vector<MPI_Request> sendRequests(ranks.size()), receiveRequests(ranks.size()); + for (size_t k = 0; k < ranks.size(); ++k) { + MPI_Isend(&toSendSize, 1, MPI_UNSIGNED_LONG_LONG, ranks[k], rank, MPI_COMM_WORLD, &sendRequests[k]); + MPI_Irecv(&recvSizes[k], 1, MPI_UNSIGNED_LONG_LONG, ranks[k], ranks[k], MPI_COMM_WORLD, &receiveRequests[k]); + } + MPI_Waitall(sendRequests.size(), sendRequests.data(), MPI_STATUSES_IGNORE); + MPI_Waitall(receiveRequests.size(), receiveRequests.data(), MPI_STATUSES_IGNORE); + size_t total_size = std::reduce(recvSizes.begin(), recvSizes.end()); + global.resize(total_size); + + sendRequests.clear(); sendRequests.resize(ranks.size()); + receiveRequests.clear(); receiveRequests.resize(ranks.size()); + size_t currentOffset = 0; + for (size_t k = 0; k < ranks.size(); ++k) { + MPI_Isend(local.data(), local.size(), mpi_struct_type, ranks[k], rank, MPI_COMM_WORLD, &sendRequests[k]); + MPI_Irecv(global.data() + currentOffset, recvSizes[k], mpi_struct_type, ranks[k], ranks[k], MPI_COMM_WORLD, &receiveRequests[k]); + currentOffset += recvSizes[k]; + } + MPI_Waitall(sendRequests.size(), sendRequests.data(), MPI_STATUSES_IGNORE); + MPI_Waitall(receiveRequests.size(), receiveRequests.data(), MPI_STATUSES_IGNORE); + MPI_Barrier(MPI_COMM_WORLD); + if (rank == 0) + msg::info << "Synchronizing Interface Ownership with peer-to-peer comms." << msg::endl; + } _toSend.clear(); @@ -320,7 +350,7 @@ namespace gmshfem::field } } - + // Free the type MPI_Type_free(&mpi_struct_type);