diff --git a/src/field/DistributedField.cpp b/src/field/DistributedField.cpp index dbbcba9692b6660cbbad4cbd61ebf9681fdeed7d..6493c17b42fa9703dfc4cd33399435ef2fca6abb 100644 --- a/src/field/DistributedField.cpp +++ b/src/field/DistributedField.cpp @@ -429,29 +429,58 @@ namespace gmshfem::field myOwnedDofs.push_back({dof->numType(), dof->entity(), dof->numGlobalDof()}); } - // Get the sizes and offset of the global array - int total_size = 0; - std::vector< int > sizes(commSize), displs(commSize); - int loc_size = myOwnedDofs.size(); - MPI_Allgather(&loc_size, 1, MPI_INT, sizes.data(), 1, MPI_INT, MPI_COMM_WORLD); - - for(unsigned i = 0; i < commSize; ++i) { - displs[i] = total_size; - total_size += sizes[i]; - } - - // Synchronize the full array - allSharedDofsArray.resize(total_size); + if(!neighboringRanks) { + // Get the sizes and offset of the global array + int total_size = 0; + std::vector< int > sizes(commSize), displs(commSize); + int loc_size = myOwnedDofs.size(); + MPI_Allgather(&loc_size, 1, MPI_INT, sizes.data(), 1, MPI_INT, MPI_COMM_WORLD); + for(unsigned i = 0; i < commSize; ++i) { + displs[i] = total_size; + total_size += sizes[i]; + } + // Synchronize the full array + allSharedDofsArray.resize(total_size); - MPI_Allgatherv(myOwnedDofs.data(), myOwnedDofs.size(), mpi_struct_type, - allSharedDofsArray.data(), sizes.data(), displs.data(), mpi_struct_type, - MPI_COMM_WORLD); + MPI_Allgatherv(myOwnedDofs.data(), myOwnedDofs.size(), mpi_struct_type, + allSharedDofsArray.data(), sizes.data(), displs.data(), mpi_struct_type, + MPI_COMM_WORLD); - if(rank == 0) { - msg::info << "Gathered allSharedDofsArray. Total size is " << allSharedDofsArray.size() << msg::endl; + if(rank == 0) { + msg::info << "Gathered allSharedDofsArray. Total size is " << allSharedDofsArray.size() << msg::endl; + } + MPI_Barrier(MPI_COMM_WORLD); } - MPI_Barrier(MPI_COMM_WORLD); - + else { + const auto& ranks = *neighboringRanks; + std::vector< unsigned long long > recvSizes(ranks.size()); + unsigned long long toSendSize = myOwnedDofs.size(); + // 1) Send my local size to all my neighbors + std::vector<MPI_Request> sendRequests(ranks.size()), receiveRequests(ranks.size()); + for (size_t k = 0; k < ranks.size(); ++k) { + MPI_Isend(&toSendSize, 1, MPI_UNSIGNED_LONG_LONG, ranks[k], rank, MPI_COMM_WORLD, &sendRequests[k]); + MPI_Irecv(&recvSizes[k], 1, MPI_UNSIGNED_LONG_LONG, ranks[k], ranks[k], MPI_COMM_WORLD, &receiveRequests[k]); + } + MPI_Waitall(sendRequests.size(), sendRequests.data(), MPI_STATUSES_IGNORE); + MPI_Waitall(receiveRequests.size(), receiveRequests.data(), MPI_STATUSES_IGNORE); + size_t total_size = std::reduce(recvSizes.begin(), recvSizes.end()); + allSharedDofsArray.resize(total_size); + sendRequests.clear(); sendRequests.resize(ranks.size()); + receiveRequests.clear(); receiveRequests.resize(ranks.size()); + size_t currentOffset = 0; + for (size_t k = 0; k < ranks.size(); ++k) { + MPI_Isend(myOwnedDofs.data(), myOwnedDofs.size(), mpi_struct_type, ranks[k], rank, MPI_COMM_WORLD, &sendRequests[k]); + MPI_Irecv(allSharedDofsArray.data() + currentOffset, recvSizes[k], mpi_struct_type, ranks[k], ranks[k], MPI_COMM_WORLD, &receiveRequests[k]); + currentOffset += recvSizes[k]; + } + MPI_Waitall(sendRequests.size(), sendRequests.data(), MPI_STATUSES_IGNORE); + MPI_Waitall(receiveRequests.size(), receiveRequests.data(), MPI_STATUSES_IGNORE); + MPI_Barrier(MPI_COMM_WORLD); + if(rank == 0) { + msg::info << "Gathered allSharedDofsArray with peer-to-peer comms." << msg::endl; + } + } + std::unordered_map< DofIndex, unsigned long long, HashBySecond, std::equal_to<DofIndex> > allSharedDofs; // Put in hashtable to check for duplicates.