From 93835b2f1317d317715ae13ed7946828f8fd5723 Mon Sep 17 00:00:00 2001 From: Jonathan Lambrechts <jonathan.lambrechts@uclouvain.be> Date: Thu, 30 Sep 2010 10:53:27 +0000 Subject: [PATCH] Solver : parallel dofManager and parallel linearSystemPETSc --- Solver/dofManager.h | 165 ++++++++++++++++++++++++++++++----- Solver/linearSystemPETSc.cpp | 34 +++++--- Solver/linearSystemPETSc.h | 27 ++++-- 3 files changed, 182 insertions(+), 44 deletions(-) diff --git a/Solver/dofManager.h b/Solver/dofManager.h index 292753e673..25ebc5a379 100644 --- a/Solver/dofManager.h +++ b/Solver/dofManager.h @@ -10,11 +10,16 @@ #include <string> #include <complex> #include <map> +#include <list> +#include "GmshConfig.h" #include "MVertex.h" #include "linearSystem.h" #include "fullMatrix.h" #include <iostream> +#ifdef HAVE_MPI + #include "mpi.h" +#endif class Dof{ protected: @@ -93,22 +98,33 @@ class dofManager{ // DofVec = dataVec std::map<Dof, dataVec> fixed; - // initial conditions + // initial conditions (not used ?) std::map<Dof, std::vector<dataVec> > initial; // numbering of unknown dof blocks std::map<Dof, int> unknown; - // associatations + // associatations (not used ?) std::map<Dof, Dof> associatedWith; // linearSystems std::map<const std::string, linearSystem<dataMat>*> _linearSystems; linearSystem<dataMat> *_current; + // ********** parallel section + private : + // those dof are images of ghost located on another proc (id givent by the map). + // this is a first try, maybe not the final implementation + std::map<Dof, int > ghosted; // dof => procId + int _localSize; + bool _parallelFinalized; + bool _isParallel; + public: + void _parallelFinalize(); + // ********** public: - dofManager(linearSystem<dataMat> *l) : _current(l) { _linearSystems["A"] = l; } - dofManager(linearSystem<dataMat> *l1, linearSystem<dataMat> *l2) : _current(l1) { + dofManager(linearSystem<dataMat> *l, bool isParallel=false) : _current(l),_isParallel(isParallel), _parallelFinalized(false) { _linearSystems["A"] = l; } + dofManager(linearSystem<dataMat> *l1, linearSystem<dataMat> *l2) : _current(l1),_isParallel(false), _parallelFinalized(false) { _linearSystems.insert(std::make_pair("A", l1)); _linearSystems.insert(std::make_pair("B", l2)); //_linearSystems.["A"] = l1; @@ -144,21 +160,22 @@ class dofManager{ { return isFixed(v->getNum(), Dof::createTypeWithTwoInts(iComp, iField)); } - + + inline void numberGhostDof (Dof key, int procId) { + if (fixed.find(key) != fixed.end()) return; + if (constraints.find(key) != constraints.end()) return; + if (ghosted.find(key) != ghosted.end()) return; + ghosted[key] = procId; + } inline void numberDof(Dof key) { - if(fixed.find(key) != fixed.end()) - { - return; - } - // constraints - if (constraints.find(key) != constraints.end()){ - return; - } - // + if (fixed.find(key) != fixed.end()) return; + if (constraints.find(key) != constraints.end()) return; + if (ghosted.find(key) != ghosted.end()) return; + std::map<Dof, int> :: iterator it = unknown.find(key); - if (it == unknown.end()){ + if (it == unknown.end()) { unsigned int size = unknown.size(); unknown[key] = size; } @@ -192,7 +209,7 @@ class dofManager{ { std::map<Dof, int>::const_iterator it = unknown.find(key); if (it != unknown.end()) { - _current->getFromSolution(it->second,val ); + _current->getFromSolution(it->second, val); return; } } @@ -254,7 +271,8 @@ class dofManager{ inline void assemble(const Dof &R, const Dof &C, const dataMat &value) { - if (!_current->isAllocated()) _current->allocate(unknown.size()); + if (_isParallel && !_parallelFinalized) _parallelFinalize(); + if (!_current->isAllocated()) _current->allocate (sizeOfR()); std::map<Dof, int>::iterator itR = unknown.find(R); if (itR != unknown.end()){ std::map<Dof, int>::iterator itC = unknown.find(C); @@ -278,7 +296,8 @@ class dofManager{ } inline void assemble(std::vector<Dof> &R, std::vector<Dof> &C, const fullMatrix<dataMat> &m) { - if (!_current->isAllocated()) _current->allocate(unknown.size()); + if (_isParallel && !_parallelFinalized) _parallelFinalize(); + if (!_current->isAllocated()) _current->allocate(sizeOfR()); std::vector<int> NR(R.size()), NC(C.size()); @@ -321,7 +340,8 @@ class dofManager{ virtual inline void assemble(std::vector<Dof> &R, const fullVector<dataMat> &m) // for linear forms { - if (!_current->isAllocated()) _current->allocate(unknown.size()); + if (_isParallel && !_parallelFinalized) _parallelFinalize(); + if (!_current->isAllocated()) _current->allocate(sizeOfR()); std::vector<int> NR(R.size()); for (unsigned int i = 0; i < R.size(); i++) { @@ -355,7 +375,8 @@ class dofManager{ virtual inline void assemble(std::vector<Dof> &R, const fullMatrix<dataMat> &m) { - if (!_current->isAllocated()) _current->allocate(unknown.size()); + if (_isParallel && !_parallelFinalized) _parallelFinalize(); + if (!_current->isAllocated()) _current->allocate(sizeOfR()); std::vector<int> NR(R.size()); for (unsigned int i = 0; i < R.size(); i++) { @@ -408,7 +429,8 @@ class dofManager{ } inline void assemble(const Dof &R, const dataMat &value) { - if(!_current->isAllocated()) _current->allocate(unknown.size()); + if (_isParallel && !_parallelFinalized) _parallelFinalize(); + if(!_current->isAllocated()) _current->allocate(sizeOfR()); std::map<Dof, int>::iterator itR = unknown.find(R); if(itR != unknown.end()){ _current->addToRightHandSide(itR->second, value); @@ -437,7 +459,7 @@ class dofManager{ { assemble(vR->getNum(), Dof::createTypeWithTwoInts(iCompR, iFieldR), value); } - int sizeOfR() const { return unknown.size(); } + int sizeOfR() const { return _isParallel ? _localSize : unknown.size(); } int sizeOfF() const { return fixed.size(); } void systemSolve(){ _current->systemSolve(); } void systemClear() @@ -506,12 +528,107 @@ class dofManager{ } } void getFixedDof(std::vector<Dof> &R){ + R.clear(); R.reserve(fixed.size()); - std::map<Dof, double>::iterator it; // template problem ?? //TODO + typename std::map<Dof, dataVec>::iterator it; for(it=fixed.begin(); it!=fixed.end();++it){ - R.push_back((*it).first); + R.push_back (it->first); } } }; +template<class T> +void dofManager<T>::_parallelFinalize() { +#ifdef HAVE_MPI + int _numStart; + int _numTotal; + MPI_Status status; + _localSize = unknown.size(); + if (Msg::GetCommRank() == 0){ + _numStart = 0; + }else + MPI_Recv (&_numStart, 1, MPI_INT, Msg::GetCommRank()-1, 0, MPI_COMM_WORLD, &status); + _numTotal = _numStart + _localSize; + if (Msg::GetCommRank() != Msg::GetCommSize()-1) + MPI_Send (&_numTotal, 1, MPI_INT, Msg::GetCommRank()+1, 0, MPI_COMM_WORLD); + MPI_Bcast(&_numTotal, 1, MPI_INT, Msg::GetCommSize()-1, MPI_COMM_WORLD); + for (std::map <Dof, int> ::iterator it = unknown.begin(); it!= unknown.end(); it++) + it->second += _numStart; + _parallelFinalized = true; + std::vector<std::list<Dof> > ghostedByProc; + int *nRequest = new int[Msg::GetCommSize()]; + int *nRequested = new int[Msg::GetCommSize()]; + for (int i = 0; i<Msg::GetCommSize(); i++) + nRequest[i] = 0; + for (std::map <Dof, int >::iterator it = ghosted.begin(); it != ghosted.end(); it++) + nRequest[it->second] ++; + MPI_Alltoall(nRequest,1,MPI_INT,nRequested,1,MPI_INT,MPI_COMM_WORLD); + long int **recv0 = new long int*[Msg::GetCommSize()]; + int **recv1 = new int*[Msg::GetCommSize()]; + long int **send0 = new long int*[Msg::GetCommSize()]; + int **send1 = new int*[Msg::GetCommSize()]; + MPI_Request *reqRecv0 = new MPI_Request[2*Msg::GetCommSize()]; + MPI_Request *reqRecv1 = reqRecv0 + Msg::GetCommSize(); + MPI_Request *reqSend0 = new MPI_Request[Msg::GetCommSize()]; + MPI_Request *reqSend1 = new MPI_Request[Msg::GetCommSize()]; + for (int i = 0; i < Msg::GetCommSize(); i++) { + send0[i] = new long int[nRequest[i]*2]; + recv0[i] = new long int[nRequested[i]*2]; + send1[i] = new int[nRequested[i]]; + recv1[i] = new int[nRequest[i]]; + reqSend0[i] = reqSend1[i] = reqRecv0[i] = reqRecv1[i] = MPI_REQUEST_NULL; + } + for (int i = 0; i<Msg::GetCommSize(); i++) + nRequest [i] = 0; + for (std::map <Dof, int >::iterator it = ghosted.begin(); it != ghosted.end(); it++) { + int proc = it->second; + send0 [proc] [nRequest[proc]*2] = it->first.getEntity(); + send0 [proc] [nRequest[proc]*2+1] = it->first.getType(); + nRequest [proc] ++; + } + for (int i = 0; i<Msg::GetCommSize(); i++) { + if (nRequested[i] > 0) { + MPI_Irecv (recv0[i], 2*nRequested[i], MPI_LONG, i, 0, MPI_COMM_WORLD, &reqRecv0[i]); + } + if (nRequest[i] > 0) { + MPI_Irecv (recv1[i], 2*nRequest[i], MPI_INT, i, 1, MPI_COMM_WORLD, &reqRecv1[i]); + MPI_Isend (send0[i], 2*nRequest[i], MPI_LONG, i, 0, MPI_COMM_WORLD, &reqSend0[i]); + } + } + int index; + while (MPI_Waitany (2*Msg::GetCommSize(), reqRecv0, &index, &status) == 0 && index != MPI_UNDEFINED) { + if (status.MPI_TAG == 0) { + for (int j = 0; j < nRequested[index]; j++) { + std::map<Dof,int>::iterator it = unknown.find (Dof(recv0[index][j*2], recv0[index][j*2+1])); + if (it == unknown.end ()) + Msg::Error ("ghost Dof does not exist on parent process"); + send1[index][j] = it->second; + } + MPI_Isend (send1[index], nRequested[index], MPI_INT, index, 1, MPI_COMM_WORLD, &reqSend1[index]); + } + } + for (int i = 0; i<Msg::GetCommSize(); i++) + nRequest[i] = 0; + for (std::map <Dof, int>::iterator it = ghosted.begin(); it != ghosted.end(); it++) { + int proc = it->second; + unknown[it->first] = recv1 [proc][nRequest[proc] ++]; + } + MPI_Waitall (Msg::GetCommSize(), reqSend0, MPI_STATUS_IGNORE); + MPI_Waitall (Msg::GetCommSize(), reqSend1, MPI_STATUS_IGNORE); + for (int i = 0; i < Msg::GetCommSize(); i++) { + delete [] send0[i]; + delete [] send1[i]; + delete [] recv0[i]; + delete [] recv1[i]; + } + delete [] send0; + delete [] send1; + delete [] recv0; + delete [] recv1; + delete [] reqSend0; + delete [] reqSend1; + delete [] reqRecv0; +#endif +} + #endif diff --git a/Solver/linearSystemPETSc.cpp b/Solver/linearSystemPETSc.cpp index 69634f6ffb..6d485cc1a1 100644 --- a/Solver/linearSystemPETSc.cpp +++ b/Solver/linearSystemPETSc.cpp @@ -46,33 +46,38 @@ void linearSystemPETSc<fullMatrix<PetscScalar> >::getFromMatrix(int row, int col template<> void linearSystemPETSc<fullMatrix<PetscScalar> >::getFromRightHandSide(int row, fullMatrix<PetscScalar> &val) const { +#if defined(PETSC_USE_COMPLEX) PetscScalar *tmp; _try(VecGetArray(_b, &tmp)); for (int i = 0; i < _blockSize; i++) { PetscScalar s = tmp[row * _blockSize + i]; -#if defined(PETSC_USE_COMPLEX) val(i,0) = s.real(); -#else - val(i,0) = s; -#endif } _try(VecRestoreArray(_b, &tmp)); +#else + for (int i = 0; i < _blockSize; i++) { + int ii = row*_blockSize +i; + VecGetValues ( _b, 1, &ii, &val(i,0)); + } +#endif } template<> -void linearSystemPETSc<fullMatrix<PetscScalar> >::getFromSolution(int row, fullMatrix<PetscScalar> &val) const -{ +void linearSystemPETSc<fullMatrix<PetscScalar> >::getFromSolution(int row, fullMatrix<PetscScalar> &val) const { +#if defined(PETSC_USE_COMPLEX) PetscScalar *tmp; _try(VecGetArray(_x, &tmp)); for (int i = 0; i < _blockSize; i++) { PetscScalar s = tmp[row * _blockSize + i]; -#if defined(PETSC_USE_COMPLEX) val(i,0) = s.real(); -#else - val(i,0) = s; -#endif } _try(VecRestoreArray(_x, &tmp)); +#else + for (int i = 0; i < _blockSize; i++) { + int ii = row*_blockSize +i; + VecGetValues ( _x, 1, &ii, &val(i,0)); + } +#endif } template<> @@ -85,15 +90,16 @@ void linearSystemPETSc<fullMatrix<PetscScalar> >::allocate(int nbRows) //printf("allocate linear system petsc \n"); //_try(PetscOptionsInsertString("-ksp_monitor_true_residual -ksp_rtol 1e-10")); _try(MatCreate(PETSC_COMM_WORLD, &_a)); - _try(MatSetSizes(_a, PETSC_DECIDE, PETSC_DECIDE, nbRows * _blockSize, nbRows * _blockSize)); - _try(MatSetType(_a, MATSEQBAIJ)); + _try(MatSetSizes(_a,nbRows * _blockSize, nbRows * _blockSize, PETSC_DETERMINE, PETSC_DETERMINE)); + //_try(MatSetType(_a, MATSEQBAIJ)); + _try(MatSetType(_a, MATMPIBAIJ)); // override the default options with the ones from the option // database (if any) _try(MatSetFromOptions(_a)); - _try(MatSeqBAIJSetPreallocation(_a, _blockSize, 5, NULL)); //todo preAllocate off-diagonal part + _try(MatMPIBAIJSetPreallocation(_a, _blockSize, 5, NULL, 0, NULL)); //_try(MatMPIBAIJSetPreallocation(_a, _blockSize, 4, NULL, 0, NULL)); //todo preAllocate off-diagonal part _try(VecCreate(PETSC_COMM_WORLD, &_x)); - _try(VecSetSizes(_x, PETSC_DECIDE, nbRows * _blockSize)); + _try(VecSetSizes(_x, nbRows * _blockSize, PETSC_DETERMINE)); // override the default options with the ones from the option // database (if any) _try(VecSetFromOptions(_x)); diff --git a/Solver/linearSystemPETSc.h b/Solver/linearSystemPETSc.h index 1972c4705e..f5a5434dd0 100644 --- a/Solver/linearSystemPETSc.h +++ b/Solver/linearSystemPETSc.h @@ -76,7 +76,7 @@ class linearSystemPETSc : public linearSystem<scalar> { { clear(); _try(MatCreate(PETSC_COMM_WORLD, &_a)); - _try(MatSetSizes(_a, PETSC_DECIDE, PETSC_DECIDE, nbRows, nbRows)); + _try(MatSetSizes(_a, nbRows, nbRows, PETSC_DETERMINE, PETSC_DETERMINE)); // override the default options with the ones from the option // database (if any) _try(MatSetFromOptions(_a)); @@ -86,13 +86,28 @@ class linearSystemPETSc : public linearSystem<scalar> { PetscOptionsGetInt(PETSC_NULL, "-petsc_prealloc", &prealloc, &set); _try(MatSeqAIJSetPreallocation(_a, prealloc, PETSC_NULL)); _try(VecCreate(PETSC_COMM_WORLD, &_x)); - _try(VecSetSizes(_x, PETSC_DECIDE, nbRows)); + _try(VecSetSizes(_x, nbRows, PETSC_DETERMINE)); // override the default options with the ones from the option // database (if any) _try(VecSetFromOptions(_x)); _try(VecDuplicate(_x, &_b)); _isAllocated = true; } + void print() { + _try(MatAssemblyBegin(_a, MAT_FINAL_ASSEMBLY)); + _try(MatAssemblyEnd(_a, MAT_FINAL_ASSEMBLY)); + _try(VecAssemblyBegin(_b)); + _try(VecAssemblyEnd(_b)); + if(Msg::GetCommRank()==0) + printf("a :\n"); + MatView(_a, PETSC_VIEWER_STDOUT_WORLD); + if(Msg::GetCommRank()==0) + printf("b :\n"); + VecView(_b, PETSC_VIEWER_STDOUT_WORLD); + if(Msg::GetCommRank()==0) + printf("x :\n"); + VecView(_x, PETSC_VIEWER_STDOUT_WORLD); + } virtual void clear() { if(_isAllocated){ @@ -114,15 +129,15 @@ class linearSystemPETSc : public linearSystem<scalar> { } virtual void getFromRightHandSide(int row, scalar &val) const { +#if defined(PETSC_USE_COMPLEX) PetscScalar *tmp; _try(VecGetArray(_b, &tmp)); PetscScalar s = tmp[row]; _try(VecRestoreArray(_b, &tmp)); // FIXME specialize this routine -#if defined(PETSC_USE_COMPLEX) val = s.real(); #else - val = s; + VecGetValues(_b, 1, &row, &val); #endif } virtual double normInfRightHandSide() const @@ -139,14 +154,14 @@ class linearSystemPETSc : public linearSystem<scalar> { } virtual void getFromSolution(int row, scalar &val) const { +#if defined(PETSC_USE_COMPLEX) PetscScalar *tmp; _try(VecGetArray(_x, &tmp)); PetscScalar s = tmp[row]; _try(VecRestoreArray(_x, &tmp)); -#if defined(PETSC_USE_COMPLEX) val = s.real(); #else - val = s; + VecGetValues(_x, 1, &row, &val); #endif } virtual void zeroMatrix() -- GitLab