diff --git a/src/problem/Formulation.cpp b/src/problem/Formulation.cpp
index 514ef49fc3268ebe4638e407fae100abbc09c37a..6ed735ce58da6fb7fc7a7501461e04a678319b69 100644
--- a/src/problem/Formulation.cpp
+++ b/src/problem/Formulation.cpp
@@ -42,7 +42,7 @@ namespace gmshddm
 
     template< class T_Scalar >
     Formulation< T_Scalar >::Formulation(const std::string &name, const std::vector< std::vector< unsigned int > > &topology) :
-      _name(name), _volume(), _interfaceFields(), _interfaceMapping(), _useMagma(false), _useGPUMRES(false), _numberOfIterations(0), _physicalCommutator(false), _artificialCommutator(false), _IA(false)
+      _name(name), _volume(), _interfaceFields(), _interfaceMapping(), _useMagma(false), _useGPUMRES(false), _useSparseMult(false), _numberOfIterations(0), _physicalCommutator(false), _artificialCommutator(false), _IA(false)
     {
       _topology.resize(topology.size());
       for(auto i = 0ULL; i < topology.size(); ++i) {
@@ -59,16 +59,19 @@ namespace gmshddm
     }
 
     template< class T_Scalar >
-    Formulation< T_Scalar >::Formulation(const std::string &name, const std::vector< std::vector< unsigned int > > &topology, bool useMagma, bool useGPUMRES) :
+    Formulation< T_Scalar >::Formulation(const std::string &name, const std::vector< std::vector< unsigned int > > &topology, bool useMagma, bool useGPUMRES, bool useSparseMult) :
       _name(name), _volume(), _interfaceFields(), _interfaceMapping(), _numberOfIterations(0), _physicalCommutator(false), _artificialCommutator(false), _IA(false)
     {
 #if defined(HAVE_MAGMA)
     _useMagma = useMagma;
     #if defined(USE_CUDA) || defined(USE_HIP)
     _useGPUMRES = useGPUMRES;
+    _useSparseMult = useSparseMult;
     #else
     if (useGPUMRES){
       gmshfem::msg::warning << "Neither CUDA nor HIP found ! PETSC GPU support won't be used." << gmshfem::msg::endl;
+      _useGPUMRES = false;
+      _useSparseMult = false;
     }
     #endif
     #ifdef USE_CUDA
@@ -80,7 +83,7 @@ namespace gmshddm
     #endif
     if (_useMagma){
       gmshfem::msg::info << "GMSHDDM has been compiled with MAGMA. MAGMA will be used." << gmshfem::msg::endl;
-      _magmaHelper = new MagmaHelper<T_Scalar>(_useMagma, _useGPUMRES, gmshddm::mpi::getMPIRank());
+      _magmaHelper = new MagmaHelper<T_Scalar>(_useMagma, _useGPUMRES, _useSparseMult, gmshddm::mpi::getMPIRank());
     }
 #else
     if(useMagma) {
@@ -88,6 +91,7 @@ namespace gmshddm
     }
 #endif
       _topology.resize(topology.size());
+      std::cout << "Topology size: " << topology.size() << std::endl;
       for(auto i = 0ULL; i < topology.size(); ++i) {
         gmshfem::msg::debug << "Create volume formulation (" << i << ")." << gmshfem::msg::endl;
         _volume.push_back(new gmshfem::problem::Formulation< T_Scalar >("volume_" + std::to_string(i)));
@@ -99,17 +103,18 @@ namespace gmshddm
         }
       
       }
+      std::cout << "End of Forumaltion constructor" << std::endl;
     }
 
     template< class T_Scalar >
     Formulation< T_Scalar >::Formulation(const std::string &name, const std::vector< gmshfem::problem::Formulation< T_Scalar > * > &volume, const std::vector< std::map< unsigned int, gmshfem::problem::Formulation< T_Scalar > * > > &surface) :
-      _name(name), _volume(volume), _interfaceFields(), _interfaceMapping(), _useMagma(false), _useGPUMRES(false), _numberOfIterations(0), _physicalCommutator(false), _artificialCommutator(false), _IA(false)
+      _name(name), _volume(volume), _interfaceFields(), _interfaceMapping(), _useMagma(false), _useGPUMRES(false), _useSparseMult(false), _numberOfIterations(0), _physicalCommutator(false), _artificialCommutator(false), _IA(false)
     {
       #warning "Still usable ?"
     }
 
     template< class T_Scalar >
-    Formulation< T_Scalar >::Formulation(const std::string &name, const std::vector< gmshfem::problem::Formulation< T_Scalar > * > &volume, const std::vector< std::map< unsigned int, gmshfem::problem::Formulation< T_Scalar > * > > &surface, bool useMagma, bool useGPUMRES) :
+    Formulation< T_Scalar >::Formulation(const std::string &name, const std::vector< gmshfem::problem::Formulation< T_Scalar > * > &volume, const std::vector< std::map< unsigned int, gmshfem::problem::Formulation< T_Scalar > * > > &surface, bool useMagma, bool useGPUMRES, bool useSparseMult) :
       _name(name), _volume(volume), _interfaceFields(), _interfaceMapping(), _numberOfIterations(0), _physicalCommutator(false), _artificialCommutator(false), _IA(false)
     {
       #warning "Still usable ?"
@@ -1211,7 +1216,7 @@ namespace gmshddm
 
         assemble();
         if (_useGPUMRES){ // _useGPUMRES
-          _magmaHelper->createOutgoingGVolumes(algebraicHelpers.fromVolumeToInterfaceScatter, algebraicHelpers.interfaceSizes, algebraicHelpers.interfaceIndices, algebraicHelpers.shiftedIdentity, algebraicHelpers.localGsize,algebraicHelpers.superOutgoingGVolume ,algebraicHelpers.outgoingGVolume);
+          _magmaHelper->createOutgoingGVolumes(algebraicHelpers.fromVolumeToInterfaceScatter, algebraicHelpers.interfaceSizes, algebraicHelpers.interfaceIndices, algebraicHelpers.shiftedIdentity, algebraicHelpers.localGsize, algebraicHelpers.superOutgoingGVolume, algebraicHelpers.outgoingGVolume);
           _magmaHelper->freeMagmaCPUArrays();
         }
         //VecView(algebraicHelpers.superOutgoingGVolume, PETSC_VIEWER_STDOUT_WORLD); 
@@ -1370,7 +1375,6 @@ namespace gmshddm
       PetscInt globSize;
 
       // Create Petsc distributed vector - let Petsc compute the global vector size
-      // THIS IS WHAT BREAKS GPUMRES ???
       if(_useGPUMRES) {
         #ifdef USE_CUDA
         VecCreateMPICUDA(MPI_COMM_WORLD, locSize, PETSC_DETERMINE, &RHS);
@@ -1479,14 +1483,14 @@ namespace gmshddm
 
       // NO NEED TO CALL ALLOCATEGREAD AND SETMULTRHS AFTER THIS POINT !!!
       if (_useMagma && _useGPUMRES){
-        _magmaHelper->allocateGread(algebraicHelpers.Gread, algebraicHelpers.localGsize);
         for (unsigned long incre = 0; incre < algebraicHelpers.myDoms.size(); ++incre) {
           unsigned long idom = algebraicHelpers.myDoms[incre];
           auto [shift, endIdx] = _offsets[idom];
           _localGsSize[idom] = endIdx - shift;
           _localGsCudaPtr[idom] = shift;
         }
-        _magmaHelper->setMultRHS(_localGsCudaPtr, _localGsSize);
+        _magmaHelper->allocateGread(algebraicHelpers.Gread, algebraicHelpers.localGsize, _localGsCudaPtr, _localGsSize);
+        //_magmaHelper->setMultRHS(_localGsCudaPtr, _localGsSize);
       }
 
       // Call the AbstractIterativeSolver object
@@ -2240,7 +2244,7 @@ namespace gmshddm
       PetscLogEventRegister("Alg: offsetStuff", 0, &offsetStuff);
       PetscLogEventRegister("Alg: offsetDomain", 0, &offsetDomain);
 
-      PetscLogEventBegin(scatter, 0, 0, 0, 0); // Scatter Breaks GRead ???
+      PetscLogEventBegin(scatter, 0, 0, 0, 0);
       PetscCall(VecScatterBegin(algebraicHelpers.globalInterfaceScatter, G, algebraicHelpers.Gread, INSERT_VALUES, SCATTER_FORWARD));
       PetscCall(VecScatterEnd(algebraicHelpers.globalInterfaceScatter, G, algebraicHelpers.Gread, INSERT_VALUES, SCATTER_FORWARD));
       PetscLogEventEnd(scatter, 0, 0, 0, 0);
@@ -2249,7 +2253,7 @@ namespace gmshddm
 
       if (_useGPUMRES){
         PetscLogEventBegin(matmult, 0, 0, 0, 0);
-        _magmaHelper->updateRHS();
+        _magmaHelper->updateRHS(algebraicHelpers.Gread);
         PetscLogEventEnd(matmult, 0, 0, 0, 0);
         PetscFunctionReturn(0);
       }
@@ -2274,7 +2278,7 @@ namespace gmshddm
         _magmaHelper->uploadMultRHS(_localGs);
         PetscLogEventEnd(magmaMRHS, 0, 0, 0, 0);
         PetscLogEventBegin(matmult, 0, 0, 0, 0);
-        _magmaHelper->updateRHS();
+        _magmaHelper->updateRHS(algebraicHelpers.Gread);
         PetscLogEventEnd(matmult, 0, 0, 0, 0);
       } else {
         for(unsigned idom = 0; idom < size(); ++idom) {
@@ -2602,8 +2606,6 @@ namespace gmshddm
 
     template< class T_Scalar >
     PetscErrorCode Formulation< T_Scalar >::_solveAllVolumeAndExtractRhs()
-    // On ne construit pas explicitement le vecteur B, on le construit directement dans la boucle puis on le lie à la matrice LHS directement
-    // C'est pas comme avec les matrices LHS où on a un gros _mats qui fait plz
     {
       PetscFunctionBeginUser;
 
diff --git a/src/problem/Formulation.h b/src/problem/Formulation.h
index 53e8f0e3f28cd4c0654666a1e9801807a0efef9a..20f08cca1b9185b91394b3e3ea7892414d6551ce 100644
--- a/src/problem/Formulation.h
+++ b/src/problem/Formulation.h
@@ -82,6 +82,7 @@ namespace gmshddm
     public:
       bool _useMagma;
       bool _useGPUMRES;
+      bool _useSparseMult;
       std::unique_ptr<gmshfem::system::AbstractMonitor> _monitor;
 
       unsigned int _numberOfIterations;
@@ -175,8 +176,8 @@ namespace gmshddm
      public:
       Formulation(const std::string &name, const std::vector< std::vector< unsigned int > > &topology);
       Formulation(const std::string &name, const std::vector< gmshfem::problem::Formulation< T_Scalar > * > &volume, const std::vector< std::map< unsigned int, gmshfem::problem::Formulation< T_Scalar > * > > &surface);
-      Formulation(const std::string &name, const std::vector< std::vector< unsigned int > > &topology, bool useMagma, bool useGPUMRES);
-      Formulation(const std::string &name, const std::vector< gmshfem::problem::Formulation< T_Scalar > * > &volume, const std::vector< std::map< unsigned int, gmshfem::problem::Formulation< T_Scalar > * > > &surface, bool useMagma, bool useGPUMRES);
+      Formulation(const std::string &name, const std::vector< std::vector< unsigned int > > &topology, bool useMagma, bool useGPUMRES, bool useSparseMult);
+      Formulation(const std::string &name, const std::vector< gmshfem::problem::Formulation< T_Scalar > * > &volume, const std::vector< std::map< unsigned int, gmshfem::problem::Formulation< T_Scalar > * > > &surface, bool useMagma, bool useGPUMRES, bool useSparseMult);
       ~Formulation();
 
       unsigned int size() const;
diff --git a/src/problem/MagmaHelper.cpp b/src/problem/MagmaHelper.cpp
index 12381013ca5f291ea5f51eb0fa50fddca3015920..796ff8dd5c2da2a3006769c3e3ae7f61b96a96e8 100644
--- a/src/problem/MagmaHelper.cpp
+++ b/src/problem/MagmaHelper.cpp
@@ -14,10 +14,15 @@ namespace gmshddm
   namespace problem 
   {
     template< class T_Scalar >
-    MagmaHelper<T_Scalar>::MagmaHelper(bool useMagma, bool GPUMRES, int MPI_RANK)
+    MagmaHelper<T_Scalar>::MagmaHelper(bool useMagma, bool GPUMRES, bool sparseMult, int MPI_RANK)
     {
       #ifdef HAVE_MAGMA
       useGPUMRES = GPUMRES;
+      if (useGPUMRES){
+        useSparseMult = sparseMult;
+      } else {
+        useSparseMult = false;
+      }
       if (useMagma){
         _MPI_RANK = MPI_RANK;
         _totalNDom = 0;
@@ -229,6 +234,7 @@ namespace gmshddm
       //   magma_zprint_gpu( _lddx, _numMatrices, _d_X, _lddx, _queue);
       // }
       VecCreateSeqCUDAWithArray(PETSC_COMM_SELF, 1, _numMatrices * _lddx, (PetscScalar*)_d_X, &outgoingGVolumeVec);
+      _outgoingGVolumeVec = &outgoingGVolumeVec;
       #elif defined(USE_HIP)
       VecCreateSeqHIPWithArray(PETSC_COMM_SELF, 1, _numMatrices * _lddx, (PetscScalar*)_d_X, &outgoingGVolumeVec);
       #else
@@ -308,7 +314,6 @@ namespace gmshddm
       // Allocate device memory
       magma_malloc( (magma_ptr*) &_d_X, _lddx*numRhs*_numMatrices*sizeof(Magma_Scalar) );
       magma_malloc( (void**) &_dX_array,    _numMatrices * sizeof(Magma_Scalar*) );
-      magma_malloc( (magma_ptr*) &_d_B, _lddb*numRhs*_numMatrices*sizeof(Magma_Scalar) );
       magma_malloc_cpu( (void**) &_hB_array,    _numMatrices * sizeof(Magma_Scalar*) );
       magma_malloc( (void**) &_dB_array,    _numMatrices * sizeof(Magma_Scalar*) );
 
@@ -415,20 +420,6 @@ namespace gmshddm
       #endif
     }
 
-    template< class T_Scalar >
-    void MagmaHelper<T_Scalar>::setMultRHS(std::unordered_map< unsigned int, unsigned int> _localGsCudaPtr, std::unordered_map< unsigned int, unsigned int> _localGsSize){
-      #ifndef HAVE_MAGMA
-      throw gmshfem::common::Exception("MAGMA is not available");
-      #else
-      for (size_t i = 0; i < _numMatrices; ++i) {
-        auto pointer = reinterpret_cast<Magma_Scalar*>(_d_B + _localGsCudaPtr[_ids[i]]);
-        // Yes, I'm technically doing a out of bounds access and yes I don't care 😎 -> I'm not anymore (I think ???)
-        _hB_array[i] = pointer;
-      }
-      magma_setvector(_numMatrices, sizeof(Magma_Scalar*), _hB_array, 1, _dB_array, 1, _queue);
-      #endif
-    }
-
     template< class T_Scalar >
     void MagmaHelper<T_Scalar>::uploadMultRHS(std::unordered_map< unsigned, Mat >& _updatedRHS)
     {
@@ -553,17 +544,10 @@ namespace gmshddm
           _rhsMat_n = n;
         }
       }
-
-      std::cout << " - padded size :  " << _paddedMatrixSize << std::endl;
-      std::cout << " - rhsMat_m :  " << _rhsMat_m << std::endl;
-      std::cout << " - rhsMat_n :  " << _rhsMat_n << std::endl;
-      
-      if (_rhsMat_m != _paddedMatrixSize) {
-        throw gmshfem::common::Exception("Je comprends pas comment c'est possible HAHAHA Roland tu dois réfléchir !");
-      }
-      
-      std::cout << "Allocaiton ..." << std::endl;
-      // Allocate host memory
+      _ldMult = _rhsMat_m;
+      _lddMult = magma_roundup(_rhsMat_m, 32);
+      _ldb = _rhsMat_n;
+      _lddb = magma_roundup(_rhsMat_n, 32);
       _ldMult = _rhsMat_m;
       _lddMult = magma_roundup(_rhsMat_m, 32);
       _sizeMult = (unsigned long long)_ldMult * (unsigned long long)_rhsMat_n * (unsigned long long)_numMatrices; // Ici on a une matrice rectangulaire !!!
@@ -578,75 +562,110 @@ namespace gmshddm
       size_t requiredMemory = _sizeMult * sizeof(Magma_Scalar) + _rhsMat_n * _numMatrices * sizeof(Magma_Scalar);
       std::cout << " - Required memory : " << requiredMemory / 1e9 << std::endl;
 
-      _h_Mult = (Magma_Scalar *)calloc(sizeof(Magma_Scalar), _sizeMult);
-      //magma_malloc_cpu( (void**) &_h_Mult, _sizeMult*sizeof(Magma_Scalar) );
-      
-      std::cout << "Allocation done" << std::endl;
-      // Fill the arrays
-      size_t sparseCount = 0;
-      for (size_t i = 0; i < _numMatrices; ++i) {
-        auto matrix = rhsMatricesSparsified[_ids[i]]; // Make sure that the order is correct
-        PetscInt m, n; // m = number of rows, n = number of columns
-        MatGetSize(matrix, &m, &n);
-        // Would love to do a MatGetValues directly into _h_Mult but I don't think I can do std::complex -> Magma_Scalar ?
-        // Also _h_Mult must be padded pffffff
-        for (PetscInt k = 0; k < n; ++k) {
-          for (PetscInt j = 0; j < m; ++j) {
-            PetscScalar value;
-            MatGetValue(matrix, j, k, &value);
-            if (!(value.real() == 0.0 && value.imag() == 0.0)) {
-              sparseCount++;
+      // ONLY WORKS WITH 1 RHS
+      magma_malloc( (magma_ptr*) &_d_B, _lddb*1*_numMatrices*sizeof(Magma_Scalar) );
+
+      if (useSparseMult) { // Sparse
+        MatCreate(PETSC_COMM_SELF, &_sparseFullMatrix);
+        MatSetType(_sparseFullMatrix, MATSEQAIJCUSPARSE);
+        MatSetSizes(_sparseFullMatrix, _lddMult * _numMatrices, _lddb * _numMatrices, PETSC_DECIDE, PETSC_DECIDE);
+        size_t sparseCount = 0;
+        for (size_t i = 0; i < _numMatrices; ++i) {
+          auto matrix = rhsMatricesSparsified[_ids[i]]; // Make sure that the order is correct
+          MatGetSize(matrix, &m, &n);
+          for (size_t j = 0; j < m; ++j) {
+            for (size_t k = 0; k < n; ++k) {
+              PetscScalar value;
+              MatGetValue(matrix, j, k, &value);
+              if (!(value.real() == 0.0 && value.imag() == 0.0)) {
+                sparseCount++;
+                MatSetValue(_sparseFullMatrix, i * _lddMult + j, i * _lddb + k, value, INSERT_VALUES); // Awful
+              }
             }
-            if constexpr(std::is_same_v<T_Scalar, std::complex<double>>) {
-              _h_Mult[i*_ldMult*_rhsMat_n + k*_ldMult + j] = MAGMA_Z_MAKE(value.real(), value.imag());
-            } else if constexpr(std::is_same_v<T_Scalar, std::complex<float>>) {
-              _h_Mult[i*_ldMult*_rhsMat_n + k*_ldMult + j] = MAGMA_C_MAKE(value.real(), value.imag());
-            } else if constexpr(std::is_same_v<T_Scalar, double>) {
-              _h_Mult[i*_ldMult*_rhsMat_n + k*_ldMult + j] = value;
-            } else {
-              _h_Mult[i*_ldMult*_rhsMat_n + k*_ldMult + j] = value;
+          }
+        }
+        MatAssemblyBegin(_sparseFullMatrix, MAT_FINAL_ASSEMBLY);
+        MatAssemblyEnd(_sparseFullMatrix, MAT_FINAL_ASSEMBLY);
+        VecCreateSeqCUDAWithArray(PETSC_COMM_SELF, 1, _lddb * _numMatrices, (PetscScalar*)_d_B, &_multRHS);
+        std::cout << "Required sparse memory : " << sparseCount * (sizeof(Magma_Scalar) + 2 * sizeof(size_t)) / 1e9 << std::endl;
+      } else {
+        if (_rhsMat_m != _paddedMatrixSize) {
+          throw gmshfem::common::Exception("Je comprends pas comment c'est possible HAHAHA Roland tu dois réfléchir !");
+        }
+        
+        std::cout << "Allocaiton ..." << std::endl;
+        // Allocate host memory
+
+        _h_Mult = (Magma_Scalar *)calloc(sizeof(Magma_Scalar), _sizeMult);
+        //magma_malloc_cpu( (void**) &_h_Mult, _sizeMult*sizeof(Magma_Scalar) );
+        
+        std::cout << "Allocation done" << std::endl;
+        // Fill the arrays
+        size_t sparseCount = 0;
+        for (size_t i = 0; i < _numMatrices; ++i) {
+          auto matrix = rhsMatricesSparsified[_ids[i]]; // Make sure that the order is correct
+          PetscInt m, n; // m = number of rows, n = number of columns
+          MatGetSize(matrix, &m, &n);
+          // Would love to do a MatGetValues directly into _h_Mult but I don't think I can do std::complex -> Magma_Scalar ?
+          // Also _h_Mult must be padded pffffff
+          for (PetscInt k = 0; k < n; ++k) {
+            for (PetscInt j = 0; j < m; ++j) {
+              PetscScalar value;
+              MatGetValue(matrix, j, k, &value);
+              if (!(value.real() == 0.0 && value.imag() == 0.0)) {
+                sparseCount++;
+              }
+              if constexpr(std::is_same_v<T_Scalar, std::complex<double>>) {
+                _h_Mult[i*_ldMult*_rhsMat_n + k*_ldMult + j] = MAGMA_Z_MAKE(value.real(), value.imag());
+              } else if constexpr(std::is_same_v<T_Scalar, std::complex<float>>) {
+                _h_Mult[i*_ldMult*_rhsMat_n + k*_ldMult + j] = MAGMA_C_MAKE(value.real(), value.imag());
+              } else if constexpr(std::is_same_v<T_Scalar, double>) {
+                _h_Mult[i*_ldMult*_rhsMat_n + k*_ldMult + j] = value;
+              } else {
+                _h_Mult[i*_ldMult*_rhsMat_n + k*_ldMult + j] = value;
+              }
             }
           }
         }
-      }
 
-      std::cout << "Required sparse memory : " << sparseCount * (sizeof(Magma_Scalar) + 2 * sizeof(size_t)) / 1e9 << std::endl;
+        std::cout << "Required sparse memory : " << sparseCount * (sizeof(Magma_Scalar) + 2 * sizeof(size_t)) / 1e9 << std::endl;
 
-      // Matrices filled
+        // Matrices filled
 
-      // Allocate device memory
-      magma_malloc( (magma_ptr*) &_d_Mult, _lddMult*_rhsMat_n*_numMatrices*sizeof(Magma_Scalar) );
-      magma_malloc( (void**) &_d_Mult_array,    _numMatrices * sizeof(Magma_Scalar*) );
+        // Allocate device memory
+        magma_malloc( (magma_ptr*) &_d_Mult, _lddMult*_rhsMat_n*_numMatrices*sizeof(Magma_Scalar) );
+        magma_malloc( (void**) &_d_Mult_array,    _numMatrices * sizeof(Magma_Scalar*) );
 
-      void* zeroedDMULT = calloc(sizeof(Magma_Scalar), _lddMult*_rhsMat_n*_numMatrices);
-      if (zeroedDMULT == NULL) {
-        throw gmshfem::common::Exception("Failed to allocate memory for zeroedDMULT");
-      }
-      magma_setvector(_lddMult*_rhsMat_n*_numMatrices, sizeof(Magma_Scalar), zeroedDMULT, 1, _d_Mult, 1, _queue);
-      free(zeroedDMULT);
-      
-      if constexpr(std::is_same_v<T_Scalar, std::complex<double>>) {
-        magma_zsetmatrix( _ldMult, _rhsMat_n*_numMatrices, _h_Mult, _ldMult, _d_Mult, _lddMult, _queue );
-        magma_zset_pointer( _d_Mult_array, _d_Mult, _ldMult, 0, 0, _lddMult*_rhsMat_n, _numMatrices, _queue );
-      } else if constexpr(std::is_same_v<T_Scalar, std::complex<float>>) {
-        magma_csetmatrix( _ldMult, _rhsMat_n*_numMatrices, _h_Mult, _ldMult, _d_Mult, _lddMult, _queue );
-        magma_cset_pointer( _d_Mult_array, _d_Mult, _ldMult, 0, 0, _lddMult*_rhsMat_n, _numMatrices, _queue );
-      } else if constexpr(std::is_same_v<T_Scalar, double>) {
-        magma_dsetmatrix( _ldMult, _rhsMat_n*_numMatrices, _h_Mult, _ldMult, _d_Mult, _lddMult, _queue );
-        magma_dset_pointer( _d_Mult_array, _d_Mult, _ldMult, 0, 0, _lddMult*_rhsMat_n, _numMatrices, _queue );
-      } else {
-        magma_ssetmatrix( _ldMult, _rhsMat_n*_numMatrices, _h_Mult, _ldMult, _d_Mult, _lddMult, _queue );
-        magma_sset_pointer( _d_Mult_array, _d_Mult, _ldMult, 0, 0, _lddMult*_rhsMat_n, _numMatrices, _queue );
+        void* zeroedDMULT = calloc(sizeof(Magma_Scalar), _lddMult*_rhsMat_n*_numMatrices);
+        if (zeroedDMULT == NULL) {
+          throw gmshfem::common::Exception("Failed to allocate memory for zeroedDMULT");
+        }
+        magma_setvector(_lddMult*_rhsMat_n*_numMatrices, sizeof(Magma_Scalar), zeroedDMULT, 1, _d_Mult, 1, _queue);
+        free(zeroedDMULT);
+        
+        if constexpr(std::is_same_v<T_Scalar, std::complex<double>>) {
+          magma_zsetmatrix( _ldMult, _rhsMat_n*_numMatrices, _h_Mult, _ldMult, _d_Mult, _lddMult, _queue );
+          magma_zset_pointer( _d_Mult_array, _d_Mult, _ldMult, 0, 0, _lddMult*_rhsMat_n, _numMatrices, _queue );
+        } else if constexpr(std::is_same_v<T_Scalar, std::complex<float>>) {
+          magma_csetmatrix( _ldMult, _rhsMat_n*_numMatrices, _h_Mult, _ldMult, _d_Mult, _lddMult, _queue );
+          magma_cset_pointer( _d_Mult_array, _d_Mult, _ldMult, 0, 0, _lddMult*_rhsMat_n, _numMatrices, _queue );
+        } else if constexpr(std::is_same_v<T_Scalar, double>) {
+          magma_dsetmatrix( _ldMult, _rhsMat_n*_numMatrices, _h_Mult, _ldMult, _d_Mult, _lddMult, _queue );
+          magma_dset_pointer( _d_Mult_array, _d_Mult, _ldMult, 0, 0, _lddMult*_rhsMat_n, _numMatrices, _queue );
+        } else {
+          magma_ssetmatrix( _ldMult, _rhsMat_n*_numMatrices, _h_Mult, _ldMult, _d_Mult, _lddMult, _queue );
+          magma_sset_pointer( _d_Mult_array, _d_Mult, _ldMult, 0, 0, _lddMult*_rhsMat_n, _numMatrices, _queue );
+        }
+        std::cout << " - padded size :  " << _paddedMatrixSize << std::endl;
+        std::cout << " - rhsMat_m :  " << _rhsMat_m << std::endl;
+        std::cout << " - rhsMat_n :  " << _rhsMat_n << std::endl;
+        std::cout << "setRHSMatrix end" <<std::endl;
       }
-      std::cout << " - padded size :  " << _paddedMatrixSize << std::endl;
-      std::cout << " - rhsMat_m :  " << _rhsMat_m << std::endl;
-      std::cout << " - rhsMat_n :  " << _rhsMat_n << std::endl;
-      std::cout << "setRHSMatrix end" <<std::endl;
       #endif
     }
 
     template< class T_Scalar >
-    void MagmaHelper<T_Scalar>::updateRHS()
+    void MagmaHelper<T_Scalar>::updateRHS(Vec& Gread)
     {
       #ifndef HAVE_MAGMA
       throw gmshfem::common::Exception("MAGMA is not available");
@@ -659,31 +678,38 @@ namespace gmshddm
       //   //magma_zprint_gpu(_lddb, _numRhs*_numMatrices, _d_B, _lddb, _queue);
       // }
 
+      if (useSparseMult){
+        VecScatterBegin(_fromPetscToPadded, Gread, _multRHS, INSERT_VALUES, SCATTER_FORWARD);
+        VecScatterEnd(_fromPetscToPadded, Gread, _multRHS, INSERT_VALUES, SCATTER_FORWARD);
+        MatMult(_sparseFullMatrix, _multRHS, *_outgoingGVolumeVec);
+        //VecView(*_outgoingGVolumeVec, PETSC_VIEWER_STDOUT_WORLD);
 
-
-      if (_numRhs == 1){
-        if constexpr(std::is_same_v<T_Scalar, std::complex<double>>) {
-          magmablas_zgemv_batched(MagmaNoTrans, _ldMult, _rhsMat_n, MAGMA_Z_MAKE(1.0,0.0), _d_Mult_array, _lddMult, _dB_array, 1, MAGMA_Z_MAKE(0.0,0.0), _dX_array, 1, _numMatrices, _queue);
-        } else if constexpr(std::is_same_v<T_Scalar, std::complex<float>>) {
-          magmablas_cgemv_batched(MagmaNoTrans, _ldMult, _rhsMat_n, MAGMA_C_MAKE(1.0,0.0), _d_Mult_array, _lddMult, _dB_array, 1, MAGMA_C_MAKE(0.0,0.0), _dX_array, 1, _numMatrices, _queue);
-        } else if constexpr(std::is_same_v<T_Scalar, double>) {
-          magmablas_dgemv_batched(MagmaNoTrans, _ldMult, _rhsMat_n, 1.0, _d_Mult_array, _lddMult, _dB_array, 1, 0.0, _dX_array, 1, _numMatrices, _queue);
-        } else {
-          magmablas_sgemv_batched(MagmaNoTrans, _ldMult, _rhsMat_n, 1.0, _d_Mult_array, _lddMult, _dB_array, 1, 0.0, _dX_array, 1, _numMatrices, _queue);
-        }
-        
       } else {
-        if constexpr(std::is_same_v<T_Scalar, std::complex<double>>) {
-          magma_zgemm_batched(MagmaNoTrans, MagmaNoTrans, _ldMult, _rhsMat_n, _numRhs, MAGMA_Z_MAKE(-1.0,0.0), _d_Mult_array, _lddMult, _dB_array, _lddb, MAGMA_Z_MAKE(0.0,0.0), _dX_array, _lddx, _numMatrices, _queue);
-        } else if constexpr(std::is_same_v<T_Scalar, std::complex<float>>) {
-          magma_cgemm_batched(MagmaNoTrans, MagmaNoTrans, _ldMult, _rhsMat_n, _numRhs, MAGMA_C_MAKE(-1.0,0.0), _d_Mult_array, _lddMult, _dB_array, _lddb, MAGMA_C_MAKE(0.0,0.0), _dX_array, _lddx, _numMatrices, _queue);
-        } else if constexpr(std::is_same_v<T_Scalar, double>) {
-          magma_dgemm_batched(MagmaNoTrans, MagmaNoTrans, _ldMult, _rhsMat_n, _numRhs, -1.0, _d_Mult_array, _lddMult, _dB_array, _lddb, 0.0, _dX_array, _lddx, _numMatrices, _queue);
+        if (_numRhs == 1){
+          if constexpr(std::is_same_v<T_Scalar, std::complex<double>>) {
+            magmablas_zgemv_batched(MagmaNoTrans, _ldMult, _rhsMat_n, MAGMA_Z_MAKE(1.0,0.0), _d_Mult_array, _lddMult, _dB_array, 1, MAGMA_Z_MAKE(0.0,0.0), _dX_array, 1, _numMatrices, _queue);
+          } else if constexpr(std::is_same_v<T_Scalar, std::complex<float>>) {
+            magmablas_cgemv_batched(MagmaNoTrans, _ldMult, _rhsMat_n, MAGMA_C_MAKE(1.0,0.0), _d_Mult_array, _lddMult, _dB_array, 1, MAGMA_C_MAKE(0.0,0.0), _dX_array, 1, _numMatrices, _queue);
+          } else if constexpr(std::is_same_v<T_Scalar, double>) {
+            magmablas_dgemv_batched(MagmaNoTrans, _ldMult, _rhsMat_n, 1.0, _d_Mult_array, _lddMult, _dB_array, 1, 0.0, _dX_array, 1, _numMatrices, _queue);
+          } else {
+            magmablas_sgemv_batched(MagmaNoTrans, _ldMult, _rhsMat_n, 1.0, _d_Mult_array, _lddMult, _dB_array, 1, 0.0, _dX_array, 1, _numMatrices, _queue);
+          }
+          
         } else {
-          magma_sgemm_batched(MagmaNoTrans, MagmaNoTrans, _ldMult, _rhsMat_n, _numRhs, -1.0, _d_Mult_array, _lddMult, _dB_array, _lddb, 0.0, _dX_array, _lddx, _numMatrices, _queue);
+          if constexpr(std::is_same_v<T_Scalar, std::complex<double>>) {
+            magma_zgemm_batched(MagmaNoTrans, MagmaNoTrans, _ldMult, _rhsMat_n, _numRhs, MAGMA_Z_MAKE(-1.0,0.0), _d_Mult_array, _lddMult, _dB_array, _lddb, MAGMA_Z_MAKE(0.0,0.0), _dX_array, _lddx, _numMatrices, _queue);
+          } else if constexpr(std::is_same_v<T_Scalar, std::complex<float>>) {
+            magma_cgemm_batched(MagmaNoTrans, MagmaNoTrans, _ldMult, _rhsMat_n, _numRhs, MAGMA_C_MAKE(-1.0,0.0), _d_Mult_array, _lddMult, _dB_array, _lddb, MAGMA_C_MAKE(0.0,0.0), _dX_array, _lddx, _numMatrices, _queue);
+          } else if constexpr(std::is_same_v<T_Scalar, double>) {
+            magma_dgemm_batched(MagmaNoTrans, MagmaNoTrans, _ldMult, _rhsMat_n, _numRhs, -1.0, _d_Mult_array, _lddMult, _dB_array, _lddb, 0.0, _dX_array, _lddx, _numMatrices, _queue);
+          } else {
+            magma_sgemm_batched(MagmaNoTrans, MagmaNoTrans, _ldMult, _rhsMat_n, _numRhs, -1.0, _d_Mult_array, _lddMult, _dB_array, _lddb, 0.0, _dX_array, _lddx, _numMatrices, _queue);
+          }
         }
-        
       }
+
+      
       #endif
     }
 
@@ -745,7 +771,7 @@ namespace gmshddm
 
     // Allocate the GRead matrix so that I don't do outofbounds access (oups) 
     template< class T_Scalar >
-    void MagmaHelper<T_Scalar>::allocateGread(Vec& Gread, unsigned localGSize)
+    void MagmaHelper<T_Scalar>::allocateGread(Vec& Gread, unsigned localGSize, std::unordered_map< unsigned int, unsigned int> _localGsCudaPtr, std::unordered_map< unsigned int, unsigned int> _localGsSize)
     {
       #ifndef HAVE_MAGMA
       throw gmshfem::common::Exception("MAGMA is not available");
@@ -759,17 +785,54 @@ namespace gmshddm
       //std::cout << "allocate Gread HIP" << std::endl;
       VecHIPGetArray(Gread, &petscArray);
       #endif
-      // Copy to _dB
-      magma_copyvector_async(2 * localGSize, sizeof(double), petscArray, 1, _d_B, 1, _queue);
-      // Create new GSize
+      // with sparse we need to pad -> use _localGsCudaPtr to determine the local sizes
+      if (useSparseMult) { // Sparse
+        // Create new padded Gread
+        // size_t currPos = 0;
+        // for (size_t i = 0; i < _numMatrices; ++i) {
+        //   magma_copyvector_async(2 * _localGsCudaPtr[_ids[i]], sizeof(double), petscArray + currPos, 1, _d_B + i * _lddb, 1, _queue);
+        //   currPos += _localGsSize[_ids[i]];
+        // }
+        // Create _multRHS to Gread scatter (basically copy withoput the padding)
+        std::vector< PetscInt > inputIndices;
+        PetscInt rstart, rend;
+        VecGetOwnershipRange(Gread, &rstart, &rend);
+        PetscInt localSize = rend - rstart;
+        IS outIS;
+        ISCreateStride(MPI_COMM_WORLD, localSize, rstart, 1, &outIS);
+
+        for (size_t i = 0; i < _numMatrices; ++i) {
+          for (size_t j = 0; j < _localGsSize[_ids[i]]; ++j) {
+            inputIndices.push_back(i * _lddb + j);
+          }
+        }
+
+        IS indexIS;
+        ISCreateGeneral(MPI_COMM_WORLD, inputIndices.size(), inputIndices.data(), PETSC_COPY_VALUES, &indexIS);
+        VecScatterCreate(Gread, outIS, _multRHS, indexIS, &_fromPetscToPadded);
+        VecScatterCreate(_multRHS, indexIS, Gread, outIS, &_fromPaddedToPetsc);
+
+      } else {
+        magma_copyvector_async(2 * localGSize, sizeof(double), petscArray, 1, _d_B, 1, _queue);
+        VecDestroy(&Gread);
+        #ifdef USE_CUDA
+        VecCreateMPICUDAWithArray(PETSC_COMM_WORLD, 1, localGSize, PETSC_DECIDE, (PetscScalar*)_d_B, &Gread);
+        #endif
+        #ifdef USE_HIP
+        VecCreateMPIHIPWithArray(PETSC_COMM_WORLD, 1, localGSize, PETSC_DECIDE, (PetscScalar*)_d_B, &Gread);
+        #endif
+        for (size_t i = 0; i < _numMatrices; ++i) {
+          auto pointer = reinterpret_cast<Magma_Scalar*>(_d_B + _localGsCudaPtr[_ids[i]]);
+          // Yes, I'm technically doing a out of bounds access and yes I don't care 😎 -> I'm not anymore (I think ???)
+          // I'm not in the sense that technically, _d_B is allocated to fully take the padding into account
+          // But technically, when GEMV is called, it might fetch into other "RHS" (which are the ones of the other matrices)
+          // (but it's ok since the matrix is padded with zeros)
+          _hB_array[i] = pointer;
+        }
+        magma_setvector(_numMatrices, sizeof(Magma_Scalar*), _hB_array, 1, _dB_array, 1, _queue); 
+      }
+      
       
-      VecDestroy(&Gread);
-      #ifdef USE_CUDA
-      VecCreateMPICUDAWithArray(PETSC_COMM_WORLD, 1, localGSize, PETSC_DECIDE, (PetscScalar*)_d_B, &Gread);
-      #endif
-      #ifdef USE_HIP
-      VecCreateMPIHIPWithArray(PETSC_COMM_WORLD, 1, localGSize, PETSC_DECIDE, (PetscScalar*)_d_B, &Gread); // This doesn't directly break _d_B ... ????
-      #endif
       #endif
       // d_B has the correct value here !!!
     }
@@ -781,7 +844,9 @@ namespace gmshddm
       #else
       magma_free_cpu(_h_X);
       magma_free_cpu(_h_B);
-      magma_free_cpu(_h_Mult);
+      if (_h_Mult != NULL) {
+        magma_free_cpu(_h_Mult);
+      }
       #endif
     }
     
diff --git a/src/problem/MagmaHelper.h b/src/problem/MagmaHelper.h
index b662aec696cc92434382265d174cea0a907a3fa8..996dae74ceec5cf4ff90d4dfdeab4f80921d04d1 100644
--- a/src/problem/MagmaHelper.h
+++ b/src/problem/MagmaHelper.h
@@ -56,7 +56,7 @@ namespace gmshddm
       public:
         using Magma_Scalar = typename MagmaTypes<T_Scalar>::Magma_Scalar;
         using Magma_Pointer = typename MagmaTypes<T_Scalar>::Magma_Pointer;
-        MagmaHelper(bool useMagma, bool useGPUMRES, int MPI_RANK);
+        MagmaHelper(bool useMagma, bool useGPUMRES, bool sprseMult, int MPI_RANK);
         ~MagmaHelper();
 
         // Process unordered map of CRS matrices to be used in batched magma routines
@@ -64,15 +64,15 @@ namespace gmshddm
         void convertRHSVectorToBatchedRHSMatrix(std::unordered_map< unsigned int, Mat > &_rhs, unsigned numRhs);
         void factPlusSolve(); // Premier solve qui donne la factorisation LU pour les matrices
         void solveOnly(); // Solve une fois qu'on a la factorisation LU avec un nouvbeau RHS (partie "directe" du solveur DDM en gros)
-        void updateRHS(); // Met à jour le RHS
+        void updateRHS(Vec& Gread); // Met à jour le RHS (sparse)
         void uploadMultRHS(std::unordered_map< unsigned, Vec >& _updatedRHS); // Met à jour le RHS de la multiplication
         void uploadMultRHS(std::unordered_map< unsigned, Mat >& _updatedRHS); // Met à jour le RHS de la multiplication
         void getSolution(std::unordered_map< unsigned, Vec >& _whereToPutIt); // Récupère la solution
         void getSolution(std::unordered_map< unsigned, Mat >& _whereToPutIt); // Récupère la solution
         void setRHSMatrix(std::unordered_map< unsigned, Mat>& rhsMatricesSparsified);
-        void setMultRHS(std::unordered_map< unsigned int, unsigned int> _localGsCudaPtr, std::unordered_map< unsigned int, unsigned int> _localGsSize);
+        //void setMultRHS(std::unordered_map< unsigned int, unsigned int> _localGsCudaPtr, std::unordered_map< unsigned int, unsigned int> _localGsSize);
         void createOutgoingGVolumes(VecScatter &fromVolumeToInterfaceScatter, std::unordered_map< unsigned, unsigned >& _interfaceSizes, std::map<unsigned, IS>& _interfaceIndices, std::map<unsigned, IS> &_shiftedIdentity, unsigned localGSize, Vec &outgoingGVolumeVec, std::unordered_map< unsigned, Vec > &outgoingGVolumes);
-        void allocateGread(Vec& Gread, unsigned localGSize);
+        void allocateGread(Vec& Gread, unsigned localGSize, std::unordered_map< unsigned int, unsigned int> _localGsCudaPtr, std::unordered_map< unsigned int, unsigned int> _localGsSize);
         void freeMagmaCPUArrays();
 
       private:
@@ -88,13 +88,17 @@ namespace gmshddm
         int _MPI_RANK;
         unsigned _totalNDom;
         bool useGPUMRES;
+        bool useSparseMult;
 
         #ifdef HAVE_MAGMA
         magma_queue_t _queue;
         // h_A -> Matrix of the systems
         // h_B -> RHS of the systems
         // h_Mult -> Compute RHS
-        Magma_Scalar *_h_A, *_h_B, *_h_X, *_h_Mult;
+        Magma_Scalar *_h_A = NULL;
+        Magma_Scalar *_h_B = NULL;
+        Magma_Scalar *_h_X = NULL;
+        Magma_Scalar *_h_Mult = NULL;
         Magma_Pointer _d_A, _d_B, _d_X, _d_Mult;
         magma_int_t _lda = 0, _ldda = 0, _ldb = 0, _lddb = 0, _ldMult = 0, _lddMult = 0, _ldx = 0, _lddx = 0;
         magma_int_t *_ipiv, *_info;
@@ -103,6 +107,12 @@ namespace gmshddm
         Magma_Scalar **_dB_array = NULL;
         Magma_Scalar **_dX_array = NULL;
 
+        Mat _sparseFullMatrix;
+        Vec* _outgoingGVolumeVec;
+        Vec _multRHS;
+        VecScatter _fromPaddedToPetsc;
+        VecScatter _fromPetscToPadded;
+
         Magma_Scalar **_d_Mult_array = NULL;
         Magma_Scalar **_hB_array = NULL;
 
diff --git a/tutorials/helmholtz/waveguide/main.cpp b/tutorials/helmholtz/waveguide/main.cpp
index a374aaae5dabd9e70e4c6a04f5a735a1be738170..605963b8f15ca12b8f556580582d55681c2a08c4 100644
--- a/tutorials/helmholtz/waveguide/main.cpp
+++ b/tutorials/helmholtz/waveguide/main.cpp
@@ -108,6 +108,7 @@ int main(int argc, char **argv)
   int order = 1;
   int magma = 0;
   int gpumres = 0;
+  int sparseMult = 0;
   int savesolution = 1;
   std::string iopath = ".";
   int bSaveFunct = 0;
@@ -124,8 +125,10 @@ int main(int argc, char **argv)
   gmshDdm.userDefinedParameter(savesolution, "saveSolution");
   gmshDdm.userDefinedParameter(iopath, "iopath");
   gmshDdm.userDefinedParameter(bSaveFunct, "saveFunction");
+  gmshDdm.userDefinedParameter(sparseMult, "sparseMult");
   bool useMagma = (magma != 0);
   bool useGPUMRES = (gpumres != 0);
+  bool useSparseMult = (sparseMult != 0);
   bool saveSolution = (savesolution != 0);
   bool saveFunction = (bSaveFunct != 0);
 
@@ -155,6 +158,8 @@ int main(int argc, char **argv)
   // DUMP EDGE TAGS
   //gmsh::model::mesh::createEdges();
 
+  std::cout << "158" << std::endl;
+
   // Define domain
   Subdomain omega(nDom);
   Subdomain gammaInf(nDom);
@@ -163,29 +168,68 @@ int main(int argc, char **argv)
   Interface sigma(nDom);
   Domain corner = Domain("corner");
 
+  std::cout << "166" << std::endl;
+
+  std::map< std::pair< int, int >, std::vector< std::pair<int, int> > > groupsMap;
+  std::vector< std::pair<int, int> > physGroups;
+  std::vector< std::vector < std::pair<int, int> > > entities;
+
+  gmsh::model::getPhysicalGroupsEntities(physGroups, entities);
+
+  for(unsigned int i = 0; i < physGroups.size(); ++i) {
+    groupsMap[physGroups[i]] = entities[i];
+  }
+
+  std::cout << "176" << std::endl;
+
+//#pragma omp parallel for
   for(unsigned int i = 0; i < nDom; ++i) {
-    omega(i) = Domain(2, i + 1);
+    omega(i) = Domain(groupsMap[std::make_pair(2, i + 1)]);
 
     if(i == nDom - 1) {
-      gammaInf(i) = Domain(1, 2);
+      gammaInf(i) = Domain(groupsMap[std::make_pair(1, 2)]);
     }
 
     if(i == 0) {
-      gammaDir(i) = Domain(1, 1);
+      gammaDir(i) = Domain(groupsMap[std::make_pair(1, 1)]);
     }
 
-    border(i) = Domain(1, 100 + i);
+    border(i) = Domain(groupsMap[std::make_pair(1, 100 + i)]);
 
     if(i != 0) {
-      sigma(i, i - 1) = Domain(1, 10000 + i - 1);
+      sigma(i, i - 1) = Domain(groupsMap[std::make_pair(1, 10000+ i - 1)]);
     }
     if(i != nDom - 1) {
-      sigma(i, i + 1) = Domain(1, 10000 + i);
+      sigma(i, i + 1) = Domain(groupsMap[std::make_pair(1, 10000 + i)]);
     }
   }
 
+  // for(unsigned int i = 0; i < nDom; ++i) {
+  //   omega(i) = Domain(2, i + 1);
+
+  //   if(i == nDom - 1) {
+  //     gammaInf(i) = Domain(1, 2);
+  //   }
+
+  //   if(i == 0) {
+  //     gammaDir(i) = Domain(1, 1);
+  //   }
+
+  //   border(i) = Domain(1, 100 + i);
+
+  //   if(i != 0) {
+  //     sigma(i, i - 1) = Domain(1, 10000 + i - 1);
+  //   }
+  //   if(i != nDom - 1) {
+  //     sigma(i, i + 1) = Domain(1, 10000 + i);
+  //   }
+  // }
+
+  std::cout << "190" << std::endl;
+
   // Define topology
   std::vector< std::vector< unsigned int > > topology(nDom);
+//#pragma omp parallel for
   for(unsigned int i = 0; i < nDom; ++i) {
     if(i != 0) {
       topology[i].push_back(i - 1);
@@ -195,32 +239,41 @@ int main(int argc, char **argv)
     }
   }
 
-
+  std::cout << "199" << std::endl;
   // Create DDM formulation
-  gmshddm::problem::Formulation< std::complex< double > > formulation("Helmholtz", topology, useMagma, useGPUMRES);
+  gmshddm::problem::Formulation< std::complex< double > > formulation("Helmholtz", topology, useMagma, useGPUMRES, useSparseMult);
 
   // Create fields
  // SubdomainField< std::complex< double >, Form::Form0 > u("u", omega | gammaDir | gammaInf | border | sigma, FunctionSpaceTypeForm0::HierarchicalH1, order);
-  SubdomainField< std::complex< double >, Form::Form0 > u("u", omega | gammaDir | gammaInf | border | sigma, FunctionSpaceTypeForm0::Lagrange);
+  SubdomainField< std::complex< double >, Form::Form0 > u("u", omega | gammaDir | gammaInf | border | sigma, FunctionSpaceTypeForm0::Lagrange); // Takes time
+  std::cout << "SubdomainField constructor done" << std::endl;
+//#pragma omp parallel for
   for(unsigned int i = 0; i < nDom; ++i) {
     u(i).addConstraint(border(i), 0.);
   }
   u(0).addConstraint(gammaDir(0), sin< std::complex< double > >(pi * y< std::complex< double > >() / l));
+
+  std::cout << "u constraints done" << std::endl;
   //InterfaceField< std::complex< double >, Form::Form0 > g("g", sigma, FunctionSpaceTypeForm0::HierarchicalH1, order);
-  InterfaceField< std::complex< double >, Form::Form0 > g("g", sigma, FunctionSpaceTypeForm0::Lagrange);
+  InterfaceField< std::complex< double >, Form::Form0 > g("g", sigma, FunctionSpaceTypeForm0::Lagrange); // Ultra lent
+  std::cout << "InterfaceField constructor done" << std::endl;
+//#pragma omp parallel for
   for(unsigned int i = 0; i < nDom; ++i) {
     for(unsigned int jj = 0; jj < topology[i].size(); ++jj) {
       const unsigned int j = topology[i][jj];
       g(i, j).addConstraint(corner, 0.);
     }
-  }
+  } // Un peu lent
 
+  std::cout << "262" << std::endl; // Takes lot of time before here
   std::complex< double > im(0., 1.);
 
   // Tell to the formulation that g is field that have to be exchanged between subdomains
   formulation.addInterfaceField(g);
 
   // Add terms to the formulation
+//#pragma omp parallel for
+  std::cout << "266" << std::endl;
   for(unsigned int i = 0; i < nDom; ++i) {
     // VOLUME TERMS
     formulation(i).integral(grad(dof(u(i))), grad(tf(u(i))), omega(i), gauss);
@@ -248,6 +301,8 @@ int main(int argc, char **argv)
     }
   }
 
+  std::cout << "287" << std::endl; // Suprisingly fast
+
 
   // Solve the DDM formulation
   formulation.pre();