diff --git a/src/problem/Formulation.cpp b/src/problem/Formulation.cpp
index b539e1b7e274ac74ec62b571f775cecd83f68e95..44992937fe946616e1802ea30a7c47557c811433 100644
--- a/src/problem/Formulation.cpp
+++ b/src/problem/Formulation.cpp
@@ -1031,35 +1031,6 @@ namespace gmshddm
       _artificialCommutator = true;
       togglePhysicalAndArtificialSourceTerms();
 
-      auto deactivateBilinear = [=](const unsigned long long idom) {
-        for(auto formulation : *_volume[idom]) {
-          if(formulation->isBilinear()) {
-	    formulation->deactivate();
-          }
-        }
-        for(auto formulation : _surface[idom]) {
-          for(auto term : *formulation.second) {
-            if(term->isBilinear()) {
-              term->deactivate();
-            }
-          }
-        }
-      };
-
-      auto activateBilinear = [=](const unsigned long long idom) {
-        for(auto formulation : *_volume[idom]) {
-          if(formulation->isBilinear()) {
-            formulation->activate();
-          }
-        }
-        for(auto formulation : _surface[idom]) {
-          for(auto term : *formulation.second) {
-            if(term->isBilinear()) {
-              term->activate();
-            }
-          }
-        }
-      };
 
       // Assemble the volumic systems for each subdomain
       for(auto idom = 0ULL; idom < _volume.size(); ++idom) {
@@ -1068,7 +1039,7 @@ namespace gmshddm
           _volume[idom]->setAttribute("ddm::artificialCommutator", _artificialCommutator);
           gmshfem::common::Options::instance()->verbose = innerVerbosity;
           if(sameMatrixWithArtificialAndPhysicalSources) {
-            deactivateBilinear(idom);
+            _deactivateBilinear(idom);
             _volume[idom]->setRHSToZero();
             _volume[idom]->assemble();
           }
@@ -1077,7 +1048,7 @@ namespace gmshddm
             _volume[idom]->initSystem();
             _volume[idom]->pre();
             _volume[idom]->assemble();
-            deactivateBilinear(idom);
+            _deactivateBilinear(idom);
           }
           gmshfem::common::Options::instance()->verbose = outerVerbosity;
         }
@@ -1125,12 +1096,12 @@ namespace gmshddm
         if(mpi::isItMySubdomain(idom)) {
           gmshfem::common::Options::instance()->verbose = innerVerbosity;
           if(!sameMatrixWithArtificialAndPhysicalSources) {
-            activateBilinear(idom);
+            _activateBilinear(idom);
           }
           _volume[idom]->assemble();
           _volume[idom]->solve(true);
           if(sameMatrixWithArtificialAndPhysicalSources) {
-            activateBilinear(idom);
+            _activateBilinear(idom);
           }
           gmshfem::common::Options::instance()->verbose = outerVerbosity;
         }
@@ -1152,59 +1123,197 @@ namespace gmshddm
       return time;
     }
 
-    // TODO
     template< class T_Scalar >
     gmshfem::algebra::MatrixCCS< T_Scalar > Formulation< T_Scalar >::computeMatrix()
     {
+      gmshfem::common::Timer time;
+      time.tick();
+
       const int outerVerbosity = gmshfem::common::Options::instance()->verbose;
       const int innerVerbosity = (outerVerbosity != 0 ? outerVerbosity - 1 : 0);
 
-      if(mpi::getMPISize() != 1) {
-        throw gmshfem::common::Exception("Formulation< T_Scalar >::computeMatrix() does not work on multiple processes");
-      }
       Mat A;
-      Vec X, Y;
+      Vec X, Y, Y_seq;
+      VecScatter     ctx;
 
-      VecDuplicate(_rhs.getPetsc(), &X);
-      VecDuplicate(_rhs.getPetsc(), &Y);
+      // Create a distributed vector that acts as a RHS
+      PetscInt locSize = _rhs.size();
+      VecCreateMPI(MPI_COMM_WORLD, locSize, PETSC_DETERMINE, &Y);
+      VecDuplicate(Y, &X);
+      
+      // Create a sequential full vector and a scatter object to synchronize the global vector of outputs,
+      // Needed to build the matrix
+      PetscInt globSize;
+      VecGetSize(Y, &globSize);
+      VecCreateSeq(MPI_COMM_SELF, globSize, &Y_seq);
+      VecScatterCreateToAll(Y, &ctx, &Y_seq);
+
+      if (mpi::getMPIRank() == 0)
+        gmshfem::msg::info << "Local size:" << locSize << " and global size:" << globSize << "." << gmshfem::msg::endl;
 
-      MatCreateShell(MPI_COMM_SELF, _rhs.size(), _rhs.size(), _rhs.size(), _rhs.size(), this, &A);
-      MatShellSetOperation(A, MATOP_MULT, (void (*)(void)) & MatVectProductA< T_Scalar >);
+
+      // Setup the A matrix
+      MatCreateShell(MPI_COMM_WORLD, locSize, locSize, globSize, globSize, this, &A);
+      MatShellSetOperation(A, MATOP_MULT, (void (*)(void)) & MatVectProductI_A< T_Scalar >);
 
       _physicalCommutator = false;
       _artificialCommutator = true;
       togglePhysicalAndArtificialSourceTerms();
 
-      auto deactivateBilinear = [=](const unsigned long long idom) {
-        for(auto formulation : *_volume[idom]) {
-          if(formulation->isBilinear()) {
-	    formulation->deactivate();
-          }
-        }
-        for(auto formulation : _surface[idom]) {
-          for(auto term : *formulation.second) {
-            if(term->isBilinear()) {
-              term->deactivate();
-            }
-          }
+  
+      // Compute sub matrices then disable bilinear terms (to assemble only RHS)
+      for(auto idom = 0ULL; idom < _volume.size(); ++idom) {
+        _volume[idom]->setAttribute("ddm::physicalCommutator", _physicalCommutator);
+        _volume[idom]->setAttribute("ddm::artificialCommutator", _artificialCommutator);
+        gmshfem::common::Options::instance()->verbose = innerVerbosity;
+        _volume[idom]->removeSystem();
+        _volume[idom]->initSystem();
+        _volume[idom]->pre();
+        _volume[idom]->assemble();
+        _deactivateBilinear(idom);
+        gmshfem::common::Options::instance()->verbose = outerVerbosity;
+      }
+
+
+      gmshfem::algebra::Vector< T_Scalar > vec(globSize);
+      std::vector< PetscInt > aiVector(globSize);
+      for(auto i = 0ULL; i < globSize; ++i) {
+        aiVector[i] = i;
+      }
+      std::vector< PetscScalar > aVector(globSize);
+
+      std::vector< unsigned long long > aj(globSize + 1, 0);
+      std::vector< unsigned long long > ai;
+      ai.reserve(globSize);
+      std::vector< T_Scalar > a;
+      a.reserve(globSize);
+
+      std::vector<PetscScalar> zeros(globSize, 0);
+      // Init RHS vector
+      VecSet(X, 0.0);
+      VecSetValue(X, 0, 1.0, INSERT_VALUES);
+      VecAssemblyBegin(X);
+      VecAssemblyEnd(X);
+
+      for(auto i = 0ULL; i < globSize; ++i) {
+
+        VecSet(X, 0.0);
+        VecSetValue(X, i, 1.0, INSERT_VALUES);
+        VecAssemblyBegin(X);
+        VecAssemblyEnd(X);
+
+        if(mpi::getMPIRank() == 0) {
+         
+          gmshfem::msg::info << "Column number " << i << " over " << globSize << "." << gmshfem::msg::endl;
         }
-      };
+        MatMult(A, X, Y);
+
+        // Scatter to rank 0
+        VecScatterBegin(ctx, Y, Y_seq, INSERT_VALUES, SCATTER_FORWARD);
+        VecScatterEnd(ctx, Y, Y_seq, INSERT_VALUES, SCATTER_FORWARD);
 
-      auto activateBilinear = [=](const unsigned long long idom) {
-        for(auto formulation : *_volume[idom]) {
-          if(formulation->isBilinear()) {
-            formulation->activate();
+        VecGetValues(Y_seq, globSize, &aiVector[0], &aVector[0]);
+
+
+
+        aj[i + 1] = aj[i];
+        for(auto j = 0ULL; j < aVector.size(); ++j) {
+          if(aVector[j] != 0) {
+            aj[i + 1]++;
+            ai.push_back(j);
+            a.push_back(T_Scalar(aVector[j]));
           }
+          
         }
-        for(auto formulation : _surface[idom]) {
-          for(auto term : *formulation.second) {
-            if(term->isBilinear()) {
-              term->activate();
-            }
-          }
+
+        ai.reserve(aj.size() + globSize);
+        a.reserve(a.size() + globSize);
+
+        // Update RHS
+        VecSetValue(X, i, 0.0, INSERT_VALUES);
+        if(i + 1 < globSize)
+          VecSetValue(X, i + 1, 1.0, INSERT_VALUES);
+        VecAssemblyBegin(X);
+        VecAssemblyEnd(X);
+      }
+
+      for(auto idom = 0ULL; idom < _volume.size(); ++idom) {
+        _activateBilinear(idom);
+      }
+
+      VecScatterDestroy(&ctx);
+
+
+      gmshfem::algebra::MatrixCCS< T_Scalar > mat(std::move(ai), std::move(aj), std::move(a));
+
+      time.tock();
+      if(mpi::getMPIRank() == 0) {
+        gmshfem::msg::info << "Computed the matrix in " << time.time() << "s." << gmshfem::msg::endl;
+      }
+      return mat;
+    }
+
+    template< class T_Scalar >
+    gmshfem::algebra::MatrixCCS< T_Scalar > Formulation< T_Scalar >::computeMatrixBlock()
+    {
+      gmshfem::common::Timer time;
+      time.tick();
+
+      const int outerVerbosity = gmshfem::common::Options::instance()->verbose;
+      const int innerVerbosity = (outerVerbosity != 0 ? outerVerbosity - 1 : 0);
+
+      Mat A;
+      Vec Y, Y_seq;
+      VecScatter     ctx;
+
+      // Create a distributed vector that acts as a RHS
+      PetscInt locSize = _rhs.size();
+      VecCreateMPI(MPI_COMM_WORLD, locSize, PETSC_DETERMINE, &Y);
+      VecSetUp(Y);
+      
+      // Create a sequential full vector and a scatter object to synchronize the global vector of outputs,
+      // Needed to build the matrix
+      PetscInt globSize;
+      VecGetSize(Y, &globSize);
+
+      // Create the identity matrix
+      Mat ID;
+      {
+        MatCreate(PETSC_COMM_WORLD, &ID);
+        MatSetSizes(ID, locSize, PETSC_DECIDE, globSize, globSize);
+        MatSetType(ID, MATMPIDENSE);
+        MatSetUp(ID);
+        MatZeroEntries(ID);
+
+        PetscInt start, end;
+        MatGetOwnershipRange(ID, &start, &end);
+
+        for(PetscInt i = start; i < end; ++i) {
+          PetscScalar val = 1.0;
+          MatSetValues(ID, 1, &i, 1, &i, &val, INSERT_VALUES);
         }
-      };
 
+        MatAssemblyBegin(ID, MAT_FINAL_ASSEMBLY);
+        MatAssemblyEnd(ID, MAT_FINAL_ASSEMBLY);
+      }
+
+      VecCreateSeq(MPI_COMM_SELF, globSize, &Y_seq);
+      VecScatterCreateToAll(Y, &ctx, &Y_seq);
+
+      if (mpi::getMPIRank() == 0)
+        gmshfem::msg::info << "Local size:" << locSize << " and global size:" << globSize << "." << gmshfem::msg::endl;
+
+
+      // Setup the A matrix
+      MatCreateShell(MPI_COMM_WORLD, locSize, locSize, globSize, globSize, this, &A);
+      MatShellSetOperation(A, MATOP_MULT, (void (*)(void)) & MatVectProductI_A< T_Scalar >);
+
+      _physicalCommutator = false;
+      _artificialCommutator = true;
+      togglePhysicalAndArtificialSourceTerms();
+
+  
+      // Compute sub matrices then disable bilinear terms (to assemble only RHS)
       for(auto idom = 0ULL; idom < _volume.size(); ++idom) {
         _volume[idom]->setAttribute("ddm::physicalCommutator", _physicalCommutator);
         _volume[idom]->setAttribute("ddm::artificialCommutator", _artificialCommutator);
@@ -1212,30 +1321,44 @@ namespace gmshddm
         _volume[idom]->removeSystem();
         _volume[idom]->initSystem();
         _volume[idom]->pre();
-	_volume[idom]->assemble();
-	deactivateBilinear(idom);
+        _volume[idom]->assemble();
+        _deactivateBilinear(idom);
         gmshfem::common::Options::instance()->verbose = outerVerbosity;
       }
 
 
-      gmshfem::algebra::Vector< T_Scalar > vec(_rhs.size());
-      std::vector< PetscInt > aiVector(_rhs.size());
-      for(auto i = 0ULL; i < _rhs.size(); ++i) {
-        aiVector[i] = i;
-      }
-      std::vector< PetscScalar > aVector(_rhs.size());
+      Mat Yblock;
+      MatDuplicate(ID, MAT_DO_NOT_COPY_VALUES, &Yblock);
+      MatMatProductI_A< T_Scalar >(A, ID, Yblock);
 
-      std::vector< unsigned long long > aj(_rhs.size() + 1, 0);
+      std::vector< PetscInt > aiVector(globSize);
+      std::vector< PetscScalar > aVector(globSize);
+
+      std::vector< unsigned long long > aj(globSize + 1, 0);
       std::vector< unsigned long long > ai;
-      ai.reserve(_rhs.size());
+      ai.reserve(globSize);
       std::vector< T_Scalar > a;
-      a.reserve(_rhs.size());
-      for(auto i = 0ULL; i < _rhs.size(); ++i) {
-        gmshfem::msg::info << "Column number " << i << " over " << _rhs.size() << "." << gmshfem::msg::endl;
-        vec[i] = 1.;
-        vec.removePetsc();
-        MatVectProductI_A< T_Scalar >(A, vec.getPetsc(), Y);
-        VecGetValues(Y, _rhs.size(), &aiVector[0], &aVector[0]);
+      a.reserve(globSize);
+
+      for(auto i = 0ULL; i < globSize; ++i) {
+        aiVector[i] = i;
+      }
+
+      for(auto i = 0ULL; i < globSize; ++i) {
+
+        
+        if(mpi::getMPIRank() == 0) {
+
+          gmshfem::msg::info << "Column number " << i << " over " << globSize << "." << gmshfem::msg::endl;
+        }
+
+        // Scatter to rank 0
+        MatGetColumnVector(Yblock, Y, i);
+        VecScatterBegin(ctx, Y, Y_seq, INSERT_VALUES, SCATTER_FORWARD);
+        VecScatterEnd(ctx, Y, Y_seq, INSERT_VALUES, SCATTER_FORWARD);
+        VecGetValues(Y_seq, globSize, &aiVector[0], &aVector[0]);
+
+       
         aj[i + 1] = aj[i];
         for(auto j = 0ULL; j < aVector.size(); ++j) {
           if(aVector[j] != 0) {
@@ -1244,18 +1367,26 @@ namespace gmshddm
             a.push_back(T_Scalar(aVector[j]));
           }
         }
-        ai.reserve(aj.size() + _rhs.size());
-        a.reserve(a.size() + _rhs.size());
-        vec[i] = 0.;
+
+        ai.reserve(aj.size() + globSize);
+        a.reserve(a.size() + globSize);
+
       }
+      
 
       for(auto idom = 0ULL; idom < _volume.size(); ++idom) {
-        activateBilinear(idom);
+        _activateBilinear(idom);
       }
 
-
+      VecScatterDestroy(&ctx);
       gmshfem::algebra::MatrixCCS< T_Scalar > mat(std::move(ai), std::move(aj), std::move(a));
+
+      time.tock();
+      if(mpi::getMPIRank() == 0) {
+        gmshfem::msg::info << "Computed the matrix in " << time.time() << "s." << gmshfem::msg::endl;
+      }
       return mat;
+
     }
 
     template< class T_Scalar >
@@ -1288,6 +1419,18 @@ namespace gmshddm
       return MatVectProductImpl<T_Scalar>(A, X, Y, true);
     }
 
+    template< class T_Scalar >
+    int MatMatProductI_A(Mat A, Mat X, Mat Y)
+    {
+      return MatMatProductImpl<T_Scalar>(A, X, Y, true);
+    }
+
+    template< class T_Scalar >
+    int MatMatProductA(Mat A, Mat X, Mat Y)
+    {
+      return MatMatProductImpl<T_Scalar>(A, X, Y, false);
+    }
+
     template< class T_Scalar >
     void Formulation<T_Scalar>::_fromPetscToInterfaceFields(Vec G)
     {
@@ -1415,6 +1558,40 @@ namespace gmshddm
       }
     }
 
+    template< class T_Scalar >
+    void Formulation< T_Scalar >::_activateBilinear(const unsigned long long idom)
+    {
+      for(auto formulation : *_volume[idom]) {
+        if(formulation->isBilinear()) {
+          formulation->activate();
+        }
+      }
+      for(auto formulation : _surface[idom]) {
+        for(auto term : *formulation.second) {
+          if(term->isBilinear()) {
+            term->activate();
+          }
+        }
+      }
+    }
+
+    template< class T_Scalar >
+    void Formulation< T_Scalar >::_deactivateBilinear(const unsigned long long idom)
+    {
+      for(auto formulation : *_volume[idom]) {
+        if(formulation->isBilinear()) {
+          formulation->deactivate();
+        }
+      }
+      for(auto formulation : _surface[idom]) {
+        for(auto term : *formulation.second) {
+          if(term->isBilinear()) {
+            term->deactivate();
+          }
+        }
+      }
+    }
+
 
     template< class T_Scalar >
     int MatVectProductImpl(Mat A, Vec X, Vec Y, bool IA)
@@ -1497,6 +1674,44 @@ namespace gmshddm
       PetscFunctionReturn(0);
     }
 
+    template< class T_Scalar >
+    int MatMatProductImpl(Mat A, Mat X, Mat Y, bool IA)
+    {
+
+      // EXTREMLY UNOPTIMIZED
+      PetscFunctionBegin;
+
+      PetscInt n, p, n_local, p_local;
+      MatGetSize(X, &n, &p);
+      MatGetLocalSize(X, &n_local, &p_local);
+      Vec Xcol, Ycol;
+      VecCreateMPI(MPI_COMM_WORLD, n_local, n, &Xcol);
+      VecDuplicate(Xcol, &Ycol);
+
+    for (auto j = 0; j < p; ++j) {
+      MatGetColumnVector(X, Xcol, j);
+      MatVectProductImpl<T_Scalar>(A, Xcol, Ycol, IA);
+
+      // Copy to matrix
+      PetscInt low, high, i;
+      PetscScalar val;
+
+      // Get the ownership range for the current rank
+      VecGetOwnershipRange(Ycol, &low, &high);
+
+      // Loop over local rows and set the value for the specified column
+      for (i = low; i < high; ++i) {
+          VecGetValues(Ycol, 1, &i, &val);
+          MatSetValue(Y, i, j, val, INSERT_VALUES);
+      }
+
+    }
+
+      PetscFunctionReturn(0);
+    }
+
+    
+
 
     template class Formulation< std::complex< double > >;
     template class Formulation< std::complex< float > >;
diff --git a/src/problem/Formulation.h b/src/problem/Formulation.h
index a20579ec482b612a173e835f8ef227a5c096eabb..9cd95bb6593c268c2cee0cbe4edd7073f182b8cb 100644
--- a/src/problem/Formulation.h
+++ b/src/problem/Formulation.h
@@ -32,6 +32,12 @@ namespace gmshddm
     int MatVectProductI_A(Mat A, Vec X, Vec Y);
     template< class T_Scalar >
     int MatVectProductImpl(Mat A, Vec X, Vec Y, bool IA);
+    template< class T_Scalar >
+    int MatMatProductImpl(Mat A, Mat X, Mat Y, bool IA);
+    template< class T_Scalar >
+    int MatMatProductA(Mat A, Mat X, Mat Y);
+    template< class T_Scalar >
+    int MatMatProductI_A(Mat A, Mat X, Mat Y);
 
     template< class T_Scalar >
     class AbstractIterativeSolver;
@@ -79,6 +85,9 @@ namespace gmshddm
       void _assembleAndSolveSurface();
       void _extractRHS();
 
+      void _activateBilinear (const unsigned long long idom);
+      void _deactivateBilinear (const unsigned long long idom);
+
      public:
       Formulation(const std::string &name, const std::vector< std::vector< unsigned int > > &topology);
       Formulation(const std::string &name, const std::vector< gmshfem::problem::Formulation< T_Scalar > * > &volume, const std::vector< std::unordered_map< unsigned int, gmshfem::problem::Formulation< T_Scalar > * > > &surface);
@@ -118,6 +127,7 @@ namespace gmshddm
       gmshfem::common::Timer solve(const std::string &solver, const double tolerance = 1e-6, const int iterMax = 1000, const bool sameMatrixWithArtificialAndPhysicalSources = false, const bool skipFinalSolutionComputation = false);
       gmshfem::common::Timer solve(AbstractIterativeSolver<T_Scalar> &solver, const double tolerance = 1e-6, const int iterMax = 1000, const bool sameMatrixWithArtificialAndPhysicalSources = false, const bool skipFinalSolutionComputation = false);
       gmshfem::algebra::MatrixCCS< T_Scalar > computeMatrix();
+      gmshfem::algebra::MatrixCCS< T_Scalar > computeMatrixBlock();
 
       void setSolutionIntoInterfaceFields(const std::vector< T_Scalar > &solution);
       const gmshfem::algebra::Vector< T_Scalar > &getRHS() const;
@@ -127,7 +137,10 @@ namespace gmshddm
 
       friend int MatVectProductA< T_Scalar >(Mat A, Vec X, Vec Y);
       friend int MatVectProductI_A< T_Scalar >(Mat A, Vec X, Vec Y);
+      friend int MatMatProductI_A< T_Scalar >(Mat A, Mat X, Mat Y);
+      friend int MatMatProductA< T_Scalar >(Mat A, Mat X, Mat Y);
       friend int MatVectProductImpl< T_Scalar >(Mat A, Vec X, Vec Y, bool IA);
+      friend int MatMatProductImpl< T_Scalar >(Mat A, Mat X, Mat Y, bool IA);
 
 
       // T_Scalar, PetscScalar, T_Interger