diff --git a/include/connectivity.h b/include/connectivity.h
index 23a4fa2c569552a5bd5022dc8ca4e3fb202e47ec..83ba43508e54f3da2ffc9ebcf9d5568141973bce 100644
--- a/include/connectivity.h
+++ b/include/connectivity.h
@@ -330,4 +330,5 @@ public:
     }
 };
 
-#endif /* USE_MPI */
\ No newline at end of file
+#endif /* USE_MPI */
+
diff --git a/include/gmsh_io.h b/include/gmsh_io.h
index 10a780fee37b1f68e56374781ffcc8db117159bb..431e135048d3d1352f970352806f541023164952 100644
--- a/include/gmsh_io.h
+++ b/include/gmsh_io.h
@@ -77,7 +77,26 @@ struct boundary_descriptor
     }
 };
 
+#ifdef USE_MPI
+struct comm_descriptor
+{
+    int                     boundary_tag;
+    int                     partition_mine;
+    int                     partition_other;
+    int                     rank_mine;
+    int                     rank_other;
+    std::vector<size_t>     dof_mapping;
+    std::vector<element_key>    fks;
+};
 
+inline
+std::ostream& operator<<(std::ostream& os, const comm_descriptor& cd)
+{
+    os << "tag: " << cd.boundary_tag << ", PM: " << cd.partition_mine << ", PO: " << cd.partition_other;
+    os << ", RM: " << cd.rank_mine << ", RO: " << cd.rank_other << ", map size: " << cd.dof_mapping.size();
+    return os;
+}
+#endif /* USE_MPI */
 
 #define IMPLIES(a, b) ((not(a)) or (b))
 #define IFF(a,b) (IMPLIES(a,b) and IMPLIES(b,a))
@@ -104,7 +123,7 @@ class model
 #endif /* USE_MPI */
 
     /* Map from boundary tag to all the faces of the entity with that tag */
-    std::map<int, std::vector<element_key>>     boundary_map;   
+    std::map<int, std::vector<element_key>>     boundary_map;  
 
     std::vector<entity>                         entities;
     std::vector<boundary_descriptor>            bnd_descriptors;
@@ -130,6 +149,8 @@ class model
 
     /* RANK 0 only: copy of remote entities for postpro */
     std::map<int, std::vector<entity>>          remote_entities;
+
+    std::map<int, comm_descriptor>              ipc_boundary_comm_table; 
 #endif /* USE_MPI */
 
     using entofs_pair = std::pair<size_t, size_t>;
@@ -147,7 +168,8 @@ class model
     void    import_gmsh_entities_rankN(void);
     int     my_partition(void) const;
     void    make_partition_to_entities_map(void);
-    void    map_interprocess_boundaries();
+    void    map_interprocess_boundaries(void);
+    void    make_comm_descriptors(void);
 #endif /* USE_MPI */
 
     void    make_boundary_to_faces_map(void);
@@ -168,6 +190,10 @@ public:
     void populate_from_gmsh(void);
 
 #ifdef USE_MPI
+    const std::map<int, comm_descriptor>& comm_descriptors(void) const {
+        return ipc_boundary_comm_table;
+    }
+
     const std::vector<int>& world_entities_tags(void) const
     {
         return all_entities_tags;
diff --git a/include/gmsh_mpi.h b/include/gmsh_mpi.h
index b9a0ef9ee94492f738c19493b04f357846573015..f05a6be1227f604706d6db112b32450721ff5bc3 100644
--- a/include/gmsh_mpi.h
+++ b/include/gmsh_mpi.h
@@ -3,7 +3,10 @@
 #include <vector>
 #include <map>
 #include <array>
-#include <mpi/mpi.h>
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Weverything"
+#include <mpi.h>
+#pragma clang diagnostic pop
 
 #include "eigen.h"
 
@@ -245,4 +248,6 @@ priv_MPI_Bcast(std::map<T1, T2>& map, int root, MPI_Comm comm)
         for (size_t i = 0; i < left.size(); i++)
             map[ left[i] ] = right[i]; 
     }
-}
\ No newline at end of file
+}
+
+
diff --git a/include/maxwell_interface.h b/include/maxwell_interface.h
index 2c344cc66e3fd51646feccfb305efb27b6380aa5..46ec674c15549f0bf60e7628e77b410f8ec98967 100644
--- a/include/maxwell_interface.h
+++ b/include/maxwell_interface.h
@@ -19,7 +19,6 @@
 #endif
 
 #ifdef USE_MPI
-#include <mpi/mpi.h>
 #include "gmsh_mpi.h"
 #endif /* USE_MPI */
 
@@ -268,6 +267,10 @@ struct solver_state
     vecxd                               Jy_src;
     vecxd                               Jz_src;
     field                               bndsrcs;
+
+#ifdef USE_MPI
+    field                               ipc;                    
+#endif /* USE_MPI */
 };
 
 #ifdef ENABLE_GPU_SOLVER
@@ -326,7 +329,7 @@ void init_from_model(const model&, solver_state&);
 void init_matparams(const model&, solver_state&, const parameter_loader&);
 //void apply_operator(solver_state&, const field&, field&);
 void export_fields_to_silo(const model&, solver_state&, const parameter_loader&);
-void timestep(solver_state&, const parameter_loader&, time_integrator_type);
+void timestep(const model&, solver_state&, const parameter_loader&, time_integrator_type);
 void prepare_sources(const model&, maxwell::solver_state&, maxwell::parameter_loader&);
 void do_sources(const model&, maxwell::solver_state&, maxwell::parameter_loader&);
 void swap(maxwell::solver_state&, const parameter_loader&);
@@ -339,7 +342,7 @@ void init_from_model(const model&, solver_state_gpu&);
 void init_matparams(const model&, solver_state_gpu&, const parameter_loader&);
 //void apply_operator(solver_state_gpu&, const field_gpu&, field_gpu&);
 void export_fields_to_silo(const model&, solver_state_gpu&, const parameter_loader&);
-void timestep(solver_state_gpu&, const parameter_loader&, time_integrator_type);
+void timestep(const model&, solver_state_gpu&, const parameter_loader&, time_integrator_type);
 void prepare_sources(const model&, maxwell::solver_state_gpu&, maxwell::parameter_loader&);
 void do_sources(const model&, maxwell::solver_state_gpu&, maxwell::parameter_loader&);
 void swap(maxwell::solver_state_gpu&, const parameter_loader&);
diff --git a/include/silo_output.hpp b/include/silo_output.hpp
index ee7d1521b63f5838b87e813221439e1fade8ddc2..c5ac5c9c56870392a88bf302b0cde6a3756cf50d 100644
--- a/include/silo_output.hpp
+++ b/include/silo_output.hpp
@@ -1,7 +1,11 @@
 #pragma once
 
 #include <vector>
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Weverything"
 #include <silo.h>
+#pragma clang diagnostic pop
 
 class silo
 {
diff --git a/src/gmsh_io.cpp b/src/gmsh_io.cpp
index acc18cc645b32f2e44d640f919191926c449ccf9..8753340db0f204e45e48811736a62d59454e9f23 100644
--- a/src/gmsh_io.cpp
+++ b/src/gmsh_io.cpp
@@ -1,12 +1,13 @@
 #include <iostream>
 #include <sstream>
+#include <fstream>
 #include <cassert>
 
 #include "gmsh_io.h"
 #include "entity.h"
 
 #ifdef USE_MPI
-#include <mpi/mpi.h>
+#include <mpi.h>
 #endif /* USE_MPI */
 
 std::string
@@ -531,6 +532,111 @@ model::map_interprocess_boundaries(void)
             connect(e, part);
     }
 }
+
+void
+model::make_comm_descriptors(void)
+{
+    /* Make a vector mapping element_key to entity tag */
+    using bfk_t = std::pair<element_key, int>;
+    std::vector<bfk_t> bfk;
+    for (auto& [tag, keys] : boundary_map)
+    {
+        if ( not is_interprocess_boundary(tag) )
+            continue;
+        
+        for (auto& k : keys)
+            bfk.push_back( std::make_pair(k, tag) );
+    }
+    /* Sort it for lookups */
+    auto comp = [](const bfk_t& a, const bfk_t& b) -> bool {
+        return a.first < b.first;
+    };
+    std::sort(bfk.begin(), bfk.end(), comp);
+
+    auto lbcomp = [](const bfk_t& a, const element_key& b) -> bool {
+        return a.first < b;
+    };
+
+    size_t flux_base = 0;
+    /* For each entity */
+
+    struct lazy_hack {
+        element_key             fk;
+        size_t                  fbs;
+        std::vector<size_t>     offsets;
+        size_t                  ibtag;
+    };
+
+    std::vector<lazy_hack> lhs;
+
+    for (auto& e : entities)
+    {
+        /* and for each face */
+        for (size_t iF = 0; iF < e.num_faces(); iF++)
+        {
+            /* get the element key */
+            auto& pf = e.face(iF);
+            auto& rf = e.face_refelem(pf);
+            auto fk = element_key(pf);
+
+            /* and lookup it in the vector we created before. */
+            auto itor = std::lower_bound(bfk.begin(), bfk.end(), fk, lbcomp);
+            if ( (itor == bfk.end()) or not ((*itor).first == fk) )
+                continue;
+            auto ibtag = (*itor).second;
+            auto fbs = rf.num_basis_functions();
+            /*
+            auto& cd = ipc_boundary_comm_table[ibtag];
+            for (size_t i = 0; i < fbs; i++)
+                cd.dof_mapping.push_back(flux_base + fbs*iF + i);
+            cd.fks.push_back(fk);
+            */
+            lazy_hack lh;
+            lh.fk = fk;
+            lh.fbs = fbs;
+            for (size_t i = 0; i < fbs; i++)
+                lh.offsets.push_back(flux_base + fbs*iF + i);
+            lh.ibtag = ibtag;
+            lhs.push_back( std::move(lh) );
+        }
+
+        flux_base += e.num_fluxes();
+    }
+
+    auto lhcomp = [](const lazy_hack& a, const lazy_hack& b) -> bool {
+        return a.fk < b.fk;
+    };
+    std::sort(lhs.begin(), lhs.end(), lhcomp);
+    for (auto& lh : lhs)
+    {
+        auto& cd = ipc_boundary_comm_table[lh.ibtag];
+        for (size_t i = 0; i < lh.fbs; i++)
+            cd.dof_mapping.push_back( lh.offsets[i] );
+    }
+
+
+    for (auto& [tag, cd] : ipc_boundary_comm_table)
+    {
+        cd.boundary_tag = tag;
+        auto parts = comm_map.at(tag);
+        assert(parts.size() == 2);
+        if (parts[0] == my_partition())
+        {
+            cd.partition_mine = parts[0];
+            cd.partition_other = parts[1];
+            cd.rank_mine = parts[0]-1;
+            cd.rank_other = parts[1]-1;
+        }
+        else
+        {
+            cd.partition_mine = parts[1];
+            cd.partition_other = parts[0];
+            cd.rank_mine = parts[1]-1;
+            cd.rank_other = parts[0]-1;
+        }
+    }
+}
+
 #endif
 
 void
@@ -581,6 +687,7 @@ model::populate_from_gmsh_rank0()
 #ifdef USE_MPI
     map_interprocess_boundaries();
     ipconn.mpi_bcast(0, MPI_COMM_WORLD);
+    make_comm_descriptors();
 #endif /* USE_MPI */
 }
 
@@ -610,6 +717,7 @@ model::populate_from_gmsh_rankN()
     import_gmsh_entities_rankN();
     map_boundaries();
     ipconn.mpi_bcast(0, MPI_COMM_WORLD);
+    make_comm_descriptors();
 }
 #endif /* USE_MPI */
 
diff --git a/src/maxwell_common.cpp b/src/maxwell_common.cpp
index e6a0febad004b97ea4e765622e2bf5bef972347f..c8f83c026a65c3b2f94473159492049dd8ed5bf2 100644
--- a/src/maxwell_common.cpp
+++ b/src/maxwell_common.cpp
@@ -4,7 +4,9 @@
 
 #define DIRICHLET       2.0
 #define ROBIN           1.0
+#ifdef USE_MPI
 #define INTERPROCESS    1.0
+#endif /* USE_MPI */
 #define NEUMANN         0.0
 
 namespace maxwell {
@@ -30,12 +32,14 @@ init_boundary_conditions(const model& mod, const parameter_loader& mpl,
                 continue;
 
             double bc_coeff = DIRICHLET;
+#ifdef USE_MPI
             if (bd.type == face_type::INTERPROCESS_BOUNDARY)
             {
                 bc_coeff = INTERPROCESS;
             }
             else
             {
+#endif /* USE_MPI */
                 switch ( mpl.boundary_type(bd.material_tag()) )
                 {
                     case boundary_condition::UNSPECIFIED:
@@ -56,8 +60,9 @@ init_boundary_conditions(const model& mod, const parameter_loader& mpl,
                         bc_coeff = NEUMANN;
                         break;
                }
+#ifdef USE_MPI
             }
-
+#endif /* USE_MPI */
             auto& pf = e.face(iF);
             auto& rf = e.face_refelem(pf);
             auto num_bf = rf.num_basis_functions();
diff --git a/src/maxwell_cpu.cpp b/src/maxwell_cpu.cpp
index 8ebd00e3651def056241cad3805d4777525b2e3e..d0d07a4508e3ce942a94621c1ce9f8f2895e74dd 100644
--- a/src/maxwell_cpu.cpp
+++ b/src/maxwell_cpu.cpp
@@ -83,6 +83,15 @@ init_from_model(const model& mod, solver_state& state)
         state.eds.push_back( std::move(ed) );
     }
 
+    auto& cds = mod.comm_descriptors();
+
+    size_t dofs = 0;
+    for (auto& [tag, cd] : cds)
+        if (cd.dof_mapping.size() > dofs)
+            dofs = cd.dof_mapping.size();
+
+    state.ipc.resize(dofs);
+
     state.curr_time = 0.0;
     state.curr_timestep = 0;
 }
@@ -176,6 +185,56 @@ compute_field_jumps(solver_state& state, const field& in)
     }
 }
 
+static void
+exchange_boundary_data(const model& mod, solver_state& state)
+{
+    auto& cds = mod.comm_descriptors();
+    for (auto& [tag, cd] : cds)
+    {
+        if (cd.rank_mine < cd.rank_other)
+        {
+            for (size_t i = 0; i < cd.dof_mapping.size(); i++)
+            {
+                auto srci = cd.dof_mapping[i];
+                state.ipc.Ex[i] = state.jumps.Ex[srci];
+                state.ipc.Ey[i] = state.jumps.Ey[srci];
+                state.ipc.Ez[i] = state.jumps.Ez[srci];
+                state.ipc.Hx[i] = state.jumps.Hx[srci];
+                state.ipc.Hy[i] = state.jumps.Hy[srci];
+                state.ipc.Hz[i] = state.jumps.Hz[srci];
+            }
+            send_field(state.ipc, cd.rank_other, tag, 0, cd.dof_mapping.size(), MPI_COMM_WORLD);
+            receive_field(state.ipc, cd.rank_other, tag, 0, cd.dof_mapping.size(), MPI_COMM_WORLD);
+            for (size_t i = 0; i < cd.dof_mapping.size(); i++)
+            {
+                auto dsti = cd.dof_mapping[i];
+                state.jumps.Ex[dsti] -= state.ipc.Ex[i];
+                state.jumps.Ey[dsti] -= state.ipc.Ey[i];
+                state.jumps.Ez[dsti] -= state.ipc.Ez[i];
+                state.jumps.Hx[dsti] -= state.ipc.Hx[i];
+                state.jumps.Hy[dsti] -= state.ipc.Hy[i];
+                state.jumps.Hz[dsti] -= state.ipc.Hz[i];
+            }
+        }
+        else
+        {
+            receive_field(state.ipc, cd.rank_other, tag, 0, cd.dof_mapping.size(), MPI_COMM_WORLD);
+            for (size_t i = 0; i < cd.dof_mapping.size(); i++)
+            {
+                auto dsti = cd.dof_mapping[i];
+                double tmp;
+                tmp = state.jumps.Ex[dsti]; state.jumps.Ex[dsti] -= state.ipc.Ex[i]; state.ipc.Ex[i] = tmp;
+                tmp = state.jumps.Ey[dsti]; state.jumps.Ey[dsti] -= state.ipc.Ey[i]; state.ipc.Ey[i] = tmp;
+                tmp = state.jumps.Ez[dsti]; state.jumps.Ez[dsti] -= state.ipc.Ez[i]; state.ipc.Ez[i] = tmp;
+                tmp = state.jumps.Hx[dsti]; state.jumps.Hx[dsti] -= state.ipc.Hx[i]; state.ipc.Hx[i] = tmp;
+                tmp = state.jumps.Hy[dsti]; state.jumps.Hy[dsti] -= state.ipc.Hy[i]; state.ipc.Hy[i] = tmp;
+                tmp = state.jumps.Hz[dsti]; state.jumps.Hz[dsti] -= state.ipc.Hz[i]; state.ipc.Hz[i] = tmp;
+            }
+            send_field(state.ipc, cd.rank_other, tag, 0, cd.dof_mapping.size(), MPI_COMM_WORLD);
+        }
+    }
+}
+
 static void
 compute_field_jumps_E(solver_state& state, const field& in)
 {
@@ -371,9 +430,14 @@ compute_fluxes_planar_H(solver_state& state)
 }
 
 static void
-compute_fluxes(solver_state& state, const field& in, field& out)
+compute_fluxes(const model& mod, solver_state& state, const field& in, field& out)
 {
     compute_field_jumps(state, in);
+
+#ifdef USE_MPI
+    exchange_boundary_data(mod, state);
+#endif /* USE_MPI */
+
     compute_fluxes_planar(state);
     
     for (auto& ed : state.eds)
@@ -458,10 +522,10 @@ compute_rk4_weighted_sum(solver_state& state, const field& in, double dt, field&
 }
 
 static void
-apply_operator(solver_state& state, const field& curr, field& next)
+apply_operator(const model& mod, solver_state& state, const field& curr, field& next)
 {
     compute_curls(state, curr, next);
-    compute_fluxes(state, curr, next);
+    compute_fluxes(mod, state, curr, next);
 }
 
 static void
@@ -493,26 +557,26 @@ leapfrog(solver_state& state)
 }
 
 void
-timestep(solver_state& state, const parameter_loader&, time_integrator_type ti)
+timestep(const model& mod, solver_state& state, const parameter_loader&, time_integrator_type ti)
 {
     if (ti == time_integrator_type::EULER)
     {
-        apply_operator(state, state.emf_curr, state.tmp);
+        apply_operator(mod, state, state.emf_curr, state.tmp);
         compute_euler_update(state, state.emf_curr, state.tmp, state.delta_t, state.emf_next);
     }
 
     if (ti == time_integrator_type::RK4)
     {    
-        apply_operator(state, state.emf_curr, state.k1);
+        apply_operator(mod, state, state.emf_curr, state.k1);
 
         compute_euler_update(state, state.emf_curr, state.k1, 0.5*state.delta_t, state.tmp);
-        apply_operator(state, state.tmp, state.k2);
+        apply_operator(mod, state, state.tmp, state.k2);
 
         compute_euler_update(state, state.emf_curr, state.k2, 0.5*state.delta_t, state.tmp);
-        apply_operator(state, state.tmp, state.k3);
+        apply_operator(mod, state, state.tmp, state.k3);
         
         compute_euler_update(state, state.emf_curr, state.k3, state.delta_t, state.tmp);
-        apply_operator(state, state.tmp, state.k4);
+        apply_operator(mod, state, state.tmp, state.k4);
 
         compute_rk4_weighted_sum(state, state.emf_curr, state.delta_t, state.emf_next);
     }
diff --git a/src/maxwell_gpu.cpp b/src/maxwell_gpu.cpp
index 2314859e3ff2b17aec4ba9dbbdfd4b23799ebfa6..6756b64bbe831f2cb9c5d12a7dd456aa420c8357 100644
--- a/src/maxwell_gpu.cpp
+++ b/src/maxwell_gpu.cpp
@@ -413,7 +413,7 @@ compute_rk4_weighted_sum(solver_state_gpu& state, const field_gpu& in,
 }
 
 void
-timestep(solver_state_gpu& state, const parameter_loader& mpl, time_integrator_type ti)
+timestep(const model& mod, solver_state_gpu& state, const parameter_loader& mpl, time_integrator_type ti)
 {
     //timecounter tc;
     //tc.tic();
diff --git a/src/maxwell_solver.cpp b/src/maxwell_solver.cpp
index ebe62490f11b6ecf72fa1b456a7ce32d70877cff..27fab666e1d169a3d9035ac05d3bd06027567c37 100644
--- a/src/maxwell_solver.cpp
+++ b/src/maxwell_solver.cpp
@@ -6,7 +6,10 @@
 #endif
 
 #ifdef USE_MPI
-#include <mpi/mpi.h>
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Weverything"
+#include <mpi.h>
+#pragma clang diagnostic pop
 #endif
 
 #include <fstream>
@@ -219,7 +222,7 @@ void test_it(const model& mod, State& state, maxwell::parameter_loader& mpl)
     for(size_t i = 0; i < num_timesteps; i++)
     {
         mpl.call_timestep_callback(i);
-        timestep(state, mpl, ti);
+        timestep(mod, state, mpl, ti);
         do_sources(mod, state, mpl);
         
         if ( (silo_output_rate != 0) and (i%silo_output_rate == 0) )
@@ -237,7 +240,7 @@ void test_it(const model& mod, State& state, maxwell::parameter_loader& mpl)
     }
 }
 
-void
+static void
 register_lua_usertypes(maxwell::parameter_loader& mpl)
 {
 #ifdef ENABLE_EXP_GEOMETRIC_DIODE
@@ -253,7 +256,7 @@ register_lua_usertypes(maxwell::parameter_loader& mpl)
 #endif /* ENABLE_EXP_GEOMETRIC_DIODE */
 }
 
-void
+static void
 register_lua_usertypes_bystate(maxwell::parameter_loader& mpl,
     model& mod, maxwell::solver_state& state)
 {
@@ -268,7 +271,7 @@ register_lua_usertypes_bystate(maxwell::parameter_loader& mpl,
 }
 
 #ifdef ENABLE_GPU_SOLVER
-void
+static void
 register_lua_usertypes_bystate(maxwell::parameter_loader& mpl,
     model& mod, maxwell::solver_state_gpu& state)
 {}