diff --git a/doc/lua_api.md b/doc/lua_api.md
index 409abc65f8e8419bf9f065b493387cc153e20979..c1cf993f9e35e96e749f8fc4d9cdae6a278884ab 100644
--- a/doc/lua_api.md
+++ b/doc/lua_api.md
@@ -7,7 +7,7 @@ This file documents the Lua API available in the solver. API has a *general* par
 The *general* part has to do with configuration not related to a specific problem (i.e. timestep, geometry file), whereas the *problem-specific* part configures all the parameter that make sense only on a given problem (i.e. materials and sources). This separation is reflected also in how the configuration is handled internally.
 
 ## API version
-This document describes the API available on the version v0.1 of the solver.
+This document describes the API available on the version v0.3 of the solver.
 
 ## General interface
 
@@ -20,16 +20,21 @@ This document describes the API available on the version v0.1 of the solver.
 - `sim.use_gpu` (0/1): enable/disable GPU usage.
 - `sim.approx_order` (integer): method approximation order.
 - `sim.geom_order` (integer): geometry order. Only order 1 supported for now.
-- `sim.time_integrator` (string): `"rk4"`, `"leapfrog"` or `"euler"`. Euler is only for test purporses, as it is numerically instable. Leapfrog works on CPU for now, use it if you have dissipative materials.
+- `sim.time_integrator` (string): `"rk4"`, `"leapfrog"` or `"euler"`. Euler is only for test purporses, as it is numerically instable.
 
 ### Postprocessing general variables
 
-- `postpro.silo_output_rate` (integer):
+- `postpro.silo_output_rate` (integer): 
 - `postpro.cycle_print_rate` (integer):
 
 ### Mesh variables
 - `mesh.scalefactor` (real): apply a scaling factor to the mesh.
 
+### Parallel execution information (available only if MPI support is enabled)
+The parallel solver runs as separate MPI processes. As such, each process loads and executes its own instance of the configuration script. This means that you must beware of global variables or other shared state, because modifications are **not** reflected across processes. Even if the configuration script should in general not depend on the rank of the current process, the following variables are exposed:
+- `parallel.comm_rank` (integer): The MPI rank of the current solver process
+- `parallel.comm_size` (integer): The MPI communicator size
+
 ### Callable functions
 - `enable_boundary_sources()`: takes a boolean parameter that specifies if boundary sources should be enabled or not
 - `enable_interface_sources()`: takes a boolean parameter that specifies if the sources applied on internal interfaces should be enabled or not
@@ -137,6 +142,10 @@ None.
 - `internal.model`: opaque reference to the internal representation of the GMSH model. Valid only after the solver is fully initialized, i.e. inside the callback functions.
 - `internal.state`: opaque reference to the internal representation of the solver state. Valid only after the solver is fully initialized, i.e. inside the callback functions.
 
+### Debug and validation
+The solver has some facilities to allow validation and comparison with analytical solutions. Those facilities are accessed via the `debug` table. In the current implementation the `debug` table is undefined by default, so it must be initialized explicitly in the configuration with a statement `debug = {}`. Possible members of the `debug` table are:
+- `analytical_solution(tag, x, y, z, t)`: define the analytical solution of the problem. If defined, the numerical solution is compared with the analytical solution at each timestep in the whole domain. The function has to return 6 values, namely `Ex`, `Ey`, `Ez`, `Hx`, `Hy` and `Hz`.
+
 ### Postprocessing
 The export of the fields E, H and J to Silo is controlled using the appropriate sections in the `postpro` table.
 - `postpro["E"].silo_mode` controls the export of the electric field E. Possible values are `"none"` (no export), `"nodal"` (export as a Silo nodal variable) and `"zonal"` (export as a Silo zonal variable) 
diff --git a/include/gmsh_io.h b/include/gmsh_io.h
index 431e135048d3d1352f970352806f541023164952..7c530e09e6df91c6b7b2778d4f775176d1ac3d85 100644
--- a/include/gmsh_io.h
+++ b/include/gmsh_io.h
@@ -240,6 +240,11 @@ public:
     {
         return ipconn;
     }
+#else
+    size_t num_cells_world() const
+    {
+        return num_cells();
+    }
 
 #endif /* USE_MPI */
 
diff --git a/include/maxwell_interface.h b/include/maxwell_interface.h
index 46ec674c15549f0bf60e7628e77b410f8ec98967..d11acabb6c53dbfa49521ca8f83e1cb20757b4d4 100644
--- a/include/maxwell_interface.h
+++ b/include/maxwell_interface.h
@@ -74,6 +74,9 @@ public:
     vec3d   eval_boundary_source(int, const point_3d&, double) const;
     vec3d   eval_interface_source(int, const point_3d&, double) const;
 
+    bool    has_analytical_solution(void) const;
+    std::pair<vec3d, vec3d> eval_analytical_solution(int, const point_3d&, double t) const;
+
     boundary_condition  boundary_type(int tag) const;
     interface_condition interface_type(int tag) const;
     field_export_mode postpro_fieldExportMode(const std::string&) const;
diff --git a/src/gmsh_io.cpp b/src/gmsh_io.cpp
index 8753340db0f204e45e48811736a62d59454e9f23..0e1bd75abeaa8dd1d55333dd868bfec48392df57 100644
--- a/src/gmsh_io.cpp
+++ b/src/gmsh_io.cpp
@@ -557,9 +557,10 @@ model::make_comm_descriptors(void)
         return a.first < b;
     };
 
-    size_t flux_base = 0;
-    /* For each entity */
 
+    /* Sorry for this. Basically I need to have the faces I will
+     * find in the next loop in the exact same order on both sides
+     * of the IPC boundary. */
     struct lazy_hack {
         element_key             fk;
         size_t                  fbs;
@@ -569,6 +570,8 @@ model::make_comm_descriptors(void)
 
     std::vector<lazy_hack> lhs;
 
+    size_t flux_base = 0;
+    /* For each entity */
     for (auto& e : entities)
     {
         /* and for each face */
diff --git a/src/maxwell_common.cpp b/src/maxwell_common.cpp
index c8f83c026a65c3b2f94473159492049dd8ed5bf2..4fc24befbe6fe3bcfaad5d25124d4c27dba68ac7 100644
--- a/src/maxwell_common.cpp
+++ b/src/maxwell_common.cpp
@@ -5,7 +5,7 @@
 #define DIRICHLET       2.0
 #define ROBIN           1.0
 #ifdef USE_MPI
-#define INTERPROCESS    1.0
+#define INTERPROCESS    1.0 
 #endif /* USE_MPI */
 #define NEUMANN         0.0
 
@@ -327,9 +327,9 @@ void export_J_field_nodal(const model& mod, silo& sdb, const vecxd& Ex,
 void export_E_field_zonal(const model& mod, silo& sdb, const vecxd& Ex,
     const vecxd& Ey, const vecxd& Ez, const parameter_loader&)
 {
-    std::vector<double> out_Ex( mod.num_cells() );
-    std::vector<double> out_Ey( mod.num_cells() );
-    std::vector<double> out_Ez( mod.num_cells() );
+    std::vector<double> out_Ex( mod.num_cells_world() );
+    std::vector<double> out_Ey( mod.num_cells_world() );
+    std::vector<double> out_Ez( mod.num_cells_world() );
 
 #ifdef USE_MPI
     for (auto& etag : mod.world_entities_tags())
@@ -362,9 +362,9 @@ void export_E_field_zonal(const model& mod, silo& sdb, const vecxd& Ex,
 void export_H_field_zonal(const model& mod, silo& sdb, const vecxd& Hx,
     const vecxd& Hy, const vecxd& Hz, const parameter_loader&)
 {
-    std::vector<double> out_Hx( mod.num_cells() );
-    std::vector<double> out_Hy( mod.num_cells() );
-    std::vector<double> out_Hz( mod.num_cells() );
+    std::vector<double> out_Hx( mod.num_cells_world() );
+    std::vector<double> out_Hy( mod.num_cells_world() );
+    std::vector<double> out_Hz( mod.num_cells_world() );
 
 #ifdef USE_MPI
     for (auto& etag : mod.world_entities_tags())
@@ -397,9 +397,9 @@ void export_H_field_zonal(const model& mod, silo& sdb, const vecxd& Hx,
 void export_J_field_zonal(const model& mod, silo& sdb, const vecxd& Ex,
     const vecxd& Ey, const vecxd& Ez, const parameter_loader& mpl)
 {
-    std::vector<double> out_Jx( mod.num_cells() );
-    std::vector<double> out_Jy( mod.num_cells() );
-    std::vector<double> out_Jz( mod.num_cells() );
+    std::vector<double> out_Jx( mod.num_cells_world() );
+    std::vector<double> out_Jy( mod.num_cells_world() );
+    std::vector<double> out_Jz( mod.num_cells_world() );
 
 #ifdef USE_MPI
     for (auto& etag : mod.world_entities_tags())
diff --git a/src/maxwell_cpu.cpp b/src/maxwell_cpu.cpp
index d0d07a4508e3ce942a94621c1ce9f8f2895e74dd..4c74a983df73783dbd465b961ac6227462517b8f 100644
--- a/src/maxwell_cpu.cpp
+++ b/src/maxwell_cpu.cpp
@@ -83,14 +83,15 @@ init_from_model(const model& mod, solver_state& state)
         state.eds.push_back( std::move(ed) );
     }
 
+#ifdef USE_MPI
     auto& cds = mod.comm_descriptors();
 
     size_t dofs = 0;
     for (auto& [tag, cd] : cds)
-        if (cd.dof_mapping.size() > dofs)
-            dofs = cd.dof_mapping.size();
+        dofs = std::max(cd.dof_mapping.size(), dofs);
 
     state.ipc.resize(dofs);
+#endif /* USE_MPI */
 
     state.curr_time = 0.0;
     state.curr_timestep = 0;
@@ -185,6 +186,7 @@ compute_field_jumps(solver_state& state, const field& in)
     }
 }
 
+#ifdef USE_MPI
 static void
 exchange_boundary_data(const model& mod, solver_state& state)
 {
@@ -234,6 +236,7 @@ exchange_boundary_data(const model& mod, solver_state& state)
         }
     }
 }
+#endif /* USE_MPI */
 
 static void
 compute_field_jumps_E(solver_state& state, const field& in)
@@ -266,6 +269,59 @@ compute_field_jumps_E(solver_state& state, const field& in)
     }
 }
 
+#ifdef USE_MPI
+static void
+exchange_boundary_data_E(const model& mod, solver_state& state)
+{
+    auto& cds = mod.comm_descriptors();
+    for (auto& [tag, cd] : cds)
+    {
+        size_t ndofs = cd.dof_mapping.size();
+        if (cd.rank_mine < cd.rank_other)
+        {
+            for (size_t i = 0; i < ndofs; i++)
+            {
+                auto srci = cd.dof_mapping[i];
+                state.ipc.Ex[i] = state.jumps.Ex[srci];
+                state.ipc.Ey[i] = state.jumps.Ey[srci];
+                state.ipc.Ez[i] = state.jumps.Ez[srci];
+            }
+            MPI_Send(state.ipc.Ex.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD);
+            MPI_Send(state.ipc.Ey.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD);
+            MPI_Send(state.ipc.Ez.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD);
+
+            MPI_Recv(state.ipc.Ex.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD, nullptr);
+            MPI_Recv(state.ipc.Ey.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD, nullptr);
+            MPI_Recv(state.ipc.Ez.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD, nullptr);
+            for (size_t i = 0; i < cd.dof_mapping.size(); i++)
+            {
+                auto dsti = cd.dof_mapping[i];
+                state.jumps.Ex[dsti] -= state.ipc.Ex[i];
+                state.jumps.Ey[dsti] -= state.ipc.Ey[i];
+                state.jumps.Ez[dsti] -= state.ipc.Ez[i];
+            }
+        }
+        else
+        {
+            MPI_Recv(state.ipc.Ex.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD, nullptr);
+            MPI_Recv(state.ipc.Ey.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD, nullptr);
+            MPI_Recv(state.ipc.Ez.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD, nullptr);
+            for (size_t i = 0; i < cd.dof_mapping.size(); i++)
+            {
+                auto dsti = cd.dof_mapping[i];
+                double tmp;
+                tmp = state.jumps.Ex[dsti]; state.jumps.Ex[dsti] -= state.ipc.Ex[i]; state.ipc.Ex[i] = tmp;
+                tmp = state.jumps.Ey[dsti]; state.jumps.Ey[dsti] -= state.ipc.Ey[i]; state.ipc.Ey[i] = tmp;
+                tmp = state.jumps.Ez[dsti]; state.jumps.Ez[dsti] -= state.ipc.Ez[i]; state.ipc.Ez[i] = tmp;
+            }
+            MPI_Send(state.ipc.Ex.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD);
+            MPI_Send(state.ipc.Ey.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD);
+            MPI_Send(state.ipc.Ez.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD);
+        }
+    }
+}
+#endif /* USE_MPI */
+
 static void
 compute_field_jumps_H(solver_state& state, const field& in)
 {
@@ -297,6 +353,59 @@ compute_field_jumps_H(solver_state& state, const field& in)
     }
 }
 
+#ifdef USE_MPI
+static void
+exchange_boundary_data_H(const model& mod, solver_state& state)
+{
+    auto& cds = mod.comm_descriptors();
+    for (auto& [tag, cd] : cds)
+    {
+        size_t ndofs = cd.dof_mapping.size();
+        if (cd.rank_mine < cd.rank_other)
+        {
+            for (size_t i = 0; i < ndofs; i++)
+            {
+                auto srci = cd.dof_mapping[i];
+                state.ipc.Hx[i] = state.jumps.Hx[srci];
+                state.ipc.Hy[i] = state.jumps.Hy[srci];
+                state.ipc.Hz[i] = state.jumps.Hz[srci];
+            }
+            MPI_Send(state.ipc.Hx.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD);
+            MPI_Send(state.ipc.Hy.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD);
+            MPI_Send(state.ipc.Hz.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD);
+            
+            MPI_Recv(state.ipc.Hx.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD, nullptr);
+            MPI_Recv(state.ipc.Hy.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD, nullptr);
+            MPI_Recv(state.ipc.Hz.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD, nullptr);
+            for (size_t i = 0; i < cd.dof_mapping.size(); i++)
+            {
+                auto dsti = cd.dof_mapping[i];
+                state.jumps.Hx[dsti] -= state.ipc.Hx[i];
+                state.jumps.Hy[dsti] -= state.ipc.Hy[i];
+                state.jumps.Hz[dsti] -= state.ipc.Hz[i];
+            }
+        }
+        else
+        {
+            MPI_Recv(state.ipc.Hx.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD, nullptr);
+            MPI_Recv(state.ipc.Hy.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD, nullptr);
+            MPI_Recv(state.ipc.Hz.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD, nullptr);
+            for (size_t i = 0; i < cd.dof_mapping.size(); i++)
+            {
+                auto dsti = cd.dof_mapping[i];
+                double tmp;
+                tmp = state.jumps.Hx[dsti]; state.jumps.Hx[dsti] -= state.ipc.Hx[i]; state.ipc.Hx[i] = tmp;
+                tmp = state.jumps.Hy[dsti]; state.jumps.Hy[dsti] -= state.ipc.Hy[i]; state.ipc.Hy[i] = tmp;
+                tmp = state.jumps.Hz[dsti]; state.jumps.Hz[dsti] -= state.ipc.Hz[i]; state.ipc.Hz[i] = tmp;
+            }
+            MPI_Send(state.ipc.Hx.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD);
+            MPI_Send(state.ipc.Hy.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD);
+            MPI_Send(state.ipc.Hz.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD);
+        }
+    }
+}
+#endif /* USE_MPI */
+
 static void
 compute_fluxes_planar(solver_state& state)
 {
@@ -452,9 +561,14 @@ compute_fluxes(const model& mod, solver_state& state, const field& in, field& ou
 }
 
 static void
-compute_fluxes_E(solver_state& state, const field& in, field& out)
+compute_fluxes_E(const model& mod, solver_state& state, const field& in, field& out)
 {
     compute_field_jumps_E(state, in);
+
+#ifdef USE_MPI
+    exchange_boundary_data_E(mod, state);
+#endif /* USE_MPI */
+    
     compute_fluxes_planar_E(state);
     
     for (auto& ed : state.eds)
@@ -466,9 +580,14 @@ compute_fluxes_E(solver_state& state, const field& in, field& out)
 }
 
 static void
-compute_fluxes_H(solver_state& state, const field& in, field& out)
+compute_fluxes_H(const model& mod, solver_state& state, const field& in, field& out)
 {
     compute_field_jumps_H(state, in);
+
+#ifdef USE_MPI
+    exchange_boundary_data_H(mod, state);
+#endif /* USE_MPI */
+
     compute_fluxes_planar_H(state);
     
     for (auto& ed : state.eds)
@@ -529,11 +648,11 @@ apply_operator(const model& mod, solver_state& state, const field& curr, field&
 }
 
 static void
-leapfrog(solver_state& state)
+leapfrog(const model& mod, solver_state& state)
 {
     auto dt = state.delta_t;
     compute_curls_H(state, state.emf_curr, state.tmp);
-    compute_fluxes_H(state, state.emf_curr, state.tmp);
+    compute_fluxes_H(mod, state, state.emf_curr, state.tmp);
     for (size_t i = 0; i < state.emf_next.num_dofs; i++)
     {
         auto CRM = 1.0 - 0.5*dt*state.matparams.sigma_over_epsilon[i];
@@ -546,7 +665,7 @@ leapfrog(solver_state& state)
     }
 
     compute_curls_E(state, state.emf_next, state.tmp);
-    compute_fluxes_E(state, state.emf_next, state.tmp);
+    compute_fluxes_E(mod, state, state.emf_next, state.tmp);
     for (size_t i = 0; i < state.emf_next.num_dofs; i++)
     {
         auto CC = dt*state.matparams.inv_mu[i];
@@ -583,7 +702,7 @@ timestep(const model& mod, solver_state& state, const parameter_loader&, time_in
 
     if (ti == time_integrator_type::LEAPFROG)
     {
-        leapfrog(state);
+        leapfrog(mod, state);
     }
 
     state.curr_time += state.delta_t;
@@ -751,6 +870,20 @@ export_fields_to_silo(const model& mod, maxwell::solver_state& state,
                 export_J_field_zonal(mod, sdb, f.Ex, f.Ey, f.Ez, mpl);
                 break;
         }
+
+        std::vector<double> entranks( mod.num_cells_world(), 999999999 );
+        for (auto& etag : mod.world_entities_tags())
+        {
+            auto& e = mod.entity_world_lookup(etag);
+            int er = mod.entity_rank(etag);
+
+            for (size_t iT = 0; iT < e.num_cells(); iT++)
+            {
+                auto gi = e.cell_world_index_by_gmsh(iT);
+                entranks[gi] = er;
+            }
+        }
+        sdb.write_zonal_variable("entity_ranks", entranks);
     }
     else
     {
diff --git a/src/maxwell_interface.cpp b/src/maxwell_interface.cpp
index 9747e74fc6973c3da97798ac1dcbe7ef76a46c26..9bbaa66246b97c1199e5038b7040ff06ed184fbb 100644
--- a/src/maxwell_interface.cpp
+++ b/src/maxwell_interface.cpp
@@ -688,4 +688,22 @@ parameter_loader::postpro_fieldExportMode(const std::string& field) const
     return field_export_mode::NONE;
 }
 
+bool parameter_loader::has_analytical_solution(void) const
+{
+    auto asol_fun = lua["debug"]["analytical_solution"];
+    if (asol_fun.valid())
+        return true;
+    
+    return false;
+}
+
+std::pair<vec3d, vec3d>
+parameter_loader::eval_analytical_solution(int tag, const point_3d& pt, double t) const
+{
+    vec3d E, H;
+    sol::tie(E(0), E(1), E(2), H(0), H(1), H(2)) =
+        lua["debug"]["analytical_solution"](tag, pt.x(), pt.y(), pt.z(), t);
+    return std::make_pair(E,H);
+}
+
 } // namespace maxwell
diff --git a/src/maxwell_solver.cpp b/src/maxwell_solver.cpp
index 27fab666e1d169a3d9035ac05d3bd06027567c37..6cd728b4c3950571e9c101ee4ede7aeb33a456c0 100644
--- a/src/maxwell_solver.cpp
+++ b/src/maxwell_solver.cpp
@@ -193,9 +193,114 @@ reinit_materials(const model& mod, maxwell::solver_state& state, maxwell::parame
 }
 #endif /* ENABLE_EXP_GEOMETRIC_DIODE */
 
+struct maxwell_6ple {
+    double Ex;
+    double Ey;
+    double Ez;
+    double Hx;
+    double Hy;
+    double Hz;
+
+    maxwell_6ple()
+        : Ex(0.0), Ey(0.0), Ez(0.0),
+          Hx(0.0), Hy(0.0), Hz(0.0)
+    {}
+};
+
+std::ostream&
+operator<<(std::ostream& os, const maxwell_6ple& m)
+{
+    os << m.Ex << " " << m.Ey << " " << m.Ez << " ";
+    os << m.Hx << " " << m.Hy << " " << m.Hz;
+    return os;
+}
+
+maxwell_6ple sqrt(const maxwell_6ple& m)
+{
+    maxwell_6ple ret;
+    ret.Ex = std::sqrt(m.Ex);
+    ret.Ey = std::sqrt(m.Ey);
+    ret.Ez = std::sqrt(m.Ez);
+    ret.Hx = std::sqrt(m.Hx);
+    ret.Hy = std::sqrt(m.Hy);
+    ret.Hz = std::sqrt(m.Hz);
+    return ret;
+}
+
+static void
+validate(const model& mod, maxwell::solver_state& state, maxwell::parameter_loader& mpl, std::ofstream& ofs)
+{
+    maxwell_6ple err;
+    maxwell_6ple energy;
+
+    for (auto& e : mod)
+    {
+        for (size_t iT = 0; iT < e.num_cells(); iT++)
+        {
+            const auto& pe = e.cell(iT);
+            const auto& re = e.cell_refelem(pe);
+
+            const auto pqps = pe.integration_points();
+            const auto dets = pe.determinants();
+
+            const auto ofs = e.cell_world_dof_offset(iT);
+            const auto cbs = re.num_basis_functions();
+
+            assert(pqps.size() == dets.size());
+            const auto bar = pe.barycenter();
+            auto eps = mpl.epsilon( e.material_tag(), bar );
+            auto mu = mpl.mu( e.material_tag(), bar );
+
+            for (size_t iQp = 0; iQp < pqps.size(); iQp++)
+            {
+                const auto& pqp = pqps[iQp];
+                const vecxd phi = re.basis_functions(iQp);
+                auto [E, H] = mpl.eval_analytical_solution(e.material_tag(), pqp.point(), state.curr_time);
+
+                maxwell_6ple Fh;
+                Fh.Ex = state.emf_next.Ex.segment(ofs, cbs).dot(phi);
+                Fh.Ey = state.emf_next.Ey.segment(ofs, cbs).dot(phi);
+                Fh.Ez = state.emf_next.Ez.segment(ofs, cbs).dot(phi);
+                Fh.Hx = state.emf_next.Hx.segment(ofs, cbs).dot(phi);
+                Fh.Hy = state.emf_next.Hy.segment(ofs, cbs).dot(phi);
+                Fh.Hz = state.emf_next.Hz.segment(ofs, cbs).dot(phi);
+
+                double ae;
+                ae = E(0) - Fh.Ex; err.Ex += pqp.weight() * ae * ae;
+                ae = E(1) - Fh.Ey; err.Ey += pqp.weight() * ae * ae;
+                ae = E(2) - Fh.Ez; err.Ez += pqp.weight() * ae * ae;
+                ae = H(0) - Fh.Hx; err.Hx += pqp.weight() * ae * ae;
+                ae = H(1) - Fh.Hy; err.Hy += pqp.weight() * ae * ae;
+                ae = H(2) - Fh.Hz; err.Hz += pqp.weight() * ae * ae;
+
+                energy.Ex += 0.5*eps*(Fh.Ex*Fh.Ex)*pqp.weight();
+                energy.Ey += 0.5*eps*(Fh.Ey*Fh.Ey)*pqp.weight();
+                energy.Ez += 0.5*eps*(Fh.Ez*Fh.Ez)*pqp.weight();
+                energy.Hx += 0.5*mu*(Fh.Hx*Fh.Hx)*pqp.weight();
+                energy.Hy += 0.5*mu*(Fh.Hy*Fh.Hy)*pqp.weight();
+                energy.Hz += 0.5*mu*(Fh.Hz*Fh.Hz)*pqp.weight();
+            }
+        }
+    }
+
+    ofs << energy.Ex << " " << energy.Ey << " " << energy.Ez << " " << energy.Hx;
+    ofs << " " << energy.Hy << " " << energy.Hz << " ";
+    ofs << energy.Ex + energy.Ey + energy.Ez + energy.Hx + energy.Hy + energy.Hz;// << std::endl;
+    ofs << " " << sqrt(err) << std::endl;
+}
+
+//static void
+//validate(const model& mod, maxwell::solver_state_gpu& state, maxwell::parameter_loader& mpl)
+//{}
+
 template<typename State>
-void test_it(const model& mod, State& state, maxwell::parameter_loader& mpl)
+void solver_mainloop(const model& mod, State& state, maxwell::parameter_loader& mpl)
 {
+#ifdef USE_MPI
+    int proc_rank;
+    MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank);
+#endif /* USE_MPI */
+
     initialize_solver(mod, state, mpl);
 
     state.delta_t = mpl.sim_dt();
@@ -204,10 +309,21 @@ void test_it(const model& mod, State& state, maxwell::parameter_loader& mpl)
     auto cycle_print_rate = mpl.postpro_cyclePrintRate();
     auto ti = mpl.sim_timeIntegrator();
 
+#ifdef USE_MPI
+    if (proc_rank == 0) {
+#endif /* USE_MPI */
     std::cout << "    BEGINNING SIMULATION" << std::endl;
     std::cout << "I will do " << num_timesteps << " of " << state.delta_t;
     std::cout << "s each." << std::endl;
     std::cout << "Time integrator: " << mpl.sim_timeIntegratorName() << std::endl;
+#ifdef USE_MPI
+    }
+#endif /* USE_MPI */
+
+    std::ofstream ofs;
+    bool validate_solution = mpl.has_analytical_solution();
+    if (validate_solution)
+        ofs.open("energy.txt");
 
     mpl.call_initialization_callback();
 #ifdef ENABLE_EXP_GEOMETRIC_DIODE
@@ -228,13 +344,27 @@ void test_it(const model& mod, State& state, maxwell::parameter_loader& mpl)
         if ( (silo_output_rate != 0) and (i%silo_output_rate == 0) )
             export_fields_to_silo(mod, state, mpl);
 
+        if ( validate_solution )
+            validate(mod, state, mpl, ofs);
+
         swap(state, mpl);
 
         if ( (cycle_print_rate != 0) and (i%cycle_print_rate == 0) )
         {
             double time = tc.toc();
+            double dofs_s_proc = (mod.num_dofs()*6*cycle_print_rate)/(time);
+#ifdef USE_MPI
+            double dofs_s_total;
+            MPI_Reduce(&dofs_s_proc, &dofs_s_total, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+            if (proc_rank == 0)
+            {
+                std::cout << "Cycle " << i << ": t = " << state.curr_time << " s";
+                std::cout << ", DOFs/s: " << dofs_s_total << std::endl;
+            }
+#else /* USE_MPI */
             std::cout << "Cycle " << i << ": t = " << state.curr_time << " s";
-            std::cout << ", DOFs/s: " << (mod.num_dofs()*6*cycle_print_rate)/(time) << std::endl;
+            std::cout << ", DOFs/s: " << dofs_s_proc << std::endl;
+#endif /* USE_MPI */
             tc.tic();
         }
     }
@@ -306,12 +436,18 @@ int main(int argc, char *argv[])
 #ifdef DONT_USE_STD_FILESYSTEM
     mkdir(mpl.sim_name().c_str(), 0755);
 #else
-    std::filesystem::create_directory( mpl.sim_name() );;
+    std::filesystem::create_directory( mpl.sim_name() );
 #endif
 
+#ifdef USE_MPI
+    if (comm_rank == 0) {
+#endif /* USE_MPI */
     gmsh::initialize();
     gmsh::option::setNumber("General.Terminal", 0);
     gmsh::open( mpl.sim_gmshmodel() );
+#ifdef USE_MPI
+    }
+#endif /* USE_MPI */
 
     model mod( mpl.sim_geomorder(), mpl.sim_approxorder() );
 
@@ -349,7 +485,7 @@ int main(int argc, char *argv[])
 #ifdef ENABLE_GPU_SOLVER
         maxwell::solver_state_gpu state_g;
         register_lua_usertypes_bystate(mpl, mod, state_g);
-        test_it(mod, state_g, mpl);
+        solver_mainloop(mod, state_g, mpl);
 #else
         std::cout << "GPU solver not compiled. Exiting." << std::endl;
 #endif /* ENABLE_GPU_SOLVER */
@@ -358,7 +494,7 @@ int main(int argc, char *argv[])
     {
         maxwell::solver_state state_c;
         register_lua_usertypes_bystate(mpl, mod, state_c);
-        test_it(mod, state_c, mpl);
+        solver_mainloop(mod, state_c, mpl);
     }
 
     //gmsh::finalize();
diff --git a/src/param_loader.cpp b/src/param_loader.cpp
index eb9b97127e14d262dd3c981a3ea70db52230f038..deea7667d187abb9711f28d1e01f0c2507361bb5 100644
--- a/src/param_loader.cpp
+++ b/src/param_loader.cpp
@@ -1,4 +1,9 @@
 #include <iostream>
+
+#ifdef USE_MPI
+#include <mpi.h>
+#endif /* USE_MPI */
+
 #include "param_loader.h"
 
 parameter_loader_base::parameter_loader_base()
@@ -44,6 +49,15 @@ parameter_loader_base::init(void)
     lua.set_function("enable_volume_sources",
                      &parameter_loader_base::enable_volume_sources,
                      this);
+
+#ifdef USE_MPI
+    int comm_rank, comm_size;
+    MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
+    lua["parallel"] = lua.create_table();
+    lua["parallel"]["comm_rank"] = comm_rank;
+    lua["parallel"]["comm_size"] = comm_size;
+#endif /* USE_MPI */
 }
 
 bool