Added MPI support to leapfrog/cpu.

8bbfd799 · Matteo Cicuttin · 9d847052 · 8bbfd799 · 8bbfd799 · 8bbfd799
Commit 8bbfd799 authored 3 years ago by Matteo Cicuttin
--- a/doc/lua_api.md
+++ b/doc/lua_api.md
@@ -7,7 +7,7 @@ This file documents the Lua API available in the solver. API has a *general* par
 The *general* part has to do with configuration not related to a specific problem (i.e. timestep, geometry file), whereas the *problem-specific* part configures all the parameter that make sense only on a given problem (i.e. materials and sources). This separation is reflected also in how the configuration is handled internally.
 ## API version
-This document describes the API available on the version v0.1 of the solver.
+This document describes the API available on the version v0.3 of the solver.
 ## General interface
@@ -20,16 +20,21 @@ This document describes the API available on the version v0.1 of the solver.
 - `sim.use_gpu` (0/1): enable/disable GPU usage.
 - `sim.approx_order` (integer): method approximation order.
 - `sim.geom_order` (integer): geometry order. Only order 1 supported for now.
- `sim.time_integrator` (string): `"rk4"`, `"leapfrog"` or `"euler"`. Euler is only for test purporses, as it is numerically instable. Leapfrog works on CPU for now, use it if you have dissipative materials.
+- `sim.time_integrator` (string): `"rk4"`, `"leapfrog"` or `"euler"`. Euler is only for test purporses, as it is numerically instable.
 ### Postprocessing general variables
- `postpro.silo_output_rate` (integer):
+- `postpro.silo_output_rate` (integer): 
 - `postpro.cycle_print_rate` (integer):
 ### Mesh variables
 - `mesh.scalefactor` (real): apply a scaling factor to the mesh.
+### Parallel execution information (available only if MPI support is enabled)
+The parallel solver runs as separate MPI processes. As such, each process loads and executes its own instance of the configuration script. This means that you must beware of global variables or other shared state, because modifications are **not** reflected across processes. Even if the configuration script should in general not depend on the rank of the current process, the following variables are exposed:
+- `parallel.comm_rank` (integer): The MPI rank of the current solver process
+- `parallel.comm_size` (integer): The MPI communicator size
 ### Callable functions
 - `enable_boundary_sources()`: takes a boolean parameter that specifies if boundary sources should be enabled or not
 - `enable_interface_sources()`: takes a boolean parameter that specifies if the sources applied on internal interfaces should be enabled or not
@@ -137,6 +142,10 @@ None.
 - `internal.model`: opaque reference to the internal representation of the GMSH model. Valid only after the solver is fully initialized, i.e. inside the callback functions.
 - `internal.state`: opaque reference to the internal representation of the solver state. Valid only after the solver is fully initialized, i.e. inside the callback functions.
+### Debug and validation
+The solver has some facilities to allow validation and comparison with analytical solutions. Those facilities are accessed via the `debug` table. In the current implementation the `debug` table is undefined by default, so it must be initialized explicitly in the configuration with a statement `debug = {}`. Possible members of the `debug` table are:
+- `analytical_solution(tag, x, y, z, t)`: define the analytical solution of the problem. If defined, the numerical solution is compared with the analytical solution at each timestep in the whole domain. The function has to return 6 values, namely `Ex`, `Ey`, `Ez`, `Hx`, `Hy` and `Hz`.
 ### Postprocessing
 The export of the fields E, H and J to Silo is controlled using the appropriate sections in the `postpro` table.
 - `postpro["E"].silo_mode` controls the export of the electric field E. Possible values are `"none"` (no export), `"nodal"` (export as a Silo nodal variable) and `"zonal"` (export as a Silo zonal variable) 

--- a/include/gmsh_io.h
+++ b/include/gmsh_io.h
@@ -240,6 +240,11 @@ public:
    {
        return ipconn;
    }
+#else
+    size_t num_cells_world() const
+    {
+        return num_cells();
+    }
 #endif /* USE_MPI */

--- a/include/maxwell_interface.h
+++ b/include/maxwell_interface.h
@@ -74,6 +74,9 @@ public:
    vec3d   eval_boundary_source(int, const point_3d&, double) const;
    vec3d   eval_interface_source(int, const point_3d&, double) const;
+    bool    has_analytical_solution(void) const;
+    std::pair<vec3d, vec3d> eval_analytical_solution(int, const point_3d&, double t) const;
    boundary_condition  boundary_type(int tag) const;
    interface_condition interface_type(int tag) const;
    field_export_mode postpro_fieldExportMode(const std::string&) const;

--- a/src/gmsh_io.cpp
+++ b/src/gmsh_io.cpp
@@ -557,9 +557,10 @@ model::make_comm_descriptors(void)
        return a.first < b;
    };
-    size_t flux_base = 0;
-    /* For each entity */
+    /* Sorry for this. Basically I need to have the faces I will
+     * find in the next loop in the exact same order on both sides
+     * of the IPC boundary. */
    struct lazy_hack {
        element_key             fk;
        size_t                  fbs;
@@ -569,6 +570,8 @@ model::make_comm_descriptors(void)
    std::vector<lazy_hack> lhs;
+    size_t flux_base = 0;
+    /* For each entity */
    for (auto& e : entities)
    {
        /* and for each face */

--- a/src/maxwell_common.cpp
+++ b/src/maxwell_common.cpp
@@ -5,7 +5,7 @@
 #define DIRICHLET       2.0
 #define ROBIN           1.0
 #ifdef USE_MPI
-#define INTERPROCESS    1.0
+#define INTERPROCESS    1.0 
 #endif /* USE_MPI */
 #define NEUMANN         0.0
@@ -327,9 +327,9 @@ void export_J_field_nodal(const model& mod, silo& sdb, const vecxd& Ex,
 void export_E_field_zonal(const model& mod, silo& sdb, const vecxd& Ex,
    const vecxd& Ey, const vecxd& Ez, const parameter_loader&)
 {
-    std::vector<double> out_Ex( mod.num_cells() );
+    std::vector<double> out_Ex( mod.num_cells_world() );
-    std::vector<double> out_Ey( mod.num_cells() );
+    std::vector<double> out_Ey( mod.num_cells_world() );
-    std::vector<double> out_Ez( mod.num_cells() );
+    std::vector<double> out_Ez( mod.num_cells_world() );
 #ifdef USE_MPI
    for (auto& etag : mod.world_entities_tags())
@@ -362,9 +362,9 @@ void export_E_field_zonal(const model& mod, silo& sdb, const vecxd& Ex,
 void export_H_field_zonal(const model& mod, silo& sdb, const vecxd& Hx,
    const vecxd& Hy, const vecxd& Hz, const parameter_loader&)
 {
-    std::vector<double> out_Hx( mod.num_cells() );
+    std::vector<double> out_Hx( mod.num_cells_world() );
-    std::vector<double> out_Hy( mod.num_cells() );
+    std::vector<double> out_Hy( mod.num_cells_world() );
-    std::vector<double> out_Hz( mod.num_cells() );
+    std::vector<double> out_Hz( mod.num_cells_world() );
 #ifdef USE_MPI
    for (auto& etag : mod.world_entities_tags())
@@ -397,9 +397,9 @@ void export_H_field_zonal(const model& mod, silo& sdb, const vecxd& Hx,
 void export_J_field_zonal(const model& mod, silo& sdb, const vecxd& Ex,
    const vecxd& Ey, const vecxd& Ez, const parameter_loader& mpl)
 {
-    std::vector<double> out_Jx( mod.num_cells() );
+    std::vector<double> out_Jx( mod.num_cells_world() );
-    std::vector<double> out_Jy( mod.num_cells() );
+    std::vector<double> out_Jy( mod.num_cells_world() );
-    std::vector<double> out_Jz( mod.num_cells() );
+    std::vector<double> out_Jz( mod.num_cells_world() );
 #ifdef USE_MPI
    for (auto& etag : mod.world_entities_tags())

--- a/src/maxwell_cpu.cpp
+++ b/src/maxwell_cpu.cpp
@@ -83,14 +83,15 @@ init_from_model(const model& mod, solver_state& state)
        state.eds.push_back( std::move(ed) );
    }
+#ifdef USE_MPI
    auto& cds = mod.comm_descriptors();
    size_t dofs = 0;
    for (auto& [tag, cd] : cds)
-        if (cd.dof_mapping.size() > dofs)
+        dofs = std::max(cd.dof_mapping.size(), dofs);
-            dofs = cd.dof_mapping.size();
    state.ipc.resize(dofs);
+#endif /* USE_MPI */
    state.curr_time = 0.0;
    state.curr_timestep = 0;
@@ -185,6 +186,7 @@ compute_field_jumps(solver_state& state, const field& in)
    }
 }
+#ifdef USE_MPI
 static void
 exchange_boundary_data(const model& mod, solver_state& state)
 {
@@ -234,6 +236,7 @@ exchange_boundary_data(const model& mod, solver_state& state)
        }
    }
 }
+#endif /* USE_MPI */
 static void
 compute_field_jumps_E(solver_state& state, const field& in)
@@ -266,6 +269,59 @@ compute_field_jumps_E(solver_state& state, const field& in)
    }
 }
+#ifdef USE_MPI
+static void
+exchange_boundary_data_E(const model& mod, solver_state& state)
+{
+    auto& cds = mod.comm_descriptors();
+    for (auto& [tag, cd] : cds)
+    {
+        size_t ndofs = cd.dof_mapping.size();
+        if (cd.rank_mine < cd.rank_other)
+        {
+            for (size_t i = 0; i < ndofs; i++)
+            {
+                auto srci = cd.dof_mapping[i];
+                state.ipc.Ex[i] = state.jumps.Ex[srci];
+                state.ipc.Ey[i] = state.jumps.Ey[srci];
+                state.ipc.Ez[i] = state.jumps.Ez[srci];
+            }
+            MPI_Send(state.ipc.Ex.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD);
+            MPI_Send(state.ipc.Ey.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD);
+            MPI_Send(state.ipc.Ez.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD);
+            MPI_Recv(state.ipc.Ex.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD, nullptr);
+            MPI_Recv(state.ipc.Ey.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD, nullptr);
+            MPI_Recv(state.ipc.Ez.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD, nullptr);
+            for (size_t i = 0; i < cd.dof_mapping.size(); i++)
+            {
+                auto dsti = cd.dof_mapping[i];
+                state.jumps.Ex[dsti] -= state.ipc.Ex[i];
+                state.jumps.Ey[dsti] -= state.ipc.Ey[i];
+                state.jumps.Ez[dsti] -= state.ipc.Ez[i];
+            }
+        }
+        else
+        {
+            MPI_Recv(state.ipc.Ex.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD, nullptr);
+            MPI_Recv(state.ipc.Ey.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD, nullptr);
+            MPI_Recv(state.ipc.Ez.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD, nullptr);
+            for (size_t i = 0; i < cd.dof_mapping.size(); i++)
+            {
+                auto dsti = cd.dof_mapping[i];
+                double tmp;
+                tmp = state.jumps.Ex[dsti]; state.jumps.Ex[dsti] -= state.ipc.Ex[i]; state.ipc.Ex[i] = tmp;
+                tmp = state.jumps.Ey[dsti]; state.jumps.Ey[dsti] -= state.ipc.Ey[i]; state.ipc.Ey[i] = tmp;
+                tmp = state.jumps.Ez[dsti]; state.jumps.Ez[dsti] -= state.ipc.Ez[i]; state.ipc.Ez[i] = tmp;
+            }
+            MPI_Send(state.ipc.Ex.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD);
+            MPI_Send(state.ipc.Ey.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD);
+            MPI_Send(state.ipc.Ez.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD);
+        }
+    }
+}
+#endif /* USE_MPI */
 static void
 compute_field_jumps_H(solver_state& state, const field& in)
 {
@@ -297,6 +353,59 @@ compute_field_jumps_H(solver_state& state, const field& in)
    }
 }
+#ifdef USE_MPI
+static void
+exchange_boundary_data_H(const model& mod, solver_state& state)
+{
+    auto& cds = mod.comm_descriptors();
+    for (auto& [tag, cd] : cds)
+    {
+        size_t ndofs = cd.dof_mapping.size();
+        if (cd.rank_mine < cd.rank_other)
+        {
+            for (size_t i = 0; i < ndofs; i++)
+            {
+                auto srci = cd.dof_mapping[i];
+                state.ipc.Hx[i] = state.jumps.Hx[srci];
+                state.ipc.Hy[i] = state.jumps.Hy[srci];
+                state.ipc.Hz[i] = state.jumps.Hz[srci];
+            }
+            MPI_Send(state.ipc.Hx.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD);
+            MPI_Send(state.ipc.Hy.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD);
+            MPI_Send(state.ipc.Hz.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD);
+            MPI_Recv(state.ipc.Hx.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD, nullptr);
+            MPI_Recv(state.ipc.Hy.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD, nullptr);
+            MPI_Recv(state.ipc.Hz.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD, nullptr);
+            for (size_t i = 0; i < cd.dof_mapping.size(); i++)
+            {
+                auto dsti = cd.dof_mapping[i];
+                state.jumps.Hx[dsti] -= state.ipc.Hx[i];
+                state.jumps.Hy[dsti] -= state.ipc.Hy[i];
+                state.jumps.Hz[dsti] -= state.ipc.Hz[i];
+            }
+        }
+        else
+        {
+            MPI_Recv(state.ipc.Hx.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD, nullptr);
+            MPI_Recv(state.ipc.Hy.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD, nullptr);
+            MPI_Recv(state.ipc.Hz.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD, nullptr);
+            for (size_t i = 0; i < cd.dof_mapping.size(); i++)
+            {
+                auto dsti = cd.dof_mapping[i];
+                double tmp;
+                tmp = state.jumps.Hx[dsti]; state.jumps.Hx[dsti] -= state.ipc.Hx[i]; state.ipc.Hx[i] = tmp;
+                tmp = state.jumps.Hy[dsti]; state.jumps.Hy[dsti] -= state.ipc.Hy[i]; state.ipc.Hy[i] = tmp;
+                tmp = state.jumps.Hz[dsti]; state.jumps.Hz[dsti] -= state.ipc.Hz[i]; state.ipc.Hz[i] = tmp;
+            }
+            MPI_Send(state.ipc.Hx.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD);
+            MPI_Send(state.ipc.Hy.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD);
+            MPI_Send(state.ipc.Hz.data(), ndofs, MPI_DOUBLE, cd.rank_other, tag, MPI_COMM_WORLD);
+        }
+    }
+}
+#endif /* USE_MPI */
 static void
 compute_fluxes_planar(solver_state& state)
 {
@@ -452,9 +561,14 @@ compute_fluxes(const model& mod, solver_state& state, const field& in, field& ou
 }
 static void
-compute_fluxes_E(solver_state& state, const field& in, field& out)
+compute_fluxes_E(const model& mod, solver_state& state, const field& in, field& out)
 {
    compute_field_jumps_E(state, in);
+#ifdef USE_MPI
+    exchange_boundary_data_E(mod, state);
+#endif /* USE_MPI */
    compute_fluxes_planar_E(state);
    for (auto& ed : state.eds)
@@ -466,9 +580,14 @@ compute_fluxes_E(solver_state& state, const field& in, field& out)
 }
 static void
-compute_fluxes_H(solver_state& state, const field& in, field& out)
+compute_fluxes_H(const model& mod, solver_state& state, const field& in, field& out)
 {
    compute_field_jumps_H(state, in);
+#ifdef USE_MPI
+    exchange_boundary_data_H(mod, state);
+#endif /* USE_MPI */
    compute_fluxes_planar_H(state);
    for (auto& ed : state.eds)
@@ -529,11 +648,11 @@ apply_operator(const model& mod, solver_state& state, const field& curr, field&
 }
 static void
-leapfrog(solver_state& state)
+leapfrog(const model& mod, solver_state& state)
 {
    auto dt = state.delta_t;
    compute_curls_H(state, state.emf_curr, state.tmp);
-    compute_fluxes_H(state, state.emf_curr, state.tmp);
+    compute_fluxes_H(mod, state, state.emf_curr, state.tmp);
    for (size_t i = 0; i < state.emf_next.num_dofs; i++)
    {
        auto CRM = 1.0 - 0.5*dt*state.matparams.sigma_over_epsilon[i];
@@ -546,7 +665,7 @@ leapfrog(solver_state& state)
    }
    compute_curls_E(state, state.emf_next, state.tmp);
-    compute_fluxes_E(state, state.emf_next, state.tmp);
+    compute_fluxes_E(mod, state, state.emf_next, state.tmp);
    for (size_t i = 0; i < state.emf_next.num_dofs; i++)
    {
        auto CC = dt*state.matparams.inv_mu[i];
@@ -583,7 +702,7 @@ timestep(const model& mod, solver_state& state, const parameter_loader&, time_in
    if (ti == time_integrator_type::LEAPFROG)
    {
-        leapfrog(state);
+        leapfrog(mod, state);
    }
    state.curr_time += state.delta_t;
@@ -751,6 +870,20 @@ export_fields_to_silo(const model& mod, maxwell::solver_state& state,
                export_J_field_zonal(mod, sdb, f.Ex, f.Ey, f.Ez, mpl);
                break;
        }
+        std::vector<double> entranks( mod.num_cells_world(), 999999999 );
+        for (auto& etag : mod.world_entities_tags())
+        {
+            auto& e = mod.entity_world_lookup(etag);
+            int er = mod.entity_rank(etag);
+            for (size_t iT = 0; iT < e.num_cells(); iT++)
+            {
+                auto gi = e.cell_world_index_by_gmsh(iT);
+                entranks[gi] = er;
+            }
+        }
+        sdb.write_zonal_variable("entity_ranks", entranks);
    }
    else
    {

--- a/src/maxwell_interface.cpp
+++ b/src/maxwell_interface.cpp
@@ -688,4 +688,22 @@ parameter_loader::postpro_fieldExportMode(const std::string& field) const
    return field_export_mode::NONE;
 }
+bool parameter_loader::has_analytical_solution(void) const
+{
+    auto asol_fun = lua["debug"]["analytical_solution"];
+    if (asol_fun.valid())
+        return true;
+    return false;
+}
+std::pair<vec3d, vec3d>
+parameter_loader::eval_analytical_solution(int tag, const point_3d& pt, double t) const
+{
+    vec3d E, H;
+    sol::tie(E(0), E(1), E(2), H(0), H(1), H(2)) =
+        lua["debug"]["analytical_solution"](tag, pt.x(), pt.y(), pt.z(), t);
+    return std::make_pair(E,H);
+}
 } // namespace maxwell
--- a/src/maxwell_solver.cpp
+++ b/src/maxwell_solver.cpp
@@ -193,9 +193,114 @@ reinit_materials(const model& mod, maxwell::solver_state& state, maxwell::parame
 }
 #endif /* ENABLE_EXP_GEOMETRIC_DIODE */
+struct maxwell_6ple {
+    double Ex;
+    double Ey;
+    double Ez;
+    double Hx;
+    double Hy;
+    double Hz;
+    maxwell_6ple()
+        : Ex(0.0), Ey(0.0), Ez(0.0),
+          Hx(0.0), Hy(0.0), Hz(0.0)
+    {}
+};
+std::ostream&
+operator<<(std::ostream& os, const maxwell_6ple& m)
+{
+    os << m.Ex << " " << m.Ey << " " << m.Ez << " ";
+    os << m.Hx << " " << m.Hy << " " << m.Hz;
+    return os;
+}
+maxwell_6ple sqrt(const maxwell_6ple& m)
+{
+    maxwell_6ple ret;
+    ret.Ex = std::sqrt(m.Ex);
+    ret.Ey = std::sqrt(m.Ey);
+    ret.Ez = std::sqrt(m.Ez);
+    ret.Hx = std::sqrt(m.Hx);
+    ret.Hy = std::sqrt(m.Hy);
+    ret.Hz = std::sqrt(m.Hz);
+    return ret;
+}
+static void
+validate(const model& mod, maxwell::solver_state& state, maxwell::parameter_loader& mpl, std::ofstream& ofs)
+{
+    maxwell_6ple err;
+    maxwell_6ple energy;
+    for (auto& e : mod)
+    {
+        for (size_t iT = 0; iT < e.num_cells(); iT++)
+        {
+            const auto& pe = e.cell(iT);
+            const auto& re = e.cell_refelem(pe);
+            const auto pqps = pe.integration_points();
+            const auto dets = pe.determinants();
+            const auto ofs = e.cell_world_dof_offset(iT);
+            const auto cbs = re.num_basis_functions();
+            assert(pqps.size() == dets.size());
+            const auto bar = pe.barycenter();
+            auto eps = mpl.epsilon( e.material_tag(), bar );
+            auto mu = mpl.mu( e.material_tag(), bar );
+            for (size_t iQp = 0; iQp < pqps.size(); iQp++)
+            {
+                const auto& pqp = pqps[iQp];
+                const vecxd phi = re.basis_functions(iQp);
+                auto [E, H] = mpl.eval_analytical_solution(e.material_tag(), pqp.point(), state.curr_time);
+                maxwell_6ple Fh;
+                Fh.Ex = state.emf_next.Ex.segment(ofs, cbs).dot(phi);
+                Fh.Ey = state.emf_next.Ey.segment(ofs, cbs).dot(phi);
+                Fh.Ez = state.emf_next.Ez.segment(ofs, cbs).dot(phi);
+                Fh.Hx = state.emf_next.Hx.segment(ofs, cbs).dot(phi);
+                Fh.Hy = state.emf_next.Hy.segment(ofs, cbs).dot(phi);
+                Fh.Hz = state.emf_next.Hz.segment(ofs, cbs).dot(phi);
+                double ae;
+                ae = E(0) - Fh.Ex; err.Ex += pqp.weight() * ae * ae;
+                ae = E(1) - Fh.Ey; err.Ey += pqp.weight() * ae * ae;
+                ae = E(2) - Fh.Ez; err.Ez += pqp.weight() * ae * ae;
+                ae = H(0) - Fh.Hx; err.Hx += pqp.weight() * ae * ae;
+                ae = H(1) - Fh.Hy; err.Hy += pqp.weight() * ae * ae;
+                ae = H(2) - Fh.Hz; err.Hz += pqp.weight() * ae * ae;
+                energy.Ex += 0.5*eps*(Fh.Ex*Fh.Ex)*pqp.weight();
+                energy.Ey += 0.5*eps*(Fh.Ey*Fh.Ey)*pqp.weight();
+                energy.Ez += 0.5*eps*(Fh.Ez*Fh.Ez)*pqp.weight();
+                energy.Hx += 0.5*mu*(Fh.Hx*Fh.Hx)*pqp.weight();
+                energy.Hy += 0.5*mu*(Fh.Hy*Fh.Hy)*pqp.weight();
+                energy.Hz += 0.5*mu*(Fh.Hz*Fh.Hz)*pqp.weight();
+            }
+        }
+    }
+    ofs << energy.Ex << " " << energy.Ey << " " << energy.Ez << " " << energy.Hx;
+    ofs << " " << energy.Hy << " " << energy.Hz << " ";
+    ofs << energy.Ex + energy.Ey + energy.Ez + energy.Hx + energy.Hy + energy.Hz;// << std::endl;
+    ofs << " " << sqrt(err) << std::endl;
+}
+//static void
+//validate(const model& mod, maxwell::solver_state_gpu& state, maxwell::parameter_loader& mpl)
+//{}
 template<typename State>
-void test_it(const model& mod, State& state, maxwell::parameter_loader& mpl)
+void solver_mainloop(const model& mod, State& state, maxwell::parameter_loader& mpl)
 {
+#ifdef USE_MPI
+    int proc_rank;
+    MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank);
+#endif /* USE_MPI */
    initialize_solver(mod, state, mpl);
    state.delta_t = mpl.sim_dt();
@@ -204,10 +309,21 @@ void test_it(const model& mod, State& state, maxwell::parameter_loader& mpl)
    auto cycle_print_rate = mpl.postpro_cyclePrintRate();
    auto ti = mpl.sim_timeIntegrator();
+#ifdef USE_MPI
+    if (proc_rank == 0) {
+#endif /* USE_MPI */
    std::cout << "    BEGINNING SIMULATION" << std::endl;
    std::cout << "I will do " << num_timesteps << " of " << state.delta_t;
    std::cout << "s each." << std::endl;
    std::cout << "Time integrator: " << mpl.sim_timeIntegratorName() << std::endl;
+#ifdef USE_MPI
+    }
+#endif /* USE_MPI */
+    std::ofstream ofs;
+    bool validate_solution = mpl.has_analytical_solution();
+    if (validate_solution)
+        ofs.open("energy.txt");
    mpl.call_initialization_callback();
 #ifdef ENABLE_EXP_GEOMETRIC_DIODE
@@ -228,13 +344,27 @@ void test_it(const model& mod, State& state, maxwell::parameter_loader& mpl)
        if ( (silo_output_rate != 0) and (i%silo_output_rate == 0) )
            export_fields_to_silo(mod, state, mpl);
+        if ( validate_solution )
+            validate(mod, state, mpl, ofs);
        swap(state, mpl);
        if ( (cycle_print_rate != 0) and (i%cycle_print_rate == 0) )
        {
            double time = tc.toc();
+            double dofs_s_proc = (mod.num_dofs()*6*cycle_print_rate)/(time);
+#ifdef USE_MPI
+            double dofs_s_total;
+            MPI_Reduce(&dofs_s_proc, &dofs_s_total, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+            if (proc_rank == 0)
+            {
+                std::cout << "Cycle " << i << ": t = " << state.curr_time << " s";
+                std::cout << ", DOFs/s: " << dofs_s_total << std::endl;
+            }
+#else /* USE_MPI */
            std::cout << "Cycle " << i << ": t = " << state.curr_time << " s";
-            std::cout << ", DOFs/s: " << (mod.num_dofs()*6*cycle_print_rate)/(time) << std::endl;
+            std::cout << ", DOFs/s: " << dofs_s_proc << std::endl;
+#endif /* USE_MPI */
            tc.tic();
        }
    }
@@ -306,12 +436,18 @@ int main(int argc, char *argv[])
 #ifdef DONT_USE_STD_FILESYSTEM
    mkdir(mpl.sim_name().c_str(), 0755);
 #else
-    std::filesystem::create_directory( mpl.sim_name() );;
+    std::filesystem::create_directory( mpl.sim_name() );
 #endif
+#ifdef USE_MPI
+    if (comm_rank == 0) {
+#endif /* USE_MPI */
    gmsh::initialize();
    gmsh::option::setNumber("General.Terminal", 0);
    gmsh::open( mpl.sim_gmshmodel() );
+#ifdef USE_MPI
+    }
+#endif /* USE_MPI */
    model mod( mpl.sim_geomorder(), mpl.sim_approxorder() );
@@ -349,7 +485,7 @@ int main(int argc, char *argv[])
 #ifdef ENABLE_GPU_SOLVER
        maxwell::solver_state_gpu state_g;
        register_lua_usertypes_bystate(mpl, mod, state_g);
-        test_it(mod, state_g, mpl);
+        solver_mainloop(mod, state_g, mpl);
 #else
        std::cout << "GPU solver not compiled. Exiting." << std::endl;
 #endif /* ENABLE_GPU_SOLVER */
@@ -358,7 +494,7 @@ int main(int argc, char *argv[])
    {
        maxwell::solver_state state_c;
        register_lua_usertypes_bystate(mpl, mod, state_c);
-        test_it(mod, state_c, mpl);
+        solver_mainloop(mod, state_c, mpl);
    }
    //gmsh::finalize();

--- a/src/param_loader.cpp
+++ b/src/param_loader.cpp
 #include <iostream>
+#ifdef USE_MPI
+#include <mpi.h>
+#endif /* USE_MPI */
 #include "param_loader.h"
 parameter_loader_base::parameter_loader_base()
@@ -44,6 +49,15 @@ parameter_loader_base::init(void)
    lua.set_function("enable_volume_sources",
                     &parameter_loader_base::enable_volume_sources,
                     this);
+#ifdef USE_MPI
+    int comm_rank, comm_size;
+    MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
+    lua["parallel"] = lua.create_table();
+    lua["parallel"]["comm_rank"] = comm_rank;
+    lua["parallel"]["comm_size"] = comm_size;
+#endif /* USE_MPI */
 }
 bool