Slight RK4 Maxwell optimization, CPU.

82e4a308 · Matteo Cicuttin · 0dcb0e72 · 82e4a308
Commit 82e4a308 authored 3 years ago by Matteo Cicuttin
--- a/src/maxwell/maxwell_cpu.cpp
+++ b/src/maxwell/maxwell_cpu.cpp
@@ -238,6 +238,128 @@ compute_field_jumps(solver_state& state, const field& in)
    }
 }
+/**************************************************************************
+ * Computational kernels: fluxes
+ */
+static void
+compute_field_jumps_and_fluxes(solver_state& state, const field& in)
+{
+    double jEx = 0.0;
+    double jEy = 0.0;
+    double jEz = 0.0;
+    double jHx = 0.0;
+    double jHy = 0.0;
+    double jHz = 0.0;
+    for (auto& ed : state.eds)
+    {
+        size_t num_all_faces = num_elems_all_orientations(ed) * ed.num_faces_per_elem;
+        for (size_t iF = 0; iF < num_all_faces; iF++)
+        {
+            auto n = ed.normals.row(iF);
+            auto nx = n[0];
+            auto ny = n[1];
+            auto nz = n[2];
+            auto face_det = ed.face_determinants(iF);
+            for (size_t i = 0; i < ed.num_fluxes; i++)
+            {
+                auto lofs = iF*ed.num_fluxes + i;
+                auto gofs = ed.flux_base + lofs;
+                if (ed.fluxdofs_neigh[lofs] != NOT_PRESENT)
+                {
+                    auto pos_mine  = ed.fluxdofs_mine[lofs];
+                    auto pos_neigh = ed.fluxdofs_neigh[lofs];
+                    jEx = in.Ex[pos_mine] - in.Ex[pos_neigh] - state.bndsrcs.Ex[gofs];
+                    jEy = in.Ey[pos_mine] - in.Ey[pos_neigh] - state.bndsrcs.Ey[gofs];
+                    jEz = in.Ez[pos_mine] - in.Ez[pos_neigh] - state.bndsrcs.Ez[gofs];
+                    jHx = in.Hx[pos_mine] - in.Hx[pos_neigh] + state.bndsrcs.Hx[gofs];
+                    jHy = in.Hy[pos_mine] - in.Hy[pos_neigh] + state.bndsrcs.Hy[gofs];
+                    jHz = in.Hz[pos_mine] - in.Hz[pos_neigh] + state.bndsrcs.Hx[gofs];
+                }
+                else
+                {
+                    auto bc = state.matparams.bc_coeffs[gofs];
+                    auto pos_mine  = ed.fluxdofs_mine[lofs];
+                    jEx = bc * in.Ex[pos_mine] - state.bndsrcs.Ex[gofs];
+                    jEy = bc * in.Ey[pos_mine] - state.bndsrcs.Ey[gofs];
+                    jEz = bc * in.Ez[pos_mine] - state.bndsrcs.Ez[gofs];
+                    jHx = (2.0 - bc) * in.Hx[pos_mine] + state.bndsrcs.Hx[gofs];
+                    jHy = (2.0 - bc) * in.Hy[pos_mine] + state.bndsrcs.Hy[gofs];
+                    jHz = (2.0 - bc) * in.Hz[pos_mine] + state.bndsrcs.Hz[gofs];
+                }
+                auto ndotE = nx*jEx + ny*jEy + nz*jEz;
+                auto ndotH = nx*jHx + ny*jHy + nz*jHz;
+                auto aE = face_det * state.matparams.aE[gofs];
+                auto bE = face_det * state.matparams.bE[gofs];
+                auto aH = face_det * state.matparams.aH[gofs];
+                auto bH = face_det * state.matparams.bH[gofs];
+                /* Compute fluxes */
+                state.fluxes.Ex[gofs] = aE*(nz*jHy - ny*jHz) + bE*(ndotE*nx - jEx);
+                state.fluxes.Ey[gofs] = aE*(nx*jHz - nz*jHx) + bE*(ndotE*ny - jEy);
+                state.fluxes.Ez[gofs] = aE*(ny*jHx - nx*jHy) + bE*(ndotE*nz - jEz);
+                state.fluxes.Hx[gofs] = aH*(ny*jEz - nz*jEy) + bH*(ndotH*nx - jHx);
+                state.fluxes.Hy[gofs] = aH*(nz*jEx - nx*jEz) + bH*(ndotH*ny - jHy);
+                state.fluxes.Hz[gofs] = aH*(nx*jEy - ny*jEx) + bH*(ndotH*nz - jHz);
+            }
+        }
+    }
+}
+static void
+compute_fluxes_planar(solver_state& state)
+{
+    for (auto& ed : state.eds)
+    {
+        size_t num_all_faces = num_elems_all_orientations(ed) * ed.num_faces_per_elem;
+        for (size_t iF = 0; iF < num_all_faces; iF++)
+        {
+            auto n = ed.normals.row(iF);
+            auto nx = n[0];
+            auto ny = n[1];
+            auto nz = n[2];
+            auto face_det = ed.face_determinants(iF);
+            for (size_t i = 0; i < ed.num_fluxes; i++)
+            {
+                auto lofs = iF*ed.num_fluxes + i;
+                auto gofs = ed.flux_base + lofs;
+                /* Sources are imposed on jumps */
+                auto jEx = state.jumps.Ex[gofs] - state.bndsrcs.Ex[gofs];
+                auto jEy = state.jumps.Ey[gofs] - state.bndsrcs.Ey[gofs];
+                auto jEz = state.jumps.Ez[gofs] - state.bndsrcs.Ez[gofs];
+                auto jHx = state.jumps.Hx[gofs] + state.bndsrcs.Hx[gofs];
+                auto jHy = state.jumps.Hy[gofs] + state.bndsrcs.Hy[gofs];
+                auto jHz = state.jumps.Hz[gofs] + state.bndsrcs.Hz[gofs];
+                auto ndotE = nx*jEx + ny*jEy + nz*jEz;
+                auto ndotH = nx*jHx + ny*jHy + nz*jHz;
+                auto aE = face_det * state.matparams.aE[gofs];
+                auto bE = face_det * state.matparams.bE[gofs];
+                auto aH = face_det * state.matparams.aH[gofs];
+                auto bH = face_det * state.matparams.bH[gofs];
+                /* Compute fluxes */
+                state.fluxes.Ex[gofs] = aE*(nz*jHy - ny*jHz) + bE*(ndotE*nx - jEx);
+                state.fluxes.Ey[gofs] = aE*(nx*jHz - nz*jHx) + bE*(ndotE*ny - jEy);
+                state.fluxes.Ez[gofs] = aE*(ny*jHx - nx*jHy) + bE*(ndotE*nz - jEz);
+                state.fluxes.Hx[gofs] = aH*(ny*jEz - nz*jEy) + bH*(ndotH*nx - jHx);
+                state.fluxes.Hy[gofs] = aH*(nz*jEx - nx*jEz) + bH*(ndotH*ny - jHy);
+                state.fluxes.Hz[gofs] = aH*(nx*jEy - ny*jEx) + bH*(ndotH*nz - jHz);
+            }
+        }
+    }
+}
 #ifdef USE_MPI
 static void
 exchange_boundary_data(const model& mod, solver_state& state)
@@ -458,57 +580,6 @@ exchange_boundary_data_H(const model& mod, solver_state& state)
 }
 #endif /* USE_MPI */
-/**************************************************************************
- * Computational kernels: fluxes
- */
-static void
-compute_fluxes_planar(solver_state& state)
-{
-    for (auto& ed : state.eds)
-    {
-        size_t num_all_faces = num_elems_all_orientations(ed) * ed.num_faces_per_elem;
-        for (size_t iF = 0; iF < num_all_faces; iF++)
-        {
-            auto n = ed.normals.row(iF);
-            auto nx = n[0];
-            auto ny = n[1];
-            auto nz = n[2];
-            auto face_det = ed.face_determinants(iF);
-            for (size_t i = 0; i < ed.num_fluxes; i++)
-            {
-                auto lofs = iF*ed.num_fluxes + i;
-                auto gofs = ed.flux_base + lofs;
-                /* Sources are imposed on jumps */
-                auto jEx = state.jumps.Ex[gofs] - state.bndsrcs.Ex[gofs];
-                auto jEy = state.jumps.Ey[gofs] - state.bndsrcs.Ey[gofs];
-                auto jEz = state.jumps.Ez[gofs] - state.bndsrcs.Ez[gofs];
-                auto jHx = state.jumps.Hx[gofs] + state.bndsrcs.Hx[gofs];
-                auto jHy = state.jumps.Hy[gofs] + state.bndsrcs.Hy[gofs];
-                auto jHz = state.jumps.Hz[gofs] + state.bndsrcs.Hz[gofs];
-                auto ndotE = nx*jEx + ny*jEy + nz*jEz;
-                auto ndotH = nx*jHx + ny*jHy + nz*jHz;
-                auto aE = face_det * state.matparams.aE[gofs];
-                auto bE = face_det * state.matparams.bE[gofs];
-                auto aH = face_det * state.matparams.aH[gofs];
-                auto bH = face_det * state.matparams.bH[gofs];
-                /* Compute fluxes */
-                state.fluxes.Ex[gofs] = aE*(nz*jHy - ny*jHz) + bE*(ndotE*nx - jEx);
-                state.fluxes.Ey[gofs] = aE*(nx*jHz - nz*jHx) + bE*(ndotE*ny - jEy);
-                state.fluxes.Ez[gofs] = aE*(ny*jHx - nx*jHy) + bE*(ndotE*nz - jEz);
-                state.fluxes.Hx[gofs] = aH*(ny*jEz - nz*jEy) + bH*(ndotH*nx - jHx);
-                state.fluxes.Hy[gofs] = aH*(nz*jEx - nx*jEz) + bH*(ndotH*ny - jHy);
-                state.fluxes.Hz[gofs] = aH*(nx*jEy - ny*jEx) + bH*(ndotH*nz - jHz);
-            }
-        }
-    }
-}
 static void
 compute_fluxes_planar_E(solver_state& state)
 {
@@ -599,13 +670,13 @@ compute_fluxes_planar_H(solver_state& state)
 static void
 compute_fluxes(const model& mod, solver_state& state, const field& in, field& out)
 {
-    compute_field_jumps(state, in);
 #ifdef USE_MPI
+    compute_field_jumps(state, in);
    exchange_boundary_data(mod, state);
-#endif /* USE_MPI */
    compute_fluxes_planar(state);
+#else
+    compute_field_jumps_and_fluxes(state, in);
+#endif /* USE_MPI */
    for (auto& ed : state.eds)
    {