diff --git a/doc/changelog.md b/doc/changelog.md index 4addfd7af35db989baab9c000f14f96123c76539..cc1ee8067da36fa2b702fa49a05435a0330f1f4f 100644 --- a/doc/changelog.md +++ b/doc/changelog.md @@ -18,3 +18,5 @@ Unavailable features: - Switched back to C++17 due to various problems on Eigen and Mac OS X. - Use system GMSH if installed and only after look at the hardcoded path (a55af8ab). - Improved export to SILO. E, H, and J fields are now exportable as nodal or zonal variables. It is also possible to disable the export of a specific field, see `lua_api.md` (8a831181). +- Preliminary implementation of leapfrog integration on CPU + diff --git a/doc/lua_api.md b/doc/lua_api.md index 7ba122f2d831d89ac208be1ce6e9ff47bab7089b..8610533dd2b8308a62ed5bd6d8e5149e57cbd1a3 100644 --- a/doc/lua_api.md +++ b/doc/lua_api.md @@ -20,6 +20,7 @@ This document describes the API available on the version v0.1 of the solver. - `sim.use_gpu` (0/1): enable/disable GPU usage. - `sim.approx_order` (integer): method approximation order. - `sim.geom_order` (integer): geometry order. Only order 1 supported for now. +- sim.time_integrator (string): `"rk4"`, `"leapfrog"` or `"euler"`. Euler is only for test purporses. ### Postprocessing general variables diff --git a/include/maxwell_common.h b/include/maxwell_common.h index 813655e6cee078c61ee5a8dfa5ab4236092031d9..d60047cc9783ca474d2477326f77961d67e47196 100644 --- a/include/maxwell_common.h +++ b/include/maxwell_common.h @@ -57,8 +57,8 @@ eval_boundary_sources(const model& mod, const parameter_loader& mpl, return mpl.eval_boundary_source(bd.gmsh_entity, pt, state.curr_time); }; auto fH = [&](const point_3d& pt) -> vec3d { - vec3d E = mpl.eval_boundary_source(bd.gmsh_entity, pt, state.curr_time); - return n.cross(E)/Z; + vec3d H = mpl.eval_boundary_source(bd.gmsh_entity, pt, state.curr_time); + return n.cross(H)/Z; }; matxd prjE = e.project_on_face(iF, fE); auto num_bf = prjE.rows(); diff --git a/include/maxwell_interface.h b/include/maxwell_interface.h index af801f5dbd0be2298e581be4e41de0f20c94daca..7e1f65d6f499000101f595c16f8e91018c5d172d 100644 --- a/include/maxwell_interface.h +++ b/include/maxwell_interface.h @@ -292,7 +292,7 @@ void init_from_model(const model&, solver_state&); void init_matparams(const model&, solver_state&, const parameter_loader&); //void apply_operator(solver_state&, const field&, field&); void export_fields_to_silo(const model&, const solver_state&, const parameter_loader&); -void timestep(solver_state&); +void timestep(solver_state&, time_integrator_type); void prepare_sources(const model&, maxwell::solver_state&, maxwell::parameter_loader&); void do_sources(const model&, maxwell::solver_state&, maxwell::parameter_loader&); void swap(maxwell::solver_state&); @@ -302,7 +302,7 @@ void init_from_model(const model&, solver_state_gpu&); void init_matparams(const model&, solver_state_gpu&, const parameter_loader&); //void apply_operator(solver_state_gpu&, const field_gpu&, field_gpu&); void export_fields_to_silo(const model&, const solver_state_gpu&, const parameter_loader&); -void timestep(solver_state_gpu&); +void timestep(solver_state_gpu&, time_integrator_type); void prepare_sources(const model&, maxwell::solver_state_gpu&, maxwell::parameter_loader&); void do_sources(const model&, maxwell::solver_state_gpu&, maxwell::parameter_loader&); void swap(maxwell::solver_state_gpu&); diff --git a/include/param_loader.h b/include/param_loader.h index dd304ae041c5dae3e8c9e6b3daa1739cef7380f9..f656a71a596586e162e481bd1c74f9555b9cd4a6 100644 --- a/include/param_loader.h +++ b/include/param_loader.h @@ -7,6 +7,18 @@ #include "gmsh_io.h" +enum class field_export_mode { + NONE, + NODAL, + ZONAL +}; + +enum class time_integrator_type { + RK4, + LEAPFROG, + EULER +}; + class parameter_loader_base { bool m_bnd_sources_active; @@ -33,6 +45,7 @@ public: size_t sim_timesteps(void) const; std::string sim_gmshmodel(void) const; bool sim_usegpu(void) const; + time_integrator_type sim_timeIntegrator(void) const; size_t postpro_siloOutputRate(void) const; size_t postpro_cyclePrintRate(void) const; @@ -50,9 +63,5 @@ public: void call_timestep_callback(size_t); }; -enum class field_export_mode { - NONE, - NODAL, - ZONAL -}; + diff --git a/src/maxwell_cpu.cpp b/src/maxwell_cpu.cpp index 03eac4cedb3547aa0d2d4456f2c89bf6561725af..35d8e642b569f9299d6688aefe6f26c46e2506e0 100644 --- a/src/maxwell_cpu.cpp +++ b/src/maxwell_cpu.cpp @@ -105,6 +105,37 @@ compute_curls(solver_state& state, const field& curr, field& next) next.Hz = state.dy.Ex - state.dx.Ey; } +static void +compute_curls_E(solver_state& state, const field& curr, field& next) +{ + for (const auto& ed : state.eds) + { + compute_field_derivatives(ed, curr.Ex, state.dx.Ex, state.dy.Ex, state.dz.Ex); + compute_field_derivatives(ed, curr.Ey, state.dx.Ey, state.dy.Ey, state.dz.Ey); + compute_field_derivatives(ed, curr.Ez, state.dx.Ez, state.dy.Ez, state.dz.Ez); + } + + next.Hx = state.dz.Ey - state.dy.Ez; + next.Hy = state.dx.Ez - state.dz.Ex; + next.Hz = state.dy.Ex - state.dx.Ey; +} + +static void +compute_curls_H(solver_state& state, const field& curr, field& next) +{ + for (const auto& ed : state.eds) + { + compute_field_derivatives(ed, curr.Hx, state.dx.Hx, state.dy.Hx, state.dz.Hx); + compute_field_derivatives(ed, curr.Hy, state.dx.Hy, state.dy.Hy, state.dz.Hy); + compute_field_derivatives(ed, curr.Hz, state.dx.Hz, state.dy.Hz, state.dz.Hz); + } + + next.Ex = state.dy.Hz - state.dz.Hy; + next.Ey = state.dz.Hx - state.dx.Hz; + next.Ez = state.dx.Hy - state.dy.Hx; +} + + static void compute_field_jumps(solver_state& state, const field& in) { @@ -143,6 +174,70 @@ compute_field_jumps(solver_state& state, const field& in) } } +static void +compute_field_jumps_E(solver_state& state, const field& in) +{ + for (auto& ed : state.eds) + { + size_t num_all_fluxes = num_elems_all_orientations(ed) * ed.num_faces_per_elem * ed.num_fluxes; + + #pragma omp parallel for + for (size_t i = 0; i < num_all_fluxes; i++) + { + auto lofs = i; + auto gofs = ed.flux_base + i; + if (ed.fluxdofs_neigh[lofs] != NOT_PRESENT) + { + auto pos_mine = ed.fluxdofs_mine[lofs]; + auto pos_neigh = ed.fluxdofs_neigh[lofs]; + state.jumps.Ex[gofs] = in.Ex[pos_mine] - in.Ex[pos_neigh]; + state.jumps.Ey[gofs] = in.Ey[pos_mine] - in.Ey[pos_neigh]; + state.jumps.Ez[gofs] = in.Ez[pos_mine] - in.Ez[pos_neigh]; + } + else + { + auto bc = state.matparams.bc_coeffs[gofs]; + auto pos_mine = ed.fluxdofs_mine[lofs]; + state.jumps.Ex[gofs] = bc * in.Ex[pos_mine]; + state.jumps.Ey[gofs] = bc * in.Ey[pos_mine]; + state.jumps.Ez[gofs] = bc * in.Ez[pos_mine]; + } + } + } +} + +static void +compute_field_jumps_H(solver_state& state, const field& in) +{ + for (auto& ed : state.eds) + { + size_t num_all_fluxes = num_elems_all_orientations(ed) * ed.num_faces_per_elem * ed.num_fluxes; + + #pragma omp parallel for + for (size_t i = 0; i < num_all_fluxes; i++) + { + auto lofs = i; + auto gofs = ed.flux_base + i; + if (ed.fluxdofs_neigh[lofs] != NOT_PRESENT) + { + auto pos_mine = ed.fluxdofs_mine[lofs]; + auto pos_neigh = ed.fluxdofs_neigh[lofs]; + state.jumps.Hx[gofs] = in.Hx[pos_mine] - in.Hx[pos_neigh]; + state.jumps.Hy[gofs] = in.Hy[pos_mine] - in.Hy[pos_neigh]; + state.jumps.Hz[gofs] = in.Hz[pos_mine] - in.Hz[pos_neigh]; + } + else + { + auto bc = state.matparams.bc_coeffs[gofs]; + auto pos_mine = ed.fluxdofs_mine[lofs]; + state.jumps.Hx[gofs] = (2.0 - bc) * in.Hx[pos_mine]; + state.jumps.Hy[gofs] = (2.0 - bc) * in.Hy[pos_mine]; + state.jumps.Hz[gofs] = (2.0 - bc) * in.Hz[pos_mine]; + } + } + } +} + static void compute_fluxes_planar(solver_state& state) { @@ -192,6 +287,82 @@ compute_fluxes_planar(solver_state& state) } } +/* In leapfrog we use centered fluxes for the moment */ +static void +compute_fluxes_planar_E(solver_state& state) +{ + for (auto& ed : state.eds) + { + size_t num_all_faces = num_elems_all_orientations(ed) * ed.num_faces_per_elem; + + #pragma omp parallel for + for (size_t iF = 0; iF < num_all_faces; iF++) + { + auto n = ed.normals.row(iF); + auto nx = n[0]; + auto ny = n[1]; + auto nz = n[2]; + auto face_det = ed.face_determinants(iF); + + for (size_t i = 0; i < ed.num_fluxes; i++) + { + auto lofs = iF*ed.num_fluxes + i; + auto gofs = ed.flux_base + lofs; + + /* Sources are imposed on jumps */ + auto jHx = state.jumps.Hx[gofs] + state.bndsrcs.Hx[gofs]; + auto jHy = state.jumps.Hy[gofs] + state.bndsrcs.Hy[gofs]; + auto jHz = state.jumps.Hz[gofs] + state.bndsrcs.Hz[gofs]; + + auto aE = face_det * 0.5; + + /* Compute fluxes */ + state.fluxes.Ex[gofs] = aE*(nz*jHy - ny*jHz); + state.fluxes.Ey[gofs] = aE*(nx*jHz - nz*jHx); + state.fluxes.Ez[gofs] = aE*(ny*jHx - nx*jHy); + } + } + } +} + +/* In leapfrog we use centered fluxes for the moment */ +static void +compute_fluxes_planar_H(solver_state& state) +{ + for (auto& ed : state.eds) + { + size_t num_all_faces = num_elems_all_orientations(ed) * ed.num_faces_per_elem; + + #pragma omp parallel for + for (size_t iF = 0; iF < num_all_faces; iF++) + { + auto n = ed.normals.row(iF); + auto nx = n[0]; + auto ny = n[1]; + auto nz = n[2]; + auto face_det = ed.face_determinants(iF); + + for (size_t i = 0; i < ed.num_fluxes; i++) + { + auto lofs = iF*ed.num_fluxes + i; + auto gofs = ed.flux_base + lofs; + + /* Sources are imposed on jumps */ + auto jEx = state.jumps.Ex[gofs] - state.bndsrcs.Ex[gofs]; + auto jEy = state.jumps.Ey[gofs] - state.bndsrcs.Ey[gofs]; + auto jEz = state.jumps.Ez[gofs] - state.bndsrcs.Ez[gofs]; + + auto aH = face_det * 0.5; + + /* Compute fluxes */ + state.fluxes.Hx[gofs] = aH*(ny*jEz - nz*jEy); + state.fluxes.Hy[gofs] = aH*(nz*jEx - nx*jEz); + state.fluxes.Hz[gofs] = aH*(nx*jEy - ny*jEx); + } + } + } +} + static void compute_fluxes(solver_state& state, const field& in, field& out) { @@ -209,13 +380,41 @@ compute_fluxes(solver_state& state, const field& in, field& out) } } +static void +compute_fluxes_E(solver_state& state, const field& in, field& out) +{ + compute_field_jumps_E(state, in); + compute_fluxes_planar_E(state); + + for (auto& ed : state.eds) + { + compute_flux_lifting(ed, state.fluxes.Hx, out.Hx); + compute_flux_lifting(ed, state.fluxes.Hy, out.Hy); + compute_flux_lifting(ed, state.fluxes.Hz, out.Hz); + } +} + +static void +compute_fluxes_H(solver_state& state, const field& in, field& out) +{ + compute_field_jumps_H(state, in); + compute_fluxes_planar_H(state); + + for (auto& ed : state.eds) + { + compute_flux_lifting(ed, state.fluxes.Ex, out.Ex); + compute_flux_lifting(ed, state.fluxes.Ey, out.Ey); + compute_flux_lifting(ed, state.fluxes.Ez, out.Ez); + } +} + static void compute_euler_update(solver_state& state, const field& y, const field& k, double dt, field& out) { #pragma omp parallel for for (size_t i = 0; i < out.num_dofs; i++) { - auto CR = 1 - dt*state.matparams.sigma_over_epsilon[i]; + auto CR = 1.0 - dt*state.matparams.sigma_over_epsilon[i]; auto CC = dt*state.matparams.inv_epsilon[i]; out.Ex[i] = y.Ex[i]*CR + k.Ex[i]*CC; out.Ey[i] = y.Ey[i]*CR + k.Ey[i]*CC; @@ -238,7 +437,7 @@ compute_rk4_weighted_sum(solver_state& state, const field& in, double dt, field& #pragma omp parallel for for (size_t i = 0; i < out.num_dofs; i++) { - auto CR = 1 - dt*state.matparams.sigma_over_epsilon[i]; + auto CR = 1.0 - dt*state.matparams.sigma_over_epsilon[i]; auto CC = dt*state.matparams.inv_epsilon[i]; out.Ex[i] = in.Ex[i]*CR + CC*(state.k1.Ex[i] + 2*state.k2.Ex[i] + 2*state.k3.Ex[i] + state.k4.Ex[i])/6; out.Ey[i] = in.Ey[i]*CR + CC*(state.k1.Ey[i] + 2*state.k2.Ey[i] + 2*state.k3.Ey[i] + state.k4.Ey[i])/6; @@ -262,26 +461,65 @@ apply_operator(solver_state& state, const field& curr, field& next) compute_fluxes(state, curr, next); } +static void +leapfrog(solver_state& state) +{ + auto dt = state.delta_t; + compute_curls_H(state, state.emf_curr, state.tmp); + compute_fluxes_H(state, state.emf_curr, state.tmp); + #pragma omp parallel for + for (size_t i = 0; i < state.emf_next.num_dofs; i++) + { + auto CRM = 1.0 - 0.5*dt*state.matparams.sigma_over_epsilon[i]; + auto CRP = 1.0 + 0.5*dt*state.matparams.sigma_over_epsilon[i]; + auto CR = CRM/CRP; + auto CC = dt*state.matparams.inv_epsilon[i]/CRP; + state.emf_next.Ex[i] = state.emf_curr.Ex[i]*CR + state.tmp.Ex[i]*CC; + state.emf_next.Ey[i] = state.emf_curr.Ey[i]*CR + state.tmp.Ey[i]*CC; + state.emf_next.Ez[i] = state.emf_curr.Ez[i]*CR + state.tmp.Ez[i]*CC; + } + + compute_curls_E(state, state.emf_next, state.tmp); + compute_fluxes_E(state, state.emf_next, state.tmp); + #pragma omp parallel for + for (size_t i = 0; i < state.emf_next.num_dofs; i++) + { + auto CC = dt*state.matparams.inv_mu[i]; + state.emf_next.Hx[i] = state.emf_curr.Hx[i] + state.tmp.Hx[i]*CC; + state.emf_next.Hy[i] = state.emf_curr.Hy[i] + state.tmp.Hy[i]*CC; + state.emf_next.Hz[i] = state.emf_curr.Hz[i] + state.tmp.Hz[i]*CC; + } +} + void -timestep(solver_state& state) +timestep(solver_state& state, time_integrator_type ti) { - /* - apply_operator(state, state.emf_curr, state.tmp); - compute_euler_update(state, state.emf_curr, state.tmp, state.delta_t, state.emf_next); - */ + if (ti == time_integrator_type::EULER) + { + apply_operator(state, state.emf_curr, state.tmp); + compute_euler_update(state, state.emf_curr, state.tmp, state.delta_t, state.emf_next); + } - apply_operator(state, state.emf_curr, state.k1); + if (ti == time_integrator_type::RK4) + { + apply_operator(state, state.emf_curr, state.k1); - compute_euler_update(state, state.emf_curr, state.k1, 0.5*state.delta_t, state.tmp); - apply_operator(state, state.tmp, state.k2); + compute_euler_update(state, state.emf_curr, state.k1, 0.5*state.delta_t, state.tmp); + apply_operator(state, state.tmp, state.k2); - compute_euler_update(state, state.emf_curr, state.k2, 0.5*state.delta_t, state.tmp); - apply_operator(state, state.tmp, state.k3); + compute_euler_update(state, state.emf_curr, state.k2, 0.5*state.delta_t, state.tmp); + apply_operator(state, state.tmp, state.k3); - compute_euler_update(state, state.emf_curr, state.k3, state.delta_t, state.tmp); - apply_operator(state, state.tmp, state.k4); + compute_euler_update(state, state.emf_curr, state.k3, state.delta_t, state.tmp); + apply_operator(state, state.tmp, state.k4); + + compute_rk4_weighted_sum(state, state.emf_curr, state.delta_t, state.emf_next); + } - compute_rk4_weighted_sum(state, state.emf_curr, state.delta_t, state.emf_next); + if (ti == time_integrator_type::LEAPFROG) + { + leapfrog(state); + } state.curr_time += state.delta_t; state.curr_timestep += 1; diff --git a/src/maxwell_gpu.cpp b/src/maxwell_gpu.cpp index 875b83420b67d5d099874e4c3993e3001583ad4c..9e154ff14b9b50d25bd04054e6c773ad9419b456 100644 --- a/src/maxwell_gpu.cpp +++ b/src/maxwell_gpu.cpp @@ -204,28 +204,32 @@ compute_rk4_weighted_sum(solver_state_gpu& state, const field_gpu& in, } void -timestep(solver_state_gpu& state) +timestep(solver_state_gpu& state, time_integrator_type ti) { //timecounter tc; //tc.tic(); - /* - apply_operator(state, state.emf_curr, state.tmp); - compute_euler_update(state, state.emf_curr, state.tmp, state.delta_t, state.emf_next); - */ + if (ti == time_integrator_type::EULER) + { + apply_operator(state, state.emf_curr, state.tmp); + compute_euler_update(state, state.emf_curr, state.tmp, state.delta_t, state.emf_next); + } - apply_operator(state, state.emf_curr, state.k1); + if (ti == time_integrator_type::RK4) + { + apply_operator(state, state.emf_curr, state.k1); - compute_euler_update(state, state.emf_curr, state.k1, 0.5*state.delta_t, state.tmp); - apply_operator(state, state.tmp, state.k2); + compute_euler_update(state, state.emf_curr, state.k1, 0.5*state.delta_t, state.tmp); + apply_operator(state, state.tmp, state.k2); - compute_euler_update(state, state.emf_curr, state.k2, 0.5*state.delta_t, state.tmp); - apply_operator(state, state.tmp, state.k3); + compute_euler_update(state, state.emf_curr, state.k2, 0.5*state.delta_t, state.tmp); + apply_operator(state, state.tmp, state.k3); - compute_euler_update(state, state.emf_curr, state.k3, state.delta_t, state.tmp); - apply_operator(state, state.tmp, state.k4); + compute_euler_update(state, state.emf_curr, state.k3, state.delta_t, state.tmp); + apply_operator(state, state.tmp, state.k4); - compute_rk4_weighted_sum(state, state.emf_curr, state.delta_t, state.emf_next); + compute_rk4_weighted_sum(state, state.emf_curr, state.delta_t, state.emf_next); + } state.curr_time += state.delta_t; state.curr_timestep += 1; diff --git a/src/maxwell_solver.cpp b/src/maxwell_solver.cpp index 71dcee781dfd0766b111ca2fac07faf95339282d..5ab9afd6b711ed93e1633c44b13c8d4afe6cb85d 100644 --- a/src/maxwell_solver.cpp +++ b/src/maxwell_solver.cpp @@ -80,11 +80,13 @@ void test_it(const model& mod, State& state, maxwell::parameter_loader& mpl) omp_set_num_threads(4); #endif + auto ti = mpl.sim_timeIntegrator(); + prepare_sources(mod, state, mpl); for(size_t i = 0; i < num_timesteps; i++) { mpl.call_timestep_callback(i); - timestep(state); + timestep(state, ti); do_sources(mod, state, mpl); diff --git a/src/param_loader.cpp b/src/param_loader.cpp index 034b0b728580a8e956b89a111dda61f783384ca4..0af01bd73006d123e9b02ee952510872dd6b6026 100644 --- a/src/param_loader.cpp +++ b/src/param_loader.cpp @@ -22,6 +22,7 @@ parameter_loader_base::init(void) //lua["sim"]["dt"]; //lua["sim"]["timesteps"]; //lua["sim"]["gmsh_model"]; + lua["sim"]["time_integrator"] = "rk4"; lua["materials"] = lua.create_table(); lua["bndconds"] = lua.create_table(); @@ -210,3 +211,26 @@ parameter_loader_base::source_was_cleared(void) { m_source_changed_state = false; } + +time_integrator_type +parameter_loader_base::sim_timeIntegrator(void) const +{ + std::string ti = lua["sim"]["time_integrator"]; + + if (ti == "rk4") + return time_integrator_type::RK4; + + if (ti == "leapfrog") + return time_integrator_type::LEAPFROG; + + if (ti == "euler") + { + std::cout << "[CONFIG] warning: Euler time integrator is only "; + std::cout << "for testing purposes" << std::endl; + return time_integrator_type::EULER; + } + + std::cout << "[CONFIG] warning: invalid time integrator '"; + std::cout << ti << "'" << std::endl; + return time_integrator_type::RK4; +} \ No newline at end of file