diff --git a/src/maxwell_gpu.cpp b/src/maxwell_gpu.cpp index 1ef290d8d00372cfbddce51adcfb8f9dcae65cfb..98d3984d09316a65a458a7edbc07e11722b694a1 100644 --- a/src/maxwell_gpu.cpp +++ b/src/maxwell_gpu.cpp @@ -204,8 +204,8 @@ compute_rk4_weighted_sum(solver_state_gpu& state, const field_gpu& in, void timestep(solver_state_gpu& state) { - timecounter tc; - tc.tic(); + //timecounter tc; + //tc.tic(); /* apply_operator(state, state.emf_curr, state.tmp); @@ -228,9 +228,9 @@ timestep(solver_state_gpu& state) state.curr_time += state.delta_t; state.curr_timestep += 1; - double time = tc.toc(); - double dofs_per_sec = 6*state.emf_curr.num_dofs/time; - std::cout << "Timestep " << state.curr_timestep << ", " << dofs_per_sec << " DOFs/s" << std::endl; + //double time = tc.toc(); + //double dofs_per_sec = 6*state.emf_curr.num_dofs/time; + //std::cout << "Timestep " << state.curr_timestep << ", " << dofs_per_sec << " DOFs/s" << std::endl; } diff --git a/src/maxwell_solver.cpp b/src/maxwell_solver.cpp index 04f13c8ed87068dacbccbea74bbafdf0ce45cf3d..9dda72de647cbd9a2b273d5dfc4cbe212613687f 100644 --- a/src/maxwell_solver.cpp +++ b/src/maxwell_solver.cpp @@ -61,7 +61,7 @@ void initialize_solver(const model& mod, State& state, const maxwell::parameter_ } void -do_boundary_sources(const model& mod, maxwell::solver_state& state, +do_sources(const model& mod, maxwell::solver_state& state, const maxwell::parameter_loader& mpl) { maxwell::eval_boundary_sources(mod, mpl, state, state.bndsrcs); @@ -69,15 +69,46 @@ do_boundary_sources(const model& mod, maxwell::solver_state& state, } void -do_boundary_sources(const model& mod, maxwell::solver_state_gpu& state, +prepare_sources(const model& mod, maxwell::solver_state& state, + const maxwell::parameter_loader& mpl) +{ + do_sources(mod, state, mpl); +} + +void +swap(maxwell::solver_state& state) +{ + std::swap(state.emf_curr, state.emf_next); +} + +#ifdef ENABLE_GPU_SOLVER +void +prepare_sources(const model& mod, maxwell::solver_state_gpu& state, const maxwell::parameter_loader& mpl) { maxwell::eval_boundary_sources(mod, mpl, state, state.bndsrcs_cpu); state.bndsrcs_buf.copyin(state.bndsrcs_cpu, state.memcpy_stream); - //maxwell::eval_interface_sources(mod, mpl, state, state.bndsrcs_cpu); - //state.memcpy_stream.wait(); + state.memcpy_stream.wait(); + std::swap(state.bndsrcs, state.bndsrcs_buf); } +void +do_sources(const model& mod, maxwell::solver_state_gpu& state, + const maxwell::parameter_loader& mpl) +{ + maxwell::eval_boundary_sources(mod, mpl, state, state.bndsrcs_cpu); + state.bndsrcs_buf.copyin(state.bndsrcs_cpu, state.memcpy_stream); +} + +void +swap(maxwell::solver_state_gpu& state) +{ + cudaDeviceSynchronize(); + std::swap(state.bndsrcs, state.bndsrcs_buf); + std::swap(state.emf_curr, state.emf_next); +} +#endif + template<typename State> void test_it(const model& mod, State& state, const maxwell::parameter_loader& mpl) { @@ -92,12 +123,11 @@ void test_it(const model& mod, State& state, const maxwell::parameter_loader& mp omp_set_num_threads(4); #endif - do_boundary_sources(mod, state, mpl); - std::swap(state.bndsrcs, state.bndsrcs_buf); + prepare_sources(mod, state, mpl); for(size_t i = 0; i < num_timesteps; i++) { timestep(state); - do_boundary_sources(mod, state, mpl); + do_sources(mod, state, mpl); std::stringstream ss; ss << mpl.sim_name() << "/timestep_" << i << ".silo"; @@ -109,9 +139,7 @@ void test_it(const model& mod, State& state, const maxwell::parameter_loader& mp std::cout << "Cycle " << i << ": t = " << state.curr_time << " s"; std::cout << std::endl; } - cudaDeviceSynchronize(); - std::swap(state.bndsrcs, state.bndsrcs_buf); - std::swap(state.emf_curr, state.emf_next); + swap(state); } } @@ -155,8 +183,8 @@ int main(int argc, const char *argv[]) else { #endif /* ENABLE_GPU_SOLVER */ - //maxwell::solver_state state_c; - //test_it(mod, state_c, mpl); + maxwell::solver_state state_c; + test_it(mod, state_c, mpl); #ifdef ENABLE_GPU_SOLVER } #endif