Skip to content
Snippets Groups Projects
Commit 08833662 authored by Marco D'Antonio's avatar Marco D'Antonio
Browse files

CPU profiling implemented

parent 40fe1636
No related branches found
No related tags found
No related merge requests found
......@@ -383,6 +383,7 @@ public:
size_t num_cells() const;
size_t num_faces() const;
size_t num_entities() const;
size_t num_orientations() const;
std::vector<entity>::const_iterator begin() const;
std::vector<entity>::const_iterator end() const;
......
......@@ -1087,6 +1087,16 @@ model::num_entities(void) const
return entities.size();
}
size_t
model::num_orientations(void) const
{
size_t ret = 0;
for (const auto& e : entities)
ret += e.num_cell_orientations();
return ret;
}
model::entofs_pair
......
......@@ -69,74 +69,116 @@ void initialize_solver(const model& mod, State& state, const maxwell::parameter_
template<typename State>
void initialize_profiling(const model& mod, State& state, time_integrator_type ti)
{
auto num_entities = mod.num_entities();
auto num_cells = mod.num_cells_world();
auto num_all_faces = mod.num_faces();
auto cell_dofs = num_dofs_3D(mod.approximation_order);
auto cell_fluxes = num_dofs_2D(mod.approximation_order);
auto num_all_dofs = mod.num_dofs();
auto num_all_fluxes = mod.num_fluxes();
auto num_cells = mod.num_cells_world();
auto num_entities = mod.num_entities();
auto num_all_orientations = mod.num_orientations();
std::cout << "num_entities: " << num_entities << std::endl
<< "num_cells: " << num_cells << std::endl
<< "num_all_faces: " << num_all_faces << std::endl
<< "cell_dofs: " << cell_dofs << std::endl
<< "cell_fluxes: " << cell_fluxes << std::endl
<< "num_all_dofs: " << num_all_dofs << std::endl
<< "num_all_fluxes: " << num_all_fluxes << std::endl;
state.logger.register_profiler("solver", TYPE)
.set_quantity("DoFs", mod.num_dofs()*6);
.set_quantity("DoFs", num_all_dofs*6);
auto jacobians_read = 3*3*num_cells;
auto dm_read = 3*cell_dofs*cell_dofs;
auto curl_readwrite = 6*num_all_dofs;
auto sources_application = 3*num_all_dofs;
if (ti == time_integrator_type::LEAPFROG)
{
#ifdef TM_NEW_CURLS
state.logger.register_profiler("compute_curls_H", TYPE)
.set_quantity("GFlops", ((45*cell_dofs+9)*num_all_dofs)/1e9)
.set_quantity("GB", 0);
.set_quantity("GB", (jacobians_read+dm_read+curl_readwrite+sources_application)*8/1e9);
state.logger.register_profiler("compute_curls_E", TYPE)
.set_quantity("GFlops", ((45*cell_dofs+6)*num_all_dofs)/1e9)
.set_quantity("GB", 0);
.set_quantity("GB", (jacobians_read+dm_read+curl_readwrite)*8/1e9);
#else
auto derivatives = jacobians_read+dm_read+4*num_all_dofs;
auto curls_h = 4*num_all_dofs;
auto curls_e = 3*num_all_dofs;
state.logger.register_profiler("compute_curls_H", TYPE)
.set_quantity("GFlops", ((21*cell_dofs*num_all_dofs)*3 + 6*num_all_dofs)/1e9)
.set_quantity("GB", 0);
.set_quantity("GB", (derivatives*3+curls_h)*8/1e9);
state.logger.register_profiler("compute_curls_E", TYPE)
.set_quantity("GFlops", ((21*cell_dofs*num_all_dofs)*3 + 3*num_all_dofs)/1e9)
.set_quantity("GB", 0);
.set_quantity("GB", (derivatives*3+curls_e)*8/1e9);
#endif /* TM_NEW_CURLS */
state.logger.register_profiler("compute_field_jumps", TYPE)
.set_quantity("GFlops", 3*num_all_fluxes/1e9)
.set_quantity("GB", 0);
.set_quantity("GB", 9*num_all_fluxes*8/1e9);
auto normals_read = num_all_faces;
auto dets_read = num_all_faces;
auto sources_imposition = 18*num_all_fluxes;
auto matparams_readwrite = 4*num_all_fluxes;
auto fluxes_write = 3*num_all_fluxes;
state.logger.register_profiler("compute_fluxes_planar", TYPE)
.set_quantity("GFlops", 37*num_all_fluxes/1e9)
.set_quantity("GB", 0);
.set_quantity("GB", (normals_read+dets_read+sources_imposition+matparams_readwrite+fluxes_write)*8/1e9);
dets_read = num_cells;
auto lm_read = num_all_orientations*cell_dofs*4*cell_fluxes;
auto lifting_readwrite = num_all_fluxes+num_all_dofs;
state.logger.register_profiler("compute_flux_lifting", TYPE)
.set_quantity("GFlops", 21*cell_dofs*num_all_dofs*3/1e9)
.set_quantity("GB", 0);
.set_quantity("GB", 3*(dets_read+lm_read+lifting_readwrite)*8/1e9);
}
else
{
#ifdef TM_NEW_CURLS
state.logger.register_profiler("compute_curls", TYPE)
.set_quantity("GFlops", ((45*cell_dofs+6)*num_all_dofs + (45*cell_dofs+9)*num_all_dofs)/1e9)
.set_quantity("GB", 2*(6*num_all_dofs + num_entities*(3*cell_dofs*cell_dofs+3*3*num_cells))*8/1e9);
.set_quantity("GB", (2*(jacobians_read+dm_read+curl_readwrite)+sources_application)*8/1e9);
#else
auto derivatives = jacobians_read+dm_read+4*num_all_dofs;
auto curls = 4*num_all_dofs + 3*num_all_dofs;
state.logger.register_profiler("compute_curls", TYPE)
.set_quantity("GFlops", ((21*cell_dofs*num_all_dofs)*6 + 9*num_all_dofs)/1e9)
.set_quantity("GB", 0);
.set_quantity("GB", (derivatives*6+curls)*8/1e9);
#endif /* TM_NEW_CURLS */
#ifdef USE_MPI
state.logger.register_profiler("compute_field_jumps", TYPE)
.set_quantity("GFlops", 6*num_all_fluxes/1e9)
.set_quantity("GB", 0);
.set_quantity("GB", 18*num_all_fluxes*8/1e9);
auto normals_read = num_all_faces;
auto dets_read = num_all_faces;
auto sources_imposition = 18*num_all_fluxes;
auto matparams_readwrite = 8*num_all_fluxes;
auto fluxes_write = 6*num_all_fluxes;
state.logger.register_profiler("compute_fluxes_planar", TYPE)
.set_quantity("GFlops", 68*num_all_fluxes/1e9)
.set_quantity("GB", 0);
.set_quantity("GB", (normals_read+dets_read+sources_imposition+matparams_readwrite+fluxes_write)*8/1e9);
#else
auto normals_read = num_all_faces;
auto dets_read = num_all_faces;
auto jumps_and_sources = 24*num_all_fluxes;
auto matparams_readwrite = 8*num_all_fluxes;
auto fluxes_write = 6*num_all_fluxes;
state.logger.register_profiler("compute_field_jumps_and_fluxes", TYPE)
.set_quantity("GFlops", 72*num_all_fluxes/1e9)
.set_quantity("GB", 0);
.set_quantity("GB", (normals_read+dets_read+jumps_and_sources+matparams_readwrite+fluxes_write)*8/1e9);
#endif /* USE_MPI */
dets_read = num_cells;
auto lm_read = num_all_orientations*cell_dofs*4*cell_fluxes;
auto lifting_readwrite = num_all_fluxes+num_all_dofs;
state.logger.register_profiler("compute_flux_lifting", TYPE)
.set_quantity("GFlops", 21*cell_dofs*num_all_dofs*6/1e9)
.set_quantity("GB", 0);
.set_quantity("GB", 6*(dets_read+lm_read+lifting_readwrite)*8/1e9);
}
}
#endif /* ENABLE_PROFILING */
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment