Skip to content
Snippets Groups Projects
Commit ce8f502b authored by Matteo Cicuttin's avatar Matteo Cicuttin
Browse files

Templated differentiation kernel.

parent 1ab9f794
Branches
Tags
No related merge requests found
......@@ -258,6 +258,7 @@ entity::populate_entity_data(entity_data_cpu<1>& ed) const
{
assert(g_order == 1);
assert(reference_cells.size() > 0);
ed.a_order = a_order;
ed.num_elems = num_elements_per_orientation();
ed.num_orientations = num_orientations();
ed.num_bf = reference_cells[0].num_basis_functions();
......
......@@ -8,6 +8,7 @@ struct entity_data_cpu;
template<>
struct entity_data_cpu<1>
{
int a_order;
std::vector<size_t> num_elems; /* No. of elements in the entity */
size_t num_orientations; /* No. of bf. orientations */
size_t num_bf; /* No. of dofs per elem */
......
#include "types.h"
#include "entity_data.h"
#include "kernels_cpu.h"
/* Field derivatives kernel, case for elements with planar faces */
void compute_field_derivatives(const entity_data_cpu<1>& ed,
const vecxd& f, vecxd& df_dx, vecxd& df_dy, vecxd& df_dz)
{
/* The elements must be ordered by orientation -> entity::sort_by_orientation() */
size_t orient_elem_base = 0;
for (size_t iO = 0; iO < ed.num_orientations; iO++)
switch (ed.a_order)
{
size_t num_elems = ed.num_elems[iO];
size_t orient_base = orient_elem_base * ed.num_bf;
case 1:
compute_field_derivatives_kernel<1>(ed, f, df_dx, df_dy, df_dz);
break;
for (size_t iT = 0; iT < num_elems; iT++)
{
size_t ent_iT = orient_elem_base + iT;
size_t dofofs = ed.dofs_base + orient_base + iT*ed.num_bf;
case 2:
compute_field_derivatives_kernel<2>(ed, f, df_dx, df_dy, df_dz);
break;
for (size_t odof = 0; odof < ed.num_bf; odof++)
{
double acc_df_dx = 0.0;
double acc_df_dy = 0.0;
double acc_df_dz = 0.0;
case 3:
compute_field_derivatives_kernel<3>(ed, f, df_dx, df_dy, df_dz);
break;
for (size_t refsys_dir = 0; refsys_dir < 3; refsys_dir++)
{
for (size_t idof = 0; idof < ed.num_bf; idof++)
{
auto dm_row = iO*ed.num_bf + odof;
auto dm_col = refsys_dir*ed.num_bf + idof;
double D = ed.differentiation_matrix(dm_row, dm_col);
acc_df_dx += ed.jacobians(3*ent_iT + 0, refsys_dir) * D * f(dofofs+idof);
acc_df_dy += ed.jacobians(3*ent_iT + 1, refsys_dir) * D * f(dofofs+idof);
acc_df_dz += ed.jacobians(3*ent_iT + 2, refsys_dir) * D * f(dofofs+idof);
}
}
case 4:
compute_field_derivatives_kernel<4>(ed, f, df_dx, df_dy, df_dz);
break;
df_dx(dofofs+odof) = acc_df_dx;
df_dy(dofofs+odof) = acc_df_dy;
df_dz(dofofs+odof) = acc_df_dz;
}
}
case 5:
compute_field_derivatives_kernel<5>(ed, f, df_dx, df_dy, df_dz);
break;
orient_elem_base += num_elems;
default:
std::cout << "compute_field_derivatives: invalid order" << std::endl;
}
}
\ No newline at end of file
#pragma once
template<size_t AK>
struct num_dofs_3D
{
static const size_t value = ((AK+3)*(AK+2)*(AK+1))/6;
};
template<size_t AK>
struct num_dofs_2D
{
static const size_t value = ((AK+2)*(AK+1))/2;
};
template<size_t AK>
struct kernel_cpu_sizes
{
static const size_t num_bf = num_dofs_3D<AK>::value;
static const size_t num_fluxes = num_dofs_2D<AK>::value;
static const size_t num_faces = 4;
static const size_t num_all_fluxes = num_fluxes * num_faces;
};
template<size_t AK>
void compute_field_derivatives_kernel(const entity_data_cpu<1>& ed,
const vecxd& f, vecxd& df_dx, vecxd& df_dy, vecxd& df_dz)
{
/* Flop count: 3*ed.num_bf*3*ed.num_bf*num_total_elems */
/* The elements must be ordered by orientation -> entity::sort_by_orientation() */
using KS = kernel_cpu_sizes<AK>;
assert(ed.num_bf == KS::num_bf);
size_t orient_elem_base = 0;
for (size_t iO = 0; iO < ed.num_orientations; iO++)
{
size_t num_elems = ed.num_elems[iO];
size_t orient_base = orient_elem_base * ed.num_bf;
for (size_t iT = 0; iT < num_elems; iT++)
{
size_t ent_iT = orient_elem_base + iT;
size_t dofofs = ed.dofs_base + orient_base + iT*ed.num_bf;
for (size_t odof = 0; odof < KS::num_bf; odof++)
{
double acc_df_dx = 0.0;
double acc_df_dy = 0.0;
double acc_df_dz = 0.0;
for (size_t refsys_dir = 0; refsys_dir < 3; refsys_dir++)
{
for (size_t idof = 0; idof < KS::num_bf; idof++)
{
auto dm_row = iO*ed.num_bf + odof;
auto dm_col = refsys_dir*ed.num_bf + idof;
double D = ed.differentiation_matrix(dm_row, dm_col);
acc_df_dx += ed.jacobians(3*ent_iT + 0, refsys_dir) * D * f(dofofs+idof);
acc_df_dy += ed.jacobians(3*ent_iT + 1, refsys_dir) * D * f(dofofs+idof);
acc_df_dz += ed.jacobians(3*ent_iT + 2, refsys_dir) * D * f(dofofs+idof);
}
}
df_dx(dofofs+odof) = acc_df_dx;
df_dy(dofofs+odof) = acc_df_dy;
df_dz(dofofs+odof) = acc_df_dz;
}
}
orient_elem_base += num_elems;
}
}
void compute_field_derivatives(const entity_data_cpu<1>& ed,
const vecxd& f, vecxd& df_dx, vecxd& df_dy, vecxd& df_dz);
\ No newline at end of file
......@@ -8,6 +8,7 @@
#include "sgr.hpp"
#include "entity_data.h"
#include "kernels_cpu.h"
#include "timecounter.h"
using namespace sgr;
......@@ -65,7 +66,13 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde
entity_data_cpu<1> ed;
e0.populate_entity_data(ed);
timecounter tc;
tc.tic();
compute_field_derivatives(ed, Pf_e0, df_dx_e0, df_dy_e0, df_dz_e0);
double time = tc.toc();
std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: ";
size_t flops = 3*ed.num_bf*3*ed.num_bf*e0.num_elements();
std::cout << flops/(1e9*time) << " GFlops/s" << std::endl;
vecxd Pdf_dx_e0 = e0.project(df_dx);
vecxd Pdf_dy_e0 = e0.project(df_dy);
......
#pragma once
#include <chrono>
#ifdef HAVE_CUDA_RT
#include <cuda_runtime.h>
#endif
class timecounter
{
std::chrono::time_point<std::chrono::steady_clock> begin, end;
public:
timecounter()
{}
void tic()
{
begin = std::chrono::steady_clock::now();
}
double toc()
{
end = std::chrono::steady_clock::now();
std::chrono::duration<double> diff = end-begin;
return diff.count();
}
};
#ifdef HAVE_CUDA_RT
class timecounter_cuda
{
cudaEvent_t start;
cudaEvent_t stop;
public:
timecounter_cuda()
{
cudaEventCreate(&start);
cudaEventCreate(&stop);
}
~timecounter_cuda()
{
cudaEventDestroy(start);
cudaEventDestroy(stop);
}
void tic()
{
cudaEventRecord(start, 0);
}
double toc()
{
cudaEventRecord(stop, 0);
float elapsed;
cudaEventSynchronize(stop);
cudaEventElapsedTime(&elapsed, start, stop);
return elapsed/1000.0;
}
};
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment