Skip to content
Snippets Groups Projects
Commit 4aff7fa6 authored by Matteo Cicuttin's avatar Matteo Cicuttin
Browse files

Differentiation matrix in texture.

parent 6f4a26c8
No related branches found
No related tags found
No related merge requests found
......@@ -69,18 +69,18 @@ num_elems_all_orientations(const entity_data_cpu& ed)
#ifdef ENABLE_GPU_SOLVER
struct entity_data_gpu
{
int g_order;
int a_order;
size_t num_all_elems;
size_t * element_orientation;
size_t num_bf;
int g_order;
int a_order;
size_t num_all_elems;
device_vector<int32_t> element_orientation;
size_t num_bf;
size_t dof_base;
size_t dof_base;
double* differentiation_matrices;
double* jacobians;
double* cell_determinants;
texture_allocator<double> differentiation_matrices;
device_vector<double> jacobians;
device_vector<double> cell_determinants;
entity_data_gpu();
......
......@@ -41,6 +41,16 @@ struct kernel_gpu_sizes<3>
static const size_t parallel_dblocks = 1;
};
template<>
struct kernel_gpu_sizes<4>
{
static const size_t num_bf = 35;
static const size_t cells_per_dblock = 12;
static const size_t dblock_bf = num_bf * cells_per_dblock;
static const size_t dblock_size = 128;
static const size_t parallel_dblocks = 1;
};
struct kernel_gpu_sizes_runtime
{
size_t num_bf;
......
......@@ -2,10 +2,6 @@
entity_data_gpu::entity_data_gpu()
{
element_orientation = nullptr;
differentiation_matrices = nullptr;
jacobians = nullptr;
cell_determinants = nullptr;
}
entity_data_gpu::entity_data_gpu(const entity_data_cpu& ed)
......@@ -15,15 +11,15 @@ entity_data_gpu::entity_data_gpu(const entity_data_cpu& ed)
num_all_elems = num_elems_all_orientations(ed);
std::vector<size_t> eo;
std::vector<int32_t> eo;
eo.reserve(num_all_elems);
for (size_t iO = 0; iO < ed.num_orientations; iO++)
for (int32_t iO = 0; iO < ed.num_orientations; iO++)
{
for (size_t iT = 0; iT < ed.num_elems[iO]; iT++)
eo.push_back(iO);
}
assert(eo.size() == num_all_elems);
gpu_copyin(eo, &element_orientation);
element_orientation.copyin(eo.data(), eo.size());
num_bf = ed.num_bf;
......@@ -44,28 +40,15 @@ entity_data_gpu::entity_data_gpu(const entity_data_cpu& ed)
ed.differentiation_matrices.block(i*num_bf, 2*num_bf, num_bf, num_bf).transpose();
}
gpu_copyin(dm, &differentiation_matrices);
differentiation_matrices.init(dm.data(), dm.size());
gpu_copyin(ed.jacobians, &jacobians);
jacobians.copyin(ed.jacobians.data(), ed.jacobians.size());
gpu_copyin(ed.cell_determinants, &cell_determinants);
cell_determinants.copyin(ed.cell_determinants.data(), ed.cell_determinants.size());
}
entity_data_gpu::~entity_data_gpu()
{
/*
if (element_orientation)
checkGPU( gpuFree(element_orientation) );
if (differentiation_matrices)
checkGPU( gpuFree(differentiation_matrices) );
if (jacobians)
checkGPU( gpuFree(jacobians) );
if (cell_determinants)
checkGPU( gpuFree(cell_determinants) );
*/
}
......@@ -3,117 +3,48 @@
template<size_t K>
__global__ void
gpu_compute_field_derivatives_kernel_planar_CM(entity_data_gpu edg,
const double* F, double *dF_dx, double* dF_dy, double* dF_dz)
gpu_deriv_planar(const double *F, const double *J, gpuTextureObject_t DM_tex,
double *dF_dx, double* dF_dy, double* dF_dz, int32_t num_all_elems,
int32_t* orients)
{
using KS = kernel_gpu_sizes<K>;
int ofs_in_entity = (blockIdx.x * blockDim.x + threadIdx.x);
int32_t cur_dof_offset = edg.dof_base + ofs_in_entity;
if (cur_dof_offset >= edg.num_all_elems*edg.num_bf)
int32_t cur_dof_offset = /*edg.dof_base + */ofs_in_entity;
if (cur_dof_offset >= num_all_elems*KS::num_bf)
return;
int32_t iT = cur_dof_offset / KS::num_bf;
int32_t elem_dof_base = edg.dof_base + iT*edg.num_bf;
int32_t iO = edg.element_orientation[iT];
int32_t DM_row = ofs_in_entity % KS::num_bf;
int32_t DM_orient = 3*KS::num_bf*iO*KS::num_bf;
double *DM = edg.differentiation_matrices + DM_orient;
double accm_dF_dx = 0.0;
double accm_dF_dy = 0.0;
double accm_dF_dz = 0.0;
int32_t jac_ofs = 9*iT;
double J[9];
for (int32_t i = 0; i < 9; i++)
J[i] = edg.jacobians[jac_ofs+i];
for (int32_t dof = 0; dof < KS::num_bf; dof++)
{
int32_t d_ofs = DM_row + KS::num_bf*dof;
int32_t f_ofs = elem_dof_base + dof;
double v = DM[d_ofs] * F[f_ofs];
accm_dF_dx += J[0] * v;
accm_dF_dy += J[3] * v;
accm_dF_dz += J[6] * v;
}
for (int32_t dof = 0; dof < KS::num_bf; dof++)
{
int32_t d_ofs = DM_row + KS::num_bf*KS::num_bf + KS::num_bf*dof;
int32_t f_ofs = elem_dof_base + dof;
double v = DM[d_ofs] * F[f_ofs];
accm_dF_dx += J[1] * v;
accm_dF_dy += J[4] * v;
accm_dF_dz += J[7] * v;
}
for (int32_t dof = 0; dof < KS::num_bf; dof++)
{
int32_t d_ofs = DM_row + 2*KS::num_bf*KS::num_bf + KS::num_bf*dof;
int32_t f_ofs = elem_dof_base + dof;
double v = DM[d_ofs] * F[f_ofs];
accm_dF_dx += J[2] * v;
accm_dF_dy += J[5] * v;
accm_dF_dz += J[8] * v;
}
dF_dx[cur_dof_offset] = accm_dF_dx;
dF_dy[cur_dof_offset] = accm_dF_dy;
dF_dz[cur_dof_offset] = accm_dF_dz;
}
template<size_t K>
__global__ void
gpu_compute_field_derivatives_kernel_planar(entity_data_gpu edg,
const double* F, double *dF_dx, double* dF_dy, double* dF_dz)
{
using KS = kernel_gpu_sizes<K>;
int ofs_in_entity = (blockIdx.x * blockDim.x + threadIdx.x);
int32_t cur_dof_offset = edg.dof_base + ofs_in_entity;
if (cur_dof_offset >= edg.num_all_elems*edg.num_bf)
return;
int32_t iT = cur_dof_offset / KS::num_bf;
int32_t elem_dof_base = edg.dof_base + iT*edg.num_bf;
int32_t iO = edg.element_orientation[iT];
int32_t elem_dof_base = /*edg.dof_base + */iT*KS::num_bf;
int32_t iO = orients[iT];
int32_t DM_row = ofs_in_entity % KS::num_bf;
int32_t DM_orient = 3*KS::num_bf*KS::num_bf*iO;
double *DM = edg.differentiation_matrices + DM_orient;
double accm_dF_dx = 0.0;
double accm_dF_dy = 0.0;
double accm_dF_dz = 0.0;
int32_t jac_ofs = 9*iT;
double J[9];
for (int32_t i = 0; i < 9; i++)
J[i] = edg.jacobians[jac_ofs+i];
for (int32_t dof = 0; dof < KS::num_bf; dof++)
{
int32_t d_ofs = DM_row + 3*KS::num_bf*dof;
int32_t d_ofs = DM_orient + DM_row + 3*KS::num_bf*dof;
int32_t f_ofs = elem_dof_base + dof;
double v = DM[d_ofs] * F[f_ofs];
accm_dF_dx += J[0] * v;
accm_dF_dy += J[3] * v;
accm_dF_dz += J[6] * v;
double v = fetch_tex(DM_tex, d_ofs) * F[f_ofs];
accm_dF_dx += J[jac_ofs+0] * v;
accm_dF_dy += J[jac_ofs+3] * v;
accm_dF_dz += J[jac_ofs+6] * v;
d_ofs = DM_row + 3*KS::num_bf*dof + KS::num_bf;
v = DM[d_ofs] * F[f_ofs];
accm_dF_dx += J[1] * v;
accm_dF_dy += J[4] * v;
accm_dF_dz += J[7] * v;
d_ofs = DM_orient + DM_row + 3*KS::num_bf*dof + KS::num_bf;
v = fetch_tex(DM_tex, d_ofs) * F[f_ofs];
accm_dF_dx += J[jac_ofs+1] * v;
accm_dF_dy += J[jac_ofs+4] * v;
accm_dF_dz += J[jac_ofs+7] * v;
d_ofs = DM_row + 3*KS::num_bf*dof + 2*KS::num_bf;
v = DM[d_ofs] * F[f_ofs];
accm_dF_dx += J[2] * v;
accm_dF_dy += J[5] * v;
accm_dF_dz += J[8] * v;
d_ofs = DM_orient + DM_row + 3*KS::num_bf*dof + 2*KS::num_bf;
v = fetch_tex(DM_tex, d_ofs) * F[f_ofs];
accm_dF_dx += J[jac_ofs+2] * v;
accm_dF_dy += J[jac_ofs+5] * v;
accm_dF_dz += J[jac_ofs+8] * v;
}
dF_dx[cur_dof_offset] = accm_dF_dx;
......@@ -130,36 +61,46 @@ gpu_compute_field_derivatives(entity_data_gpu& edg,
if (edg.num_bf*edg.num_all_elems % THREADS_PER_BLOCK)
num_blocks += 1;
auto J = edg.jacobians.data();
auto Dtex = edg.differentiation_matrices.get_texture();
int32_t num_elems = edg.num_all_elems;
auto orients = edg.element_orientation.data();
auto num_orients = edg.element_orientation.size();
switch (edg.a_order)
{
case 1:
if (edg.g_order == 1)
gpu_compute_field_derivatives_kernel_planar<1><<<num_blocks, THREADS_PER_BLOCK>>>(edg, f, df_dx, df_dy, df_dz);
gpu_deriv_planar<1><<<num_blocks, THREADS_PER_BLOCK>>>(f, J,
Dtex, df_dx, df_dy, df_dz, num_elems, orients);
//else
// compute_field_derivatives_kernel_curved<1>(ed, f, df_dx, df_dy, df_dz);
break;
case 2:
if (edg.g_order == 1)
gpu_compute_field_derivatives_kernel_planar<2><<<num_blocks, THREADS_PER_BLOCK>>>(edg, f, df_dx, df_dy, df_dz);
gpu_deriv_planar<2><<<num_blocks, THREADS_PER_BLOCK>>>(f, J,
Dtex, df_dx, df_dy, df_dz, num_elems, orients);
//else
// compute_field_derivatives_kernel_curved<2>(ed, f, df_dx, df_dy, df_dz);
break;
case 3:
if (edg.g_order == 1)
gpu_compute_field_derivatives_kernel_planar<3><<<num_blocks, THREADS_PER_BLOCK>>>(edg, f, df_dx, df_dy, df_dz);
gpu_deriv_planar<3><<<num_blocks, THREADS_PER_BLOCK>>>(f, J,
Dtex, df_dx, df_dy, df_dz, num_elems, orients);
//else
// compute_field_derivatives_kernel_curved<3>(ed, f, df_dx, df_dy, df_dz);
break;
#if 0
case 4:
if (edg.g_order == 1)
gpu_compute_field_derivatives_kernel_planar<4>(edg, f, df_dx, df_dy, df_dz);
gpu_deriv_planar<4><<<num_blocks, THREADS_PER_BLOCK>>>(f, J,
Dtex, df_dx, df_dy, df_dz, num_elems, orients);
//else
// compute_field_derivatives_kernel_curved<4>(ed, f, df_dx, df_dy, df_dz);
break;
#if 0
case 5:
if (edg.g_order == 1)
gpu_compute_field_derivatives_kernel_planar<5>(edg, f, df_dx, df_dy, df_dz);
......
......@@ -162,7 +162,7 @@ int main(void)
std::cout << Bmagentafg << " *** TESTING: DIFFERENTIATION ***" << reset << std::endl;
for (size_t go = 1; go < 2; go++)
for (size_t ao = go; ao < 4; ao++)
for (size_t ao = go; ao < 5; ao++)
failed_tests += test_differentiation_convergence(go, ao);
return failed_tests;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment