Skip to content
Snippets Groups Projects
Commit 9ea23067 authored by Matteo Cicuttin's avatar Matteo Cicuttin
Browse files

Restrict pointers, field again in texture.

parent 19888b70
No related branches found
No related tags found
No related merge requests found
......@@ -129,4 +129,4 @@ void reshape_dofs(const entity_data_cpu&, const entity_data_gpu&, const vecxd&,
void
gpu_compute_field_derivatives(entity_data_gpu& edg,
const double* F, double *dF_dx, double* dF_dy, double* dF_dz);
gpuTextureObject_t F, double *dF_dx, double* dF_dy, double* dF_dz);
......@@ -3,9 +3,10 @@
template<size_t K>
__global__ void
gpu_deriv_planar(const double *F, const double *J, gpuTextureObject_t DM_tex,
double *dF_dx, double* dF_dy, double* dF_dz, int32_t num_all_elems,
int32_t* orients)
gpu_deriv_planar(gpuTextureObject_t F, const double * __restrict__ J,
gpuTextureObject_t DM_tex, double * __restrict__ dF_dx,
double * __restrict__ dF_dy, double * __restrict__ dF_dz,
int32_t num_all_elems, int32_t* orients)
{
using KS = kernel_gpu_sizes<K>;
......@@ -29,19 +30,19 @@ gpu_deriv_planar(const double *F, const double *J, gpuTextureObject_t DM_tex,
{
int32_t d_ofs = DM_orient + DM_row + 3*KS::num_bf*dof;
int32_t f_ofs = elem_dof_base + dof;
double v = fetch_tex(DM_tex, d_ofs) * F[f_ofs];
double v = fetch_tex(DM_tex, d_ofs) * fetch_tex(F, f_ofs);
accm_dF_dx += J[jac_ofs+0] * v;
accm_dF_dy += J[jac_ofs+3] * v;
accm_dF_dz += J[jac_ofs+6] * v;
d_ofs = DM_orient + DM_row + 3*KS::num_bf*dof + KS::num_bf;
v = fetch_tex(DM_tex, d_ofs) * F[f_ofs];
v = fetch_tex(DM_tex, d_ofs) * fetch_tex(F, f_ofs);
accm_dF_dx += J[jac_ofs+1] * v;
accm_dF_dy += J[jac_ofs+4] * v;
accm_dF_dz += J[jac_ofs+7] * v;
d_ofs = DM_orient + DM_row + 3*KS::num_bf*dof + 2*KS::num_bf;
v = fetch_tex(DM_tex, d_ofs) * F[f_ofs];
v = fetch_tex(DM_tex, d_ofs) * fetch_tex(F, f_ofs);
accm_dF_dx += J[jac_ofs+2] * v;
accm_dF_dy += J[jac_ofs+5] * v;
accm_dF_dz += J[jac_ofs+8] * v;
......@@ -55,7 +56,7 @@ gpu_deriv_planar(const double *F, const double *J, gpuTextureObject_t DM_tex,
template<size_t K>
void
launch_deriv_kernel(entity_data_gpu& edg,
const double* f, double *df_dx, double* df_dy, double* df_dz)
gpuTextureObject_t f, double *df_dx, double* df_dy, double* df_dz)
{
const auto THREADS_PER_BLOCK = kernel_gpu_sizes<K>::deriv_threads;
auto num_blocks = edg.num_bf*edg.num_all_elems/THREADS_PER_BLOCK;
......@@ -77,7 +78,7 @@ launch_deriv_kernel(entity_data_gpu& edg,
void
gpu_compute_field_derivatives(entity_data_gpu& edg,
const double* f, double *df_dx, double* df_dy, double* df_dz)
gpuTextureObject_t f, double *df_dx, double* df_dy, double* df_dz)
{
......@@ -108,6 +109,6 @@ gpu_compute_field_derivatives(entity_data_gpu& edg,
break;
default:
std::cout << "compute_field_derivatives: invalid order" << std::endl;
throw std::invalid_argument("compute_field_derivatives: invalid order");
}
}
\ No newline at end of file
......@@ -69,14 +69,14 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde
entity_data_gpu edg(ed);
/* Prepare I/O vectors and call kernel */
device_vector<double> Pf_gpu(Pf_cpu.data(), Pf_cpu.size());
texture_allocator<double> Pf_gpu(Pf_cpu.data(), Pf_cpu.size());
device_vector<double> df_dx_gpu(Pf_cpu.size());
device_vector<double> df_dy_gpu(Pf_cpu.size());
device_vector<double> df_dz_gpu(Pf_cpu.size());
timecounter_gpu tc;
tc.tic();
gpu_compute_field_derivatives(edg, Pf_gpu.data(), df_dx_gpu.data(),
gpu_compute_field_derivatives(edg, Pf_gpu.get_texture(), df_dx_gpu.data(),
df_dy_gpu.data(), df_dz_gpu.data());
double time = tc.toc();
......@@ -162,7 +162,7 @@ int main(void)
std::cout << Bmagentafg << " *** TESTING: DIFFERENTIATION ***" << reset << std::endl;
for (size_t go = 1; go < 2; go++)
for (size_t ao = go; ao < 6; ao++)
for (size_t ao = go; ao < 7; ao++)
failed_tests += test_differentiation_convergence(go, ao);
return failed_tests;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment