Skip to content
Snippets Groups Projects
Commit d9441a2c authored by Matteo Cicuttin's avatar Matteo Cicuttin
Browse files

Better field in texture actually.

parent 43c92df9
Branches
Tags
No related merge requests found
......@@ -129,4 +129,4 @@ void reshape_dofs(const entity_data_cpu&, const entity_data_gpu&, const vecxd&,
void
gpu_compute_field_derivatives(entity_data_gpu& edg,
const double* F, double *dF_dx, double* dF_dy, double* dF_dz);
gpuTextureObject_t F, double *dF_dx, double* dF_dy, double* dF_dz);
......@@ -3,7 +3,7 @@
template<size_t K>
__global__ void
gpu_deriv_planar(const double *F, const double * __restrict__ J,
gpu_deriv_planar(gpuTextureObject_t F, const double * __restrict__ J,
gpuTextureObject_t DM_tex, double * __restrict__ dF_dx,
double * __restrict__ dF_dy, double * __restrict__ dF_dz,
int32_t num_all_elems, int32_t* orients, int32_t dof_base)
......@@ -30,19 +30,19 @@ gpu_deriv_planar(const double *F, const double * __restrict__ J,
{
int32_t d_ofs = DM_orient + DM_row + 3*KS::num_bf*dof;
int32_t f_ofs = elem_dof_base + dof;
double v = fetch_tex(DM_tex, d_ofs) * F[f_ofs];
double v = fetch_tex(DM_tex, d_ofs) * fetch_tex(F, f_ofs);
accm_dF_dx += J[jac_ofs+0] * v;
accm_dF_dy += J[jac_ofs+3] * v;
accm_dF_dz += J[jac_ofs+6] * v;
d_ofs = DM_orient + DM_row + 3*KS::num_bf*dof + KS::num_bf;
v = fetch_tex(DM_tex, d_ofs) * F[f_ofs];
v = fetch_tex(DM_tex, d_ofs) * fetch_tex(F, f_ofs);
accm_dF_dx += J[jac_ofs+1] * v;
accm_dF_dy += J[jac_ofs+4] * v;
accm_dF_dz += J[jac_ofs+7] * v;
d_ofs = DM_orient + DM_row + 3*KS::num_bf*dof + 2*KS::num_bf;
v = fetch_tex(DM_tex, d_ofs) * F[f_ofs];
v = fetch_tex(DM_tex, d_ofs) * fetch_tex(F, f_ofs);
accm_dF_dx += J[jac_ofs+2] * v;
accm_dF_dy += J[jac_ofs+5] * v;
accm_dF_dz += J[jac_ofs+8] * v;
......@@ -56,7 +56,7 @@ gpu_deriv_planar(const double *F, const double * __restrict__ J,
template<size_t K>
void
launch_deriv_kernel(entity_data_gpu& edg,
const double* f, double *df_dx, double* df_dy, double* df_dz)
gpuTextureObject_t f, double *df_dx, double* df_dy, double* df_dz)
{
const auto THREADS_PER_BLOCK = kernel_gpu_sizes<K>::deriv_threads;
auto num_blocks = edg.num_bf*edg.num_all_elems/THREADS_PER_BLOCK;
......@@ -78,7 +78,7 @@ launch_deriv_kernel(entity_data_gpu& edg,
void
gpu_compute_field_derivatives(entity_data_gpu& edg,
const double* f, double *df_dx, double* df_dy, double* df_dz)
gpuTextureObject_t f, double *df_dx, double* df_dy, double* df_dz)
{
......
......@@ -60,14 +60,14 @@ int profile_differentiation(int geometric_order, int approximation_order)
entity_data_gpu edg(ed);
/* Prepare I/O vectors and call kernel */
device_vector<double> Pf_gpu(Pf_cpu.data(), Pf_cpu.size());
texture_allocator<double> Pf_gpu(Pf_cpu.data(), Pf_cpu.size());
device_vector<double> df_dx_gpu(Pf_cpu.size());
device_vector<double> df_dy_gpu(Pf_cpu.size());
device_vector<double> df_dz_gpu(Pf_cpu.size());
timecounter_gpu tc;
tc.tic();
gpu_compute_field_derivatives(edg, Pf_gpu.data(), df_dx_gpu.data(),
gpu_compute_field_derivatives(edg, Pf_gpu.get_texture(), df_dx_gpu.data(),
df_dy_gpu.data(), df_dz_gpu.data());
double time = tc.toc();
......
......@@ -110,7 +110,7 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde
}
/* Prepare I/O vectors and call kernel */
device_vector<double> Pf_gpu(Pf.data(), Pf.size());
texture_allocator<double> Pf_gpu(Pf.data(), Pf.size());
device_vector<double> df_dx_gpu(model_num_dofs);
device_vector<double> df_dy_gpu(model_num_dofs);
device_vector<double> df_dz_gpu(model_num_dofs);
......@@ -119,8 +119,8 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde
{
timecounter_gpu tc;
tc.tic();
gpu_compute_field_derivatives(edg, Pf_gpu.data(), df_dx_gpu.data(),
df_dy_gpu.data(), df_dz_gpu.data());
gpu_compute_field_derivatives(edg, Pf_gpu.get_texture(),
df_dx_gpu.data(), df_dy_gpu.data(), df_dz_gpu.data());
double time = tc.toc();
auto num_cells = edg.num_all_elems;
......@@ -225,8 +225,8 @@ int main(void)
int failed_tests = 0;
std::cout << Bmagentafg << " *** TESTING: DIFFERENTIATION ***" << reset << std::endl;
for (size_t go = 1; go < 5; go++)
for (size_t ao = go; ao < 5; ao++)
for (size_t go = 1; go < 2; go++)
for (size_t ao = go; ao < 7; ao++)
failed_tests += test_differentiation_convergence(go, ao);
return failed_tests;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment