Skip to content
Snippets Groups Projects
Commit 935ec216 authored by Matteo Cicuttin's avatar Matteo Cicuttin
Browse files

OpenACC testing stuff.

parent 0de25e9a
No related branches found
No related tags found
No related merge requests found
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
#include <cstdio> #include <cstdio>
#include <unistd.h> #include <unistd.h>
#include <openacc.h>
#include "fd_wave_cpu.hpp" #include "fd_wave_cpu.hpp"
/* pgc++ -O3 -I /home/math0471p/matteo/mysoft/silo/include/ -L /home/math0471p/matteo/mysoft/silo/lib/ -DHAVE_SILO -DSAVE_TIMESTEPS -acc -ta=nvidia:managed,time -Minfo=accel fd_openacc.cpp -lsilo */ /* pgc++ -O3 -I /home/math0471p/matteo/mysoft/silo/include/ -L /home/math0471p/matteo/mysoft/silo/lib/ -DHAVE_SILO -DSAVE_TIMESTEPS -acc -ta=nvidia:managed,time -Minfo=accel fd_openacc.cpp -lsilo */
...@@ -35,27 +37,23 @@ double solve_openacc(wave_equation_context<T>& wec) ...@@ -35,27 +37,23 @@ double solve_openacc(wave_equation_context<T>& wec)
static const T w4 = -1.0/560.0; static const T w4 = -1.0/560.0;
static const T w[9] = { w4, w3, w2, w1, w0, w1, w2, w3, w4 }; static const T w[9] = { w4, w3, w2, w1, w0, w1, w2, w3, w4 };
T * __restrict__ u_prev = wec.g_prev.data();
T * __restrict__ u_curr = wec.g_curr.data();
T * __restrict__ u_next = wec.g_next.data();
size_t nelem = wec.g_curr.size(); size_t nelem = wec.g_curr.size();
T *d_prev = acc_copyin(wec.g_prev.data(), nelem*sizeof(T));
T *d_curr = acc_copyin(wec.g_curr.data(), nelem*sizeof(T));
T *d_next = acc_copyin(wec.g_next.data(), nelem*sizeof(T));
#define U_OFFSET(i,j) ( (2*WAVE_8_HALO_SIZE+maxcol)*(i+WAVE_8_HALO_SIZE) + (j+WAVE_8_HALO_SIZE) ) #define U_OFFSET(i,j) ( (2*WAVE_8_HALO_SIZE+maxcol)*(i+WAVE_8_HALO_SIZE) + (j+WAVE_8_HALO_SIZE) )
//#pragma acc data copy(u_prev[0:nelem])
//#pragma acc data copy(u_curr[0:nelem])
//#pragma acc data copy(u_next[0:nelem])
for (size_t iter = 0; iter < wec.maxiter; iter++)
{
auto start = std::chrono::high_resolution_clock::now(); auto start = std::chrono::high_resolution_clock::now();
#pragma acc kernels
for (size_t iter = 0; iter < wec.maxiter; iter++)
{
T kx2 = c*c * dt*dt * (maxcol-1)*(maxcol-1); T kx2 = c*c * dt*dt * (maxcol-1)*(maxcol-1);
T ky2 = c*c * dt*dt * (maxrow-1)*(maxrow-1); T ky2 = c*c * dt*dt * (maxrow-1)*(maxrow-1);
T one_minus_adt = (1.0 - a*dt); T one_minus_adt = (1.0 - a*dt);
T two_minus_adt = (2.0 - a*dt); T two_minus_adt = (2.0 - a*dt);
#pragma omp parallel for shared(maxrow, maxcol, u_prev, u_curr, u_next)
#pragma acc kernels
#pragma acc loop independent #pragma acc loop independent
for (size_t i = 0; i < maxrow; i++) for (size_t i = 0; i < maxrow; i++)
{ {
...@@ -64,47 +62,36 @@ double solve_openacc(wave_equation_context<T>& wec) ...@@ -64,47 +62,36 @@ double solve_openacc(wave_equation_context<T>& wec)
{ {
T lapl = 0.0; T lapl = 0.0;
for (int k = -WAVE_8_HALO_SIZE; k <= WAVE_8_HALO_SIZE; k++) for (int k = -WAVE_8_HALO_SIZE; k <= WAVE_8_HALO_SIZE; k++)
lapl += kx2 * w[k+WAVE_8_HALO_SIZE] * u_curr[ U_OFFSET(i,j+k) ]; lapl += kx2 * w[k+WAVE_8_HALO_SIZE] * d_curr[ U_OFFSET(i,j+k) ];
for (int k = -WAVE_8_HALO_SIZE; k <= WAVE_8_HALO_SIZE; k++) for (int k = -WAVE_8_HALO_SIZE; k <= WAVE_8_HALO_SIZE; k++)
lapl += ky2 * w[k+WAVE_8_HALO_SIZE] * u_curr[ U_OFFSET(i+k,j) ]; lapl += ky2 * w[k+WAVE_8_HALO_SIZE] * d_curr[ U_OFFSET(i+k,j) ];
T val = lapl - T val = lapl -
one_minus_adt * u_prev[ U_OFFSET(i,j) ] + one_minus_adt * d_prev[ U_OFFSET(i,j) ] +
two_minus_adt * u_curr[ U_OFFSET(i,j) ]; two_minus_adt * d_curr[ U_OFFSET(i,j) ];
if ( (i == 0) or (j == 0) or (i == maxrow-1) or (j == maxcol-1) ) if ( (i == 0) or (j == 0) or (i == maxrow-1) or (j == maxcol-1) )
val = 0; val = 0;
u_next[ U_OFFSET(i,j) ] = val; d_next[ U_OFFSET(i,j) ] = val;
} }
} }
T *d_temp = d_prev;
d_prev = d_curr;
d_curr = d_next;
d_next = d_temp;
}
acc_copyout(wec.g_prev.data(), nelem*sizeof(T));
acc_copyout(wec.g_curr.data(), nelem*sizeof(T));
acc_copyout(wec.g_next.data(), nelem*sizeof(T));
auto stop = std::chrono::high_resolution_clock::now(); auto stop = std::chrono::high_resolution_clock::now();
std::chrono::duration<double, std::milli> ms = stop - start; std::chrono::duration<double, std::milli> ms = stop - start;
time += ms.count(); time += ms.count();
std::swap(u_prev, u_curr);
std::swap(u_curr, u_next);
#ifdef HAVE_SILO
#ifdef SAVE_TIMESTEPS
if ( (iter%100) == 0 )
{
//#pragma acc update self(u_curr[0:nelem])
std::stringstream ss;
ss << "wave_openacc_" << iter << ".silo";
visit_dump(wec.g_curr, ss.str());
}
#endif /* SAVE_TIMESTEPS */
#endif /* HAVE_SILO */
#ifdef SAVE_ITERTIME
ofs << i << " " << ms.count() << std::endl;
#endif /* SAVE_ITERTIME */
}
std::cout << "[Wave][OpenACC] Iteration Time: " << time/wec.maxiter << "ms" << std::endl; std::cout << "[Wave][OpenACC] Iteration Time: " << time/wec.maxiter << "ms" << std::endl;
std::cout << "[Wave][OpenACC] Wall Time: " << time << "ms" << std::endl; std::cout << "[Wave][OpenACC] Wall Time: " << time << "ms" << std::endl;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment