Select Git revision
Stommel.lua
Forked from
gmsh / gmsh
Source project has a limited visibility.
fd_openacc.cpp 4.48 KiB
#include <iostream>
#include <fstream>
#include <cstdio>
#include <unistd.h>
#include "fd_wave_cpu.hpp"
/* pgc++ -O3 -I /home/math0471p/matteo/mysoft/silo/include/ -L /home/math0471p/matteo/mysoft/silo/lib/ -DHAVE_SILO -DSAVE_TIMESTEPS -acc -ta=nvidia:managed,time -Minfo=accel fd_openacc.cpp -lsilo */
template<typename T>
double solve_openacc(wave_equation_context<T>& wec)
{
/* Simulation parameters */
int maxcol = wec.g_curr.domain_cols();
int maxrow = wec.g_curr.domain_rows();
T dt = wec.dt;
T c = wec.velocity;
T a = wec.damping;
assert(maxcol > 1);
assert(maxrow > 1);
double time = 0.0;
#ifdef SAVE_ITERTIME
std::ofstream ofs;
ofs.open("itertime-naive.txt");
#endif /* SAVE_ITERTIME */
/**** Initialize constants ****/
static const T w0 = -205.0/72.0;
static const T w1 = 8.0/5.0;
static const T w2 = -1.0/5.0;
static const T w3 = 8.0/315.0;
static const T w4 = -1.0/560.0;
static const T w[9] = { w4, w3, w2, w1, w0, w1, w2, w3, w4 };
T * __restrict__ u_prev = wec.g_prev.data();
T * __restrict__ u_curr = wec.g_curr.data();
T * __restrict__ u_next = wec.g_next.data();
size_t nelem = wec.g_curr.size();
#define U_OFFSET(i,j) ( (2*WAVE_8_HALO_SIZE+maxcol)*(i+WAVE_8_HALO_SIZE) + (j+WAVE_8_HALO_SIZE) )
//#pragma acc data copy(u_prev[0:nelem])
//#pragma acc data copy(u_curr[0:nelem])
//#pragma acc data copy(u_next[0:nelem])
for (size_t iter = 0; iter < wec.maxiter; iter++)
{
auto start = std::chrono::high_resolution_clock::now();
T kx2 = c*c * dt*dt * (maxcol-1)*(maxcol-1);
T ky2 = c*c * dt*dt * (maxrow-1)*(maxrow-1);
T one_minus_adt = (1.0 - a*dt);
T two_minus_adt = (2.0 - a*dt);
#pragma omp parallel for shared(maxrow, maxcol, u_prev, u_curr, u_next)
#pragma acc kernels
#pragma acc loop independent
for (size_t i = 0; i < maxrow; i++)
{
#pragma acc loop independent
for (size_t j = 0; j < maxcol; j++)
{
T lapl = 0.0;
for (int k = -WAVE_8_HALO_SIZE; k <= WAVE_8_HALO_SIZE; k++)
lapl += kx2 * w[k+WAVE_8_HALO_SIZE] * u_curr[ U_OFFSET(i,j+k) ];
for (int k = -WAVE_8_HALO_SIZE; k <= WAVE_8_HALO_SIZE; k++)
lapl += ky2 * w[k+WAVE_8_HALO_SIZE] * u_curr[ U_OFFSET(i+k,j) ];
T val = lapl -
one_minus_adt * u_prev[ U_OFFSET(i,j) ] +
two_minus_adt * u_curr[ U_OFFSET(i,j) ];
if ( (i == 0) or (j == 0) or (i == maxrow-1) or (j == maxcol-1) )
val = 0;
u_next[ U_OFFSET(i,j) ] = val;
}
}
auto stop = std::chrono::high_resolution_clock::now();
std::chrono::duration<double, std::milli> ms = stop - start;
time += ms.count();
std::swap(u_prev, u_curr);
std::swap(u_curr, u_next);
#ifdef HAVE_SILO
#ifdef SAVE_TIMESTEPS
if ( (iter%100) == 0 )
{
//#pragma acc update self(u_curr[0:nelem])
std::stringstream ss;
ss << "wave_openacc_" << iter << ".silo";
visit_dump(wec.g_curr, ss.str());
}
#endif /* SAVE_TIMESTEPS */
#endif /* HAVE_SILO */
#ifdef SAVE_ITERTIME
ofs << i << " " << ms.count() << std::endl;
#endif /* SAVE_ITERTIME */
}
std::cout << "[Wave][OpenACC] Iteration Time: " << time/wec.maxiter << "ms" << std::endl;
std::cout << "[Wave][OpenACC] Wall Time: " << time << "ms" << std::endl;
double itertime = time/wec.maxiter;
double gflops_s = 58*(maxrow*maxcol)/(1e6*itertime);
std::cout << "[Wave][OpenACC] GFlops/s: " << gflops_s << std::endl;
size_t kernel_bytes = 3*sizeof(T)*(maxrow*maxcol);
double gbytes_s = kernel_bytes/(1e6*itertime);
std::cout << "[Wave][OpenACC] Bandwidth: " << gbytes_s << "GB/s" << std::endl;
#ifdef HAVE_SILO
visit_dump(wec.g_curr, "wave_openacc_lastiter.silo");
#endif /* HAVE_SILO */
return time/wec.maxiter;
}
int main(void)
{
#ifdef SINGLE_PRECISION
using T = float;
std::cout << "Precision: single" << std::endl;
#else
using T = double;
std::cout << "Precision: double" << std::endl;
#endif
double time;
for (size_t sz = 256; sz <= 256; sz *= 2)
{
std::cout << sz << std::endl;
wave_equation_context<T> wec(sz, sz, 1, 0.1, 0.0001, 5000);
wec.init();
time = solve_openacc(wec);
}
return 0;
}