Skip to content
Snippets Groups Projects
Commit f2b143db authored by Matteo Cicuttin's avatar Matteo Cicuttin
Browse files

OpenACC working but slow.

parent 331bb9b0
No related branches found
No related tags found
No related merge requests found
......@@ -21,7 +21,19 @@ endif()
find_package(OpenACC)
if (OpenACC_CXX_FOUND)
add_definitions(-DHAVE_OPENACC)
if (ENABLE_SINGLE)
add_executable(fd_openacc_single fd_openacc.cpp)
target_compile_definitions(fd_openacc_single PUBLIC -DSINGLE_PRECISION)
target_compile_options(fd_openacc_single PUBLIC ${OpenACC_CXX_FLAGS})
target_link_libraries(fd_openacc_single ${LINK_LIBS} -lgomp)
endif()
if (ENABLE_DOUBLE)
add_executable(fd_openacc_double fd_openacc.cpp)
target_compile_options(fd_openacc_double PUBLIC ${OpenACC_CXX_FLAGS})
target_link_libraries(fd_openacc_double ${LINK_LIBS} -lgomp)
endif()
endif()
option(ENABLE_TIMESTEP_OUTPUT "Save timesteps (don't use during perf meas)" OFF)
......
......@@ -20,7 +20,7 @@ visit_dump(const fd_grid<T>& g, const std::string& fn)
static_assert(std::is_same<T,double>::value or std::is_same<T,float>::value, "Only double or float");
DBfile *db = nullptr;
db = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL, "visit output", DB_HDF5);
db = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL, "visit output", DB_PDB);
if (!db)
{
std::cout << "Cannot write simulation output" << std::endl;
......
......@@ -5,9 +5,123 @@
#include "fd_wave_cpu.hpp"
/* pgc++ -O3 -I /home/math0471p/matteo/mysoft/silo/include/ -L /home/math0471p/matteo/mysoft/silo/lib/ -DHAVE_SILO -DSAVE_TIMESTEPS -acc -ta=nvidia:managed,time -Minfo=accel fd_openacc.cpp -lsilo */
template<typename T>
double solve_openacc(wave_equation_context<T>& wec, size_t nths)
double solve_openacc(wave_equation_context<T>& wec)
{
/* Simulation parameters */
int maxcol = wec.g_curr.domain_cols();
int maxrow = wec.g_curr.domain_rows();
T dt = wec.dt;
T c = wec.velocity;
T a = wec.damping;
assert(maxcol > 1);
assert(maxrow > 1);
double time = 0.0;
#ifdef SAVE_ITERTIME
std::ofstream ofs;
ofs.open("itertime-naive.txt");
#endif /* SAVE_ITERTIME */
/**** Initialize constants ****/
static const T w0 = -205.0/72.0;
static const T w1 = 8.0/5.0;
static const T w2 = -1.0/5.0;
static const T w3 = 8.0/315.0;
static const T w4 = -1.0/560.0;
static const T w[9] = { w4, w3, w2, w1, w0, w1, w2, w3, w4 };
T * __restrict__ u_prev = wec.g_prev.data();
T * __restrict__ u_curr = wec.g_curr.data();
T * __restrict__ u_next = wec.g_next.data();
size_t nelem = wec.g_curr.size();
#define U_OFFSET(i,j) ( (2*WAVE_8_HALO_SIZE+maxcol)*(i+WAVE_8_HALO_SIZE) + (j+WAVE_8_HALO_SIZE) )
//#pragma acc data copy(u_prev[0:nelem])
//#pragma acc data copy(u_curr[0:nelem])
//#pragma acc data copy(u_next[0:nelem])
for (size_t iter = 0; iter < wec.maxiter; iter++)
{
auto start = std::chrono::high_resolution_clock::now();
T kx2 = c*c * dt*dt * (maxcol-1)*(maxcol-1);
T ky2 = c*c * dt*dt * (maxrow-1)*(maxrow-1);
T one_minus_adt = (1.0 - a*dt);
T two_minus_adt = (2.0 - a*dt);
#pragma omp parallel for shared(maxrow, maxcol, u_prev, u_curr, u_next)
#pragma acc kernels
#pragma acc loop independent
for (size_t i = 0; i < maxrow; i++)
{
#pragma acc loop independent
for (size_t j = 0; j < maxcol; j++)
{
T lapl = 0.0;
for (int k = -WAVE_8_HALO_SIZE; k <= WAVE_8_HALO_SIZE; k++)
lapl += kx2 * w[k+WAVE_8_HALO_SIZE] * u_curr[ U_OFFSET(i,j+k) ];
for (int k = -WAVE_8_HALO_SIZE; k <= WAVE_8_HALO_SIZE; k++)
lapl += ky2 * w[k+WAVE_8_HALO_SIZE] * u_curr[ U_OFFSET(i+k,j) ];
T val = lapl -
one_minus_adt * u_prev[ U_OFFSET(i,j) ] +
two_minus_adt * u_curr[ U_OFFSET(i,j) ];
if ( (i == 0) or (j == 0) or (i == maxrow-1) or (j == maxcol-1) )
val = 0;
u_next[ U_OFFSET(i,j) ] = val;
}
}
auto stop = std::chrono::high_resolution_clock::now();
std::chrono::duration<double, std::milli> ms = stop - start;
time += ms.count();
std::swap(u_prev, u_curr);
std::swap(u_curr, u_next);
#ifdef HAVE_SILO
#ifdef SAVE_TIMESTEPS
if ( (iter%100) == 0 )
{
//#pragma acc update self(u_curr[0:nelem])
std::stringstream ss;
ss << "wave_openacc_" << iter << ".silo";
visit_dump(wec.g_curr, ss.str());
}
#endif /* SAVE_TIMESTEPS */
#endif /* HAVE_SILO */
#ifdef SAVE_ITERTIME
ofs << i << " " << ms.count() << std::endl;
#endif /* SAVE_ITERTIME */
}
std::cout << "[Wave][OpenACC] Iteration Time: " << time/wec.maxiter << "ms" << std::endl;
std::cout << "[Wave][OpenACC] Wall Time: " << time << "ms" << std::endl;
double itertime = time/wec.maxiter;
double gflops_s = 58*(maxrow*maxcol)/(1e6*itertime);
std::cout << "[Wave][OpenACC] GFlops/s: " << gflops_s << std::endl;
size_t kernel_bytes = 3*sizeof(T)*(maxrow*maxcol);
double gbytes_s = kernel_bytes/(1e6*itertime);
std::cout << "[Wave][OpenACC] Bandwidth: " << gbytes_s << "GB/s" << std::endl;
#ifdef HAVE_SILO
visit_dump(wec.g_curr, "wave_openacc_lastiter.silo");
#endif /* HAVE_SILO */
return time/wec.maxiter;
}
int main(void)
......@@ -20,8 +134,11 @@ int main(void)
std::cout << "Precision: double" << std::endl;
#endif
for (size_t sz = 128; sz <= 1024; sz *= 2)
double time;
for (size_t sz = 256; sz <= 256; sz *= 2)
{
std::cout << sz << std::endl;
wave_equation_context<T> wec(sz, sz, 1, 0.1, 0.0001, 5000);
wec.init();
......@@ -30,3 +147,5 @@ int main(void)
return 0;
}
......@@ -14,6 +14,8 @@
#include <sstream>
#include <string.h>
#include <pmmintrin.h>
#include <xmmintrin.h>
#include "fd_grid.hpp"
#include "fd_dataio.hpp"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment