From 935ec21679caee358cecefd925471babaeb840f6 Mon Sep 17 00:00:00 2001 From: Matteo Cicuttin <datafl4sh@toxicnet.eu> Date: Mon, 30 Mar 2020 14:38:48 +0200 Subject: [PATCH] OpenACC testing stuff. --- kokkos-testing/fd_catalog/fd_openacc.cpp | 63 ++++++++++-------------- 1 file changed, 25 insertions(+), 38 deletions(-) diff --git a/kokkos-testing/fd_catalog/fd_openacc.cpp b/kokkos-testing/fd_catalog/fd_openacc.cpp index ad4ffea..0d80c0c 100644 --- a/kokkos-testing/fd_catalog/fd_openacc.cpp +++ b/kokkos-testing/fd_catalog/fd_openacc.cpp @@ -3,6 +3,8 @@ #include <cstdio> #include <unistd.h> +#include <openacc.h> + #include "fd_wave_cpu.hpp" /* pgc++ -O3 -I /home/math0471p/matteo/mysoft/silo/include/ -L /home/math0471p/matteo/mysoft/silo/lib/ -DHAVE_SILO -DSAVE_TIMESTEPS -acc -ta=nvidia:managed,time -Minfo=accel fd_openacc.cpp -lsilo */ @@ -35,27 +37,23 @@ double solve_openacc(wave_equation_context<T>& wec) static const T w4 = -1.0/560.0; static const T w[9] = { w4, w3, w2, w1, w0, w1, w2, w3, w4 }; - T * __restrict__ u_prev = wec.g_prev.data(); - T * __restrict__ u_curr = wec.g_curr.data(); - T * __restrict__ u_next = wec.g_next.data(); size_t nelem = wec.g_curr.size(); + T *d_prev = acc_copyin(wec.g_prev.data(), nelem*sizeof(T)); + T *d_curr = acc_copyin(wec.g_curr.data(), nelem*sizeof(T)); + T *d_next = acc_copyin(wec.g_next.data(), nelem*sizeof(T)); #define U_OFFSET(i,j) ( (2*WAVE_8_HALO_SIZE+maxcol)*(i+WAVE_8_HALO_SIZE) + (j+WAVE_8_HALO_SIZE) ) -//#pragma acc data copy(u_prev[0:nelem]) -//#pragma acc data copy(u_curr[0:nelem]) -//#pragma acc data copy(u_next[0:nelem]) + auto start = std::chrono::high_resolution_clock::now(); + +#pragma acc kernels for (size_t iter = 0; iter < wec.maxiter; iter++) { - auto start = std::chrono::high_resolution_clock::now(); - T kx2 = c*c * dt*dt * (maxcol-1)*(maxcol-1); T ky2 = c*c * dt*dt * (maxrow-1)*(maxrow-1); T one_minus_adt = (1.0 - a*dt); T two_minus_adt = (2.0 - a*dt); -#pragma omp parallel for shared(maxrow, maxcol, u_prev, u_curr, u_next) -#pragma acc kernels #pragma acc loop independent for (size_t i = 0; i < maxrow; i++) { @@ -64,46 +62,35 @@ double solve_openacc(wave_equation_context<T>& wec) { T lapl = 0.0; for (int k = -WAVE_8_HALO_SIZE; k <= WAVE_8_HALO_SIZE; k++) - lapl += kx2 * w[k+WAVE_8_HALO_SIZE] * u_curr[ U_OFFSET(i,j+k) ]; + lapl += kx2 * w[k+WAVE_8_HALO_SIZE] * d_curr[ U_OFFSET(i,j+k) ]; for (int k = -WAVE_8_HALO_SIZE; k <= WAVE_8_HALO_SIZE; k++) - lapl += ky2 * w[k+WAVE_8_HALO_SIZE] * u_curr[ U_OFFSET(i+k,j) ]; + lapl += ky2 * w[k+WAVE_8_HALO_SIZE] * d_curr[ U_OFFSET(i+k,j) ]; T val = lapl - - one_minus_adt * u_prev[ U_OFFSET(i,j) ] + - two_minus_adt * u_curr[ U_OFFSET(i,j) ]; + one_minus_adt * d_prev[ U_OFFSET(i,j) ] + + two_minus_adt * d_curr[ U_OFFSET(i,j) ]; if ( (i == 0) or (j == 0) or (i == maxrow-1) or (j == maxcol-1) ) val = 0; - u_next[ U_OFFSET(i,j) ] = val; + d_next[ U_OFFSET(i,j) ] = val; } } - auto stop = std::chrono::high_resolution_clock::now(); - - std::chrono::duration<double, std::milli> ms = stop - start; - time += ms.count(); - - std::swap(u_prev, u_curr); - std::swap(u_curr, u_next); - -#ifdef HAVE_SILO -#ifdef SAVE_TIMESTEPS - if ( (iter%100) == 0 ) - { - //#pragma acc update self(u_curr[0:nelem]) - std::stringstream ss; - ss << "wave_openacc_" << iter << ".silo"; - visit_dump(wec.g_curr, ss.str()); - } -#endif /* SAVE_TIMESTEPS */ -#endif /* HAVE_SILO */ - -#ifdef SAVE_ITERTIME - ofs << i << " " << ms.count() << std::endl; -#endif /* SAVE_ITERTIME */ + T *d_temp = d_prev; + d_prev = d_curr; + d_curr = d_next; + d_next = d_temp; } + + acc_copyout(wec.g_prev.data(), nelem*sizeof(T)); + acc_copyout(wec.g_curr.data(), nelem*sizeof(T)); + acc_copyout(wec.g_next.data(), nelem*sizeof(T)); + auto stop = std::chrono::high_resolution_clock::now(); + + std::chrono::duration<double, std::milli> ms = stop - start; + time += ms.count(); std::cout << "[Wave][OpenACC] Iteration Time: " << time/wec.maxiter << "ms" << std::endl; std::cout << "[Wave][OpenACC] Wall Time: " << time << "ms" << std::endl; -- GitLab