From 935ec21679caee358cecefd925471babaeb840f6 Mon Sep 17 00:00:00 2001
From: Matteo Cicuttin <datafl4sh@toxicnet.eu>
Date: Mon, 30 Mar 2020 14:38:48 +0200
Subject: [PATCH] OpenACC testing stuff.

---
 kokkos-testing/fd_catalog/fd_openacc.cpp | 63 ++++++++++--------------
 1 file changed, 25 insertions(+), 38 deletions(-)

diff --git a/kokkos-testing/fd_catalog/fd_openacc.cpp b/kokkos-testing/fd_catalog/fd_openacc.cpp
index ad4ffea..0d80c0c 100644
--- a/kokkos-testing/fd_catalog/fd_openacc.cpp
+++ b/kokkos-testing/fd_catalog/fd_openacc.cpp
@@ -3,6 +3,8 @@
 #include <cstdio>
 #include <unistd.h>
 
+#include <openacc.h>
+
 #include "fd_wave_cpu.hpp"
 
 /* pgc++ -O3 -I /home/math0471p/matteo/mysoft/silo/include/ -L /home/math0471p/matteo/mysoft/silo/lib/ -DHAVE_SILO -DSAVE_TIMESTEPS -acc -ta=nvidia:managed,time -Minfo=accel fd_openacc.cpp -lsilo */
@@ -35,27 +37,23 @@ double solve_openacc(wave_equation_context<T>& wec)
     static const T w4 = -1.0/560.0;
     static const T w[9] = { w4, w3, w2, w1, w0, w1, w2, w3, w4 };
 
-    T * __restrict__ u_prev       = wec.g_prev.data();
-    T * __restrict__ u_curr       = wec.g_curr.data();
-    T * __restrict__ u_next       = wec.g_next.data();
     size_t nelem    = wec.g_curr.size();
+	T *d_prev = acc_copyin(wec.g_prev.data(), nelem*sizeof(T));
+	T *d_curr = acc_copyin(wec.g_curr.data(), nelem*sizeof(T));
+	T *d_next = acc_copyin(wec.g_next.data(), nelem*sizeof(T));
 
 #define U_OFFSET(i,j) ( (2*WAVE_8_HALO_SIZE+maxcol)*(i+WAVE_8_HALO_SIZE) + (j+WAVE_8_HALO_SIZE) )
 
-//#pragma acc data copy(u_prev[0:nelem])
-//#pragma acc data copy(u_curr[0:nelem])
-//#pragma acc data copy(u_next[0:nelem])
+    auto start = std::chrono::high_resolution_clock::now();
+
+#pragma acc kernels
     for (size_t iter = 0; iter < wec.maxiter; iter++)
     {
-        auto start = std::chrono::high_resolution_clock::now();
-
         T kx2 = c*c * dt*dt * (maxcol-1)*(maxcol-1);
         T ky2 = c*c * dt*dt * (maxrow-1)*(maxrow-1);
         T one_minus_adt = (1.0 - a*dt);
         T two_minus_adt = (2.0 - a*dt);
 
-#pragma omp parallel for shared(maxrow, maxcol, u_prev, u_curr, u_next)
-#pragma acc kernels
         #pragma acc loop independent
         for (size_t i = 0; i < maxrow; i++)
         {
@@ -64,46 +62,35 @@ double solve_openacc(wave_equation_context<T>& wec)
             {
                 T lapl = 0.0;
                 for (int k = -WAVE_8_HALO_SIZE; k <= WAVE_8_HALO_SIZE; k++)
-                    lapl += kx2 * w[k+WAVE_8_HALO_SIZE] * u_curr[ U_OFFSET(i,j+k) ];
+                    lapl += kx2 * w[k+WAVE_8_HALO_SIZE] * d_curr[ U_OFFSET(i,j+k) ];
         
                 for (int k = -WAVE_8_HALO_SIZE; k <= WAVE_8_HALO_SIZE; k++)
-                    lapl += ky2 * w[k+WAVE_8_HALO_SIZE] * u_curr[ U_OFFSET(i+k,j) ];
+                    lapl += ky2 * w[k+WAVE_8_HALO_SIZE] * d_curr[ U_OFFSET(i+k,j) ];
 
                 T val = lapl - 
-                        one_minus_adt * u_prev[ U_OFFSET(i,j) ] + 
-                        two_minus_adt * u_curr[ U_OFFSET(i,j) ];
+                        one_minus_adt * d_prev[ U_OFFSET(i,j) ] + 
+                        two_minus_adt * d_curr[ U_OFFSET(i,j) ];
 
                 if ( (i == 0) or (j == 0) or (i == maxrow-1) or (j == maxcol-1) )
                     val = 0;
                 
-                u_next[ U_OFFSET(i,j) ] = val;
+                d_next[ U_OFFSET(i,j) ] = val;
             }
         }
 
-        auto stop = std::chrono::high_resolution_clock::now();
-
-        std::chrono::duration<double, std::milli> ms = stop - start;
-        time += ms.count();
-        
-        std::swap(u_prev, u_curr);
-        std::swap(u_curr, u_next);
-
-#ifdef HAVE_SILO
-#ifdef SAVE_TIMESTEPS
-        if ( (iter%100) == 0 )
-        {
-            //#pragma acc update self(u_curr[0:nelem])
-            std::stringstream ss;
-            ss << "wave_openacc_" << iter << ".silo";
-            visit_dump(wec.g_curr, ss.str());
-        }
-#endif /* SAVE_TIMESTEPS */
-#endif /* HAVE_SILO */
-
-#ifdef SAVE_ITERTIME
-        ofs << i << " " << ms.count() << std::endl;
-#endif /* SAVE_ITERTIME */
+		T *d_temp = d_prev;
+		d_prev = d_curr;
+		d_curr = d_next;
+		d_next = d_temp;
     }
+	
+	acc_copyout(wec.g_prev.data(), nelem*sizeof(T));
+	acc_copyout(wec.g_curr.data(), nelem*sizeof(T));
+	acc_copyout(wec.g_next.data(), nelem*sizeof(T));
+    auto stop = std::chrono::high_resolution_clock::now();
+
+    std::chrono::duration<double, std::milli> ms = stop - start;
+    time += ms.count();
     
     std::cout << "[Wave][OpenACC] Iteration Time: " << time/wec.maxiter << "ms" << std::endl;
     std::cout << "[Wave][OpenACC] Wall Time: " << time << "ms" << std::endl;
-- 
GitLab