From 97af8e34f0ca1f723bc337b6b0f36708124c4742 Mon Sep 17 00:00:00 2001
From: Matteo Cicuttin <datafl4sh@toxicnet.eu>
Date: Mon, 10 May 2021 19:56:54 +0000
Subject: [PATCH] Hipified common kernels. HIP can't compile Eigen.

---
 CMakeLists.txt              | 50 +++++++++++++-------
 include/connectivity.h      |  4 +-
 include/eigen.h             |  7 +++
 include/entity_data.h       |  4 +-
 include/gmsh_io.h           | 16 +++++--
 include/gpu_support.hpp     | 91 +++++++++++++++++++++++++------------
 include/kernels_gpu.h       | 15 +++---
 include/maxwell_interface.h |  4 +-
 include/physical_element.h  |  1 +
 include/reference_element.h |  1 +
 include/types.h             |  8 +---
 src/entity.cpp              |  3 +-
 src/maxwell_common.cpp      |  4 +-
 src/maxwell_gpu.cpp         | 13 +++---
 src/maxwell_interface.cpp   | 41 +++++++++--------
 15 files changed, 166 insertions(+), 96 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d230b70..9d8d058 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,7 +3,7 @@ project(gmsh_gpu_dg)
 include(CheckLanguage)
 
 
-set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CUDA_ARCHITECTURES 35)
 
@@ -24,7 +24,6 @@ set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
 
 ######################################################################
 ## Find required libraries
-
 find_package(Eigen3 REQUIRED)
 set(LINK_LIBS ${LINK_LIBS} Eigen3::Eigen)
 
@@ -95,13 +94,15 @@ if (OPT_ENABLE_GPU_SOLVER)
             set(LINK_LIBS ${LINK_LIBS} CUDA::nvml)
             add_definitions(-DHAVE_CUDA_NVML)
         endif()
-	link_directories("/usr/local/cuda/lib64")
+    link_directories("/usr/local/cuda/lib64")
     endif()
 
     ######################################################################
     ## Find HIP stuff
     find_package(HIP QUIET MODULE)
     if (HIP_FOUND)
+        execute_process(COMMAND sh -c "/opt/rocm/hip/bin/hipify-perl ${CMAKE_SOURCE_DIR}/src/kernels_cuda.cu > ${CMAKE_SOURCE_DIR}/src/hipified_kernels_cuda.cpp")
+        execute_process(COMMAND sh -c "/opt/rocm/hip/bin/hipify-perl ${CMAKE_SOURCE_DIR}/src/maxwell_kernels_cuda.cu > ${CMAKE_SOURCE_DIR}/src/hipified_maxwell_kernels_cuda.cpp")
         add_definitions(-DHAVE_HIP)
         include_directories("/opt/rocm/include")
         set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
@@ -117,7 +118,9 @@ if (OPT_ENABLE_GPU_SOLVER)
         if (OPT_USE_HIP)
             add_definitions(-DGPU_USE_HIP)
             add_definitions(-D__HIP_PLATFORM_HCC__)
+            set(LINK_LIBS ${LINK_LIBS})
         endif()
+
     endif()
 
     option(OPT_BLOCKED_GPU_KERNELS "Use blocked kernels on GPU (DO NOT USE)" OFF)
@@ -249,6 +252,10 @@ if (OPT_GMSH_484_BEFORE_SPLIT)
     add_definitions(-DUSE_INITIAL_4_8_4_API)
 endif()
 
+option(OPT_DISABLE_TEXTURE_CACHE "On ROCm 4.1 texture stuff is broken, disable it" OFF)
+if (OPT_DISABLE_TEXTURE_CACHE)
+    add_definitions(-DDISABLE_TEXTURE_CACHE)
+endif()
 ######################################################################
 ## Configure targets
 include_directories(contrib/sgr)
@@ -267,14 +274,20 @@ set(LIBGMSHDG_SOURCES   src/gmsh_io.cpp
                     )
 
 if (OPT_ENABLE_GPU_SOLVER)
-    set(LIBGMSHDG_SOURCES ${LIBGMSHDG_SOURCES}
-                        src/kernels_gpu_mi.cpp
-                        src/kernels_cuda.cu
-                    )
+    set(LIBGMSHDG_SOURCES ${LIBGMSHDG_SOURCES} src/kernels_gpu_mi.cpp)
+
+    if (OPT_USE_HIP)
+        set_source_files_properties(src/hipified_kernels_cuda.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
+        set(LIBGMSHDG_SOURCES ${LIBGMSHDG_SOURCES} src/hipified_kernels_cuda.cpp)
+        hip_add_library(gmshdg SHARED ${LIBGMSHDG_SOURCES})
+        target_link_libraries(gmshdg ${LINK_LIBS})
+    else()
+        set(LIBGMSHDG_SOURCES ${LIBGMSHDG_SOURCES} src/kernels_cuda.cu)
+        add_library(gmshdg SHARED ${LIBGMSHDG_SOURCES})
+        target_link_libraries(gmshdg ${LINK_LIBS})
+    endif()
 endif()
 
-add_library(gmshdg SHARED ${LIBGMSHDG_SOURCES})
-target_link_libraries(gmshdg ${LINK_LIBS})
 
 ########## maxwell solver
 set(MAXWELL_SOURCES     src/maxwell_interface.cpp
@@ -284,14 +297,19 @@ set(MAXWELL_SOURCES     src/maxwell_interface.cpp
                     )
 
 if (OPT_ENABLE_GPU_SOLVER)
-    set(MAXWELL_SOURCES ${MAXWELL_SOURCES}
-                        src/maxwell_gpu.cpp
-                        src/maxwell_kernels_cuda.cu
-    )
-endif()
+    set(MAXWELL_SOURCES ${MAXWELL_SOURCES} src/maxwell_gpu.cpp)
 
-add_executable(maxwell_solver ${MAXWELL_SOURCES})
-target_link_libraries(maxwell_solver gmshdg)
+    if (OPT_USE_HIP)
+        set_source_files_properties(src/hipified_maxwell_kernels_cuda.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
+        set(MAXWELL_SOURCES ${MAXWELL_SOURCES} src/hipified_maxwell_kernels_cuda.cpp)
+        hip_add_executable(maxwell_solver ${MAXWELL_SOURCES} HIPCC_OPTIONS "-std=c++17")
+        target_link_libraries(maxwell_solver gmshdg)
+    else()
+        set(MAXWELL_SOURCES ${MAXWELL_SOURCES} src/maxwell_kernels_cuda.cu)
+        add_executable(maxwell_solver ${MAXWELL_SOURCES})
+        target_link_libraries(maxwell_solver gmshdg)
+    endif()
+endif()
 
 ########## tests
 option(OPT_BUILD_TESTS "Build tests" OFF)
diff --git a/include/connectivity.h b/include/connectivity.h
index 3730eba..275ca63 100644
--- a/include/connectivity.h
+++ b/include/connectivity.h
@@ -187,7 +187,7 @@ public:
     std::set<cell_descriptor>
     neighbours_as_cell_descriptors(const cell_descriptor& cd) const
     {
-        auto filter = [](const ConnData& connd) -> auto {
+        auto filter = [](const ConnData& connd) {
             cell_descriptor ret;
             ret.iE = connd.second.iE;
             ret.iT = connd.second.iT;
@@ -218,7 +218,7 @@ public:
     element_keys() const
     {
         using PT = std::pair<cell_descriptor, std::set<ConnData>>;
-        auto get_node = [](const PT& adjpair) -> auto {
+        auto get_node = [](const PT& adjpair) {
             return adjpair.first;
         };
 
diff --git a/include/eigen.h b/include/eigen.h
index 16d1dd5..89d5618 100644
--- a/include/eigen.h
+++ b/include/eigen.h
@@ -15,3 +15,10 @@ using MatxCM = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>
 template<typename T>
 using MatxRM = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
 
+using vec3d = Eigen::Matrix<double, 3, 1>;
+using mat3d = Eigen::Matrix<double, 3, 3, Eigen::RowMajor>;
+using vecxd = Eigen::Matrix<double, Eigen::Dynamic, 1>;
+using matxd = Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
+using vec_mat3d = eigen_compatible_stdvector<mat3d>;
+using vec_matxd = eigen_compatible_stdvector<matxd>;
+
diff --git a/include/entity_data.h b/include/entity_data.h
index 2db0c82..d778a15 100644
--- a/include/entity_data.h
+++ b/include/entity_data.h
@@ -1,7 +1,6 @@
 #pragma once
 
 #include <numeric>
-
 #include "eigen.h"
 #include "types.h"
 
@@ -100,6 +99,7 @@ struct entity_data_gpu
     ~entity_data_gpu();
 };
 
+#if 0
 template<typename T>
 void
 gpu_copyin(const std::vector<T> src, T** dst)
@@ -131,7 +131,7 @@ gpu_copyin(const Eigen::Matrix<T, Eigen::Dynamic, 1>& src, double** dst)
     checkGPU( gpuMalloc( (void**)dst, src.size()*sizeof(T) ) );
     checkGPU( gpuMemcpy( *dst, src.data(), src.size()*sizeof(T), gpuMemcpyHostToDevice) );
 }
-
+#endif
 #endif /* ENABLE_GPU_SOLVER */
 
 
diff --git a/include/gmsh_io.h b/include/gmsh_io.h
index bfed218..7585f30 100644
--- a/include/gmsh_io.h
+++ b/include/gmsh_io.h
@@ -38,9 +38,19 @@ struct boundary_descriptor
     face_type   type;
     int         gmsh_entity;
 
-#ifndef HIDE_THIS_FROM_NVCC
-    auto operator<=>(const boundary_descriptor&) const = default;
-#endif /* HIDE_THIS_FROM_NVCC */
+    //auto operator <=> (const boundary_descriptor&) const = default;
+
+    bool operator<(const boundary_descriptor& other) const
+    {
+        return (type < other.type) or
+               ( (type == other.type) and (gmsh_entity < other.gmsh_entity) );
+    }
+
+    bool operator==(const boundary_descriptor& other) const
+    {
+        return (type == other.type) and (gmsh_entity == other.gmsh_entity);
+    }
+
     boundary_descriptor()
         : type(face_type::NONE), gmsh_entity(0)
     {}
diff --git a/include/gpu_support.hpp b/include/gpu_support.hpp
index 9e3b346..475a962 100644
--- a/include/gpu_support.hpp
+++ b/include/gpu_support.hpp
@@ -7,7 +7,12 @@
 #ifdef GPU_USE_HIP
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-result"
+#pragma GCC diagnostic ignored "-Wshadow"
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Weverything"
 #include <hip/hip_runtime.h>
+#include <hip/hip_runtime_api.h>
+#pragma clang diagnostic pop
 #pragma GCC diagnostic pop
 #else /* GPU_USE_HIP */
 #include <cuda_runtime.h>
@@ -26,7 +31,11 @@
 #define gpuMemcpyDefault                hipMemcpyDefault
 #define gpuTextureDesc                  hipTextureDesc
 #define gpuResourceDesc                 hipResourceDesc
-#define gpuTextureObject_t              hipTextureObject_t
+#ifdef DISABLE_TEXTURE_CACHE
+    #define gpuTextureObject_t          double *
+#else
+    #define gpuTextureObject_t          hipTextureObject_t
+#endif
 #define gpuCreateTextureObject          hipCreateTextureObject
 #define gpuDestroyTextureObject         hipDestroyTextureObject
 #define gpuAddressModeClamp             hipAddressModeClamp
@@ -39,6 +48,13 @@
 #define gpuEventRecord                  hipEventRecord
 #define gpuEventSynchronize             hipEventSynchronize
 #define gpuEventElapsedTime             hipEventElapsedTime
+#define gpuStream_t                     hipStream_t
+#define gpuStreamCreate                 hipStreamCreate
+#define gpuStreamDestroy                hipStreamDestroy
+#define gpuStreamSynchronize            hipStreamSynchronize
+#define gpuMallocHost                   hipHostMalloc
+#define gpuFreeHost                     hipHostFree
+#define gpuMemcpyAsync                  hipMemcpyAsync
 
 #else /* GPU_USE_HIP */
 
@@ -53,7 +69,11 @@
 #define gpuMemcpyDefault                cudaMemcpyDefault 
 #define gpuTextureDesc                  cudaTextureDesc
 #define gpuResourceDesc                 cudaResourceDesc
-#define gpuTextureObject_t              cudaTextureObject_t
+#ifdef DISABLE_TEXTURE_CACHE
+    #define gpuTextureObject_t          double *
+#else
+    #define gpuTextureObject_t          cudaTextureObject_t
+#endif
 #define gpuCreateTextureObject          cudaCreateTextureObject
 #define gpuDestroyTextureObject         cudaDestroyTextureObject
 #define gpuAddressModeClamp             cudaAddressModeClamp
@@ -66,9 +86,17 @@
 #define gpuEventRecord                  cudaEventRecord
 #define gpuEventSynchronize             cudaEventSynchronize
 #define gpuEventElapsedTime             cudaEventElapsedTime
-
+#define gpuStream_t                     cudaStream_t
+#define gpuStreamCreate                 cudaStreamCreate
+#define gpuStreamDestroy                cudaStreamDestroy
+#define gpuStreamSynchronize            cudaStreamSynchronize
+#define gpuMallocHost                   cudaMallocHost
+#define gpuFreeHost                     cudaFreeHost
+#define gpuMemcpyAsync                  cudaMemcpyAsync
 #endif /* GPU_USE_HIP */
 
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-parameter"
 inline void
 checkGPU(gpuError_t result)
 {
@@ -84,6 +112,7 @@ checkGPU(gpuError_t result)
 #endif /* GPU_DEBUG */
 //    return result;
 }
+#pragma clang diagnostic pop
 
 inline gpuError_t
 gpuMalloc(void **ptr, size_t size)
@@ -126,32 +155,27 @@ gpuMemcpy(void *dst, const void *src, size_t size, gpuMemcpyKind kind)
 }
 
 
-#ifdef GPU_USE_HIP
+#if defined(__NVCC__) || defined(__HIP__)
 static __inline__ __device__ double
-fetch_tex(hipTextureObject_t tex, int32_t ofs)
+fetch_tex(gpuTextureObject_t tex, int32_t ofs)
 {
-    uint2 val = tex1Dfetch<uint2>(tex, ofs);
-    return __hiloint2double(val.y, val.x);
-}
+#ifdef DISABLE_TEXTURE_CACHE
+    return tex[ofs];
 #else
-#ifdef __NVCC__
-static __inline__ __device__ double
-fetch_tex(cudaTextureObject_t tex, int32_t ofs)
-{
     uint2 val = tex1Dfetch<uint2>(tex, ofs);
     return __hiloint2double(val.y, val.x);
-}
 #endif
+}
 #endif
 
 class stream
 {
-    cudaStream_t    m_stream;
+    gpuStream_t    m_stream;
 
 public:
     stream()
     {
-        checkGPU( cudaStreamCreate(&m_stream) );
+        checkGPU( gpuStreamCreate(&m_stream) );
     }
 
     stream(const stream&) = delete;
@@ -161,17 +185,17 @@ public:
 
     ~stream()
     {
-        checkGPU( cudaStreamDestroy(m_stream) );
+        checkGPU( gpuStreamDestroy(m_stream) );
     }
 
-    operator cudaStream_t() const
+    operator gpuStream_t() const
     {
         return m_stream;
     }
 
     void wait(void) const
     {
-        cudaStreamSynchronize(m_stream);
+        checkGPU( gpuStreamSynchronize(m_stream) );
     }
 };
 
@@ -228,7 +252,7 @@ public:
     void copyin(const T *src, size_t size, const stream& st)
     {
         assert(size == m_size);
-        (void)checkGPU( cudaMemcpyAsync(m_devptr, src, size*sizeof(T), gpuMemcpyHostToDevice, st) );
+        (void)checkGPU( gpuMemcpyAsync(m_devptr, src, size*sizeof(T), gpuMemcpyHostToDevice, st) );
     }
 
     void copyout(T *dst) const
@@ -307,10 +331,11 @@ class texture_allocator;
 template<>
 class texture_allocator<double>
 {
+#ifndef DISABLE_TEXTURE_CACHE
     gpuTextureDesc          cTexDesc;
     gpuResourceDesc         cResDesc;
     gpuTextureObject_t      cTexObj;
-
+#endif
     double                  *d_texDataPtr;
     size_t                  data_size;
 
@@ -327,9 +352,11 @@ public:
 
     texture_allocator(texture_allocator&& other)
     {
+#ifndef DISABLE_TEXTURE_CACHE
         cTexDesc            = other.cTexDesc;
         cResDesc            = other.cResDesc;
         cTexObj             = other.cTexObj;
+#endif
         d_texDataPtr        = other.d_texDataPtr;
         other.d_texDataPtr  = nullptr;
         data_size           = other.data_size;
@@ -353,6 +380,7 @@ public:
         else
             checkGPU( gpuMemset(d_texDataPtr, 0, size*sizeof(double)) );
 
+#ifndef DISABLE_TEXTURE_CACHE
         /* cudaTextureDesc */
         memset(&cTexDesc, 0, sizeof(cTexDesc));
         cTexDesc.normalizedCoords = 0;
@@ -370,12 +398,17 @@ public:
 
         /* create texture */
         checkGPU( gpuCreateTextureObject(&cTexObj, &cResDesc, &cTexDesc, nullptr) );
+#endif
         data_size = size;
     }
 
     gpuTextureObject_t get_texture(void) const
     {
+#ifdef DISABLE_TEXTURE_CACHE
+        return d_texDataPtr;
+#else
         return cTexObj;
+#endif
     }
 
     double * data(void) const
@@ -392,7 +425,9 @@ public:
     {
         if (d_texDataPtr)
         {
+#ifndef DISABLE_TEXTURE_CACHE
             checkGPU( gpuDestroyTextureObject(cTexObj) );
+#endif
             checkGPU( gpuFree(d_texDataPtr) );
         }
 
@@ -411,28 +446,28 @@ class timecounter_gpu
 public:
     timecounter_gpu()
     {
-        gpuEventCreate(&start);
-        gpuEventCreate(&stop);
+        checkGPU( gpuEventCreate(&start) );
+        checkGPU( gpuEventCreate(&stop) );
     }
 
     ~timecounter_gpu()
     {
-        gpuEventDestroy(start);
-        gpuEventDestroy(stop);
+        checkGPU( gpuEventDestroy(start) );
+        checkGPU( gpuEventDestroy(stop) );
     }
 
     void tic()
     {
-        gpuEventRecord(start, 0);
+        checkGPU( gpuEventRecord(start, 0) );
     }
 
     double toc()
     {
-        gpuEventRecord(stop, 0);
+        checkGPU( gpuEventRecord(stop, 0) );
 
         float elapsed;
-        gpuEventSynchronize(stop);
-        gpuEventElapsedTime(&elapsed, start, stop);
+        checkGPU( gpuEventSynchronize(stop) );
+        checkGPU( gpuEventElapsedTime(&elapsed, start, stop) );
         return double(elapsed)/1000.0;
     }
 };
diff --git a/include/kernels_gpu.h b/include/kernels_gpu.h
index ea04177..fbf43a5 100644
--- a/include/kernels_gpu.h
+++ b/include/kernels_gpu.h
@@ -141,35 +141,34 @@ size_t gpu_dblocks_dofs(const entity_data_cpu&);
 void reshape_dofs(const entity_data_cpu&, const entity_data_gpu&, const vecxd&, vecxd&, bool);
 
 
-
 void
 gpu_compute_field_derivatives(const entity_data_gpu& edg, const double *F,
-    double *dF_dx, double* dF_dy, double* dF_dz, double alpha, cudaStream_t stream = 0);
+    double *dF_dx, double* dF_dy, double* dF_dz, double alpha, gpuStream_t stream = 0);
 
 void gpu_compute_flux_lifting(entity_data_gpu&, const double *, double *,
-    cudaStream_t stream = 0);
+    gpuStream_t stream = 0);
 
 void gpu_subtract(double *, const double *, const double *, size_t,
-    cudaStream_t stream = 0);
+    gpuStream_t stream = 0);
 
 void
 gpu_compute_euler_update(double *out, const double *y, const double *k,
     const double *reaction, const double *material, size_t num_dofs, double dt,
-    cudaStream_t stream = 0);
+    gpuStream_t stream = 0);
 
 void
 gpu_compute_euler_update(double *out, const double *y, const double *k,
     const double *material, size_t num_dofs, double dt,
-    cudaStream_t stream = 0);
+    gpuStream_t stream = 0);
 
 void
 gpu_compute_rk4_weighted_sum(double *out, const double *in, const double *k1,
     const double *k2, const double *k3, const double *k4,
     const double *material, size_t num_dofs, double dt,
-    cudaStream_t stream = 0);
+    gpuStream_t stream = 0);
 
 void
 gpu_compute_rk4_weighted_sum(double *out, const double *in, const double *k1,
     const double *k2, const double *k3, const double *k4, const double *reaction,
     const double *material, size_t num_dofs, double dt,
-    cudaStream_t stream = 0);
+    gpuStream_t stream = 0);
diff --git a/include/maxwell_interface.h b/include/maxwell_interface.h
index 92db3e1..5bc8414 100644
--- a/include/maxwell_interface.h
+++ b/include/maxwell_interface.h
@@ -363,9 +363,9 @@ init_H_field(const model& mod, solver_state_gpu& state, const Function& H)
 }
 
 void gpu_compute_jumps(const entity_data_gpu&, const field_gpu&, field_gpu&,
-    double *, cudaStream_t stream = 0);
+    double *, gpuStream_t stream = 0);
 void gpu_compute_fluxes(const entity_data_gpu&, const field_gpu&, field_gpu&,
-    const field_gpu&, const material_params_gpu&, cudaStream_t stream = 0);
+    const field_gpu&, const material_params_gpu&, gpuStream_t stream = 0);
 #endif /* ENABLE_GPU_SOLVER */
 
 
diff --git a/include/physical_element.h b/include/physical_element.h
index 4c7c271..b8dad8c 100644
--- a/include/physical_element.h
+++ b/include/physical_element.h
@@ -2,6 +2,7 @@
 
 #include "gmsh.h"
 #include "point.hpp"
+#include "eigen.h"
 #include "types.h"
 
 /* This is a key for a basis function. Did this
diff --git a/include/reference_element.h b/include/reference_element.h
index 22e3930..19961b4 100644
--- a/include/reference_element.h
+++ b/include/reference_element.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include "eigen.h"
 #include "types.h"
 
 class reference_element
diff --git a/include/types.h b/include/types.h
index 6ec9c22..a192f3b 100644
--- a/include/types.h
+++ b/include/types.h
@@ -1,7 +1,7 @@
 #pragma once
 
+#include <vector>
 #include "point.hpp"
-#include "eigen.h"
 
 #ifdef __NVCC__
 #define HIDE_THIS_FROM_NVCC
@@ -12,12 +12,6 @@ using vec_size_t = std::vector<size_t>;
 using vec_double = std::vector<double>;
 using vec_point_3d = std::vector<point_3d>;
 using vec_quadpt_3d = std::vector<quadrature_point_3d>;
-using vec3d = Eigen::Matrix<double, 3, 1>;
-using mat3d = Eigen::Matrix<double, 3, 3, Eigen::RowMajor>;
-using vecxd = Eigen::Matrix<double, Eigen::Dynamic, 1>;
-using matxd = Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
-using vec_mat3d = eigen_compatible_stdvector<mat3d>;
-using vec_matxd = eigen_compatible_stdvector<matxd>;
 
 struct entity_params
 {
diff --git a/src/entity.cpp b/src/entity.cpp
index 54de2bc..30e3379 100644
--- a/src/entity.cpp
+++ b/src/entity.cpp
@@ -158,7 +158,8 @@ entity::project(const vector_function& function, vecxd& pfx, vecxd& pfy, vecxd&
             rhs_z += pqp.weight() * fval(2) * phi; 
         }
 
-        Eigen::LDLT<matxd> mass_ldlt(mass);
+        Eigen::LDLT<matxd> mass_ldlt;
+        mass_ldlt.compute(mass);
         pfx.segment(m_dof_base + iT*num_bf, num_bf) = mass_ldlt.solve(rhs_x);
         pfy.segment(m_dof_base + iT*num_bf, num_bf) = mass_ldlt.solve(rhs_y);
         pfz.segment(m_dof_base + iT*num_bf, num_bf) = mass_ldlt.solve(rhs_z);
diff --git a/src/maxwell_common.cpp b/src/maxwell_common.cpp
index 8cd1541..3ef7ed6 100644
--- a/src/maxwell_common.cpp
+++ b/src/maxwell_common.cpp
@@ -192,7 +192,7 @@ parameter_loader::validate_materials(const std::string& mpn, int tag) const
 }
 
 bool
-parameter_loader::validate_boundary_condition(const model& mod, int tag) const
+parameter_loader::validate_boundary_condition(const model&, int tag) const
 {
     auto bc = lua[SEC_BCONDS][tag];
     if (not bc.valid())
@@ -232,7 +232,7 @@ parameter_loader::validate_boundary_condition(const model& mod, int tag) const
 }
 
 bool
-parameter_loader::validate_interface_condition(const model& mod, int tag) const
+parameter_loader::validate_interface_condition(const model&, int tag) const
 {
     auto ic = lua[SEC_IFCONDS][tag];
     if (not ic.valid())
diff --git a/src/maxwell_gpu.cpp b/src/maxwell_gpu.cpp
index a9c79c7..170006b 100644
--- a/src/maxwell_gpu.cpp
+++ b/src/maxwell_gpu.cpp
@@ -98,7 +98,8 @@ void init_from_model(const model& mod, solver_state_gpu& state)
 }
 
 
-void compute_curls(solver_state_gpu& state, const field_gpu& curr, field_gpu& next)
+static void
+compute_curls(solver_state_gpu& state, const field_gpu& curr, field_gpu& next)
 {
     auto currp = curr.data();
     auto nextp = next.data();
@@ -125,7 +126,7 @@ void compute_curls(solver_state_gpu& state, const field_gpu& curr, field_gpu& ne
     gpu_subtract(nextp.Hz, dyp.Ex, dxp.Ey, nextp.num_dofs, state.compute_stream);
 }
 
-void
+static void
 compute_fluxes(solver_state_gpu& state, const field_gpu& in, field_gpu& out)
 {
     auto matparams_ptrs = state.matparams.data();
@@ -152,14 +153,14 @@ compute_fluxes(solver_state_gpu& state, const field_gpu& in, field_gpu& out)
     }
 }
 
-void
+static void
 apply_operator(solver_state_gpu& state, const field_gpu& curr, field_gpu& next)
 {
     compute_curls(state, curr, next);
     compute_fluxes(state, curr, next);
 }
 
-void
+static void
 compute_euler_update(solver_state_gpu& state, const field_gpu& y,
     const field_gpu& k, double dt, field_gpu& out)
 {
@@ -179,7 +180,7 @@ compute_euler_update(solver_state_gpu& state, const field_gpu& y,
     gpu_compute_euler_update(outp.Hz, yp.Hz, kp.Hz, mu, out.num_dofs, dt, state.compute_stream);
 }
 
-void
+static void
 compute_rk4_weighted_sum(solver_state_gpu& state, const field_gpu& in,
     double dt, field_gpu& out)
 {
@@ -276,7 +277,7 @@ do_sources(const model& mod, maxwell::solver_state_gpu& state,
 void
 swap(maxwell::solver_state_gpu& state)
 {
-    cudaDeviceSynchronize();
+    checkGPU( gpuDeviceSynchronize() );
     std::swap(state.bndsrcs, state.bndsrcs_buf);
     std::swap(state.emf_curr, state.emf_next);
 }
diff --git a/src/maxwell_interface.cpp b/src/maxwell_interface.cpp
index 4a72a54..bd023c9 100644
--- a/src/maxwell_interface.cpp
+++ b/src/maxwell_interface.cpp
@@ -47,30 +47,32 @@ pinned_field::zero()
 void
 pinned_field::resize(size_t sz)
 {
-    if (Ex and sz != num_dofs) cudaFreeHost(Ex);
-    if (Ey and sz != num_dofs) cudaFreeHost(Ey);
-    if (Ez and sz != num_dofs) cudaFreeHost(Ez);
-    if (Ex and sz != num_dofs) cudaFreeHost(Ex);
-    if (Ey and sz != num_dofs) cudaFreeHost(Ey);
-    if (Ez and sz != num_dofs) cudaFreeHost(Ez);
-
-    checkGPU( cudaMallocHost((void**)&Ex, sz*sizeof(double)) );
-    checkGPU( cudaMallocHost((void**)&Ey, sz*sizeof(double)) );
-    checkGPU( cudaMallocHost((void**)&Ez, sz*sizeof(double)) );
-    checkGPU( cudaMallocHost((void**)&Hx, sz*sizeof(double)) );
-    checkGPU( cudaMallocHost((void**)&Hy, sz*sizeof(double)) );
-    checkGPU( cudaMallocHost((void**)&Hz, sz*sizeof(double)) );
+    if (Ex and sz != num_dofs) checkGPU( gpuFreeHost(Ex) );
+    if (Ey and sz != num_dofs) checkGPU( gpuFreeHost(Ey) );
+    if (Ez and sz != num_dofs) checkGPU( gpuFreeHost(Ez) );
+    if (Hx and sz != num_dofs) checkGPU( gpuFreeHost(Hx) );
+    if (Hy and sz != num_dofs) checkGPU( gpuFreeHost(Hy) );
+    if (Hz and sz != num_dofs) checkGPU( gpuFreeHost(Hz) );
+    
+    zero();
+
+    checkGPU( gpuMallocHost((void**)&Ex, sz*sizeof(double)) );
+    checkGPU( gpuMallocHost((void**)&Ey, sz*sizeof(double)) );
+    checkGPU( gpuMallocHost((void**)&Ez, sz*sizeof(double)) );
+    checkGPU( gpuMallocHost((void**)&Hx, sz*sizeof(double)) );
+    checkGPU( gpuMallocHost((void**)&Hy, sz*sizeof(double)) );
+    checkGPU( gpuMallocHost((void**)&Hz, sz*sizeof(double)) );
     num_dofs = sz;
 }
 
 pinned_field::~pinned_field()
 {
-    if (Ex) cudaFreeHost(Ex);
-    if (Ey) cudaFreeHost(Ey);
-    if (Ez) cudaFreeHost(Ez);
-    if (Ex) cudaFreeHost(Ex);
-    if (Ey) cudaFreeHost(Ey);
-    if (Ez) cudaFreeHost(Ez);
+    if (Ex) checkGPU( gpuFreeHost(Ex) );
+    if (Ey) checkGPU( gpuFreeHost(Ey) );
+    if (Ez) checkGPU( gpuFreeHost(Ez) );
+    if (Hx) checkGPU( gpuFreeHost(Hx) );
+    if (Hy) checkGPU( gpuFreeHost(Hy) );
+    if (Hz) checkGPU( gpuFreeHost(Hz) );
 }
 
 void
@@ -92,6 +94,7 @@ field_gpu::resize(size_t p_num_dofs)
     Hx.resize(p_num_dofs);
     Hy.resize(p_num_dofs);
     Hz.resize(p_num_dofs);
+    zero();
     num_dofs = p_num_dofs;
 }
 
-- 
GitLab