From 97af8e34f0ca1f723bc337b6b0f36708124c4742 Mon Sep 17 00:00:00 2001 From: Matteo Cicuttin <datafl4sh@toxicnet.eu> Date: Mon, 10 May 2021 19:56:54 +0000 Subject: [PATCH] Hipified common kernels. HIP can't compile Eigen. --- CMakeLists.txt | 50 +++++++++++++------- include/connectivity.h | 4 +- include/eigen.h | 7 +++ include/entity_data.h | 4 +- include/gmsh_io.h | 16 +++++-- include/gpu_support.hpp | 91 +++++++++++++++++++++++++------------ include/kernels_gpu.h | 15 +++--- include/maxwell_interface.h | 4 +- include/physical_element.h | 1 + include/reference_element.h | 1 + include/types.h | 8 +--- src/entity.cpp | 3 +- src/maxwell_common.cpp | 4 +- src/maxwell_gpu.cpp | 13 +++--- src/maxwell_interface.cpp | 41 +++++++++-------- 15 files changed, 166 insertions(+), 96 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d230b70..9d8d058 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ project(gmsh_gpu_dg) include(CheckLanguage) -set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CUDA_ARCHITECTURES 35) @@ -24,7 +24,6 @@ set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH}) ###################################################################### ## Find required libraries - find_package(Eigen3 REQUIRED) set(LINK_LIBS ${LINK_LIBS} Eigen3::Eigen) @@ -95,13 +94,15 @@ if (OPT_ENABLE_GPU_SOLVER) set(LINK_LIBS ${LINK_LIBS} CUDA::nvml) add_definitions(-DHAVE_CUDA_NVML) endif() - link_directories("/usr/local/cuda/lib64") + link_directories("/usr/local/cuda/lib64") endif() ###################################################################### ## Find HIP stuff find_package(HIP QUIET MODULE) if (HIP_FOUND) + execute_process(COMMAND sh -c "/opt/rocm/hip/bin/hipify-perl ${CMAKE_SOURCE_DIR}/src/kernels_cuda.cu > ${CMAKE_SOURCE_DIR}/src/hipified_kernels_cuda.cpp") + execute_process(COMMAND sh -c "/opt/rocm/hip/bin/hipify-perl ${CMAKE_SOURCE_DIR}/src/maxwell_kernels_cuda.cu > ${CMAKE_SOURCE_DIR}/src/hipified_maxwell_kernels_cuda.cpp") add_definitions(-DHAVE_HIP) include_directories("/opt/rocm/include") set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>") @@ -117,7 +118,9 @@ if (OPT_ENABLE_GPU_SOLVER) if (OPT_USE_HIP) add_definitions(-DGPU_USE_HIP) add_definitions(-D__HIP_PLATFORM_HCC__) + set(LINK_LIBS ${LINK_LIBS}) endif() + endif() option(OPT_BLOCKED_GPU_KERNELS "Use blocked kernels on GPU (DO NOT USE)" OFF) @@ -249,6 +252,10 @@ if (OPT_GMSH_484_BEFORE_SPLIT) add_definitions(-DUSE_INITIAL_4_8_4_API) endif() +option(OPT_DISABLE_TEXTURE_CACHE "On ROCm 4.1 texture stuff is broken, disable it" OFF) +if (OPT_DISABLE_TEXTURE_CACHE) + add_definitions(-DDISABLE_TEXTURE_CACHE) +endif() ###################################################################### ## Configure targets include_directories(contrib/sgr) @@ -267,14 +274,20 @@ set(LIBGMSHDG_SOURCES src/gmsh_io.cpp ) if (OPT_ENABLE_GPU_SOLVER) - set(LIBGMSHDG_SOURCES ${LIBGMSHDG_SOURCES} - src/kernels_gpu_mi.cpp - src/kernels_cuda.cu - ) + set(LIBGMSHDG_SOURCES ${LIBGMSHDG_SOURCES} src/kernels_gpu_mi.cpp) + + if (OPT_USE_HIP) + set_source_files_properties(src/hipified_kernels_cuda.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) + set(LIBGMSHDG_SOURCES ${LIBGMSHDG_SOURCES} src/hipified_kernels_cuda.cpp) + hip_add_library(gmshdg SHARED ${LIBGMSHDG_SOURCES}) + target_link_libraries(gmshdg ${LINK_LIBS}) + else() + set(LIBGMSHDG_SOURCES ${LIBGMSHDG_SOURCES} src/kernels_cuda.cu) + add_library(gmshdg SHARED ${LIBGMSHDG_SOURCES}) + target_link_libraries(gmshdg ${LINK_LIBS}) + endif() endif() -add_library(gmshdg SHARED ${LIBGMSHDG_SOURCES}) -target_link_libraries(gmshdg ${LINK_LIBS}) ########## maxwell solver set(MAXWELL_SOURCES src/maxwell_interface.cpp @@ -284,14 +297,19 @@ set(MAXWELL_SOURCES src/maxwell_interface.cpp ) if (OPT_ENABLE_GPU_SOLVER) - set(MAXWELL_SOURCES ${MAXWELL_SOURCES} - src/maxwell_gpu.cpp - src/maxwell_kernels_cuda.cu - ) -endif() + set(MAXWELL_SOURCES ${MAXWELL_SOURCES} src/maxwell_gpu.cpp) -add_executable(maxwell_solver ${MAXWELL_SOURCES}) -target_link_libraries(maxwell_solver gmshdg) + if (OPT_USE_HIP) + set_source_files_properties(src/hipified_maxwell_kernels_cuda.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) + set(MAXWELL_SOURCES ${MAXWELL_SOURCES} src/hipified_maxwell_kernels_cuda.cpp) + hip_add_executable(maxwell_solver ${MAXWELL_SOURCES} HIPCC_OPTIONS "-std=c++17") + target_link_libraries(maxwell_solver gmshdg) + else() + set(MAXWELL_SOURCES ${MAXWELL_SOURCES} src/maxwell_kernels_cuda.cu) + add_executable(maxwell_solver ${MAXWELL_SOURCES}) + target_link_libraries(maxwell_solver gmshdg) + endif() +endif() ########## tests option(OPT_BUILD_TESTS "Build tests" OFF) diff --git a/include/connectivity.h b/include/connectivity.h index 3730eba..275ca63 100644 --- a/include/connectivity.h +++ b/include/connectivity.h @@ -187,7 +187,7 @@ public: std::set<cell_descriptor> neighbours_as_cell_descriptors(const cell_descriptor& cd) const { - auto filter = [](const ConnData& connd) -> auto { + auto filter = [](const ConnData& connd) { cell_descriptor ret; ret.iE = connd.second.iE; ret.iT = connd.second.iT; @@ -218,7 +218,7 @@ public: element_keys() const { using PT = std::pair<cell_descriptor, std::set<ConnData>>; - auto get_node = [](const PT& adjpair) -> auto { + auto get_node = [](const PT& adjpair) { return adjpair.first; }; diff --git a/include/eigen.h b/include/eigen.h index 16d1dd5..89d5618 100644 --- a/include/eigen.h +++ b/include/eigen.h @@ -15,3 +15,10 @@ using MatxCM = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor> template<typename T> using MatxRM = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>; +using vec3d = Eigen::Matrix<double, 3, 1>; +using mat3d = Eigen::Matrix<double, 3, 3, Eigen::RowMajor>; +using vecxd = Eigen::Matrix<double, Eigen::Dynamic, 1>; +using matxd = Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>; +using vec_mat3d = eigen_compatible_stdvector<mat3d>; +using vec_matxd = eigen_compatible_stdvector<matxd>; + diff --git a/include/entity_data.h b/include/entity_data.h index 2db0c82..d778a15 100644 --- a/include/entity_data.h +++ b/include/entity_data.h @@ -1,7 +1,6 @@ #pragma once #include <numeric> - #include "eigen.h" #include "types.h" @@ -100,6 +99,7 @@ struct entity_data_gpu ~entity_data_gpu(); }; +#if 0 template<typename T> void gpu_copyin(const std::vector<T> src, T** dst) @@ -131,7 +131,7 @@ gpu_copyin(const Eigen::Matrix<T, Eigen::Dynamic, 1>& src, double** dst) checkGPU( gpuMalloc( (void**)dst, src.size()*sizeof(T) ) ); checkGPU( gpuMemcpy( *dst, src.data(), src.size()*sizeof(T), gpuMemcpyHostToDevice) ); } - +#endif #endif /* ENABLE_GPU_SOLVER */ diff --git a/include/gmsh_io.h b/include/gmsh_io.h index bfed218..7585f30 100644 --- a/include/gmsh_io.h +++ b/include/gmsh_io.h @@ -38,9 +38,19 @@ struct boundary_descriptor face_type type; int gmsh_entity; -#ifndef HIDE_THIS_FROM_NVCC - auto operator<=>(const boundary_descriptor&) const = default; -#endif /* HIDE_THIS_FROM_NVCC */ + //auto operator <=> (const boundary_descriptor&) const = default; + + bool operator<(const boundary_descriptor& other) const + { + return (type < other.type) or + ( (type == other.type) and (gmsh_entity < other.gmsh_entity) ); + } + + bool operator==(const boundary_descriptor& other) const + { + return (type == other.type) and (gmsh_entity == other.gmsh_entity); + } + boundary_descriptor() : type(face_type::NONE), gmsh_entity(0) {} diff --git a/include/gpu_support.hpp b/include/gpu_support.hpp index 9e3b346..475a962 100644 --- a/include/gpu_support.hpp +++ b/include/gpu_support.hpp @@ -7,7 +7,12 @@ #ifdef GPU_USE_HIP #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-result" +#pragma GCC diagnostic ignored "-Wshadow" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Weverything" #include <hip/hip_runtime.h> +#include <hip/hip_runtime_api.h> +#pragma clang diagnostic pop #pragma GCC diagnostic pop #else /* GPU_USE_HIP */ #include <cuda_runtime.h> @@ -26,7 +31,11 @@ #define gpuMemcpyDefault hipMemcpyDefault #define gpuTextureDesc hipTextureDesc #define gpuResourceDesc hipResourceDesc -#define gpuTextureObject_t hipTextureObject_t +#ifdef DISABLE_TEXTURE_CACHE + #define gpuTextureObject_t double * +#else + #define gpuTextureObject_t hipTextureObject_t +#endif #define gpuCreateTextureObject hipCreateTextureObject #define gpuDestroyTextureObject hipDestroyTextureObject #define gpuAddressModeClamp hipAddressModeClamp @@ -39,6 +48,13 @@ #define gpuEventRecord hipEventRecord #define gpuEventSynchronize hipEventSynchronize #define gpuEventElapsedTime hipEventElapsedTime +#define gpuStream_t hipStream_t +#define gpuStreamCreate hipStreamCreate +#define gpuStreamDestroy hipStreamDestroy +#define gpuStreamSynchronize hipStreamSynchronize +#define gpuMallocHost hipHostMalloc +#define gpuFreeHost hipHostFree +#define gpuMemcpyAsync hipMemcpyAsync #else /* GPU_USE_HIP */ @@ -53,7 +69,11 @@ #define gpuMemcpyDefault cudaMemcpyDefault #define gpuTextureDesc cudaTextureDesc #define gpuResourceDesc cudaResourceDesc -#define gpuTextureObject_t cudaTextureObject_t +#ifdef DISABLE_TEXTURE_CACHE + #define gpuTextureObject_t double * +#else + #define gpuTextureObject_t cudaTextureObject_t +#endif #define gpuCreateTextureObject cudaCreateTextureObject #define gpuDestroyTextureObject cudaDestroyTextureObject #define gpuAddressModeClamp cudaAddressModeClamp @@ -66,9 +86,17 @@ #define gpuEventRecord cudaEventRecord #define gpuEventSynchronize cudaEventSynchronize #define gpuEventElapsedTime cudaEventElapsedTime - +#define gpuStream_t cudaStream_t +#define gpuStreamCreate cudaStreamCreate +#define gpuStreamDestroy cudaStreamDestroy +#define gpuStreamSynchronize cudaStreamSynchronize +#define gpuMallocHost cudaMallocHost +#define gpuFreeHost cudaFreeHost +#define gpuMemcpyAsync cudaMemcpyAsync #endif /* GPU_USE_HIP */ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-parameter" inline void checkGPU(gpuError_t result) { @@ -84,6 +112,7 @@ checkGPU(gpuError_t result) #endif /* GPU_DEBUG */ // return result; } +#pragma clang diagnostic pop inline gpuError_t gpuMalloc(void **ptr, size_t size) @@ -126,32 +155,27 @@ gpuMemcpy(void *dst, const void *src, size_t size, gpuMemcpyKind kind) } -#ifdef GPU_USE_HIP +#if defined(__NVCC__) || defined(__HIP__) static __inline__ __device__ double -fetch_tex(hipTextureObject_t tex, int32_t ofs) +fetch_tex(gpuTextureObject_t tex, int32_t ofs) { - uint2 val = tex1Dfetch<uint2>(tex, ofs); - return __hiloint2double(val.y, val.x); -} +#ifdef DISABLE_TEXTURE_CACHE + return tex[ofs]; #else -#ifdef __NVCC__ -static __inline__ __device__ double -fetch_tex(cudaTextureObject_t tex, int32_t ofs) -{ uint2 val = tex1Dfetch<uint2>(tex, ofs); return __hiloint2double(val.y, val.x); -} #endif +} #endif class stream { - cudaStream_t m_stream; + gpuStream_t m_stream; public: stream() { - checkGPU( cudaStreamCreate(&m_stream) ); + checkGPU( gpuStreamCreate(&m_stream) ); } stream(const stream&) = delete; @@ -161,17 +185,17 @@ public: ~stream() { - checkGPU( cudaStreamDestroy(m_stream) ); + checkGPU( gpuStreamDestroy(m_stream) ); } - operator cudaStream_t() const + operator gpuStream_t() const { return m_stream; } void wait(void) const { - cudaStreamSynchronize(m_stream); + checkGPU( gpuStreamSynchronize(m_stream) ); } }; @@ -228,7 +252,7 @@ public: void copyin(const T *src, size_t size, const stream& st) { assert(size == m_size); - (void)checkGPU( cudaMemcpyAsync(m_devptr, src, size*sizeof(T), gpuMemcpyHostToDevice, st) ); + (void)checkGPU( gpuMemcpyAsync(m_devptr, src, size*sizeof(T), gpuMemcpyHostToDevice, st) ); } void copyout(T *dst) const @@ -307,10 +331,11 @@ class texture_allocator; template<> class texture_allocator<double> { +#ifndef DISABLE_TEXTURE_CACHE gpuTextureDesc cTexDesc; gpuResourceDesc cResDesc; gpuTextureObject_t cTexObj; - +#endif double *d_texDataPtr; size_t data_size; @@ -327,9 +352,11 @@ public: texture_allocator(texture_allocator&& other) { +#ifndef DISABLE_TEXTURE_CACHE cTexDesc = other.cTexDesc; cResDesc = other.cResDesc; cTexObj = other.cTexObj; +#endif d_texDataPtr = other.d_texDataPtr; other.d_texDataPtr = nullptr; data_size = other.data_size; @@ -353,6 +380,7 @@ public: else checkGPU( gpuMemset(d_texDataPtr, 0, size*sizeof(double)) ); +#ifndef DISABLE_TEXTURE_CACHE /* cudaTextureDesc */ memset(&cTexDesc, 0, sizeof(cTexDesc)); cTexDesc.normalizedCoords = 0; @@ -370,12 +398,17 @@ public: /* create texture */ checkGPU( gpuCreateTextureObject(&cTexObj, &cResDesc, &cTexDesc, nullptr) ); +#endif data_size = size; } gpuTextureObject_t get_texture(void) const { +#ifdef DISABLE_TEXTURE_CACHE + return d_texDataPtr; +#else return cTexObj; +#endif } double * data(void) const @@ -392,7 +425,9 @@ public: { if (d_texDataPtr) { +#ifndef DISABLE_TEXTURE_CACHE checkGPU( gpuDestroyTextureObject(cTexObj) ); +#endif checkGPU( gpuFree(d_texDataPtr) ); } @@ -411,28 +446,28 @@ class timecounter_gpu public: timecounter_gpu() { - gpuEventCreate(&start); - gpuEventCreate(&stop); + checkGPU( gpuEventCreate(&start) ); + checkGPU( gpuEventCreate(&stop) ); } ~timecounter_gpu() { - gpuEventDestroy(start); - gpuEventDestroy(stop); + checkGPU( gpuEventDestroy(start) ); + checkGPU( gpuEventDestroy(stop) ); } void tic() { - gpuEventRecord(start, 0); + checkGPU( gpuEventRecord(start, 0) ); } double toc() { - gpuEventRecord(stop, 0); + checkGPU( gpuEventRecord(stop, 0) ); float elapsed; - gpuEventSynchronize(stop); - gpuEventElapsedTime(&elapsed, start, stop); + checkGPU( gpuEventSynchronize(stop) ); + checkGPU( gpuEventElapsedTime(&elapsed, start, stop) ); return double(elapsed)/1000.0; } }; diff --git a/include/kernels_gpu.h b/include/kernels_gpu.h index ea04177..fbf43a5 100644 --- a/include/kernels_gpu.h +++ b/include/kernels_gpu.h @@ -141,35 +141,34 @@ size_t gpu_dblocks_dofs(const entity_data_cpu&); void reshape_dofs(const entity_data_cpu&, const entity_data_gpu&, const vecxd&, vecxd&, bool); - void gpu_compute_field_derivatives(const entity_data_gpu& edg, const double *F, - double *dF_dx, double* dF_dy, double* dF_dz, double alpha, cudaStream_t stream = 0); + double *dF_dx, double* dF_dy, double* dF_dz, double alpha, gpuStream_t stream = 0); void gpu_compute_flux_lifting(entity_data_gpu&, const double *, double *, - cudaStream_t stream = 0); + gpuStream_t stream = 0); void gpu_subtract(double *, const double *, const double *, size_t, - cudaStream_t stream = 0); + gpuStream_t stream = 0); void gpu_compute_euler_update(double *out, const double *y, const double *k, const double *reaction, const double *material, size_t num_dofs, double dt, - cudaStream_t stream = 0); + gpuStream_t stream = 0); void gpu_compute_euler_update(double *out, const double *y, const double *k, const double *material, size_t num_dofs, double dt, - cudaStream_t stream = 0); + gpuStream_t stream = 0); void gpu_compute_rk4_weighted_sum(double *out, const double *in, const double *k1, const double *k2, const double *k3, const double *k4, const double *material, size_t num_dofs, double dt, - cudaStream_t stream = 0); + gpuStream_t stream = 0); void gpu_compute_rk4_weighted_sum(double *out, const double *in, const double *k1, const double *k2, const double *k3, const double *k4, const double *reaction, const double *material, size_t num_dofs, double dt, - cudaStream_t stream = 0); + gpuStream_t stream = 0); diff --git a/include/maxwell_interface.h b/include/maxwell_interface.h index 92db3e1..5bc8414 100644 --- a/include/maxwell_interface.h +++ b/include/maxwell_interface.h @@ -363,9 +363,9 @@ init_H_field(const model& mod, solver_state_gpu& state, const Function& H) } void gpu_compute_jumps(const entity_data_gpu&, const field_gpu&, field_gpu&, - double *, cudaStream_t stream = 0); + double *, gpuStream_t stream = 0); void gpu_compute_fluxes(const entity_data_gpu&, const field_gpu&, field_gpu&, - const field_gpu&, const material_params_gpu&, cudaStream_t stream = 0); + const field_gpu&, const material_params_gpu&, gpuStream_t stream = 0); #endif /* ENABLE_GPU_SOLVER */ diff --git a/include/physical_element.h b/include/physical_element.h index 4c7c271..b8dad8c 100644 --- a/include/physical_element.h +++ b/include/physical_element.h @@ -2,6 +2,7 @@ #include "gmsh.h" #include "point.hpp" +#include "eigen.h" #include "types.h" /* This is a key for a basis function. Did this diff --git a/include/reference_element.h b/include/reference_element.h index 22e3930..19961b4 100644 --- a/include/reference_element.h +++ b/include/reference_element.h @@ -1,5 +1,6 @@ #pragma once +#include "eigen.h" #include "types.h" class reference_element diff --git a/include/types.h b/include/types.h index 6ec9c22..a192f3b 100644 --- a/include/types.h +++ b/include/types.h @@ -1,7 +1,7 @@ #pragma once +#include <vector> #include "point.hpp" -#include "eigen.h" #ifdef __NVCC__ #define HIDE_THIS_FROM_NVCC @@ -12,12 +12,6 @@ using vec_size_t = std::vector<size_t>; using vec_double = std::vector<double>; using vec_point_3d = std::vector<point_3d>; using vec_quadpt_3d = std::vector<quadrature_point_3d>; -using vec3d = Eigen::Matrix<double, 3, 1>; -using mat3d = Eigen::Matrix<double, 3, 3, Eigen::RowMajor>; -using vecxd = Eigen::Matrix<double, Eigen::Dynamic, 1>; -using matxd = Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>; -using vec_mat3d = eigen_compatible_stdvector<mat3d>; -using vec_matxd = eigen_compatible_stdvector<matxd>; struct entity_params { diff --git a/src/entity.cpp b/src/entity.cpp index 54de2bc..30e3379 100644 --- a/src/entity.cpp +++ b/src/entity.cpp @@ -158,7 +158,8 @@ entity::project(const vector_function& function, vecxd& pfx, vecxd& pfy, vecxd& rhs_z += pqp.weight() * fval(2) * phi; } - Eigen::LDLT<matxd> mass_ldlt(mass); + Eigen::LDLT<matxd> mass_ldlt; + mass_ldlt.compute(mass); pfx.segment(m_dof_base + iT*num_bf, num_bf) = mass_ldlt.solve(rhs_x); pfy.segment(m_dof_base + iT*num_bf, num_bf) = mass_ldlt.solve(rhs_y); pfz.segment(m_dof_base + iT*num_bf, num_bf) = mass_ldlt.solve(rhs_z); diff --git a/src/maxwell_common.cpp b/src/maxwell_common.cpp index 8cd1541..3ef7ed6 100644 --- a/src/maxwell_common.cpp +++ b/src/maxwell_common.cpp @@ -192,7 +192,7 @@ parameter_loader::validate_materials(const std::string& mpn, int tag) const } bool -parameter_loader::validate_boundary_condition(const model& mod, int tag) const +parameter_loader::validate_boundary_condition(const model&, int tag) const { auto bc = lua[SEC_BCONDS][tag]; if (not bc.valid()) @@ -232,7 +232,7 @@ parameter_loader::validate_boundary_condition(const model& mod, int tag) const } bool -parameter_loader::validate_interface_condition(const model& mod, int tag) const +parameter_loader::validate_interface_condition(const model&, int tag) const { auto ic = lua[SEC_IFCONDS][tag]; if (not ic.valid()) diff --git a/src/maxwell_gpu.cpp b/src/maxwell_gpu.cpp index a9c79c7..170006b 100644 --- a/src/maxwell_gpu.cpp +++ b/src/maxwell_gpu.cpp @@ -98,7 +98,8 @@ void init_from_model(const model& mod, solver_state_gpu& state) } -void compute_curls(solver_state_gpu& state, const field_gpu& curr, field_gpu& next) +static void +compute_curls(solver_state_gpu& state, const field_gpu& curr, field_gpu& next) { auto currp = curr.data(); auto nextp = next.data(); @@ -125,7 +126,7 @@ void compute_curls(solver_state_gpu& state, const field_gpu& curr, field_gpu& ne gpu_subtract(nextp.Hz, dyp.Ex, dxp.Ey, nextp.num_dofs, state.compute_stream); } -void +static void compute_fluxes(solver_state_gpu& state, const field_gpu& in, field_gpu& out) { auto matparams_ptrs = state.matparams.data(); @@ -152,14 +153,14 @@ compute_fluxes(solver_state_gpu& state, const field_gpu& in, field_gpu& out) } } -void +static void apply_operator(solver_state_gpu& state, const field_gpu& curr, field_gpu& next) { compute_curls(state, curr, next); compute_fluxes(state, curr, next); } -void +static void compute_euler_update(solver_state_gpu& state, const field_gpu& y, const field_gpu& k, double dt, field_gpu& out) { @@ -179,7 +180,7 @@ compute_euler_update(solver_state_gpu& state, const field_gpu& y, gpu_compute_euler_update(outp.Hz, yp.Hz, kp.Hz, mu, out.num_dofs, dt, state.compute_stream); } -void +static void compute_rk4_weighted_sum(solver_state_gpu& state, const field_gpu& in, double dt, field_gpu& out) { @@ -276,7 +277,7 @@ do_sources(const model& mod, maxwell::solver_state_gpu& state, void swap(maxwell::solver_state_gpu& state) { - cudaDeviceSynchronize(); + checkGPU( gpuDeviceSynchronize() ); std::swap(state.bndsrcs, state.bndsrcs_buf); std::swap(state.emf_curr, state.emf_next); } diff --git a/src/maxwell_interface.cpp b/src/maxwell_interface.cpp index 4a72a54..bd023c9 100644 --- a/src/maxwell_interface.cpp +++ b/src/maxwell_interface.cpp @@ -47,30 +47,32 @@ pinned_field::zero() void pinned_field::resize(size_t sz) { - if (Ex and sz != num_dofs) cudaFreeHost(Ex); - if (Ey and sz != num_dofs) cudaFreeHost(Ey); - if (Ez and sz != num_dofs) cudaFreeHost(Ez); - if (Ex and sz != num_dofs) cudaFreeHost(Ex); - if (Ey and sz != num_dofs) cudaFreeHost(Ey); - if (Ez and sz != num_dofs) cudaFreeHost(Ez); - - checkGPU( cudaMallocHost((void**)&Ex, sz*sizeof(double)) ); - checkGPU( cudaMallocHost((void**)&Ey, sz*sizeof(double)) ); - checkGPU( cudaMallocHost((void**)&Ez, sz*sizeof(double)) ); - checkGPU( cudaMallocHost((void**)&Hx, sz*sizeof(double)) ); - checkGPU( cudaMallocHost((void**)&Hy, sz*sizeof(double)) ); - checkGPU( cudaMallocHost((void**)&Hz, sz*sizeof(double)) ); + if (Ex and sz != num_dofs) checkGPU( gpuFreeHost(Ex) ); + if (Ey and sz != num_dofs) checkGPU( gpuFreeHost(Ey) ); + if (Ez and sz != num_dofs) checkGPU( gpuFreeHost(Ez) ); + if (Hx and sz != num_dofs) checkGPU( gpuFreeHost(Hx) ); + if (Hy and sz != num_dofs) checkGPU( gpuFreeHost(Hy) ); + if (Hz and sz != num_dofs) checkGPU( gpuFreeHost(Hz) ); + + zero(); + + checkGPU( gpuMallocHost((void**)&Ex, sz*sizeof(double)) ); + checkGPU( gpuMallocHost((void**)&Ey, sz*sizeof(double)) ); + checkGPU( gpuMallocHost((void**)&Ez, sz*sizeof(double)) ); + checkGPU( gpuMallocHost((void**)&Hx, sz*sizeof(double)) ); + checkGPU( gpuMallocHost((void**)&Hy, sz*sizeof(double)) ); + checkGPU( gpuMallocHost((void**)&Hz, sz*sizeof(double)) ); num_dofs = sz; } pinned_field::~pinned_field() { - if (Ex) cudaFreeHost(Ex); - if (Ey) cudaFreeHost(Ey); - if (Ez) cudaFreeHost(Ez); - if (Ex) cudaFreeHost(Ex); - if (Ey) cudaFreeHost(Ey); - if (Ez) cudaFreeHost(Ez); + if (Ex) checkGPU( gpuFreeHost(Ex) ); + if (Ey) checkGPU( gpuFreeHost(Ey) ); + if (Ez) checkGPU( gpuFreeHost(Ez) ); + if (Hx) checkGPU( gpuFreeHost(Hx) ); + if (Hy) checkGPU( gpuFreeHost(Hy) ); + if (Hz) checkGPU( gpuFreeHost(Hz) ); } void @@ -92,6 +94,7 @@ field_gpu::resize(size_t p_num_dofs) Hx.resize(p_num_dofs); Hy.resize(p_num_dofs); Hz.resize(p_num_dofs); + zero(); num_dofs = p_num_dofs; } -- GitLab