diff --git a/CMakeLists.txt b/CMakeLists.txt index d230b70c048ec39753c3d661ced54711044cd7f1..047b0f97cba73a12d3091877931f771f2385a3c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ project(gmsh_gpu_dg) include(CheckLanguage) -set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CUDA_ARCHITECTURES 35) @@ -22,9 +22,15 @@ if(NOT DEFINED HIP_PATH) endif() set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH}) +if (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") + cmake_host_system_information(RESULT OSVER QUERY OS_RELEASE) + if (OSVER VERSION_LESS "10.15") + add_definitions(-DDONT_USE_STD_FILESYSTEM) + endif() +endif() + ###################################################################### ## Find required libraries - find_package(Eigen3 REQUIRED) set(LINK_LIBS ${LINK_LIBS} Eigen3::Eigen) @@ -95,7 +101,7 @@ if (OPT_ENABLE_GPU_SOLVER) set(LINK_LIBS ${LINK_LIBS} CUDA::nvml) add_definitions(-DHAVE_CUDA_NVML) endif() - link_directories("/usr/local/cuda/lib64") + link_directories("/usr/local/cuda/lib64") endif() ###################################################################### @@ -115,9 +121,13 @@ if (OPT_ENABLE_GPU_SOLVER) set(HIP_NVCC_FLAGS "-ccbin g++-8" ${HIP_NVCC_FLAGS}) option(OPT_USE_HIP "Use HIP GPU code path" OFF) if (OPT_USE_HIP) + execute_process(COMMAND sh -c "/opt/rocm/hip/bin/hipify-perl ${CMAKE_SOURCE_DIR}/src/kernels_cuda.cu > ${CMAKE_SOURCE_DIR}/src/hipified_kernels_cuda.cpp") + execute_process(COMMAND sh -c "/opt/rocm/hip/bin/hipify-perl ${CMAKE_SOURCE_DIR}/src/maxwell_kernels_cuda.cu > ${CMAKE_SOURCE_DIR}/src/hipified_maxwell_kernels_cuda.cpp") add_definitions(-DGPU_USE_HIP) add_definitions(-D__HIP_PLATFORM_HCC__) + set(LINK_LIBS ${LINK_LIBS}) endif() + endif() option(OPT_BLOCKED_GPU_KERNELS "Use blocked kernels on GPU (DO NOT USE)" OFF) @@ -153,8 +163,8 @@ if (COMPILER_IS_CLANG) -Wno-padded -Wno-shorten-64-to-32 -Wno-sign-conversion -Wno-old-style-cast \ -Wno-sign-compare -Wno-c99-extensions -Wno-extra-semi-stmt -Wno-source-uses-openmp") else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-sign-compare -Wshadow \ - -Wno-unknown-pragmas") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-sign-compare -Wshadow \ + -Wno-unknown-pragmas -Wno-unused-parameter") endif() set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -fpermissive") set(CMAKE_CXX_FLAGS_RELEASE "-O3 -g -DNDEBUG -fpermissive") @@ -249,6 +259,10 @@ if (OPT_GMSH_484_BEFORE_SPLIT) add_definitions(-DUSE_INITIAL_4_8_4_API) endif() +option(OPT_DISABLE_TEXTURE_CACHE "On ROCm 4.1 texture stuff is broken, disable it" OFF) +if (OPT_DISABLE_TEXTURE_CACHE) + add_definitions(-DDISABLE_TEXTURE_CACHE) +endif() ###################################################################### ## Configure targets include_directories(contrib/sgr) @@ -267,14 +281,23 @@ set(LIBGMSHDG_SOURCES src/gmsh_io.cpp ) if (OPT_ENABLE_GPU_SOLVER) - set(LIBGMSHDG_SOURCES ${LIBGMSHDG_SOURCES} - src/kernels_gpu_mi.cpp - src/kernels_cuda.cu - ) + set(LIBGMSHDG_SOURCES ${LIBGMSHDG_SOURCES} src/kernels_gpu_mi.cpp) + + if (OPT_USE_HIP) + set_source_files_properties(src/hipified_kernels_cuda.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) + set(LIBGMSHDG_SOURCES ${LIBGMSHDG_SOURCES} src/hipified_kernels_cuda.cpp) + hip_add_library(gmshdg SHARED ${LIBGMSHDG_SOURCES}) + target_link_libraries(gmshdg ${LINK_LIBS}) + else() + set(LIBGMSHDG_SOURCES ${LIBGMSHDG_SOURCES} src/kernels_cuda.cu) + add_library(gmshdg SHARED ${LIBGMSHDG_SOURCES}) + target_link_libraries(gmshdg ${LINK_LIBS}) + endif() +else() + add_library(gmshdg SHARED ${LIBGMSHDG_SOURCES}) + target_link_libraries(gmshdg ${LINK_LIBS}) endif() -add_library(gmshdg SHARED ${LIBGMSHDG_SOURCES}) -target_link_libraries(gmshdg ${LINK_LIBS}) ########## maxwell solver set(MAXWELL_SOURCES src/maxwell_interface.cpp @@ -284,38 +307,46 @@ set(MAXWELL_SOURCES src/maxwell_interface.cpp ) if (OPT_ENABLE_GPU_SOLVER) - set(MAXWELL_SOURCES ${MAXWELL_SOURCES} - src/maxwell_gpu.cpp - src/maxwell_kernels_cuda.cu - ) -endif() + set(MAXWELL_SOURCES ${MAXWELL_SOURCES} src/maxwell_gpu.cpp) -add_executable(maxwell_solver ${MAXWELL_SOURCES}) -target_link_libraries(maxwell_solver gmshdg) + if (OPT_USE_HIP) + set_source_files_properties(src/hipified_maxwell_kernels_cuda.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) + set(MAXWELL_SOURCES ${MAXWELL_SOURCES} src/hipified_maxwell_kernels_cuda.cpp) + hip_add_executable(maxwell_solver ${MAXWELL_SOURCES} HIPCC_OPTIONS "-std=c++17") + target_link_libraries(maxwell_solver gmshdg) + else() + set(MAXWELL_SOURCES ${MAXWELL_SOURCES} src/maxwell_kernels_cuda.cu) + add_executable(maxwell_solver ${MAXWELL_SOURCES}) + target_link_libraries(maxwell_solver gmshdg) + endif() +else() + add_executable(maxwell_solver ${MAXWELL_SOURCES}) + target_link_libraries(maxwell_solver gmshdg) +endif() ########## tests option(OPT_BUILD_TESTS "Build tests" OFF) if (OPT_BUILD_TESTS) add_executable(test_basics tests/test_basics.cpp) - target_link_libraries(test_basics gmshdg) + target_link_libraries(test_basics gmshdg ${LINK_LIBS}) add_executable(test_mass tests/test_mass.cpp) - target_link_libraries(test_mass gmshdg) + target_link_libraries(test_mass gmshdg ${LINK_LIBS}) add_executable(test_differentiation tests/test_differentiation.cpp) - target_link_libraries(test_differentiation gmshdg) + target_link_libraries(test_differentiation gmshdg ${LINK_LIBS}) add_executable(test_lifting tests/test_lifting.cpp) - target_link_libraries(test_lifting gmshdg) + target_link_libraries(test_lifting gmshdg ${LINK_LIBS}) if (OPT_ENABLE_GPU_SOLVER) add_executable(test_differentiation_gpu tests/test_differentiation_gpu.cpp) - target_link_libraries(test_differentiation_gpu gmshdg) + target_link_libraries(test_differentiation_gpu gmshdg ${LINK_LIBS}) add_executable(profile_differentiation_gpu tests/profile_differentiation_gpu.cpp) - target_link_libraries(profile_differentiation_gpu gmshdg) + target_link_libraries(profile_differentiation_gpu gmshdg ${LINK_LIBS}) add_executable(test_lifting_gpu tests/test_lifting_gpu.cpp) - target_link_libraries(test_lifting_gpu gmshdg) + target_link_libraries(test_lifting_gpu gmshdg ${LINK_LIBS}) endif() endif() diff --git a/doc/changelog.md b/doc/changelog.md index 83e6c43822eabfe5b378ef020cf3ff32481c8eb2..9305240c6a3435057ff5c1a1b2f98b0da4985d37 100644 --- a/doc/changelog.md +++ b/doc/changelog.md @@ -12,5 +12,12 @@ Unavailable features: - In the Maxwell solver, the RK4 time integrator limits the practical values of the conducibility `sigma`. If the absolute value of `1 - sigma*sim.dt/(epsilon*const.eps0)` is greater than 1, the computation fails. This will be solved by leapfrog time integration in a subsequent version. - Repetitive sources on GPU. -## Version v0.2 (NOT RELEASED YET) -- Fixed performance issue in the evaluation of boundary/interface sources (4463681c). \ No newline at end of file +## Version v0.2 +- Fixed performance issue in the evaluation of boundary/interface sources (4463681c). +- Compressed source data for upload on GPU (27bb4c44). +- Added HIP support via Hipify (e6f5a020). +- Switched back to C++17 due to various problems on Eigen and Mac OS X. +- Use system GMSH if installed and only after look at the hardcoded path (a55af8ab). +- Improved export to SILO. E, H, and J fields are now exportable as nodal or zonal variables. It is also possible to disable the export of a specific field, see `lua_api.md` (8a831181). +- Leapfrog integration on CPU & GPU (e3d95022). + diff --git a/doc/lua_api.md b/doc/lua_api.md index 41ed455647ee915497a62d0bf68a3d86189a52ae..363323459c11af48f402e5a8a34edebdf34392b3 100644 --- a/doc/lua_api.md +++ b/doc/lua_api.md @@ -20,6 +20,7 @@ This document describes the API available on the version v0.1 of the solver. - `sim.use_gpu` (0/1): enable/disable GPU usage. - `sim.approx_order` (integer): method approximation order. - `sim.geom_order` (integer): geometry order. Only order 1 supported for now. +- `sim.time_integrator` (string): `"rk4"`, `"leapfrog"` or `"euler"`. Euler is only for test purporses, as it is numerically instable. Leapfrog works on CPU for now, use it if you have dissipative materials. ### Postprocessing general variables @@ -112,5 +113,17 @@ None. - `electric_initial_condition(x, y, z)`: if defined, the solver calls this function at the beginning of the simulation in order to initialize the electric field. The parameters `x`, `y` and `z` represent the point at which the field needs to be evaluated. The function has to return a triple specifying the three field components, i.e. `return Ex, Ey, Ez`. - `magnetic_initial_condition(x, y, z)`: if defined, the solver calls this function at the beginning of the simulation in order to initialize the magnetic field. The parameters `x`, `y` and `z` represent the point at which the field needs to be evaluated. The function has to return a triple specifying the three field components, i.e. `return Hx, Hy, Hz`. +### Postprocessing +The export of the fields E, H and J to Silo is controlled using the appropriate sections in the `postpro` table. +- `postpro["E"].silo_mode` controls the export of the electric field E. Possible values are `"none"` (no export), `"nodal"` (export as a Silo nodal variable) and `"zonal"` (export as a Silo zonal variable) +- `postpro["H"].silo_mode` controls the export of the magnetic field H. Possible values are `"none"` (no export), `"nodal"` (export as a Silo nodal variable) and `"zonal"` (export as a Silo zonal variable) +- `postpro["J"].silo_mode` controls the export of the current density field J. Possible values are `"none"` (no export), `"nodal"` (export as a Silo nodal variable) and `"zonal"` (export as a Silo zonal variable) + +Nodal variables represent the field at the mesh nodes. The nodal value is reconstructed by taking the average of the solution evaluated at the node in all the elements adjacent to that node. + +Zonal variables represent the field as a piecewise-constant field evaluated in the barycenter of the element. + +Default mode is `"nodal"`. + ### Considerations on the evaluation of sources and the computation on GPU -Currently sources for the timestep `t+1` are evaluated asynchronously on the CPU while the GPU is computing the timestep `t`. When the GPU is done, the updated sources are uploaded from the CPU to the GPU and the cycle restarts. This usually works relatively well on big domains with high approximation order, however on small domains at low approximation order does not give any speedup. For this reason, if the sources do not need to be evaluated along the whole simulation, it is suggested to turn them off at the appropriate timestep by using the `on_timestep()` callback and the appropriate functions described below. A future version of the solver will implement repetitive boundary sources, that will be precomputed on the CPU and uploaded on the GPU only once. \ No newline at end of file +Currently sources for the timestep `t+1` are evaluated asynchronously on the CPU while the GPU is computing the timestep `t`. The sources at `t+1` are then uploaded asynchronously from the CPU to the GPU. This usually works relatively well, especially on big domains with high approximation order, however on small domains at low approximation order you can notice slowdowns. For this reason, if the sources do not need to be evaluated along the whole simulation, it is suggested to turn them off at the appropriate timestep by using the `on_timestep()` callback and the appropriate functions described below. Despite of that, I am not sure that implementing repetitive boundary sources will be necessary. diff --git a/include/connectivity.h b/include/connectivity.h index 3730eba1edafaffac99138ccde07ec887c109e2b..275ca63dee1e1321003decbcc2de6a4c0e94abd9 100644 --- a/include/connectivity.h +++ b/include/connectivity.h @@ -187,7 +187,7 @@ public: std::set<cell_descriptor> neighbours_as_cell_descriptors(const cell_descriptor& cd) const { - auto filter = [](const ConnData& connd) -> auto { + auto filter = [](const ConnData& connd) { cell_descriptor ret; ret.iE = connd.second.iE; ret.iT = connd.second.iT; @@ -218,7 +218,7 @@ public: element_keys() const { using PT = std::pair<cell_descriptor, std::set<ConnData>>; - auto get_node = [](const PT& adjpair) -> auto { + auto get_node = [](const PT& adjpair) { return adjpair.first; }; diff --git a/include/eigen.h b/include/eigen.h index 16d1dd5926953853b5663c473b065649b091b13f..89d56185d62ebe63c6a09da1fd938873a4f45de8 100644 --- a/include/eigen.h +++ b/include/eigen.h @@ -15,3 +15,10 @@ using MatxCM = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor> template<typename T> using MatxRM = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>; +using vec3d = Eigen::Matrix<double, 3, 1>; +using mat3d = Eigen::Matrix<double, 3, 3, Eigen::RowMajor>; +using vecxd = Eigen::Matrix<double, Eigen::Dynamic, 1>; +using matxd = Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>; +using vec_mat3d = eigen_compatible_stdvector<mat3d>; +using vec_matxd = eigen_compatible_stdvector<matxd>; + diff --git a/include/entity_data.h b/include/entity_data.h index 2db0c821947c428af419481c28213360757b256d..d778a15831561fb9c7fc83da0f6994dabe5be136 100644 --- a/include/entity_data.h +++ b/include/entity_data.h @@ -1,7 +1,6 @@ #pragma once #include <numeric> - #include "eigen.h" #include "types.h" @@ -100,6 +99,7 @@ struct entity_data_gpu ~entity_data_gpu(); }; +#if 0 template<typename T> void gpu_copyin(const std::vector<T> src, T** dst) @@ -131,7 +131,7 @@ gpu_copyin(const Eigen::Matrix<T, Eigen::Dynamic, 1>& src, double** dst) checkGPU( gpuMalloc( (void**)dst, src.size()*sizeof(T) ) ); checkGPU( gpuMemcpy( *dst, src.data(), src.size()*sizeof(T), gpuMemcpyHostToDevice) ); } - +#endif #endif /* ENABLE_GPU_SOLVER */ diff --git a/include/gmsh_io.h b/include/gmsh_io.h index bfed21812975523435437102ccb1e54f3898cc81..7585f304322d8c578dc530c39c87aa35b9b8d060 100644 --- a/include/gmsh_io.h +++ b/include/gmsh_io.h @@ -38,9 +38,19 @@ struct boundary_descriptor face_type type; int gmsh_entity; -#ifndef HIDE_THIS_FROM_NVCC - auto operator<=>(const boundary_descriptor&) const = default; -#endif /* HIDE_THIS_FROM_NVCC */ + //auto operator <=> (const boundary_descriptor&) const = default; + + bool operator<(const boundary_descriptor& other) const + { + return (type < other.type) or + ( (type == other.type) and (gmsh_entity < other.gmsh_entity) ); + } + + bool operator==(const boundary_descriptor& other) const + { + return (type == other.type) and (gmsh_entity == other.gmsh_entity); + } + boundary_descriptor() : type(face_type::NONE), gmsh_entity(0) {} diff --git a/include/gpu_support.hpp b/include/gpu_support.hpp index 9e3b34643fbcda4c170df968f2a1465fc45b0856..475a962528f0c2050f8abdb1002ddaf2ee707d5c 100644 --- a/include/gpu_support.hpp +++ b/include/gpu_support.hpp @@ -7,7 +7,12 @@ #ifdef GPU_USE_HIP #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-result" +#pragma GCC diagnostic ignored "-Wshadow" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Weverything" #include <hip/hip_runtime.h> +#include <hip/hip_runtime_api.h> +#pragma clang diagnostic pop #pragma GCC diagnostic pop #else /* GPU_USE_HIP */ #include <cuda_runtime.h> @@ -26,7 +31,11 @@ #define gpuMemcpyDefault hipMemcpyDefault #define gpuTextureDesc hipTextureDesc #define gpuResourceDesc hipResourceDesc -#define gpuTextureObject_t hipTextureObject_t +#ifdef DISABLE_TEXTURE_CACHE + #define gpuTextureObject_t double * +#else + #define gpuTextureObject_t hipTextureObject_t +#endif #define gpuCreateTextureObject hipCreateTextureObject #define gpuDestroyTextureObject hipDestroyTextureObject #define gpuAddressModeClamp hipAddressModeClamp @@ -39,6 +48,13 @@ #define gpuEventRecord hipEventRecord #define gpuEventSynchronize hipEventSynchronize #define gpuEventElapsedTime hipEventElapsedTime +#define gpuStream_t hipStream_t +#define gpuStreamCreate hipStreamCreate +#define gpuStreamDestroy hipStreamDestroy +#define gpuStreamSynchronize hipStreamSynchronize +#define gpuMallocHost hipHostMalloc +#define gpuFreeHost hipHostFree +#define gpuMemcpyAsync hipMemcpyAsync #else /* GPU_USE_HIP */ @@ -53,7 +69,11 @@ #define gpuMemcpyDefault cudaMemcpyDefault #define gpuTextureDesc cudaTextureDesc #define gpuResourceDesc cudaResourceDesc -#define gpuTextureObject_t cudaTextureObject_t +#ifdef DISABLE_TEXTURE_CACHE + #define gpuTextureObject_t double * +#else + #define gpuTextureObject_t cudaTextureObject_t +#endif #define gpuCreateTextureObject cudaCreateTextureObject #define gpuDestroyTextureObject cudaDestroyTextureObject #define gpuAddressModeClamp cudaAddressModeClamp @@ -66,9 +86,17 @@ #define gpuEventRecord cudaEventRecord #define gpuEventSynchronize cudaEventSynchronize #define gpuEventElapsedTime cudaEventElapsedTime - +#define gpuStream_t cudaStream_t +#define gpuStreamCreate cudaStreamCreate +#define gpuStreamDestroy cudaStreamDestroy +#define gpuStreamSynchronize cudaStreamSynchronize +#define gpuMallocHost cudaMallocHost +#define gpuFreeHost cudaFreeHost +#define gpuMemcpyAsync cudaMemcpyAsync #endif /* GPU_USE_HIP */ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-parameter" inline void checkGPU(gpuError_t result) { @@ -84,6 +112,7 @@ checkGPU(gpuError_t result) #endif /* GPU_DEBUG */ // return result; } +#pragma clang diagnostic pop inline gpuError_t gpuMalloc(void **ptr, size_t size) @@ -126,32 +155,27 @@ gpuMemcpy(void *dst, const void *src, size_t size, gpuMemcpyKind kind) } -#ifdef GPU_USE_HIP +#if defined(__NVCC__) || defined(__HIP__) static __inline__ __device__ double -fetch_tex(hipTextureObject_t tex, int32_t ofs) +fetch_tex(gpuTextureObject_t tex, int32_t ofs) { - uint2 val = tex1Dfetch<uint2>(tex, ofs); - return __hiloint2double(val.y, val.x); -} +#ifdef DISABLE_TEXTURE_CACHE + return tex[ofs]; #else -#ifdef __NVCC__ -static __inline__ __device__ double -fetch_tex(cudaTextureObject_t tex, int32_t ofs) -{ uint2 val = tex1Dfetch<uint2>(tex, ofs); return __hiloint2double(val.y, val.x); -} #endif +} #endif class stream { - cudaStream_t m_stream; + gpuStream_t m_stream; public: stream() { - checkGPU( cudaStreamCreate(&m_stream) ); + checkGPU( gpuStreamCreate(&m_stream) ); } stream(const stream&) = delete; @@ -161,17 +185,17 @@ public: ~stream() { - checkGPU( cudaStreamDestroy(m_stream) ); + checkGPU( gpuStreamDestroy(m_stream) ); } - operator cudaStream_t() const + operator gpuStream_t() const { return m_stream; } void wait(void) const { - cudaStreamSynchronize(m_stream); + checkGPU( gpuStreamSynchronize(m_stream) ); } }; @@ -228,7 +252,7 @@ public: void copyin(const T *src, size_t size, const stream& st) { assert(size == m_size); - (void)checkGPU( cudaMemcpyAsync(m_devptr, src, size*sizeof(T), gpuMemcpyHostToDevice, st) ); + (void)checkGPU( gpuMemcpyAsync(m_devptr, src, size*sizeof(T), gpuMemcpyHostToDevice, st) ); } void copyout(T *dst) const @@ -307,10 +331,11 @@ class texture_allocator; template<> class texture_allocator<double> { +#ifndef DISABLE_TEXTURE_CACHE gpuTextureDesc cTexDesc; gpuResourceDesc cResDesc; gpuTextureObject_t cTexObj; - +#endif double *d_texDataPtr; size_t data_size; @@ -327,9 +352,11 @@ public: texture_allocator(texture_allocator&& other) { +#ifndef DISABLE_TEXTURE_CACHE cTexDesc = other.cTexDesc; cResDesc = other.cResDesc; cTexObj = other.cTexObj; +#endif d_texDataPtr = other.d_texDataPtr; other.d_texDataPtr = nullptr; data_size = other.data_size; @@ -353,6 +380,7 @@ public: else checkGPU( gpuMemset(d_texDataPtr, 0, size*sizeof(double)) ); +#ifndef DISABLE_TEXTURE_CACHE /* cudaTextureDesc */ memset(&cTexDesc, 0, sizeof(cTexDesc)); cTexDesc.normalizedCoords = 0; @@ -370,12 +398,17 @@ public: /* create texture */ checkGPU( gpuCreateTextureObject(&cTexObj, &cResDesc, &cTexDesc, nullptr) ); +#endif data_size = size; } gpuTextureObject_t get_texture(void) const { +#ifdef DISABLE_TEXTURE_CACHE + return d_texDataPtr; +#else return cTexObj; +#endif } double * data(void) const @@ -392,7 +425,9 @@ public: { if (d_texDataPtr) { +#ifndef DISABLE_TEXTURE_CACHE checkGPU( gpuDestroyTextureObject(cTexObj) ); +#endif checkGPU( gpuFree(d_texDataPtr) ); } @@ -411,28 +446,28 @@ class timecounter_gpu public: timecounter_gpu() { - gpuEventCreate(&start); - gpuEventCreate(&stop); + checkGPU( gpuEventCreate(&start) ); + checkGPU( gpuEventCreate(&stop) ); } ~timecounter_gpu() { - gpuEventDestroy(start); - gpuEventDestroy(stop); + checkGPU( gpuEventDestroy(start) ); + checkGPU( gpuEventDestroy(stop) ); } void tic() { - gpuEventRecord(start, 0); + checkGPU( gpuEventRecord(start, 0) ); } double toc() { - gpuEventRecord(stop, 0); + checkGPU( gpuEventRecord(stop, 0) ); float elapsed; - gpuEventSynchronize(stop); - gpuEventElapsedTime(&elapsed, start, stop); + checkGPU( gpuEventSynchronize(stop) ); + checkGPU( gpuEventElapsedTime(&elapsed, start, stop) ); return double(elapsed)/1000.0; } }; diff --git a/include/kernels_gpu.h b/include/kernels_gpu.h index ea041770a4847e0244c0ed224d3fe9898283765b..454c1df8d8c59a4ed08fa2d0040c3de779dd242b 100644 --- a/include/kernels_gpu.h +++ b/include/kernels_gpu.h @@ -141,35 +141,45 @@ size_t gpu_dblocks_dofs(const entity_data_cpu&); void reshape_dofs(const entity_data_cpu&, const entity_data_gpu&, const vecxd&, vecxd&, bool); - void gpu_compute_field_derivatives(const entity_data_gpu& edg, const double *F, - double *dF_dx, double* dF_dy, double* dF_dz, double alpha, cudaStream_t stream = 0); + double *dF_dx, double* dF_dy, double* dF_dz, double alpha, gpuStream_t stream = 0); void gpu_compute_flux_lifting(entity_data_gpu&, const double *, double *, - cudaStream_t stream = 0); + gpuStream_t stream = 0); void gpu_subtract(double *, const double *, const double *, size_t, - cudaStream_t stream = 0); + gpuStream_t stream = 0); void gpu_compute_euler_update(double *out, const double *y, const double *k, const double *reaction, const double *material, size_t num_dofs, double dt, - cudaStream_t stream = 0); + gpuStream_t stream = 0); void gpu_compute_euler_update(double *out, const double *y, const double *k, const double *material, size_t num_dofs, double dt, - cudaStream_t stream = 0); + gpuStream_t stream = 0); void gpu_compute_rk4_weighted_sum(double *out, const double *in, const double *k1, const double *k2, const double *k3, const double *k4, const double *material, size_t num_dofs, double dt, - cudaStream_t stream = 0); + gpuStream_t stream = 0); void gpu_compute_rk4_weighted_sum(double *out, const double *in, const double *k1, const double *k2, const double *k3, const double *k4, const double *reaction, const double *material, size_t num_dofs, double dt, - cudaStream_t stream = 0); + gpuStream_t stream = 0); + +void +gpu_compute_leapfrog_update(double *next, const double *curr, const double *soe, + const double *tmp, const double *ie, size_t num_dofs, double dt, + gpuStream_t stream = 0); + +void +gpu_compute_leapfrog_update(double *next, const double *curr, + const double *tmp, const double *im, size_t num_dofs, double dt, + gpuStream_t stream = 0); + diff --git a/include/maxwell_common.h b/include/maxwell_common.h index c096869ee6aa63c5dd8fed60e4888c7380601177..d60047cc9783ca474d2477326f77961d67e47196 100644 --- a/include/maxwell_common.h +++ b/include/maxwell_common.h @@ -1,6 +1,7 @@ #pragma once #include "maxwell_interface.h" +#include "silo_output.hpp" namespace maxwell { @@ -56,8 +57,8 @@ eval_boundary_sources(const model& mod, const parameter_loader& mpl, return mpl.eval_boundary_source(bd.gmsh_entity, pt, state.curr_time); }; auto fH = [&](const point_3d& pt) -> vec3d { - vec3d E = mpl.eval_boundary_source(bd.gmsh_entity, pt, state.curr_time); - return n.cross(E)/Z; + vec3d H = mpl.eval_boundary_source(bd.gmsh_entity, pt, state.curr_time); + return n.cross(H)/Z; }; matxd prjE = e.project_on_face(iF, fE); auto num_bf = prjE.rows(); @@ -155,5 +156,23 @@ eval_interface_sources(const model& mod, const parameter_loader& mpl, } } +void export_E_field_nodal(const model&, silo&, const vecxd&, const vecxd&, + const vecxd&, const parameter_loader&); + +void export_H_field_nodal(const model&, silo&, const vecxd&, const vecxd&, + const vecxd&, const parameter_loader&); + +void export_J_field_nodal(const model&, silo&, const vecxd&, const vecxd&, + const vecxd&, const parameter_loader&); + +void export_E_field_zonal(const model&, silo&, const vecxd&, const vecxd&, + const vecxd&, const parameter_loader&); + +void export_H_field_zonal(const model&, silo&, const vecxd&, const vecxd&, + const vecxd&, const parameter_loader&); + +void export_J_field_zonal(const model&, silo&, const vecxd&, const vecxd&, + const vecxd&, const parameter_loader&); + } // namespace maxwell diff --git a/include/maxwell_interface.h b/include/maxwell_interface.h index 92db3e188b8474ed8afb8b86868e587419b71a36..aec2ef6a834fc8305b2d9e23aa05b82bda26d1b6 100644 --- a/include/maxwell_interface.h +++ b/include/maxwell_interface.h @@ -70,6 +70,7 @@ public: boundary_condition boundary_type(int tag) const; interface_condition interface_type(int tag) const; + field_export_mode postpro_fieldExportMode(const std::string&) const; }; #endif /* HIDE_THIS_FROM_NVCC */ @@ -280,7 +281,13 @@ struct solver_state_gpu field_gpu bndsrcs; field_gpu bndsrcs_buf; + + field bndsrcs_decomp_cpu; pinned_field bndsrcs_cpu; + + std::vector<size_t> bndsrcs_decomp_table_cpu; + device_vector<size_t> bndsrcs_decomp_table; + stream memcpy_stream; stream compute_stream; }; @@ -290,8 +297,8 @@ struct solver_state_gpu void init_from_model(const model&, solver_state&); void init_matparams(const model&, solver_state&, const parameter_loader&); //void apply_operator(solver_state&, const field&, field&); -void export_to_silo(const model&, const solver_state&, const std::string&); -void timestep(solver_state&); +void export_fields_to_silo(const model&, const solver_state&, const parameter_loader&); +void timestep(solver_state&, time_integrator_type); void prepare_sources(const model&, maxwell::solver_state&, maxwell::parameter_loader&); void do_sources(const model&, maxwell::solver_state&, maxwell::parameter_loader&); void swap(maxwell::solver_state&); @@ -300,8 +307,8 @@ void swap(maxwell::solver_state&); void init_from_model(const model&, solver_state_gpu&); void init_matparams(const model&, solver_state_gpu&, const parameter_loader&); //void apply_operator(solver_state_gpu&, const field_gpu&, field_gpu&); -void export_to_silo(const model&, const solver_state_gpu&, const std::string&); -void timestep(solver_state_gpu&); +void export_fields_to_silo(const model&, const solver_state_gpu&, const parameter_loader&); +void timestep(solver_state_gpu&, time_integrator_type); void prepare_sources(const model&, maxwell::solver_state_gpu&, maxwell::parameter_loader&); void do_sources(const model&, maxwell::solver_state_gpu&, maxwell::parameter_loader&); void swap(maxwell::solver_state_gpu&); @@ -362,12 +369,28 @@ init_H_field(const model& mod, solver_state_gpu& state, const Function& H) state.emf_curr.Hz.copyin(Hz.data(), Hz.size()); } + +#endif /* ENABLE_GPU_SOLVER */ + void gpu_compute_jumps(const entity_data_gpu&, const field_gpu&, field_gpu&, - double *, cudaStream_t stream = 0); + double *, gpuStream_t stream = 0); + +void gpu_compute_jumps_E(const entity_data_gpu&, const field_gpu&, field_gpu&, + double *, gpuStream_t stream = 0); + +void gpu_compute_jumps_H(const entity_data_gpu&, const field_gpu&, field_gpu&, + double *, gpuStream_t stream = 0); + void gpu_compute_fluxes(const entity_data_gpu&, const field_gpu&, field_gpu&, - const field_gpu&, const material_params_gpu&, cudaStream_t stream = 0); -#endif /* ENABLE_GPU_SOLVER */ + const field_gpu&, const material_params_gpu&, gpuStream_t stream = 0); + +void gpu_compute_fluxes_E(const entity_data_gpu&, const field_gpu&, field_gpu&, + const field_gpu&, const material_params_gpu&, gpuStream_t stream = 0); +void gpu_compute_fluxes_H(const entity_data_gpu&, const field_gpu&, field_gpu&, + const field_gpu&, const material_params_gpu&, gpuStream_t stream = 0); +void decompress_bndsrc(const solver_state_gpu& state, const field_gpu& csrcs, + field_gpu& srcs); } // namespace maxwell diff --git a/include/param_loader.h b/include/param_loader.h index d33fe330dae39bba2c85dcabecb420d8ee18601a..fd2f5eac9b7af2da537533d2f98c0286e57dc16c 100644 --- a/include/param_loader.h +++ b/include/param_loader.h @@ -7,6 +7,18 @@ #include "gmsh_io.h" +enum class field_export_mode { + NONE, + NODAL, + ZONAL +}; + +enum class time_integrator_type { + RK4, + LEAPFROG, + EULER +}; + class parameter_loader_base { bool m_bnd_sources_active; @@ -33,6 +45,8 @@ public: size_t sim_timesteps(void) const; std::string sim_gmshmodel(void) const; bool sim_usegpu(void) const; + time_integrator_type sim_timeIntegrator(void) const; + std::string sim_timeIntegratorName(void) const; size_t postpro_siloOutputRate(void) const; size_t postpro_cyclePrintRate(void) const; @@ -51,3 +65,4 @@ public: }; + diff --git a/include/physical_element.h b/include/physical_element.h index 4c7c271e295780a606944752d6a4cc148ac1067e..b8dad8c262954a5d952dd056d8005ee04584b39a 100644 --- a/include/physical_element.h +++ b/include/physical_element.h @@ -2,6 +2,7 @@ #include "gmsh.h" #include "point.hpp" +#include "eigen.h" #include "types.h" /* This is a key for a basis function. Did this diff --git a/include/reference_element.h b/include/reference_element.h index 22e3930aa5c1dee634e0357122b37e5e58b4f050..19961b43c1ee4ebce017646a365e967a3c987e65 100644 --- a/include/reference_element.h +++ b/include/reference_element.h @@ -1,5 +1,6 @@ #pragma once +#include "eigen.h" #include "types.h" class reference_element diff --git a/include/silo_output.hpp b/include/silo_output.hpp index cfee85b929018eaa92ea5edc499d6b1ca395ee52..ee7d1521b63f5838b87e813221439e1fade8ddc2 100644 --- a/include/silo_output.hpp +++ b/include/silo_output.hpp @@ -9,7 +9,7 @@ class silo std::vector<double> node_coords_x; std::vector<double> node_coords_y; std::vector<double> node_coords_z; - + std::vector<size_t> nodeTags; std::vector<int> nodelist; int num_cells; @@ -31,5 +31,10 @@ public: bool write_zonal_variable(const std::string&, const std::vector<float>&); bool write_scalar_expression(const std::string& name, const std::string& expression); bool write_vector_expression(const std::string& name, const std::string& expression); + size_t num_nodes(void) const; + size_t node_tag_offset(size_t tag) const; void close_db(); + + + bool write_nodal_variable(const std::string& name, const std::vector<double>& data); }; diff --git a/include/types.h b/include/types.h index 6ec9c226a04c4ce7f1287b52e35415a4e1fde1ef..a192f3b690a812e42429f6f1df0359127eeffa0d 100644 --- a/include/types.h +++ b/include/types.h @@ -1,7 +1,7 @@ #pragma once +#include <vector> #include "point.hpp" -#include "eigen.h" #ifdef __NVCC__ #define HIDE_THIS_FROM_NVCC @@ -12,12 +12,6 @@ using vec_size_t = std::vector<size_t>; using vec_double = std::vector<double>; using vec_point_3d = std::vector<point_3d>; using vec_quadpt_3d = std::vector<quadrature_point_3d>; -using vec3d = Eigen::Matrix<double, 3, 1>; -using mat3d = Eigen::Matrix<double, 3, 3, Eigen::RowMajor>; -using vecxd = Eigen::Matrix<double, Eigen::Dynamic, 1>; -using matxd = Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>; -using vec_mat3d = eigen_compatible_stdvector<mat3d>; -using vec_matxd = eigen_compatible_stdvector<matxd>; struct entity_params { diff --git a/share/binde_simplified/params_binde_simplified.lua b/share/binde_simplified/params_binde_simplified.lua index e47b43b1bf38d7b5921650266900119a6da5541a..c138e16a099ed1212c62ae8e5a1f681ba31b95e5 100644 --- a/share/binde_simplified/params_binde_simplified.lua +++ b/share/binde_simplified/params_binde_simplified.lua @@ -2,7 +2,7 @@ sim.name = "binde_simplified" -- simulation name sim.dt = 1e-11 -- timestep size sim.timesteps = 10000 -- num of iterations sim.gmsh_model = "binde_simplified.geo" -- gmsh model filename -sim.use_gpu = 1 -- 0: cpu, 1: gpu +sim.use_gpu = 0 -- 0: cpu, 1: gpu sim.approx_order = 1 -- approximation order --sim_geom_order = 1 -- geometric order, only 1 for now postpro.silo_output_rate = 10 -- rate at which to write silo files diff --git a/src/entity.cpp b/src/entity.cpp index 54de2bcd44a7a2bdfe77ff3b1f71e842c74e8e22..30e33790cf7ff868d8f035845ff4f13acfb392db 100644 --- a/src/entity.cpp +++ b/src/entity.cpp @@ -158,7 +158,8 @@ entity::project(const vector_function& function, vecxd& pfx, vecxd& pfy, vecxd& rhs_z += pqp.weight() * fval(2) * phi; } - Eigen::LDLT<matxd> mass_ldlt(mass); + Eigen::LDLT<matxd> mass_ldlt; + mass_ldlt.compute(mass); pfx.segment(m_dof_base + iT*num_bf, num_bf) = mass_ldlt.solve(rhs_x); pfy.segment(m_dof_base + iT*num_bf, num_bf) = mass_ldlt.solve(rhs_y); pfz.segment(m_dof_base + iT*num_bf, num_bf) = mass_ldlt.solve(rhs_z); diff --git a/src/kernels_cuda.cu b/src/kernels_cuda.cu index 744b0c1519df8ac303545508850705b7c4df6b1c..99918c51133c664c5f5bc8f1f0dd4dd4ef9dd72f 100644 --- a/src/kernels_cuda.cu +++ b/src/kernels_cuda.cu @@ -435,3 +435,63 @@ gpu_compute_rk4_weighted_sum(double *out, const double *in, const double *k1, gpu_rk4_weighted_sum_kernel<<<gs, RK4_THREADS, 0, stream>>>(out, in, k1, k2, k3, k4, reaction, material, num_dofs, dt); } + + +void __global__ +gpu_leapfrog_update_kernel(double * __restrict__ next, const double * __restrict__ curr, + const double * __restrict__ soe, const double * __restrict__ tmp, + const double * __restrict__ ie, size_t num_dofs, double dt) +{ + int32_t dof = blockIdx.x * blockDim.x + threadIdx.x; + if (dof < num_dofs) + { + double dt_sigma_over_2eps = 0.5*dt*soe[dof]; + double CR = (1.0 - dt_sigma_over_2eps)/(1.0 + dt_sigma_over_2eps); + double CC = dt*ie[dof]/(1.0 + dt_sigma_over_2eps); + + next[dof] = curr[dof]*CR + tmp[dof]*CC; + } +} + +void +gpu_compute_leapfrog_update(double *next, const double *curr, const double *soe, + const double *tmp, const double *ie, size_t num_dofs, double dt, + cudaStream_t stream) +{ + static const size_t EULER_THREADS = 256; + + auto gs = num_dofs/EULER_THREADS; + if (num_dofs % EULER_THREADS != 0) + gs += 1; + + gpu_leapfrog_update_kernel<<<gs, EULER_THREADS, 0, stream>>>(next, curr, soe, + tmp, ie, num_dofs, dt); +} + +void __global__ +gpu_leapfrog_update_kernel(double * __restrict__ next, const double * __restrict__ curr, + const double * __restrict__ tmp, const double * __restrict__ im, + size_t num_dofs, double dt) +{ + int32_t dof = blockIdx.x * blockDim.x + threadIdx.x; + if (dof < num_dofs) + { + double CC = dt*im[dof]; + next[dof] = curr[dof] + tmp[dof]*CC; + } +} + +void +gpu_compute_leapfrog_update(double *next, const double *curr, + const double *tmp, const double *im, size_t num_dofs, double dt, + cudaStream_t stream) +{ + static const size_t EULER_THREADS = 256; + + auto gs = num_dofs/EULER_THREADS; + if (num_dofs % EULER_THREADS != 0) + gs += 1; + + gpu_leapfrog_update_kernel<<<gs, EULER_THREADS, 0, stream>>>(next, curr, + tmp, im, num_dofs, dt); +} diff --git a/src/maxwell_common.cpp b/src/maxwell_common.cpp index 8cd1541eddb2dcc82c259e3e8d90cd84637e1dea..8976fd8189af17cebf0ff32847d2d799129283bf 100644 --- a/src/maxwell_common.cpp +++ b/src/maxwell_common.cpp @@ -6,33 +6,6 @@ #define ROBIN 1.0 #define NEUMANN 0.0 -#define SEC_BCONDS "bndconds" -#define BC_KIND "kind" -#define BC_SOURCE "source" -#define BCOND_TYPE_PEC "pec" -#define BCOND_TYPE_PMC "pmc" -#define BCOND_TYPE_IMPEDANCE "impedance" -#define BCOND_TYPE_EPLANEW "plane_wave_E" -#define BCOND_TYPE_HPLANEW "plane_wave_H" -#define BCOND_TYPE_EFIELD "E_field" -#define BCOND_TYPE_HFIELD "H_field" -#define BCOND_TYPE_SURFCURR "surface_current" - -#define SEC_IFCONDS "ifaceconds" -#define IFC_KIND "kind" -#define IFC_SOURCE "source" -#define IFCOND_TYPE_EFIELD "E_field" -#define IFCOND_TYPE_HFIELD "H_field" -#define IFCOND_TYPE_SURFCURR "surface_current" - - -#define SEC_MATERIALS "materials" -#define MAT_EPS "epsilon" -#define MAT_EPS0 "eps0" -#define MAT_MU "mu" -#define MAT_MU0 "mu0" -#define MAT_SIGMA "sigma" - namespace maxwell { /* Take a model and a parameter loader and return a vector with the @@ -164,308 +137,229 @@ init_lax_milgram(const model& mod, const parameter_loader& mpl, } -parameter_loader::parameter_loader() -{ - lua["const"][MAT_EPS0] = 8.8541878128e-12; - lua["const"][MAT_MU0] = 4e-7*M_PI; -} - -bool -parameter_loader::validate_materials(const std::string& mpn, int tag) const -{ - auto mfun = lua[SEC_MATERIALS][mpn]; - if ( mfun.valid() ) - return true; - - auto mparams = lua[SEC_MATERIALS][tag]; - if ( mparams.valid() ) - { - auto matparams_mpn = lua[SEC_MATERIALS][tag][mpn]; - if ( matparams_mpn.valid() ) - return true; - } - std::cout << "[CONFIG] '" SEC_MATERIALS ".[" << tag << "]." << mpn; - std::cout << "' not defined and '" SEC_MATERIALS "." << mpn; - std::cout << "(tag,x,y,z)' not present." << std::endl; - return false; -} -bool -parameter_loader::validate_boundary_condition(const model& mod, int tag) const -{ - auto bc = lua[SEC_BCONDS][tag]; - if (not bc.valid()) - return true; - if ( not bc[BC_KIND].valid() ) - { - std::cout << "[CONFIG] '" BC_KIND "' not specified on interface "; - std::cout << tag << std::endl; - return false; - } - std::string bc_kind = bc[BC_KIND]; - if (bc_kind == BCOND_TYPE_PEC) - return true; - if (bc_kind == BCOND_TYPE_PMC) - return true; +void export_E_field_nodal(const model& mod, silo& sdb, const vecxd& Ex, + const vecxd& Ey, const vecxd& Ez, const parameter_loader&) +{ + std::vector<double> accum_Ex( sdb.num_nodes(), 0.0 ); + std::vector<double> accum_Ey( sdb.num_nodes(), 0.0 ); + std::vector<double> accum_Ez( sdb.num_nodes(), 0.0 ); + std::vector<size_t> count( sdb.num_nodes(), 0 ); - if (bc_kind == BCOND_TYPE_IMPEDANCE) - return true; + for (auto& e : mod) + { + for (size_t iT = 0; iT < e.num_cells(); iT++) + { + auto& pe = e.cell(iT); + auto cgofs = e.cell_global_dof_offset(iT); + auto nodetags = pe.node_tags(); + for (size_t iD = 0; iD < 4; iD++) + { + size_t tag = nodetags[iD]; + size_t svofs = sdb.node_tag_offset(tag); + accum_Ex[svofs] += Ex[cgofs+iD]; + accum_Ey[svofs] += Ey[cgofs+iD]; + accum_Ez[svofs] += Ez[cgofs+iD]; + count[svofs]++; + } + } + } - if (bc_kind == BCOND_TYPE_EPLANEW) + for (size_t i = 0; i < sdb.num_nodes(); i++) { - if (bc[BC_SOURCE].valid()) - return true; - - std::cout << "[CONFIG] '" BC_SOURCE "' not specified for plane "; - std::cout << "wave condition on surface " << tag << std::endl; - return false; + accum_Ex[i] /= count[i]; + accum_Ey[i] /= count[i]; + accum_Ez[i] /= count[i]; } - std::cout << "[CONFIG] boundary condition not implemented on "; - std::cout << "surface " << tag << std::endl; - return false; + sdb.write_nodal_variable("Ex", accum_Ex); + sdb.write_nodal_variable("Ey", accum_Ey); + sdb.write_nodal_variable("Ez", accum_Ez); + sdb.write_vector_expression("E", "{Ex, Ey, Ez}"); } -bool -parameter_loader::validate_interface_condition(const model& mod, int tag) const +void export_H_field_nodal(const model& mod, silo& sdb, const vecxd& Hx, + const vecxd& Hy, const vecxd& Hz, const parameter_loader&) { - auto ic = lua[SEC_IFCONDS][tag]; - if (not ic.valid()) - return true; + std::vector<double> accum_Hx( sdb.num_nodes(), 0.0 ); + std::vector<double> accum_Hy( sdb.num_nodes(), 0.0 ); + std::vector<double> accum_Hz( sdb.num_nodes(), 0.0 ); + std::vector<size_t> count( sdb.num_nodes(), 0 ); - if ( not ic[IFC_KIND].valid() ) + for (auto& e : mod) { - std::cout << "[CONFIG '" IFC_KIND "' not specified on interface "; - std::cout << tag << std::endl; - return false; - } - - std::string ic_kind = ic[IFC_KIND]; + for (size_t iT = 0; iT < e.num_cells(); iT++) + { + auto& pe = e.cell(iT); + auto cgofs = e.cell_global_dof_offset(iT); + auto nodetags = pe.node_tags(); + for (size_t iD = 0; iD < 4; iD++) + { + size_t tag = nodetags[iD]; + size_t svofs = sdb.node_tag_offset(tag); + accum_Hx[svofs] += Hx[cgofs+iD]; + accum_Hy[svofs] += Hy[cgofs+iD]; + accum_Hz[svofs] += Hz[cgofs+iD]; + count[svofs]++; + } + } + } - if (ic_kind == IFCOND_TYPE_SURFCURR) + for (size_t i = 0; i < sdb.num_nodes(); i++) { - if (ic[IFC_SOURCE].valid()) - return true; - - std::cout << "[CONFIG] '" IFC_SOURCE "' not specified for surface "; - std::cout << "current condition on surface " << tag << std::endl; - return false; + accum_Hx[i] /= count[i]; + accum_Hy[i] /= count[i]; + accum_Hz[i] /= count[i]; } - std::cout << "[CONFIG] interface condition not implemented on "; - std::cout << "surface " << tag << std::endl; - return false; + sdb.write_nodal_variable("Hx", accum_Hx); + sdb.write_nodal_variable("Hy", accum_Hy); + sdb.write_nodal_variable("Hz", accum_Hz); + sdb.write_vector_expression("H", "{Hx, Hy, Hz}"); } -bool -parameter_loader::validate_conditions(const model& mod) const +void export_J_field_nodal(const model& mod, silo& sdb, const vecxd& Ex, + const vecxd& Ey, const vecxd& Ez, const parameter_loader& mpl) { - bool success = true; - auto bds = mod.boundary_descriptors(); - std::sort(bds.begin(), bds.end()); - bds.erase( std::unique(bds.begin(), bds.end()), bds.end() ); - - auto ft_none = [](const boundary_descriptor& bd) -> bool { - return bd.type == face_type::NONE; - }; - - bds.erase( std::remove_if(bds.begin(), bds.end(), ft_none), bds.end() ); + std::vector<double> accum_Jx( sdb.num_nodes(), 0.0 ); + std::vector<double> accum_Jy( sdb.num_nodes(), 0.0 ); + std::vector<double> accum_Jz( sdb.num_nodes(), 0.0 ); + std::vector<size_t> count( sdb.num_nodes(), 0 ); - for (auto& bd : bds) + for (auto& e : mod) { - switch (bd.type) + auto etag = e.gmsh_tag(); + for (size_t iT = 0; iT < e.num_cells(); iT++) { - case face_type::NONE: - assert(false); - break; - - case face_type::BOUNDARY: - if (not validate_boundary_condition(mod, bd.gmsh_entity) ) - success = false; - break; - - case face_type::INTERFACE: - if (not validate_interface_condition(mod, bd.gmsh_entity) ) - success = false; - break; + auto& pe = e.cell(iT); + auto bar = pe.barycenter(); + auto sigma = mpl.sigma(etag, bar); + auto cgofs = e.cell_global_dof_offset(iT); + auto nodetags = pe.node_tags(); + for (size_t iD = 0; iD < 4; iD++) + { + size_t tag = nodetags[iD]; + size_t svofs = sdb.node_tag_offset(tag); + accum_Jx[svofs] += sigma*Ex[cgofs+iD]; + accum_Jy[svofs] += sigma*Ey[cgofs+iD]; + accum_Jz[svofs] += sigma*Ez[cgofs+iD]; + count[svofs]++; + } } - } - - return success; -} + } -bool -parameter_loader::validate_model_params(const model& mod) const -{ - bool success = true; - for (auto& e : mod) + for (size_t i = 0; i < sdb.num_nodes(); i++) { - auto tag = e.gmsh_tag(); - bool eps_valid = validate_materials(MAT_EPS, tag); - bool mu_valid = validate_materials(MAT_MU, tag); - bool sigma_valid = validate_materials(MAT_SIGMA, tag); - if ( (not eps_valid) or (not mu_valid) or (not sigma_valid) ) - success = false; + accum_Jx[i] /= count[i]; + accum_Jy[i] /= count[i]; + accum_Jz[i] /= count[i]; } - if ( not validate_conditions(mod) ) - success = false; - - return success; -} - -bool -parameter_loader::initial_Efield_defined(void) const -{ - auto eic = lua["electric_initial_condition"]; - return eic.valid(); + sdb.write_nodal_variable("Jx", accum_Jx); + sdb.write_nodal_variable("Jy", accum_Jy); + sdb.write_nodal_variable("Jz", accum_Jz); + sdb.write_vector_expression("J", "{Jx, Jy, Jz}"); } -vec3d -parameter_loader::initial_Efield(const point_3d& pt) const -{ - vec3d ret; - sol::tie(ret(0), ret(1), ret(2)) = - lua["electric_initial_condition"](pt.x(), pt.y(), pt.z()); - return ret; -} -bool -parameter_loader::initial_Hfield_defined(void) const -{ - auto mic = lua["magnetic_initial_condition"]; - return mic.valid(); -} -vec3d -parameter_loader::initial_Hfield(const point_3d& pt) const +void export_E_field_zonal(const model& mod, silo& sdb, const vecxd& Ex, + const vecxd& Ey, const vecxd& Ez, const parameter_loader&) { - vec3d ret; - sol::tie(ret(0), ret(1), ret(2)) = - lua["magnetic_initial_condition"](pt.x(), pt.y(), pt.z()); - return ret; -} + std::vector<double> out_Ex( mod.num_cells() ); + std::vector<double> out_Ey( mod.num_cells() ); + std::vector<double> out_Ez( mod.num_cells() ); -double -parameter_loader::epsilon(int tag, const point_3d& pt) const -{ - double eps0 = lua["const"]["eps0"]; - auto eps_dom = lua[SEC_MATERIALS][tag][MAT_EPS]; - if (eps_dom.valid()) + for (size_t iE = 0; iE < mod.num_entities(); iE++) { - double ret = eps_dom; - return eps0*ret; + auto& e = mod.entity_at(iE); + for (size_t iT = 0; iT < e.num_cells(); iT++) + { + auto& pe = e.cell(iT); + auto& re = e.cell_refelem(pe); + vecxd phi_bar = re.basis_functions({1./3., 1./3., 1./3.}); + auto num_bf = re.num_basis_functions(); + auto ofs = e.cell_global_dof_offset(iT); + auto gi = e.cell_global_index_by_gmsh(iT); + out_Ex[gi] = Ex.segment(ofs, num_bf).dot(phi_bar); + out_Ey[gi] = Ey.segment(ofs, num_bf).dot(phi_bar); + out_Ez[gi] = Ez.segment(ofs, num_bf).dot(phi_bar); + } } - double ret = lua[SEC_MATERIALS][MAT_EPS](tag, pt.x(), pt.y(), pt.z()); - return eps0*ret; + sdb.write_zonal_variable("Ex", out_Ex); + sdb.write_zonal_variable("Ey", out_Ey); + sdb.write_zonal_variable("Ez", out_Ez); + sdb.write_vector_expression("E", "{Ex, Ey, Ez}"); } -double -parameter_loader::mu(int tag, const point_3d& pt) const +void export_H_field_zonal(const model& mod, silo& sdb, const vecxd& Hx, + const vecxd& Hy, const vecxd& Hz, const parameter_loader&) { - double mu0 = lua["const"]["mu0"]; - auto mu_dom = lua[SEC_MATERIALS][tag][MAT_MU]; - if (mu_dom.valid()) + std::vector<double> out_Hx( mod.num_cells() ); + std::vector<double> out_Hy( mod.num_cells() ); + std::vector<double> out_Hz( mod.num_cells() ); + + for (size_t iE = 0; iE < mod.num_entities(); iE++) { - double ret = mu_dom; - return mu0*ret; + auto& e = mod.entity_at(iE); + for (size_t iT = 0; iT < e.num_cells(); iT++) + { + auto& pe = e.cell(iT); + auto& re = e.cell_refelem(pe); + vecxd phi_bar = re.basis_functions({1./3., 1./3., 1./3.}); + auto num_bf = re.num_basis_functions(); + auto ofs = e.cell_global_dof_offset(iT); + auto gi = e.cell_global_index_by_gmsh(iT); + out_Hx[gi] = Hx.segment(ofs, num_bf).dot(phi_bar); + out_Hy[gi] = Hy.segment(ofs, num_bf).dot(phi_bar); + out_Hz[gi] = Hz.segment(ofs, num_bf).dot(phi_bar); + } } - double ret = lua[SEC_MATERIALS][MAT_MU](tag, pt.x(), pt.y(), pt.z()); - return mu0*ret; -} - -double -parameter_loader::sigma(int tag, const point_3d& pt) const -{ - auto sigma_dom = lua[SEC_MATERIALS][tag][MAT_SIGMA]; - if (sigma_dom.valid()) - return sigma_dom; - - return lua[SEC_MATERIALS][MAT_SIGMA](tag, pt.x(), pt.y(), pt.z()); -} - -boundary_condition -parameter_loader::boundary_type(int tag) const -{ - auto bnd_data = lua[SEC_BCONDS][tag]; - if (not bnd_data.valid()) - return boundary_condition::UNSPECIFIED; - - std::string kind_str = bnd_data[BC_KIND]; - - if (kind_str == BCOND_TYPE_PEC) - return boundary_condition::PEC; - - if (kind_str == BCOND_TYPE_PMC) - return boundary_condition::PMC; - - if (kind_str == BCOND_TYPE_IMPEDANCE) - return boundary_condition::IMPEDANCE; - - if (kind_str == BCOND_TYPE_EPLANEW) - return boundary_condition::PLANE_WAVE_E; - - if (kind_str == BCOND_TYPE_HPLANEW) - return boundary_condition::PLANE_WAVE_H; - - if (kind_str == BCOND_TYPE_EFIELD) - return boundary_condition::E_FIELD; - - if (kind_str == BCOND_TYPE_HFIELD) - return boundary_condition::H_FIELD; - - if (kind_str == BCOND_TYPE_SURFCURR) - return boundary_condition::SURFACE_CURRENT; - - return boundary_condition::UNSPECIFIED; + sdb.write_zonal_variable("Hx", out_Hx); + sdb.write_zonal_variable("Hy", out_Hy); + sdb.write_zonal_variable("Hz", out_Hz); + sdb.write_vector_expression("H", "{Hx, Hy, Hz}"); } -interface_condition -parameter_loader::interface_type(int tag) const +void export_J_field_zonal(const model& mod, silo& sdb, const vecxd& Ex, + const vecxd& Ey, const vecxd& Ez, const parameter_loader& mpl) { - auto if_data = lua[SEC_IFCONDS][tag]; - if (not if_data.valid()) - return interface_condition::UNSPECIFIED; - - auto kind = if_data[IFC_KIND]; - - std::string kind_str = kind; - - if (kind_str == IFCOND_TYPE_EFIELD) - return interface_condition::E_FIELD; - - if (kind_str == IFCOND_TYPE_HFIELD) - return interface_condition::H_FIELD; - - if (kind_str == IFCOND_TYPE_SURFCURR) - return interface_condition::SURFACE_CURRENT; + std::vector<double> out_Jx( mod.num_cells() ); + std::vector<double> out_Jy( mod.num_cells() ); + std::vector<double> out_Jz( mod.num_cells() ); - return interface_condition::UNSPECIFIED; -} + for (size_t iE = 0; iE < mod.num_entities(); iE++) + { + auto& e = mod.entity_at(iE); + auto etag = e.gmsh_tag(); + for (size_t iT = 0; iT < e.num_cells(); iT++) + { + auto& pe = e.cell(iT); + auto& re = e.cell_refelem(pe); + auto bar = pe.barycenter(); + auto sigma = mpl.sigma(etag, bar); + vecxd phi_bar = re.basis_functions({1./3., 1./3., 1./3.}); + auto num_bf = re.num_basis_functions(); + auto ofs = e.cell_global_dof_offset(iT); + auto gi = e.cell_global_index_by_gmsh(iT); + out_Jx[gi] = sigma*Ex.segment(ofs, num_bf).dot(phi_bar); + out_Jy[gi] = sigma*Ey.segment(ofs, num_bf).dot(phi_bar); + out_Jz[gi] = sigma*Ez.segment(ofs, num_bf).dot(phi_bar); + } + } -vec3d -parameter_loader::eval_boundary_source(int tag, const point_3d& pt, double t) const -{ - vec3d ret; - sol::tie(ret(0), ret(1), ret(2)) = - lua[SEC_BCONDS][tag][BC_SOURCE](tag, pt.x(), pt.y(), pt.z(), t); - return ret; + sdb.write_zonal_variable("Jx", out_Jx); + sdb.write_zonal_variable("Jy", out_Jy); + sdb.write_zonal_variable("Jz", out_Jz); + sdb.write_vector_expression("J", "{Jx, Jy, Jz}"); } -vec3d -parameter_loader::eval_interface_source(int tag, const point_3d& pt, double t) const -{ - vec3d ret; - sol::tie(ret(0), ret(1), ret(2)) = - lua[SEC_IFCONDS][tag][IFC_SOURCE](tag, pt.x(), pt.y(), pt.z(), t); - return ret; -} } // namespace maxwell diff --git a/src/maxwell_cpu.cpp b/src/maxwell_cpu.cpp index 5099f17417b3494f7a238f10a349a5f473b974a4..a5a13a54949a64bf9298d694bc045d03b5a9cb48 100644 --- a/src/maxwell_cpu.cpp +++ b/src/maxwell_cpu.cpp @@ -105,6 +105,37 @@ compute_curls(solver_state& state, const field& curr, field& next) next.Hz = state.dy.Ex - state.dx.Ey; } +static void +compute_curls_E(solver_state& state, const field& curr, field& next) +{ + for (const auto& ed : state.eds) + { + compute_field_derivatives(ed, curr.Ex, state.dx.Ex, state.dy.Ex, state.dz.Ex); + compute_field_derivatives(ed, curr.Ey, state.dx.Ey, state.dy.Ey, state.dz.Ey); + compute_field_derivatives(ed, curr.Ez, state.dx.Ez, state.dy.Ez, state.dz.Ez); + } + + next.Hx = state.dz.Ey - state.dy.Ez; + next.Hy = state.dx.Ez - state.dz.Ex; + next.Hz = state.dy.Ex - state.dx.Ey; +} + +static void +compute_curls_H(solver_state& state, const field& curr, field& next) +{ + for (const auto& ed : state.eds) + { + compute_field_derivatives(ed, curr.Hx, state.dx.Hx, state.dy.Hx, state.dz.Hx); + compute_field_derivatives(ed, curr.Hy, state.dx.Hy, state.dy.Hy, state.dz.Hy); + compute_field_derivatives(ed, curr.Hz, state.dx.Hz, state.dy.Hz, state.dz.Hz); + } + + next.Ex = state.dy.Hz - state.dz.Hy; + next.Ey = state.dz.Hx - state.dx.Hz; + next.Ez = state.dx.Hy - state.dy.Hx; +} + + static void compute_field_jumps(solver_state& state, const field& in) { @@ -143,6 +174,70 @@ compute_field_jumps(solver_state& state, const field& in) } } +static void +compute_field_jumps_E(solver_state& state, const field& in) +{ + for (auto& ed : state.eds) + { + size_t num_all_fluxes = num_elems_all_orientations(ed) * ed.num_faces_per_elem * ed.num_fluxes; + + #pragma omp parallel for + for (size_t i = 0; i < num_all_fluxes; i++) + { + auto lofs = i; + auto gofs = ed.flux_base + i; + if (ed.fluxdofs_neigh[lofs] != NOT_PRESENT) + { + auto pos_mine = ed.fluxdofs_mine[lofs]; + auto pos_neigh = ed.fluxdofs_neigh[lofs]; + state.jumps.Ex[gofs] = in.Ex[pos_mine] - in.Ex[pos_neigh]; + state.jumps.Ey[gofs] = in.Ey[pos_mine] - in.Ey[pos_neigh]; + state.jumps.Ez[gofs] = in.Ez[pos_mine] - in.Ez[pos_neigh]; + } + else + { + auto bc = state.matparams.bc_coeffs[gofs]; + auto pos_mine = ed.fluxdofs_mine[lofs]; + state.jumps.Ex[gofs] = bc * in.Ex[pos_mine]; + state.jumps.Ey[gofs] = bc * in.Ey[pos_mine]; + state.jumps.Ez[gofs] = bc * in.Ez[pos_mine]; + } + } + } +} + +static void +compute_field_jumps_H(solver_state& state, const field& in) +{ + for (auto& ed : state.eds) + { + size_t num_all_fluxes = num_elems_all_orientations(ed) * ed.num_faces_per_elem * ed.num_fluxes; + + #pragma omp parallel for + for (size_t i = 0; i < num_all_fluxes; i++) + { + auto lofs = i; + auto gofs = ed.flux_base + i; + if (ed.fluxdofs_neigh[lofs] != NOT_PRESENT) + { + auto pos_mine = ed.fluxdofs_mine[lofs]; + auto pos_neigh = ed.fluxdofs_neigh[lofs]; + state.jumps.Hx[gofs] = in.Hx[pos_mine] - in.Hx[pos_neigh]; + state.jumps.Hy[gofs] = in.Hy[pos_mine] - in.Hy[pos_neigh]; + state.jumps.Hz[gofs] = in.Hz[pos_mine] - in.Hz[pos_neigh]; + } + else + { + auto bc = state.matparams.bc_coeffs[gofs]; + auto pos_mine = ed.fluxdofs_mine[lofs]; + state.jumps.Hx[gofs] = (2.0 - bc) * in.Hx[pos_mine]; + state.jumps.Hy[gofs] = (2.0 - bc) * in.Hy[pos_mine]; + state.jumps.Hz[gofs] = (2.0 - bc) * in.Hz[pos_mine]; + } + } + } +} + static void compute_fluxes_planar(solver_state& state) { @@ -192,6 +287,92 @@ compute_fluxes_planar(solver_state& state) } } +static void +compute_fluxes_planar_E(solver_state& state) +{ + for (auto& ed : state.eds) + { + size_t num_all_faces = num_elems_all_orientations(ed) * ed.num_faces_per_elem; + + #pragma omp parallel for + for (size_t iF = 0; iF < num_all_faces; iF++) + { + auto n = ed.normals.row(iF); + auto nx = n[0]; + auto ny = n[1]; + auto nz = n[2]; + auto face_det = ed.face_determinants(iF); + + for (size_t i = 0; i < ed.num_fluxes; i++) + { + auto lofs = iF*ed.num_fluxes + i; + auto gofs = ed.flux_base + lofs; + + /* Sources are imposed on jumps */ + auto jEx = state.jumps.Ex[gofs] - state.bndsrcs.Ex[gofs]; + auto jEy = state.jumps.Ey[gofs] - state.bndsrcs.Ey[gofs]; + auto jEz = state.jumps.Ez[gofs] - state.bndsrcs.Ez[gofs]; + auto jHx = state.jumps.Hx[gofs] + state.bndsrcs.Hx[gofs]; + auto jHy = state.jumps.Hy[gofs] + state.bndsrcs.Hy[gofs]; + auto jHz = state.jumps.Hz[gofs] + state.bndsrcs.Hz[gofs]; + + auto ndotE = nx*jEx + ny*jEy + nz*jEz; + + auto aE = face_det * state.matparams.aE[gofs]; + auto bE = face_det * state.matparams.bE[gofs]; + + /* Compute fluxes */ + state.fluxes.Ex[gofs] = aE*(nz*jHy - ny*jHz) + bE*(ndotE*nx - jEx); + state.fluxes.Ey[gofs] = aE*(nx*jHz - nz*jHx) + bE*(ndotE*ny - jEy); + state.fluxes.Ez[gofs] = aE*(ny*jHx - nx*jHy) + bE*(ndotE*nz - jEz); + } + } + } +} + +static void +compute_fluxes_planar_H(solver_state& state) +{ + for (auto& ed : state.eds) + { + size_t num_all_faces = num_elems_all_orientations(ed) * ed.num_faces_per_elem; + + #pragma omp parallel for + for (size_t iF = 0; iF < num_all_faces; iF++) + { + auto n = ed.normals.row(iF); + auto nx = n[0]; + auto ny = n[1]; + auto nz = n[2]; + auto face_det = ed.face_determinants(iF); + + for (size_t i = 0; i < ed.num_fluxes; i++) + { + auto lofs = iF*ed.num_fluxes + i; + auto gofs = ed.flux_base + lofs; + + /* Sources are imposed on jumps */ + auto jEx = state.jumps.Ex[gofs] - state.bndsrcs.Ex[gofs]; + auto jEy = state.jumps.Ey[gofs] - state.bndsrcs.Ey[gofs]; + auto jEz = state.jumps.Ez[gofs] - state.bndsrcs.Ez[gofs]; + auto jHx = state.jumps.Hx[gofs] + state.bndsrcs.Hx[gofs]; + auto jHy = state.jumps.Hy[gofs] + state.bndsrcs.Hy[gofs]; + auto jHz = state.jumps.Hz[gofs] + state.bndsrcs.Hz[gofs]; + + auto ndotH = nx*jHx + ny*jHy + nz*jHz; + + auto aH = face_det * state.matparams.aH[gofs]; + auto bH = face_det * state.matparams.bH[gofs]; + + /* Compute fluxes */ + state.fluxes.Hx[gofs] = aH*(ny*jEz - nz*jEy) + bH*(ndotH*nx - jHx); + state.fluxes.Hy[gofs] = aH*(nz*jEx - nx*jEz) + bH*(ndotH*ny - jHy); + state.fluxes.Hz[gofs] = aH*(nx*jEy - ny*jEx) + bH*(ndotH*nz - jHz); + } + } + } +} + static void compute_fluxes(solver_state& state, const field& in, field& out) { @@ -209,13 +390,41 @@ compute_fluxes(solver_state& state, const field& in, field& out) } } +static void +compute_fluxes_E(solver_state& state, const field& in, field& out) +{ + compute_field_jumps_E(state, in); + compute_fluxes_planar_E(state); + + for (auto& ed : state.eds) + { + compute_flux_lifting(ed, state.fluxes.Hx, out.Hx); + compute_flux_lifting(ed, state.fluxes.Hy, out.Hy); + compute_flux_lifting(ed, state.fluxes.Hz, out.Hz); + } +} + +static void +compute_fluxes_H(solver_state& state, const field& in, field& out) +{ + compute_field_jumps_H(state, in); + compute_fluxes_planar_H(state); + + for (auto& ed : state.eds) + { + compute_flux_lifting(ed, state.fluxes.Ex, out.Ex); + compute_flux_lifting(ed, state.fluxes.Ey, out.Ey); + compute_flux_lifting(ed, state.fluxes.Ez, out.Ez); + } +} + static void compute_euler_update(solver_state& state, const field& y, const field& k, double dt, field& out) { #pragma omp parallel for for (size_t i = 0; i < out.num_dofs; i++) { - auto CR = 1 - dt*state.matparams.sigma_over_epsilon[i]; + auto CR = 1.0 - dt*state.matparams.sigma_over_epsilon[i]; auto CC = dt*state.matparams.inv_epsilon[i]; out.Ex[i] = y.Ex[i]*CR + k.Ex[i]*CC; out.Ey[i] = y.Ey[i]*CR + k.Ey[i]*CC; @@ -238,7 +447,7 @@ compute_rk4_weighted_sum(solver_state& state, const field& in, double dt, field& #pragma omp parallel for for (size_t i = 0; i < out.num_dofs; i++) { - auto CR = 1 - dt*state.matparams.sigma_over_epsilon[i]; + auto CR = 1.0 - dt*state.matparams.sigma_over_epsilon[i]; auto CC = dt*state.matparams.inv_epsilon[i]; out.Ex[i] = in.Ex[i]*CR + CC*(state.k1.Ex[i] + 2*state.k2.Ex[i] + 2*state.k3.Ex[i] + state.k4.Ex[i])/6; out.Ey[i] = in.Ey[i]*CR + CC*(state.k1.Ey[i] + 2*state.k2.Ey[i] + 2*state.k3.Ey[i] + state.k4.Ey[i])/6; @@ -262,26 +471,65 @@ apply_operator(solver_state& state, const field& curr, field& next) compute_fluxes(state, curr, next); } +static void +leapfrog(solver_state& state) +{ + auto dt = state.delta_t; + compute_curls_H(state, state.emf_curr, state.tmp); + compute_fluxes_H(state, state.emf_curr, state.tmp); + #pragma omp parallel for + for (size_t i = 0; i < state.emf_next.num_dofs; i++) + { + auto CRM = 1.0 - 0.5*dt*state.matparams.sigma_over_epsilon[i]; + auto CRP = 1.0 + 0.5*dt*state.matparams.sigma_over_epsilon[i]; + auto CR = CRM/CRP; + auto CC = dt*state.matparams.inv_epsilon[i]/CRP; + state.emf_next.Ex[i] = state.emf_curr.Ex[i]*CR + state.tmp.Ex[i]*CC; + state.emf_next.Ey[i] = state.emf_curr.Ey[i]*CR + state.tmp.Ey[i]*CC; + state.emf_next.Ez[i] = state.emf_curr.Ez[i]*CR + state.tmp.Ez[i]*CC; + } + + compute_curls_E(state, state.emf_next, state.tmp); + compute_fluxes_E(state, state.emf_next, state.tmp); + #pragma omp parallel for + for (size_t i = 0; i < state.emf_next.num_dofs; i++) + { + auto CC = dt*state.matparams.inv_mu[i]; + state.emf_next.Hx[i] = state.emf_curr.Hx[i] + state.tmp.Hx[i]*CC; + state.emf_next.Hy[i] = state.emf_curr.Hy[i] + state.tmp.Hy[i]*CC; + state.emf_next.Hz[i] = state.emf_curr.Hz[i] + state.tmp.Hz[i]*CC; + } +} + void -timestep(solver_state& state) +timestep(solver_state& state, time_integrator_type ti) { - /* - apply_operator(state, state.emf_curr, state.tmp); - compute_euler_update(state, state.emf_curr, state.tmp, state.delta_t, state.emf_next); - */ + if (ti == time_integrator_type::EULER) + { + apply_operator(state, state.emf_curr, state.tmp); + compute_euler_update(state, state.emf_curr, state.tmp, state.delta_t, state.emf_next); + } - apply_operator(state, state.emf_curr, state.k1); + if (ti == time_integrator_type::RK4) + { + apply_operator(state, state.emf_curr, state.k1); - compute_euler_update(state, state.emf_curr, state.k1, 0.5*state.delta_t, state.tmp); - apply_operator(state, state.tmp, state.k2); + compute_euler_update(state, state.emf_curr, state.k1, 0.5*state.delta_t, state.tmp); + apply_operator(state, state.tmp, state.k2); - compute_euler_update(state, state.emf_curr, state.k2, 0.5*state.delta_t, state.tmp); - apply_operator(state, state.tmp, state.k3); + compute_euler_update(state, state.emf_curr, state.k2, 0.5*state.delta_t, state.tmp); + apply_operator(state, state.tmp, state.k3); - compute_euler_update(state, state.emf_curr, state.k3, state.delta_t, state.tmp); - apply_operator(state, state.tmp, state.k4); + compute_euler_update(state, state.emf_curr, state.k3, state.delta_t, state.tmp); + apply_operator(state, state.tmp, state.k4); + + compute_rk4_weighted_sum(state, state.emf_curr, state.delta_t, state.emf_next); + } - compute_rk4_weighted_sum(state, state.emf_curr, state.delta_t, state.emf_next); + if (ti == time_integrator_type::LEAPFROG) + { + leapfrog(state); + } state.curr_time += state.delta_t; state.curr_timestep += 1; @@ -318,69 +566,64 @@ swap(maxwell::solver_state& state) } void -export_to_silo(const model& mod, const solver_state& state, const std::string& filename) +export_fields_to_silo(const model& mod, const maxwell::solver_state& state, + const maxwell::parameter_loader& mpl) { + std::stringstream ss; + ss << mpl.sim_name() << "/timestep_" << state.curr_timestep << ".silo"; + silo sdb; - sdb.create_db(filename); + sdb.create_db(ss.str()); sdb.import_mesh_from_gmsh(); sdb.write_mesh(state.curr_time, state.curr_timestep); - std::vector<double> curr_Ex; curr_Ex.resize( mod.num_cells() ); - std::vector<double> curr_Ey; curr_Ey.resize( mod.num_cells() ); - std::vector<double> curr_Ez; curr_Ez.resize( mod.num_cells() ); - std::vector<double> curr_Hx; curr_Hx.resize( mod.num_cells() ); - std::vector<double> curr_Hy; curr_Hy.resize( mod.num_cells() ); - std::vector<double> curr_Hz; curr_Hz.resize( mod.num_cells() ); - - std::vector<double> next_Ex; next_Ex.resize( mod.num_cells() ); - std::vector<double> next_Ey; next_Ey.resize( mod.num_cells() ); - std::vector<double> next_Ez; next_Ez.resize( mod.num_cells() ); - std::vector<double> next_Hx; next_Hx.resize( mod.num_cells() ); - std::vector<double> next_Hy; next_Hy.resize( mod.num_cells() ); - std::vector<double> next_Hz; next_Hz.resize( mod.num_cells() ); - - for (size_t iE = 0; iE < mod.num_entities(); iE++) + switch( mpl.postpro_fieldExportMode("E") ) { - auto& e = mod.entity_at(iE); - for (size_t iT = 0; iT < e.num_cells(); iT++) - { - auto& pe = e.cell(iT); - auto& re = e.cell_refelem(pe); - vecxd phi_bar = re.basis_functions({1./3., 1./3., 1./3.}); - auto num_bf = re.num_basis_functions(); - auto ofs = e.cell_global_dof_offset(iT); - auto gi = e.cell_global_index_by_gmsh(iT); - curr_Ex[gi] = state.emf_curr.Ex.segment(ofs, num_bf).dot(phi_bar); - curr_Ey[gi] = state.emf_curr.Ey.segment(ofs, num_bf).dot(phi_bar); - curr_Ez[gi] = state.emf_curr.Ez.segment(ofs, num_bf).dot(phi_bar); - curr_Hx[gi] = state.emf_curr.Hx.segment(ofs, num_bf).dot(phi_bar); - curr_Hy[gi] = state.emf_curr.Hy.segment(ofs, num_bf).dot(phi_bar); - curr_Hz[gi] = state.emf_curr.Hz.segment(ofs, num_bf).dot(phi_bar); - next_Ex[gi] = state.emf_next.Ex.segment(ofs, num_bf).dot(phi_bar); - next_Ey[gi] = state.emf_next.Ey.segment(ofs, num_bf).dot(phi_bar); - next_Ez[gi] = state.emf_next.Ez.segment(ofs, num_bf).dot(phi_bar); - next_Hx[gi] = state.emf_next.Hx.segment(ofs, num_bf).dot(phi_bar); - next_Hy[gi] = state.emf_next.Hy.segment(ofs, num_bf).dot(phi_bar); - next_Hz[gi] = state.emf_next.Hz.segment(ofs, num_bf).dot(phi_bar); - } + case field_export_mode::NONE: + break; + + case field_export_mode::NODAL: + export_E_field_nodal(mod, sdb, state.emf_next.Ex, state.emf_next.Ey, + state.emf_next.Ez, mpl); + break; + + case field_export_mode::ZONAL: + export_E_field_zonal(mod, sdb, state.emf_next.Ex, state.emf_next.Ey, + state.emf_next.Ez, mpl); + break; } - sdb.write_zonal_variable("curr_Ex", curr_Ex); - sdb.write_zonal_variable("curr_Ey", curr_Ey); - sdb.write_zonal_variable("curr_Ez", curr_Ez); - sdb.write_zonal_variable("curr_Hx", curr_Hx); - sdb.write_zonal_variable("curr_Hy", curr_Hy); - sdb.write_zonal_variable("curr_Hz", curr_Hz); - sdb.write_vector_expression("curr_E", "{curr_Ex, curr_Ey, curr_Ez}"); - sdb.write_vector_expression("curr_H", "{curr_Hx, curr_Hy, curr_Hz}"); - sdb.write_zonal_variable("next_Ex", next_Ex); - sdb.write_zonal_variable("next_Ey", next_Ey); - sdb.write_zonal_variable("next_Ez", next_Ez); - sdb.write_zonal_variable("next_Hx", next_Hx); - sdb.write_zonal_variable("next_Hy", next_Hy); - sdb.write_zonal_variable("next_Hz", next_Hz); - sdb.write_vector_expression("next_E", "{next_Ex, next_Ey, next_Ez}"); - sdb.write_vector_expression("next_H", "{next_Hx, next_Hy, next_Hz}"); + switch( mpl.postpro_fieldExportMode("H") ) + { + case field_export_mode::NONE: + break; + + case field_export_mode::NODAL: + export_H_field_nodal(mod, sdb, state.emf_next.Hx, state.emf_next.Hy, + state.emf_next.Hz, mpl); + break; + + case field_export_mode::ZONAL: + export_H_field_zonal(mod, sdb, state.emf_next.Hx, state.emf_next.Hy, + state.emf_next.Hz, mpl); + break; + } + + switch( mpl.postpro_fieldExportMode("J") ) + { + case field_export_mode::NONE: + break; + + case field_export_mode::NODAL: + export_J_field_nodal(mod, sdb, state.emf_next.Ex, state.emf_next.Ey, + state.emf_next.Ez, mpl); + break; + + case field_export_mode::ZONAL: + export_J_field_zonal(mod, sdb, state.emf_next.Ex, state.emf_next.Ey, + state.emf_next.Ez, mpl); + break; + } } } // namespace maxwell diff --git a/src/maxwell_gpu.cpp b/src/maxwell_gpu.cpp index a9c79c7365b86d0b93380c84d44169341cf5a1e0..bf432a50f43ac6f5e1e486ce6056e788275cd491 100644 --- a/src/maxwell_gpu.cpp +++ b/src/maxwell_gpu.cpp @@ -15,14 +15,13 @@ void init_matparams(const model& mod, solver_state_gpu& state, const parameter_loader& mpl) { + state.matparams.num_dofs = mod.num_dofs(); + state.matparams.num_fluxes = mod.num_fluxes(); vecxd inv_eps = vecxd::Zero( mod.num_dofs() ); vecxd inv_mu = vecxd::Zero( mod.num_dofs() ); vecxd sigma = vecxd::Zero( mod.num_dofs() ); vecxd sigma_over_eps = vecxd::Zero( mod.num_dofs() ); - vecxd Z = vecxd::Zero( mod.num_dofs() ); - vecxd Y = vecxd::Zero( mod.num_dofs() ); - for (auto& e : mod) { auto tag = e.gmsh_tag(); @@ -81,8 +80,7 @@ void init_from_model(const model& mod, solver_state_gpu& state) state.tmp.resize( mod.num_dofs() ); state.bndsrcs.resize( mod.num_fluxes() ); - state.bndsrcs_buf.resize( mod.num_fluxes() ); - state.bndsrcs_cpu.resize( mod.num_fluxes() ); + state.bndsrcs_decomp_cpu.resize( mod.num_fluxes() ); for (auto& e : mod) { @@ -93,12 +91,56 @@ void init_from_model(const model& mod, solver_state_gpu& state) state.edgs.push_back( std::move(edg) ); } + auto& bds = mod.boundary_descriptors(); + size_t face_num_base = 0; + for (auto& e : mod) + { + for (size_t iF = 0; iF < e.num_faces(); iF++) + { + auto& pf = e.face(iF); + auto& rf = e.face_refelem(pf); + auto num_fluxes = rf.num_basis_functions(); + + auto gfnum = face_num_base + iF; + auto bd = bds[gfnum]; + if (bd.type == face_type::BOUNDARY or bd.type == face_type::INTERFACE) + { + for (size_t i = 0; i < num_fluxes; i++) + { + state.bndsrcs_decomp_table_cpu.push_back(gfnum*num_fluxes+i); + } + } + } + face_num_base += e.num_faces(); + } + + state.bndsrcs_decomp_table.copyin( state.bndsrcs_decomp_table_cpu.data(), + state.bndsrcs_decomp_table_cpu.size() ); + + state.bndsrcs_cpu.resize( state.bndsrcs_decomp_table_cpu.size() ); + state.bndsrcs_buf.resize( state.bndsrcs_decomp_table_cpu.size() ); + state.curr_time = 0.0; state.curr_timestep = 0; } +void +compress_bndsrc(const solver_state_gpu& state, const field& srcs, pinned_field& csrcs) +{ + for (size_t i = 0; i < csrcs.num_dofs; i++) + { + auto from_ofs = state.bndsrcs_decomp_table_cpu[i]; + csrcs.Ex[i] = srcs.Ex[from_ofs]; + csrcs.Ey[i] = srcs.Ey[from_ofs]; + csrcs.Ez[i] = srcs.Ez[from_ofs]; + csrcs.Hx[i] = srcs.Hx[from_ofs]; + csrcs.Hy[i] = srcs.Hy[from_ofs]; + csrcs.Hz[i] = srcs.Hz[from_ofs]; + } +} -void compute_curls(solver_state_gpu& state, const field_gpu& curr, field_gpu& next) +static void +compute_curls(solver_state_gpu& state, const field_gpu& curr, field_gpu& next) { auto currp = curr.data(); auto nextp = next.data(); @@ -125,7 +167,51 @@ void compute_curls(solver_state_gpu& state, const field_gpu& curr, field_gpu& ne gpu_subtract(nextp.Hz, dyp.Ex, dxp.Ey, nextp.num_dofs, state.compute_stream); } -void +static void +compute_curls_E(solver_state_gpu& state, const field_gpu& curr, field_gpu& next) +{ + auto currp = curr.data(); + auto nextp = next.data(); + auto dxp = state.dx.data(); + auto dyp = state.dy.data(); + auto dzp = state.dz.data(); + + for (const auto& edg : state.edgs) + { + gpu_compute_field_derivatives(edg, currp.Ex, dxp.Ex, dyp.Ex, dzp.Ex, 1.0, state.compute_stream); + gpu_compute_field_derivatives(edg, currp.Ey, dxp.Ey, dyp.Ey, dzp.Ey, 1.0, state.compute_stream); + gpu_compute_field_derivatives(edg, currp.Ez, dxp.Ez, dyp.Ez, dzp.Ez, 1.0, state.compute_stream); + } + + + gpu_subtract(nextp.Hx, dzp.Ey, dyp.Ez, nextp.num_dofs, state.compute_stream); + gpu_subtract(nextp.Hy, dxp.Ez, dzp.Ex, nextp.num_dofs, state.compute_stream); + gpu_subtract(nextp.Hz, dyp.Ex, dxp.Ey, nextp.num_dofs, state.compute_stream); +} + +static void +compute_curls_H(solver_state_gpu& state, const field_gpu& curr, field_gpu& next) +{ + auto currp = curr.data(); + auto nextp = next.data(); + auto dxp = state.dx.data(); + auto dyp = state.dy.data(); + auto dzp = state.dz.data(); + + for (const auto& edg : state.edgs) + { + gpu_compute_field_derivatives(edg, currp.Hx, dxp.Hx, dyp.Hx, dzp.Hx, 1.0, state.compute_stream); + gpu_compute_field_derivatives(edg, currp.Hy, dxp.Hy, dyp.Hy, dzp.Hy, 1.0, state.compute_stream); + gpu_compute_field_derivatives(edg, currp.Hz, dxp.Hz, dyp.Hz, dzp.Hz, 1.0, state.compute_stream); + } + + + gpu_subtract(nextp.Ex, dyp.Hz, dzp.Hy, nextp.num_dofs, state.compute_stream); + gpu_subtract(nextp.Ey, dzp.Hx, dxp.Hz, nextp.num_dofs, state.compute_stream); + gpu_subtract(nextp.Ez, dxp.Hy, dyp.Hx, nextp.num_dofs, state.compute_stream); +} + +static void compute_fluxes(solver_state_gpu& state, const field_gpu& in, field_gpu& out) { auto matparams_ptrs = state.matparams.data(); @@ -152,14 +238,95 @@ compute_fluxes(solver_state_gpu& state, const field_gpu& in, field_gpu& out) } } -void +static void +compute_fluxes_E(solver_state_gpu& state, const field_gpu& in, field_gpu& out) +{ + auto matparams_ptrs = state.matparams.data(); + double *bc_coeffs = matparams_ptrs.bc_coeffs; + + for (const auto& edg : state.edgs) + maxwell::gpu_compute_jumps_E(edg, in, state.jumps, bc_coeffs, state.compute_stream); + + for (const auto& edg : state.edgs) + maxwell::gpu_compute_fluxes_E(edg, state.jumps, state.fluxes, state.bndsrcs, + state.matparams, state.compute_stream); + + auto fluxesp = state.fluxes.data(); + auto outp = out.data(); + + for (auto& edg : state.edgs) + { + gpu_compute_flux_lifting(edg, fluxesp.Hx, outp.Hx, state.compute_stream); + gpu_compute_flux_lifting(edg, fluxesp.Hy, outp.Hy, state.compute_stream); + gpu_compute_flux_lifting(edg, fluxesp.Hz, outp.Hz, state.compute_stream); + } +} + +static void +compute_fluxes_H(solver_state_gpu& state, const field_gpu& in, field_gpu& out) +{ + auto matparams_ptrs = state.matparams.data(); + double *bc_coeffs = matparams_ptrs.bc_coeffs; + + for (const auto& edg : state.edgs) + maxwell::gpu_compute_jumps_H(edg, in, state.jumps, bc_coeffs, state.compute_stream); + + for (const auto& edg : state.edgs) + maxwell::gpu_compute_fluxes_H(edg, state.jumps, state.fluxes, state.bndsrcs, + state.matparams, state.compute_stream); + + auto fluxesp = state.fluxes.data(); + auto outp = out.data(); + + for (auto& edg : state.edgs) + { + gpu_compute_flux_lifting(edg, fluxesp.Ex, outp.Ex, state.compute_stream); + gpu_compute_flux_lifting(edg, fluxesp.Ey, outp.Ey, state.compute_stream); + gpu_compute_flux_lifting(edg, fluxesp.Ez, outp.Ez, state.compute_stream); + } +} + +static void +leapfrog(solver_state_gpu& state) +{ + double *r = state.matparams.sigma_over_epsilon.data(); + double *eps = state.matparams.inv_epsilon.data(); + double *mu = state.matparams.inv_mu.data(); + + auto currp = state.emf_curr.data(); + auto nextp = state.emf_next.data(); + auto tmpp = state.tmp.data(); + + auto dt = state.delta_t; + compute_curls_H(state, state.emf_curr, state.tmp); + compute_fluxes_H(state, state.emf_curr, state.tmp); + + gpu_compute_leapfrog_update(nextp.Ex, currp.Ex, r, tmpp.Ex, eps, nextp.num_dofs, + dt, state.compute_stream); + gpu_compute_leapfrog_update(nextp.Ey, currp.Ey, r, tmpp.Ey, eps, nextp.num_dofs, + dt, state.compute_stream); + gpu_compute_leapfrog_update(nextp.Ez, currp.Ez, r, tmpp.Ez, eps, nextp.num_dofs, + dt, state.compute_stream); + + compute_curls_E(state, state.emf_next, state.tmp); + compute_fluxes_E(state, state.emf_next, state.tmp); + + gpu_compute_leapfrog_update(nextp.Hx, currp.Hx, tmpp.Hx, mu, nextp.num_dofs, + dt, state.compute_stream); + gpu_compute_leapfrog_update(nextp.Hy, currp.Hy, tmpp.Hy, mu, nextp.num_dofs, + dt, state.compute_stream); + gpu_compute_leapfrog_update(nextp.Hz, currp.Hz, tmpp.Hz, mu, nextp.num_dofs, + dt, state.compute_stream); +} + +static void apply_operator(solver_state_gpu& state, const field_gpu& curr, field_gpu& next) { compute_curls(state, curr, next); compute_fluxes(state, curr, next); } -void +static void compute_euler_update(solver_state_gpu& state, const field_gpu& y, const field_gpu& k, double dt, field_gpu& out) { @@ -179,7 +346,7 @@ compute_euler_update(solver_state_gpu& state, const field_gpu& y, gpu_compute_euler_update(outp.Hz, yp.Hz, kp.Hz, mu, out.num_dofs, dt, state.compute_stream); } -void +static void compute_rk4_weighted_sum(solver_state_gpu& state, const field_gpu& in, double dt, field_gpu& out) { @@ -203,28 +370,37 @@ compute_rk4_weighted_sum(solver_state_gpu& state, const field_gpu& in, } void -timestep(solver_state_gpu& state) +timestep(solver_state_gpu& state, time_integrator_type ti) { //timecounter tc; //tc.tic(); - /* - apply_operator(state, state.emf_curr, state.tmp); - compute_euler_update(state, state.emf_curr, state.tmp, state.delta_t, state.emf_next); - */ + if (ti == time_integrator_type::EULER) + { + apply_operator(state, state.emf_curr, state.tmp); + compute_euler_update(state, state.emf_curr, state.tmp, state.delta_t, state.emf_next); + } - apply_operator(state, state.emf_curr, state.k1); + if (ti == time_integrator_type::RK4) + { + apply_operator(state, state.emf_curr, state.k1); - compute_euler_update(state, state.emf_curr, state.k1, 0.5*state.delta_t, state.tmp); - apply_operator(state, state.tmp, state.k2); + compute_euler_update(state, state.emf_curr, state.k1, 0.5*state.delta_t, state.tmp); + apply_operator(state, state.tmp, state.k2); - compute_euler_update(state, state.emf_curr, state.k2, 0.5*state.delta_t, state.tmp); - apply_operator(state, state.tmp, state.k3); + compute_euler_update(state, state.emf_curr, state.k2, 0.5*state.delta_t, state.tmp); + apply_operator(state, state.tmp, state.k3); - compute_euler_update(state, state.emf_curr, state.k3, state.delta_t, state.tmp); - apply_operator(state, state.tmp, state.k4); + compute_euler_update(state, state.emf_curr, state.k3, state.delta_t, state.tmp); + apply_operator(state, state.tmp, state.k4); - compute_rk4_weighted_sum(state, state.emf_curr, state.delta_t, state.emf_next); + compute_rk4_weighted_sum(state, state.emf_curr, state.delta_t, state.emf_next); + } + + if (ti == time_integrator_type::LEAPFROG) + { + leapfrog(state); + } state.curr_time += state.delta_t; state.curr_timestep += 1; @@ -239,14 +415,15 @@ prepare_sources(const model& mod, maxwell::solver_state_gpu& state, maxwell::parameter_loader& mpl) { if ( mpl.boundary_sources_enabled() ) - maxwell::eval_boundary_sources(mod, mpl, state, state.bndsrcs_cpu); + maxwell::eval_boundary_sources(mod, mpl, state, state.bndsrcs_decomp_cpu); if ( mpl.interface_sources_enabled() ) - maxwell::eval_interface_sources(mod, mpl, state, state.bndsrcs_cpu); + maxwell::eval_interface_sources(mod, mpl, state, state.bndsrcs_decomp_cpu); + compress_bndsrc(state, state.bndsrcs_decomp_cpu, state.bndsrcs_cpu); state.bndsrcs_buf.copyin(state.bndsrcs_cpu, state.memcpy_stream); state.memcpy_stream.wait(); - std::swap(state.bndsrcs, state.bndsrcs_buf); + decompress_bndsrc(state, state.bndsrcs_buf, state.bndsrcs); } void @@ -255,6 +432,7 @@ do_sources(const model& mod, maxwell::solver_state_gpu& state, { if ( mpl.source_has_changed_state() ) { + state.bndsrcs_decomp_cpu.zero(); state.bndsrcs_cpu.zero(); mpl.source_was_cleared(); } @@ -264,95 +442,83 @@ do_sources(const model& mod, maxwell::solver_state_gpu& state, auto ve = mpl.volume_sources_enabled(); if ( be ) - maxwell::eval_boundary_sources(mod, mpl, state, state.bndsrcs_cpu); + maxwell::eval_boundary_sources(mod, mpl, state, state.bndsrcs_decomp_cpu); if ( ie ) - maxwell::eval_interface_sources(mod, mpl, state, state.bndsrcs_cpu); + maxwell::eval_interface_sources(mod, mpl, state, state.bndsrcs_decomp_cpu); if ( be or ie or ve ) + { + compress_bndsrc(state, state.bndsrcs_decomp_cpu, state.bndsrcs_cpu); state.bndsrcs_buf.copyin(state.bndsrcs_cpu, state.memcpy_stream); + } } void swap(maxwell::solver_state_gpu& state) { - cudaDeviceSynchronize(); - std::swap(state.bndsrcs, state.bndsrcs_buf); + checkGPU( gpuDeviceSynchronize() ); + decompress_bndsrc(state, state.bndsrcs_buf, state.bndsrcs); std::swap(state.emf_curr, state.emf_next); } void -export_to_silo(const model& mod, const solver_state_gpu& state, const std::string& filename) +export_fields_to_silo(const model& mod, const maxwell::solver_state_gpu& state, + const maxwell::parameter_loader& mpl) { + std::stringstream ss; + ss << mpl.sim_name() << "/timestep_" << state.curr_timestep << ".silo"; + silo sdb; - sdb.create_db(filename); + sdb.create_db(ss.str()); sdb.import_mesh_from_gmsh(); sdb.write_mesh(state.curr_time, state.curr_timestep); - std::vector<double> curr_Ex; curr_Ex.resize( mod.num_cells() ); - std::vector<double> curr_Ey; curr_Ey.resize( mod.num_cells() ); - std::vector<double> curr_Ez; curr_Ez.resize( mod.num_cells() ); - std::vector<double> curr_Hx; curr_Hx.resize( mod.num_cells() ); - std::vector<double> curr_Hy; curr_Hy.resize( mod.num_cells() ); - std::vector<double> curr_Hz; curr_Hz.resize( mod.num_cells() ); - - std::vector<double> next_Ex; next_Ex.resize( mod.num_cells() ); - std::vector<double> next_Ey; next_Ey.resize( mod.num_cells() ); - std::vector<double> next_Ez; next_Ez.resize( mod.num_cells() ); - std::vector<double> next_Hx; next_Hx.resize( mod.num_cells() ); - std::vector<double> next_Hy; next_Hy.resize( mod.num_cells() ); - std::vector<double> next_Hz; next_Hz.resize( mod.num_cells() ); - - field emf_curr; - emf_curr.resize( state.emf_curr.num_dofs ); - state.emf_curr.copyout(emf_curr); - - field emf_next; - emf_next.resize( state.emf_next.num_dofs ); - state.emf_next.copyout(emf_next); - - for (size_t iE = 0; iE < mod.num_entities(); iE++) + maxwell::field f; + f.resize( mod.num_dofs() ); + state.emf_next.copyout(f); + + switch( mpl.postpro_fieldExportMode("E") ) { - auto& e = mod.entity_at(iE); - for (size_t iT = 0; iT < e.num_cells(); iT++) - { - auto& pe = e.cell(iT); - auto& re = e.cell_refelem(pe); - vecxd phi_bar = re.basis_functions({1./3., 1./3., 1./3.}); - auto num_bf = re.num_basis_functions(); - auto ofs = e.cell_global_dof_offset(iT); - auto gi = e.cell_global_index_by_gmsh(iT); - curr_Ex[gi] = emf_curr.Ex.segment(ofs, num_bf).dot(phi_bar); - curr_Ey[gi] = emf_curr.Ey.segment(ofs, num_bf).dot(phi_bar); - curr_Ez[gi] = emf_curr.Ez.segment(ofs, num_bf).dot(phi_bar); - curr_Hx[gi] = emf_curr.Hx.segment(ofs, num_bf).dot(phi_bar); - curr_Hy[gi] = emf_curr.Hy.segment(ofs, num_bf).dot(phi_bar); - curr_Hz[gi] = emf_curr.Hz.segment(ofs, num_bf).dot(phi_bar); - next_Ex[gi] = emf_next.Ex.segment(ofs, num_bf).dot(phi_bar); - next_Ey[gi] = emf_next.Ey.segment(ofs, num_bf).dot(phi_bar); - next_Ez[gi] = emf_next.Ez.segment(ofs, num_bf).dot(phi_bar); - next_Hx[gi] = emf_next.Hx.segment(ofs, num_bf).dot(phi_bar); - next_Hy[gi] = emf_next.Hy.segment(ofs, num_bf).dot(phi_bar); - next_Hz[gi] = emf_next.Hz.segment(ofs, num_bf).dot(phi_bar); - } + case field_export_mode::NONE: + break; + + case field_export_mode::NODAL: + export_E_field_nodal(mod, sdb, f.Ex, f.Ey, f.Ez, mpl); + break; + + case field_export_mode::ZONAL: + export_E_field_zonal(mod, sdb, f.Ex, f.Ey, f.Ez, mpl); + break; } - sdb.write_zonal_variable("curr_Ex", curr_Ex); - sdb.write_zonal_variable("curr_Ey", curr_Ey); - sdb.write_zonal_variable("curr_Ez", curr_Ez); - sdb.write_zonal_variable("curr_Hx", curr_Hx); - sdb.write_zonal_variable("curr_Hy", curr_Hy); - sdb.write_zonal_variable("curr_Hz", curr_Hz); - sdb.write_vector_expression("curr_E", "{curr_Ex, curr_Ey, curr_Ez}"); - sdb.write_vector_expression("curr_H", "{curr_Hx, curr_Hy, curr_Hz}"); - sdb.write_zonal_variable("next_Ex", next_Ex); - sdb.write_zonal_variable("next_Ey", next_Ey); - sdb.write_zonal_variable("next_Ez", next_Ez); - sdb.write_zonal_variable("next_Hx", next_Hx); - sdb.write_zonal_variable("next_Hy", next_Hy); - sdb.write_zonal_variable("next_Hz", next_Hz); - sdb.write_vector_expression("next_E", "{next_Ex, next_Ey, next_Ez}"); - sdb.write_vector_expression("next_H", "{next_Hx, next_Hy, next_Hz}"); + switch( mpl.postpro_fieldExportMode("H") ) + { + case field_export_mode::NONE: + break; + + case field_export_mode::NODAL: + export_H_field_nodal(mod, sdb, f.Hx, f.Hy, f.Hz, mpl); + break; + + case field_export_mode::ZONAL: + export_H_field_zonal(mod, sdb, f.Hx, f.Hy, f.Hz, mpl); + break; + } + + switch( mpl.postpro_fieldExportMode("J") ) + { + case field_export_mode::NONE: + break; + + case field_export_mode::NODAL: + export_J_field_nodal(mod, sdb, f.Ex, f.Ey, f.Ez, mpl); + break; + + case field_export_mode::ZONAL: + export_J_field_zonal(mod, sdb, f.Ex, f.Ey, f.Ez, mpl); + break; + } } } // namespace maxwell diff --git a/src/maxwell_interface.cpp b/src/maxwell_interface.cpp index 4a72a54ad01c5fdc3eb170af0af422752302ead2..eebd90025687f00b7eb71af2ad15131d6944333c 100644 --- a/src/maxwell_interface.cpp +++ b/src/maxwell_interface.cpp @@ -47,30 +47,32 @@ pinned_field::zero() void pinned_field::resize(size_t sz) { - if (Ex and sz != num_dofs) cudaFreeHost(Ex); - if (Ey and sz != num_dofs) cudaFreeHost(Ey); - if (Ez and sz != num_dofs) cudaFreeHost(Ez); - if (Ex and sz != num_dofs) cudaFreeHost(Ex); - if (Ey and sz != num_dofs) cudaFreeHost(Ey); - if (Ez and sz != num_dofs) cudaFreeHost(Ez); - - checkGPU( cudaMallocHost((void**)&Ex, sz*sizeof(double)) ); - checkGPU( cudaMallocHost((void**)&Ey, sz*sizeof(double)) ); - checkGPU( cudaMallocHost((void**)&Ez, sz*sizeof(double)) ); - checkGPU( cudaMallocHost((void**)&Hx, sz*sizeof(double)) ); - checkGPU( cudaMallocHost((void**)&Hy, sz*sizeof(double)) ); - checkGPU( cudaMallocHost((void**)&Hz, sz*sizeof(double)) ); + if (Ex and sz != num_dofs) checkGPU( gpuFreeHost(Ex) ); + if (Ey and sz != num_dofs) checkGPU( gpuFreeHost(Ey) ); + if (Ez and sz != num_dofs) checkGPU( gpuFreeHost(Ez) ); + if (Hx and sz != num_dofs) checkGPU( gpuFreeHost(Hx) ); + if (Hy and sz != num_dofs) checkGPU( gpuFreeHost(Hy) ); + if (Hz and sz != num_dofs) checkGPU( gpuFreeHost(Hz) ); + + zero(); + + checkGPU( gpuMallocHost((void**)&Ex, sz*sizeof(double)) ); + checkGPU( gpuMallocHost((void**)&Ey, sz*sizeof(double)) ); + checkGPU( gpuMallocHost((void**)&Ez, sz*sizeof(double)) ); + checkGPU( gpuMallocHost((void**)&Hx, sz*sizeof(double)) ); + checkGPU( gpuMallocHost((void**)&Hy, sz*sizeof(double)) ); + checkGPU( gpuMallocHost((void**)&Hz, sz*sizeof(double)) ); num_dofs = sz; } pinned_field::~pinned_field() { - if (Ex) cudaFreeHost(Ex); - if (Ey) cudaFreeHost(Ey); - if (Ez) cudaFreeHost(Ez); - if (Ex) cudaFreeHost(Ex); - if (Ey) cudaFreeHost(Ey); - if (Ez) cudaFreeHost(Ez); + if (Ex) checkGPU( gpuFreeHost(Ex) ); + if (Ey) checkGPU( gpuFreeHost(Ey) ); + if (Ez) checkGPU( gpuFreeHost(Ez) ); + if (Hx) checkGPU( gpuFreeHost(Hx) ); + if (Hy) checkGPU( gpuFreeHost(Hy) ); + if (Hz) checkGPU( gpuFreeHost(Hz) ); } void @@ -92,6 +94,7 @@ field_gpu::resize(size_t p_num_dofs) Hx.resize(p_num_dofs); Hy.resize(p_num_dofs); Hz.resize(p_num_dofs); + zero(); num_dofs = p_num_dofs; } @@ -231,4 +234,375 @@ material_params_gpu::copyin(const material_params& mp) #endif /* ENABLE_GPU_SOLVER */ + + + + +#define SEC_BCONDS "bndconds" +#define BC_KIND "kind" +#define BC_SOURCE "source" +#define BCOND_TYPE_PEC "pec" +#define BCOND_TYPE_PMC "pmc" +#define BCOND_TYPE_IMPEDANCE "impedance" +#define BCOND_TYPE_EPLANEW "plane_wave_E" +#define BCOND_TYPE_HPLANEW "plane_wave_H" +#define BCOND_TYPE_EFIELD "E_field" +#define BCOND_TYPE_HFIELD "H_field" +#define BCOND_TYPE_SURFCURR "surface_current" + +#define SEC_IFCONDS "ifaceconds" +#define IFC_KIND "kind" +#define IFC_SOURCE "source" +#define IFCOND_TYPE_EFIELD "E_field" +#define IFCOND_TYPE_HFIELD "H_field" +#define IFCOND_TYPE_SURFCURR "surface_current" + + +#define SEC_MATERIALS "materials" +#define MAT_EPS "epsilon" +#define MAT_EPS0 "eps0" +#define MAT_MU "mu" +#define MAT_MU0 "mu0" +#define MAT_SIGMA "sigma" + + + +parameter_loader::parameter_loader() +{ + lua["const"][MAT_EPS0] = 8.8541878128e-12; + lua["const"][MAT_MU0] = 4e-7*M_PI; + lua["postpro"]["E"] = lua.create_table(); + lua["postpro"]["E"]["silo_mode"] = "nodal"; + lua["postpro"]["H"] = lua.create_table(); + lua["postpro"]["H"]["silo_mode"] = "nodal"; + lua["postpro"]["J"] = lua.create_table(); + lua["postpro"]["J"]["silo_mode"] = "nodal"; +} + +bool +parameter_loader::validate_materials(const std::string& mpn, int tag) const +{ + auto mfun = lua[SEC_MATERIALS][mpn]; + if ( mfun.valid() ) + return true; + + auto mparams = lua[SEC_MATERIALS][tag]; + if ( mparams.valid() ) + { + auto matparams_mpn = lua[SEC_MATERIALS][tag][mpn]; + if ( matparams_mpn.valid() ) + return true; + } + + std::cout << "[CONFIG] '" SEC_MATERIALS ".[" << tag << "]." << mpn; + std::cout << "' not defined and '" SEC_MATERIALS "." << mpn; + std::cout << "(tag,x,y,z)' not present." << std::endl; + return false; +} + +bool +parameter_loader::validate_boundary_condition(const model&, int tag) const +{ + auto bc = lua[SEC_BCONDS][tag]; + if (not bc.valid()) + return true; + + if ( not bc[BC_KIND].valid() ) + { + std::cout << "[CONFIG] '" BC_KIND "' not specified on interface "; + std::cout << tag << std::endl; + return false; + } + + std::string bc_kind = bc[BC_KIND]; + + if (bc_kind == BCOND_TYPE_PEC) + return true; + + if (bc_kind == BCOND_TYPE_PMC) + return true; + + if (bc_kind == BCOND_TYPE_IMPEDANCE) + return true; + + if (bc_kind == BCOND_TYPE_EPLANEW) + { + if (bc[BC_SOURCE].valid()) + return true; + + std::cout << "[CONFIG] '" BC_SOURCE "' not specified for plane "; + std::cout << "wave condition on surface " << tag << std::endl; + return false; + } + + std::cout << "[CONFIG] boundary condition not implemented on "; + std::cout << "surface " << tag << std::endl; + return false; +} + +bool +parameter_loader::validate_interface_condition(const model&, int tag) const +{ + auto ic = lua[SEC_IFCONDS][tag]; + if (not ic.valid()) + return true; + + if ( not ic[IFC_KIND].valid() ) + { + std::cout << "[CONFIG '" IFC_KIND "' not specified on interface "; + std::cout << tag << std::endl; + return false; + } + + std::string ic_kind = ic[IFC_KIND]; + + if (ic_kind == IFCOND_TYPE_SURFCURR) + { + if (ic[IFC_SOURCE].valid()) + return true; + + std::cout << "[CONFIG] '" IFC_SOURCE "' not specified for surface "; + std::cout << "current condition on surface " << tag << std::endl; + return false; + } + + std::cout << "[CONFIG] interface condition not implemented on "; + std::cout << "surface " << tag << std::endl; + return false; +} + +bool +parameter_loader::validate_conditions(const model& mod) const +{ + bool success = true; + auto bds = mod.boundary_descriptors(); + std::sort(bds.begin(), bds.end()); + bds.erase( std::unique(bds.begin(), bds.end()), bds.end() ); + + auto ft_none = [](const boundary_descriptor& bd) -> bool { + return bd.type == face_type::NONE; + }; + + bds.erase( std::remove_if(bds.begin(), bds.end(), ft_none), bds.end() ); + + for (auto& bd : bds) + { + switch (bd.type) + { + case face_type::NONE: + assert(false); + break; + + case face_type::BOUNDARY: + if (not validate_boundary_condition(mod, bd.gmsh_entity) ) + success = false; + break; + + case face_type::INTERFACE: + if (not validate_interface_condition(mod, bd.gmsh_entity) ) + success = false; + break; + } + } + + return success; +} + +bool +parameter_loader::validate_model_params(const model& mod) const +{ + bool success = true; + for (auto& e : mod) + { + auto tag = e.gmsh_tag(); + bool eps_valid = validate_materials(MAT_EPS, tag); + bool mu_valid = validate_materials(MAT_MU, tag); + bool sigma_valid = validate_materials(MAT_SIGMA, tag); + if ( (not eps_valid) or (not mu_valid) or (not sigma_valid) ) + success = false; + } + + if ( not validate_conditions(mod) ) + success = false; + + return success; +} + +bool +parameter_loader::initial_Efield_defined(void) const +{ + auto eic = lua["electric_initial_condition"]; + return eic.valid(); +} + +vec3d +parameter_loader::initial_Efield(const point_3d& pt) const +{ + vec3d ret; + sol::tie(ret(0), ret(1), ret(2)) = + lua["electric_initial_condition"](pt.x(), pt.y(), pt.z()); + return ret; +} + +bool +parameter_loader::initial_Hfield_defined(void) const +{ + auto mic = lua["magnetic_initial_condition"]; + return mic.valid(); +} + +vec3d +parameter_loader::initial_Hfield(const point_3d& pt) const +{ + vec3d ret; + sol::tie(ret(0), ret(1), ret(2)) = + lua["magnetic_initial_condition"](pt.x(), pt.y(), pt.z()); + return ret; +} + +double +parameter_loader::epsilon(int tag, const point_3d& pt) const +{ + double eps0 = lua["const"]["eps0"]; + auto eps_dom = lua[SEC_MATERIALS][tag][MAT_EPS]; + if (eps_dom.valid()) + { + double ret = eps_dom; + return eps0*ret; + } + + double ret = lua[SEC_MATERIALS][MAT_EPS](tag, pt.x(), pt.y(), pt.z()); + return eps0*ret; +} + +double +parameter_loader::mu(int tag, const point_3d& pt) const +{ + double mu0 = lua["const"]["mu0"]; + auto mu_dom = lua[SEC_MATERIALS][tag][MAT_MU]; + if (mu_dom.valid()) + { + double ret = mu_dom; + return mu0*ret; + } + + double ret = lua[SEC_MATERIALS][MAT_MU](tag, pt.x(), pt.y(), pt.z()); + return mu0*ret; +} + +double +parameter_loader::sigma(int tag, const point_3d& pt) const +{ + auto sigma_dom = lua[SEC_MATERIALS][tag][MAT_SIGMA]; + if (sigma_dom.valid()) + return sigma_dom; + + return lua[SEC_MATERIALS][MAT_SIGMA](tag, pt.x(), pt.y(), pt.z()); +} + +boundary_condition +parameter_loader::boundary_type(int tag) const +{ + auto bnd_data = lua[SEC_BCONDS][tag]; + if (not bnd_data.valid()) + return boundary_condition::UNSPECIFIED; + + std::string kind_str = bnd_data[BC_KIND]; + + if (kind_str == BCOND_TYPE_PEC) + return boundary_condition::PEC; + + if (kind_str == BCOND_TYPE_PMC) + return boundary_condition::PMC; + + if (kind_str == BCOND_TYPE_IMPEDANCE) + return boundary_condition::IMPEDANCE; + + if (kind_str == BCOND_TYPE_EPLANEW) + return boundary_condition::PLANE_WAVE_E; + + if (kind_str == BCOND_TYPE_HPLANEW) + return boundary_condition::PLANE_WAVE_H; + + if (kind_str == BCOND_TYPE_EFIELD) + return boundary_condition::E_FIELD; + + if (kind_str == BCOND_TYPE_HFIELD) + return boundary_condition::H_FIELD; + + if (kind_str == BCOND_TYPE_SURFCURR) + return boundary_condition::SURFACE_CURRENT; + + return boundary_condition::UNSPECIFIED; +} + +interface_condition +parameter_loader::interface_type(int tag) const +{ + auto if_data = lua[SEC_IFCONDS][tag]; + if (not if_data.valid()) + return interface_condition::UNSPECIFIED; + + auto kind = if_data[IFC_KIND]; + + std::string kind_str = kind; + + if (kind_str == IFCOND_TYPE_EFIELD) + return interface_condition::E_FIELD; + + if (kind_str == IFCOND_TYPE_HFIELD) + return interface_condition::H_FIELD; + + if (kind_str == IFCOND_TYPE_SURFCURR) + return interface_condition::SURFACE_CURRENT; + + return interface_condition::UNSPECIFIED; +} + +vec3d +parameter_loader::eval_boundary_source(int tag, const point_3d& pt, double t) const +{ + vec3d ret; + sol::tie(ret(0), ret(1), ret(2)) = + lua[SEC_BCONDS][tag][BC_SOURCE](tag, pt.x(), pt.y(), pt.z(), t); + return ret; +} + +vec3d +parameter_loader::eval_interface_source(int tag, const point_3d& pt, double t) const +{ + vec3d ret; + sol::tie(ret(0), ret(1), ret(2)) = + lua[SEC_IFCONDS][tag][IFC_SOURCE](tag, pt.x(), pt.y(), pt.z(), t); + return ret; +} + +field_export_mode +parameter_loader::postpro_fieldExportMode(const std::string& field) const +{ + auto pp = lua["postpro"][field]; + if (not pp.valid()) + { + std::cout << "[CONFIG] warning: invalid field '" << field; + std::cout << "' in 'postpro' table" << std::endl; + return field_export_mode::NONE; + } + + std::string silo_mode = pp["silo_mode"]; + + if (silo_mode == "nodal") + return field_export_mode::NODAL; + + if (silo_mode == "zonal") + return field_export_mode::ZONAL; + + if (silo_mode != "none") + { + std::cout << "[CONFIG] warning: invalid export mode '" << silo_mode; + std::cout << "'" << std::endl; + } + + return field_export_mode::NONE; +} + } // namespace maxwell diff --git a/src/maxwell_kernels_cuda.cu b/src/maxwell_kernels_cuda.cu index 384983b1a063f6d0780c11058810d6ab5b673772..7365e927a56e267686e42e6b4c59ffbd8c2ad7c7 100644 --- a/src/maxwell_kernels_cuda.cu +++ b/src/maxwell_kernels_cuda.cu @@ -86,6 +86,70 @@ gpu_compute_jumps(const entity_data_gpu& edg, const field_gpu& in, field_gpu& ju checkGPU(cudaPeekAtLastError()); } +void +gpu_compute_jumps_E(const entity_data_gpu& edg, const field_gpu& in, field_gpu& jumps, + double *bc_coeffs, cudaStream_t stream) +{ + static const size_t JUMP_THREADS = 256; + + auto num_all_fluxes = edg.num_all_elems*edg.num_fluxes*edg.num_faces_per_elem; + + auto gs = num_all_fluxes/JUMP_THREADS; + if (num_all_fluxes % JUMP_THREADS != 0) + gs += 1; + + dim3 grid_size(gs); + dim3 threads_per_block(JUMP_THREADS); + + /* Compute E-field jumps */ + gpu_compute_jumps_kernel<true><<<gs, threads_per_block, 0, stream>>>(in.Ex.data(), + edg.fluxdofs_mine.data(), edg.fluxdofs_neigh.data(), jumps.Ex.data(), + bc_coeffs, edg.flux_base, num_all_fluxes); + checkGPU(cudaPeekAtLastError()); + + gpu_compute_jumps_kernel<true><<<gs, threads_per_block, 0, stream>>>(in.Ey.data(), + edg.fluxdofs_mine.data(), edg.fluxdofs_neigh.data(), jumps.Ey.data(), + bc_coeffs, edg.flux_base, num_all_fluxes); + checkGPU(cudaPeekAtLastError()); + + gpu_compute_jumps_kernel<true><<<gs, threads_per_block, 0, stream>>>(in.Ez.data(), + edg.fluxdofs_mine.data(), edg.fluxdofs_neigh.data(), jumps.Ez.data(), + bc_coeffs, edg.flux_base, num_all_fluxes); + checkGPU(cudaPeekAtLastError()); +} + +void +gpu_compute_jumps_H(const entity_data_gpu& edg, const field_gpu& in, field_gpu& jumps, + double *bc_coeffs, cudaStream_t stream) +{ + static const size_t JUMP_THREADS = 256; + + auto num_all_fluxes = edg.num_all_elems*edg.num_fluxes*edg.num_faces_per_elem; + + auto gs = num_all_fluxes/JUMP_THREADS; + if (num_all_fluxes % JUMP_THREADS != 0) + gs += 1; + + dim3 grid_size(gs); + dim3 threads_per_block(JUMP_THREADS); + + /* Compute H-field jumps */ + gpu_compute_jumps_kernel<false><<<gs, threads_per_block, 0, stream>>>(in.Hx.data(), + edg.fluxdofs_mine.data(), edg.fluxdofs_neigh.data(), jumps.Hx.data(), + bc_coeffs, edg.flux_base, num_all_fluxes); + checkGPU(cudaPeekAtLastError()); + + gpu_compute_jumps_kernel<false><<<gs, threads_per_block, 0, stream>>>(in.Hy.data(), + edg.fluxdofs_mine.data(), edg.fluxdofs_neigh.data(), jumps.Hy.data(), + bc_coeffs, edg.flux_base, num_all_fluxes); + checkGPU(cudaPeekAtLastError()); + + gpu_compute_jumps_kernel<false><<<gs, threads_per_block, 0, stream>>>(in.Hz.data(), + edg.fluxdofs_mine.data(), edg.fluxdofs_neigh.data(), jumps.Hz.data(), + bc_coeffs, edg.flux_base, num_all_fluxes); + checkGPU(cudaPeekAtLastError()); +} + template<size_t K> __global__ void gpu_compute_fluxes_kernel_planar(field_gpu::const_raw_ptrs jump, field_gpu::raw_ptrs flux, @@ -131,8 +195,84 @@ gpu_compute_fluxes_kernel_planar(field_gpu::const_raw_ptrs jump, field_gpu::raw_ } template<size_t K> +__global__ void +gpu_compute_fluxes_kernel_planar_E(field_gpu::const_raw_ptrs jump, field_gpu::raw_ptrs flux, + field_gpu::const_raw_ptrs bndsrc, material_params_gpu::const_raw_ptrs fcp, + const double * __restrict__ face_dets, const double * __restrict__ face_normals, + size_t flux_base, size_t num_fluxes) +{ + using KS = kernel_gpu_sizes<K>; + auto local_dof_pos = blockIdx.x * blockDim.x + threadIdx.x; + auto global_dof_pos = flux_base + local_dof_pos; + + auto entry_num = local_dof_pos/KS::num_fluxes; + + if (local_dof_pos >= num_fluxes) + return; + + auto face_det = face_dets[entry_num]; + auto nx = face_normals[3*entry_num + 0]; + auto ny = face_normals[3*entry_num + 1]; + auto nz = face_normals[3*entry_num + 2]; + + auto jEx = jump.Ex[global_dof_pos] - bndsrc.Ex[global_dof_pos]; + auto jEy = jump.Ey[global_dof_pos] - bndsrc.Ey[global_dof_pos]; + auto jEz = jump.Ez[global_dof_pos] - bndsrc.Ez[global_dof_pos]; + auto jHx = jump.Hx[global_dof_pos] + bndsrc.Hx[global_dof_pos]; + auto jHy = jump.Hy[global_dof_pos] + bndsrc.Hy[global_dof_pos]; + auto jHz = jump.Hz[global_dof_pos] + bndsrc.Hz[global_dof_pos]; + + auto ndotE = nx*jEx + ny*jEy + nz*jEz; + + auto aE = face_det * fcp.aE[global_dof_pos]; + auto bE = face_det * fcp.bE[global_dof_pos]; + + flux.Ex[global_dof_pos] = aE*(nz*jHy - ny*jHz) + bE*(ndotE*nx - jEx); + flux.Ey[global_dof_pos] = aE*(nx*jHz - nz*jHx) + bE*(ndotE*ny - jEy); + flux.Ez[global_dof_pos] = aE*(ny*jHx - nx*jHy) + bE*(ndotE*nz - jEz); +} + +template<size_t K> +__global__ void +gpu_compute_fluxes_kernel_planar_H(field_gpu::const_raw_ptrs jump, field_gpu::raw_ptrs flux, + field_gpu::const_raw_ptrs bndsrc, material_params_gpu::const_raw_ptrs fcp, + const double * __restrict__ face_dets, const double * __restrict__ face_normals, + size_t flux_base, size_t num_fluxes) +{ + using KS = kernel_gpu_sizes<K>; + auto local_dof_pos = blockIdx.x * blockDim.x + threadIdx.x; + auto global_dof_pos = flux_base + local_dof_pos; + + auto entry_num = local_dof_pos/KS::num_fluxes; + + if (local_dof_pos >= num_fluxes) + return; + + auto face_det = face_dets[entry_num]; + auto nx = face_normals[3*entry_num + 0]; + auto ny = face_normals[3*entry_num + 1]; + auto nz = face_normals[3*entry_num + 2]; + + auto jEx = jump.Ex[global_dof_pos] - bndsrc.Ex[global_dof_pos]; + auto jEy = jump.Ey[global_dof_pos] - bndsrc.Ey[global_dof_pos]; + auto jEz = jump.Ez[global_dof_pos] - bndsrc.Ez[global_dof_pos]; + auto jHx = jump.Hx[global_dof_pos] + bndsrc.Hx[global_dof_pos]; + auto jHy = jump.Hy[global_dof_pos] + bndsrc.Hy[global_dof_pos]; + auto jHz = jump.Hz[global_dof_pos] + bndsrc.Hz[global_dof_pos]; + + auto ndotH = nx*jHx + ny*jHy + nz*jHz; + + auto aH = face_det * fcp.aH[global_dof_pos]; + auto bH = face_det * fcp.bH[global_dof_pos]; + + flux.Hx[global_dof_pos] = aH*(ny*jEz - nz*jEy) + bH*(ndotH*nx - jHx); + flux.Hy[global_dof_pos] = aH*(nz*jEx - nx*jEz) + bH*(ndotH*ny - jHy); + flux.Hz[global_dof_pos] = aH*(nx*jEy - ny*jEx) + bH*(ndotH*nz - jHz); +} + +template<size_t K, typename Kernel> void -launch_compute_fluxes_kernel(const entity_data_gpu& edg, const field_gpu& jumps, +launch_compute_fluxes_kernel(Kernel& kernel, const entity_data_gpu& edg, const field_gpu& jumps, field_gpu& fluxes, const field_gpu& bndsrc, const material_params_gpu& fcp, cudaStream_t stream) { @@ -150,7 +290,7 @@ launch_compute_fluxes_kernel(const entity_data_gpu& edg, const field_gpu& jumps, auto pp = fcp.data(); auto bs = bndsrc.data(); - gpu_compute_fluxes_kernel_planar<K><<<gs, FLUX_THREADS, 0, stream>>>(jp, fp, bs, pp, + kernel<<<gs, FLUX_THREADS, 0, stream>>>(jp, fp, bs, pp, edg.face_determinants.data(), edg.normals.data(), edg.flux_base, num_all_fluxes); } @@ -162,14 +302,136 @@ gpu_compute_fluxes(const entity_data_gpu& edg, const field_gpu& jumps, { switch( edg.a_order ) { - case 1: launch_compute_fluxes_kernel<1>(edg, jumps, fluxes, bndsrc, fcp, stream); break; - case 2: launch_compute_fluxes_kernel<2>(edg, jumps, fluxes, bndsrc, fcp, stream); break; - case 3: launch_compute_fluxes_kernel<3>(edg, jumps, fluxes, bndsrc, fcp, stream); break; - case 4: launch_compute_fluxes_kernel<4>(edg, jumps, fluxes, bndsrc, fcp, stream); break; - case 5: launch_compute_fluxes_kernel<5>(edg, jumps, fluxes, bndsrc, fcp, stream); break; - case 6: launch_compute_fluxes_kernel<6>(edg, jumps, fluxes, bndsrc, fcp, stream); break; + case 1: launch_compute_fluxes_kernel<1>(gpu_compute_fluxes_kernel_planar<1>, + edg, jumps, fluxes, bndsrc, fcp, stream); + break; + + case 2: launch_compute_fluxes_kernel<2>(gpu_compute_fluxes_kernel_planar<2>, + edg, jumps, fluxes, bndsrc, fcp, stream); + break; + + case 3: launch_compute_fluxes_kernel<3>(gpu_compute_fluxes_kernel_planar<3>, + edg, jumps, fluxes, bndsrc, fcp, stream); + break; + + case 4: launch_compute_fluxes_kernel<4>(gpu_compute_fluxes_kernel_planar<4>, + edg, jumps, fluxes, bndsrc, fcp, stream); + break; + + case 5: launch_compute_fluxes_kernel<5>(gpu_compute_fluxes_kernel_planar<5>, + edg, jumps, fluxes, bndsrc, fcp, stream); + break; + + case 6: launch_compute_fluxes_kernel<6>(gpu_compute_fluxes_kernel_planar<6>, + edg, jumps, fluxes, bndsrc, fcp, stream); + break; + default: throw std::invalid_argument("unsupported order"); } } +void +gpu_compute_fluxes_E(const entity_data_gpu& edg, const field_gpu& jumps, + field_gpu& fluxes, const field_gpu& bndsrc, const material_params_gpu& fcp, + cudaStream_t stream) +{ + switch( edg.a_order ) + { + case 1: launch_compute_fluxes_kernel<1>(gpu_compute_fluxes_kernel_planar_E<1>, + edg, jumps, fluxes, bndsrc, fcp, stream); + break; + + case 2: launch_compute_fluxes_kernel<2>(gpu_compute_fluxes_kernel_planar_E<2>, + edg, jumps, fluxes, bndsrc, fcp, stream); + break; + + case 3: launch_compute_fluxes_kernel<3>(gpu_compute_fluxes_kernel_planar_E<3>, + edg, jumps, fluxes, bndsrc, fcp, stream); + break; + + case 4: launch_compute_fluxes_kernel<4>(gpu_compute_fluxes_kernel_planar_E<4>, + edg, jumps, fluxes, bndsrc, fcp, stream); + break; + + case 5: launch_compute_fluxes_kernel<5>(gpu_compute_fluxes_kernel_planar_E<5>, + edg, jumps, fluxes, bndsrc, fcp, stream); + break; + + case 6: launch_compute_fluxes_kernel<6>(gpu_compute_fluxes_kernel_planar_E<6>, + edg, jumps, fluxes, bndsrc, fcp, stream); + break; + + default: throw std::invalid_argument("unsupported order"); + } +} + +void +gpu_compute_fluxes_H(const entity_data_gpu& edg, const field_gpu& jumps, + field_gpu& fluxes, const field_gpu& bndsrc, const material_params_gpu& fcp, + cudaStream_t stream) +{ + switch( edg.a_order ) + { + case 1: launch_compute_fluxes_kernel<1>(gpu_compute_fluxes_kernel_planar_H<1>, + edg, jumps, fluxes, bndsrc, fcp, stream); + break; + + case 2: launch_compute_fluxes_kernel<2>(gpu_compute_fluxes_kernel_planar_H<2>, + edg, jumps, fluxes, bndsrc, fcp, stream); + break; + + case 3: launch_compute_fluxes_kernel<3>(gpu_compute_fluxes_kernel_planar_H<3>, + edg, jumps, fluxes, bndsrc, fcp, stream); + break; + + case 4: launch_compute_fluxes_kernel<4>(gpu_compute_fluxes_kernel_planar_H<4>, + edg, jumps, fluxes, bndsrc, fcp, stream); + break; + + case 5: launch_compute_fluxes_kernel<5>(gpu_compute_fluxes_kernel_planar_H<5>, + edg, jumps, fluxes, bndsrc, fcp, stream); + break; + + case 6: launch_compute_fluxes_kernel<6>(gpu_compute_fluxes_kernel_planar_H<6>, + edg, jumps, fluxes, bndsrc, fcp, stream); + break; + + default: throw std::invalid_argument("unsupported order"); + } +} + +__global__ void +gpu_bndsrcs_decompress_kernel(const size_t *dtable, const field_gpu::const_raw_ptrs csrcs, + field_gpu::raw_ptrs srcs) +{ + auto cdof = blockIdx.x * blockDim.x + threadIdx.x; + + if (cdof >= csrcs.num_dofs) + return; + + auto ddof = dtable[cdof]; + srcs.Ex[ddof] = csrcs.Ex[cdof]; + srcs.Ey[ddof] = csrcs.Ey[cdof]; + srcs.Ez[ddof] = csrcs.Ez[cdof]; + srcs.Hx[ddof] = csrcs.Hx[cdof]; + srcs.Hy[ddof] = csrcs.Hy[cdof]; + srcs.Hz[ddof] = csrcs.Hz[cdof]; +} + +void +decompress_bndsrc(const solver_state_gpu& state, const field_gpu& csrcs, field_gpu& srcs) +{ + static const size_t DECOMPRESS_THREADS = 256; + + auto num_cdofs = csrcs.num_dofs; + auto gs = num_cdofs/DECOMPRESS_THREADS; + if (num_cdofs % DECOMPRESS_THREADS != 0) + gs += 1; + + dim3 grid_size(gs); + + gpu_bndsrcs_decompress_kernel<<<gs, DECOMPRESS_THREADS, 0, state.compute_stream>>>( + state.bndsrcs_decomp_table.data(), csrcs.data(), srcs.data()); +} + } // namespace maxwell diff --git a/src/maxwell_solver.cpp b/src/maxwell_solver.cpp index a8b323d94027e8f23f23646e3507013fd12c460c..69ee2b95ce30a2e91c9bebd66be8b562e1874b52 100644 --- a/src/maxwell_solver.cpp +++ b/src/maxwell_solver.cpp @@ -1,4 +1,9 @@ +#ifdef DONT_USE_STD_FILESYSTEM +#include <sys/stat.h> +#include <sys/types.h> +#else #include <filesystem> +#endif #ifdef _OPENMP #include <omp.h> @@ -7,6 +12,7 @@ #include "gmsh.h" #include "silo_output.hpp" #include "maxwell_interface.h" +#include "maxwell_common.h" #include "param_loader.h" #if 0 @@ -69,22 +75,27 @@ void test_it(const model& mod, State& state, maxwell::parameter_loader& mpl) auto num_timesteps = mpl.sim_timesteps(); auto silo_output_rate = mpl.postpro_siloOutputRate(); auto cycle_print_rate = mpl.postpro_cyclePrintRate(); + auto ti = mpl.sim_timeIntegrator(); #ifdef _OPENMP omp_set_num_threads(4); #endif + std::cout << " BEGINNING SIMULATION" << std::endl; + std::cout << "I will do " << num_timesteps << " of " << state.delta_t; + std::cout << "s each." << std::endl; + std::cout << "Time integrator: " << mpl.sim_timeIntegratorName() << std::endl; + prepare_sources(mod, state, mpl); for(size_t i = 0; i < num_timesteps; i++) { mpl.call_timestep_callback(i); - timestep(state); + timestep(state, ti); do_sources(mod, state, mpl); - std::stringstream ss; - ss << mpl.sim_name() << "/timestep_" << i << ".silo"; + if (i%silo_output_rate == 0) - maxwell::export_to_silo(mod, state, ss.str()); + export_fields_to_silo(mod, state, mpl); if (i%cycle_print_rate == 0) { @@ -115,7 +126,11 @@ int main(int argc, const char *argv[]) return 1; } +#ifdef DONT_USE_STD_FILESYSTEM + mkdir(mpl.sim_name().c_str(), 0755); +#else std::filesystem::create_directory( mpl.sim_name() );; +#endif gmsh::open( mpl.sim_gmshmodel() ); diff --git a/src/param_loader.cpp b/src/param_loader.cpp index 034b0b728580a8e956b89a111dda61f783384ca4..b89bf4f963f5b30f57019bfbc527e99dfd30a070 100644 --- a/src/param_loader.cpp +++ b/src/param_loader.cpp @@ -22,6 +22,7 @@ parameter_loader_base::init(void) //lua["sim"]["dt"]; //lua["sim"]["timesteps"]; //lua["sim"]["gmsh_model"]; + lua["sim"]["time_integrator"] = "rk4"; lua["materials"] = lua.create_table(); lua["bndconds"] = lua.create_table(); @@ -210,3 +211,34 @@ parameter_loader_base::source_was_cleared(void) { m_source_changed_state = false; } + +time_integrator_type +parameter_loader_base::sim_timeIntegrator(void) const +{ + std::string ti = lua["sim"]["time_integrator"]; + + if (ti == "rk4") + return time_integrator_type::RK4; + + if (ti == "leapfrog") + return time_integrator_type::LEAPFROG; + + if (ti == "euler") + { + std::cout << "[CONFIG] warning: Euler time integrator is only "; + std::cout << "for testing purposes" << std::endl; + return time_integrator_type::EULER; + } + + std::cout << "[CONFIG] warning: invalid time integrator '"; + std::cout << ti << "'" << std::endl; + return time_integrator_type::RK4; +} + +std::string +parameter_loader_base::sim_timeIntegratorName(void) const +{ + std::string ti = lua["sim"]["time_integrator"]; + return ti; +} + diff --git a/src/silo_io.cpp b/src/silo_io.cpp index 738952c659148b27c0224f5b89930f821ae7228a..c9ffea23e4b7275505fffb47083f092ea039f8b4 100644 --- a/src/silo_io.cpp +++ b/src/silo_io.cpp @@ -8,7 +8,7 @@ #define INVALID_OFS ((size_t) (~0)) silo::silo() - : defvar_seq(0) + : m_siloDb(nullptr), defvar_seq(0) {} silo::~silo() @@ -19,7 +19,6 @@ silo::~silo() void silo::gmsh_get_nodes(void) { - std::vector<size_t> nodeTags; std::vector<double> coords; std::vector<double> paraCoords; @@ -171,6 +170,21 @@ silo::write_zonal_variable(const std::string& name, const std::vector<double>& d return true; } +bool +silo::write_nodal_variable(const std::string& name, const std::vector<double>& data) +{ + if (data.size() != num_nodes()) + { + std::cout << "Invalid number of nodes" << std::endl; + return false; + } + + DBPutUcdvar1(m_siloDb, name.c_str(), "mesh", data.data(), data.size(), NULL, + 0, DB_DOUBLE, DB_NODECENT, NULL); + + return true; +} + bool silo::write_zonal_variable(const std::string& name, const std::vector<float>& data) { @@ -210,11 +224,25 @@ silo::write_vector_expression(const std::string& name, const std::string& expres return true; } +size_t +silo::num_nodes(void) const +{ + return nodeTags.size(); +} + +size_t +silo::node_tag_offset(size_t tag) const +{ + assert(tag < node_tag2ofs.size()); + return node_tag2ofs[tag]; +} + void silo::close_db() { if (m_siloDb) + { DBClose(m_siloDb); - - m_siloDb = nullptr; + m_siloDb = nullptr; + } }