diff --git a/CMakeLists.txt b/CMakeLists.txt index e1ece6ee8f84ea181a7dd770c7abd5d61eb98a3e..5e0068e5d4189093c358bf34c9987706d5815f1a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -147,7 +147,7 @@ endif () if (COMPILER_IS_CLANG) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-zero-as-null-pointer-constant -Wno-global-constructors -Wno-padded -Wno-shorten-64-to-32 -Wno-sign-conversion -Wno-old-style-cast -Wno-sign-compare -Wno-c99-extensions -Wno-extra-semi-stmt") else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-sign-compare") endif() set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -fpermissive") set(CMAKE_CXX_FLAGS_RELEASE "-O3 -g -DNDEBUG -fpermissive") @@ -243,6 +243,7 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) set(COMMON_SOURCES gmsh_io.cpp reference_element.cpp physical_element.cpp entity.cpp kernels_cpu.cpp connectivity.cpp entity_data.cpp) set(LIBGMSHDG_SOURCES src/gmsh_io.cpp + src/silo_io.cpp src/entity.cpp src/reference_element.cpp src/physical_element.cpp @@ -251,11 +252,15 @@ set(LIBGMSHDG_SOURCES src/gmsh_io.cpp src/kernels_cpu.cpp src/kernels_gpu_mi.cpp src/kernels_cuda.cu + src/maxwell_interface.cpp ) add_library(gmshdg SHARED ${LIBGMSHDG_SOURCES}) target_link_libraries(gmshdg ${LINK_LIBS}) +add_executable(maxwell_solver src/maxwell_solver.cpp) +target_link_libraries(maxwell_solver gmshdg) + add_executable(test_basics tests/test_basics.cpp) target_link_libraries(test_basics gmshdg) diff --git a/include/entity.h b/include/entity.h index ebf3997f849c6670d7c8ff4911b72d725ee77bda..46fa446a725b9819f6af2c2f70bd2772c161828e 100644 --- a/include/entity.h +++ b/include/entity.h @@ -8,6 +8,7 @@ #include "entity_data.h" using scalar_function = std::function<double(const point_3d&)>; +using vector_function = std::function<vec3d(const point_3d&)>; class entity { @@ -59,8 +60,12 @@ public: /* add method to adjust offset in system!! */ double measure(void) const; + vecxd project(const scalar_function&) const; + //matxd project(const vector_function&) const; + void project(const scalar_function&, vecxd&) const; + void project(const vector_function&, vecxd&, vecxd&, vecxd&) const; size_t num_cells(void) const; size_t num_cell_orientations(void) const; diff --git a/include/gmsh_io.h b/include/gmsh_io.h index 12d730b222624bec51a4ce607484b8037f8b2dd5..60b0194ecdcd280e9a1bf595261412d5f371817e 100644 --- a/include/gmsh_io.h +++ b/include/gmsh_io.h @@ -60,14 +60,15 @@ public: size_t num_dofs() const; size_t num_fluxes() const; size_t num_cells() const; + size_t num_entities() const; std::vector<entity>::const_iterator begin() const; std::vector<entity>::const_iterator end() const; std::vector<entity>::iterator begin(); std::vector<entity>::iterator end(); - entity& entity_at(size_t); - entity entity_at(size_t) const; + entity& entity_at(size_t); + const entity& entity_at(size_t) const; }; diff --git a/include/gpu_support.hpp b/include/gpu_support.hpp index 052243d3dec8382d010ae9c0378a2eae187ebc29..658c7a3315fd389297956ee2698742a246ef597b 100644 --- a/include/gpu_support.hpp +++ b/include/gpu_support.hpp @@ -148,82 +148,109 @@ fetch_tex(cudaTextureObject_t tex, int32_t ofs) template<typename T> class device_vector { - T *d_vptr; - size_t vsize; + T * m_devptr; + size_t m_size; public: device_vector() - : d_vptr(nullptr), vsize(0) + : m_devptr(nullptr), m_size(0) {} device_vector(size_t size) - : d_vptr(nullptr), vsize(0) + : m_devptr(nullptr), m_size(0) { - (void)checkGPU( gpuMalloc((void**)&d_vptr, size*sizeof(T)) ); - (void)checkGPU( gpuMemset(d_vptr, 0, size*sizeof(T)) ); - vsize = size; + (void)checkGPU( gpuMalloc((void**)&m_devptr, size*sizeof(T)) ); + (void)checkGPU( gpuMemset(m_devptr, 0, size*sizeof(T)) ); + m_size = size; } - /* allocate memory and copyin data at ptr */ device_vector(const T *src, size_t size) - : d_vptr(nullptr), vsize(0) + : m_devptr(nullptr), m_size(0) { copyin(src, size); } + device_vector(const device_vector& other) + { + (void)checkGPU( gpuMalloc((void**)&m_devptr, other.m_size*sizeof(T)) ); + (void)checkGPU( gpuMemcpy(m_devptr, other.m_devptr, other.m_size*sizeof(T), gpuMemcpyDeviceToDevice) ); + m_size = other.m_size; + } + + device_vector(device_vector&& other) + { + m_devptr = other.m_devptr; + m_size = other.m_size; + other.m_devptr = nullptr; + other.m_size = 0; + } + void copyin(const T *src, size_t size) { - if (d_vptr != nullptr) - (void) gpuFree(d_vptr); + if (m_devptr != nullptr) + (void) gpuFree(m_devptr); - (void)checkGPU( gpuMalloc((void**)&d_vptr, size*sizeof(T)) ); - (void)checkGPU( gpuMemcpy(d_vptr, src, size*sizeof(T), gpuMemcpyHostToDevice) ); - vsize = size; + (void)checkGPU( gpuMalloc((void**)&m_devptr, size*sizeof(T)) ); + (void)checkGPU( gpuMemcpy(m_devptr, src, size*sizeof(T), gpuMemcpyHostToDevice) ); + m_size = size; } void copyout(T *dst) { - if (d_vptr == nullptr) + if (m_devptr == nullptr) return; - (void)checkGPU( gpuMemcpy(dst, d_vptr, vsize*sizeof(T), gpuMemcpyDeviceToHost) ); + (void)checkGPU( gpuMemcpy(dst, m_devptr, m_size*sizeof(T), gpuMemcpyDeviceToHost) ); } void resize(size_t size) { - if (d_vptr != nullptr) - (void) gpuFree(d_vptr); + if (size == m_size) + return; - (void)checkGPU( gpuMalloc((void**)&d_vptr, size*sizeof(T)) ); - vsize = size; - } + if (m_devptr != nullptr) + (void) gpuFree(m_devptr); - device_vector(const device_vector&) = delete; + (void)checkGPU( gpuMalloc((void**)&m_devptr, size*sizeof(T)) ); + m_size = size; + } - device_vector(device_vector&& other) + device_vector& operator=(const device_vector& other) { - d_vptr = other.d_vptr; - vsize = other.vsize; - other.d_vptr = nullptr; - other.vsize = 0; + if ( (m_devptr != nullptr) or (m_size != other.m_size) ) + { + (void) gpuFree(m_devptr); + (void) checkGPU( gpuMalloc((void**)&m_devptr, other.m_size*sizeof(T)) ); + } + + (void) checkGPU( gpuMemcpy(m_devptr, other.m_devptr, other.m_size*sizeof(T), gpuMemcpyDeviceToDevice) ); + m_size = other.m_size; } - device_vector& operator=(const device_vector&) = delete; + void zero(void) + { + (void) checkGPU( gpuMemset(m_devptr, 0, m_size*sizeof(T)) ); + } T* data() { - return d_vptr; + return m_devptr; + } + + const T* data() const + { + return m_devptr; } size_t size() const { - return vsize; + return m_size; } ~device_vector() { - if (d_vptr) - (void)checkGPU(gpuFree(d_vptr)); + if (m_devptr) + (void)checkGPU(gpuFree(m_devptr)); } }; diff --git a/include/kernels_gpu.h b/include/kernels_gpu.h index d8df2bfb2d16977367fe4b57b0f1b8da83ba0735..1c81c6d51719278d8f16cea65646973cb69b006f 100644 --- a/include/kernels_gpu.h +++ b/include/kernels_gpu.h @@ -143,8 +143,9 @@ void reshape_dofs(const entity_data_cpu&, const entity_data_gpu&, const vecxd&, void -gpu_compute_field_derivatives(entity_data_gpu& edg, - gpuTextureObject_t F, double *dF_dx, double* dF_dy, double* dF_dz); +gpu_compute_field_derivatives(const entity_data_gpu& edg, const double *F, + double *dF_dx, double* dF_dy, double* dF_dz, double alpha); -void -gpu_compute_flux_lifting(entity_data_gpu& edg, const double *f, double *out); \ No newline at end of file +void gpu_compute_flux_lifting(entity_data_gpu&, const double *, double *); + +void gpu_subtract(double *, const double *, const double *, size_t); \ No newline at end of file diff --git a/include/maxwell_interface.h b/include/maxwell_interface.h new file mode 100644 index 0000000000000000000000000000000000000000..2481d886eec061ed99bb2d9e921520fd30497b4e --- /dev/null +++ b/include/maxwell_interface.h @@ -0,0 +1,89 @@ +#pragma once + +#include "types.h" +#include "gpu_support.hpp" + +struct electromagnetic_field +{ + vecxd Ex; + vecxd Ey; + vecxd Ez; + vecxd Hx; + vecxd Hy; + vecxd Hz; + + size_t num_dofs; + + void resize(size_t); +}; + +struct electromagnetic_material_params +{ + size_t num_dofs; + size_t num_fluxes; + + vecxd inv_epsilon; + vecxd inv_mu; + vecxd sigma; + vecxd sigma_over_epsilon; + + /* Lax-Milgram flux coefficients */ + vecxd aE; + vecxd bE; + vecxd aH; + vecxd bH; + + /* Boundary conditions coefficients */ + vecxd bc_coeffs; +}; + +struct electromagnetic_material_params_gpu +{ + size_t num_dofs; + size_t num_fluxes; + + device_vector<double> inv_epsilon; + device_vector<double> inv_mu; + device_vector<double> sigma; + device_vector<double> sigma_over_epsilon; + + /* Lax-Milgram flux coefficients */ + device_vector<double> aE; + device_vector<double> bE; + device_vector<double> aH; + device_vector<double> bH; + + /* Boundary conditions coefficients */ + device_vector<double> bc_coeffs; +}; + +struct electromagnetic_field_gpu +{ + device_vector<double> Ex; + device_vector<double> Ey; + device_vector<double> Ez; + device_vector<double> Hx; + device_vector<double> Hy; + device_vector<double> Hz; + + size_t num_dofs; + + struct raw_ptrs + { + size_t num_dofs; + + double *Ex; + double *Ey; + double *Ez; + double *Hx; + double *Hy; + double *Hz; + }; + + void zero(void); + void resize(size_t); + raw_ptrs data(void); + void copyin(const electromagnetic_field&); + void copyout(electromagnetic_field&); + +}; \ No newline at end of file diff --git a/include/silo_output.hpp b/include/silo_output.hpp index 52fac0cb7db4497ab51a09240b53a3eed0d9f139..092f443b8fae5d26f879e497b115e503a2c7dd4e 100644 --- a/include/silo_output.hpp +++ b/include/silo_output.hpp @@ -1,10 +1,7 @@ #pragma once -#include <cassert> +#include <vector> #include <silo.h> -#include "gmsh.h" - -#define INVALID_OFS ((size_t) (~0)) class silo { @@ -16,171 +13,21 @@ class silo std::vector<int> nodelist; int num_cells; - DBfile *m_siloDb; - - void gmsh_get_nodes() - { - std::vector<size_t> nodeTags; - std::vector<double> coords; - std::vector<double> paraCoords; - - gmsh::model::mesh::getNodes(nodeTags, coords, paraCoords, -1, -1, true, false); - - auto maxtag_pos = std::max_element(nodeTags.begin(), nodeTags.end()); - - size_t maxtag = 0; - if (maxtag_pos != nodeTags.end()) - maxtag = (*maxtag_pos)+1; - - node_coords_x.resize( nodeTags.size() ); - node_coords_y.resize( nodeTags.size() ); - node_coords_z.resize( nodeTags.size() ); - - for (size_t i = 0; i < nodeTags.size(); i++) - { - node_coords_x[i] = coords[3*i+0]; - node_coords_y[i] = coords[3*i+1]; - node_coords_z[i] = coords[3*i+2]; - } - - node_tag2ofs.resize(maxtag, INVALID_OFS); - for (size_t i = 0; i < nodeTags.size(); i++) - node_tag2ofs.at( nodeTags[i] ) = i; - } - - void gmsh_get_elements() - { - gmsh::vectorpair entities; - num_cells = 0; - gmsh::model::getEntities(entities, 3); - for (auto [dim, tag] : entities) - { - std::vector<int> elemTypes; - gmsh::model::mesh::getElementTypes(elemTypes, dim, tag); - for (auto& elemType : elemTypes) - { - std::vector<size_t> elemTags; - std::vector<size_t> elemNodeTags; - gmsh::model::mesh::getElementsByType(elemType, elemTags, elemNodeTags, tag); - auto nodesPerElem = elemNodeTags.size()/elemTags.size(); - assert( elemTags.size() * nodesPerElem == elemNodeTags.size() ); - - for (size_t i = 0; i < elemTags.size(); i++) - { - auto base = nodesPerElem * i; + DBfile * m_siloDb; - auto node0_tag = elemNodeTags[base + 0]; - assert(node0_tag < node_tag2ofs.size()); - auto node0_ofs = node_tag2ofs[node0_tag]; - assert(node0_ofs != INVALID_OFS); - nodelist.push_back(node0_ofs + 1); - - auto node1_tag = elemNodeTags[base + 1]; - assert(node1_tag < node_tag2ofs.size()); - auto node1_ofs = node_tag2ofs[node1_tag]; - assert(node1_ofs != INVALID_OFS); - nodelist.push_back(node1_ofs + 1); - - auto node2_tag = elemNodeTags[base + 2]; - assert(node2_tag < node_tag2ofs.size()); - auto node2_ofs = node_tag2ofs[node2_tag]; - assert(node2_ofs != INVALID_OFS); - nodelist.push_back(node2_ofs + 1); - - auto node3_tag = elemNodeTags[base + 3]; - assert(node3_tag < node_tag2ofs.size()); - auto node3_ofs = node_tag2ofs[node3_tag]; - assert(node3_ofs != INVALID_OFS); - nodelist.push_back(node3_ofs + 1); - } - - num_cells += elemTags.size(); - } - } - - assert(nodelist.size() == 4*num_cells); - } + void gmsh_get_nodes(void); + void gmsh_get_elements(void); public: - silo() - {} - - void import_mesh_from_gmsh() - { - gmsh_get_nodes(); - gmsh_get_elements(); - } - - bool create_db(const std::string& dbname) - { - m_siloDb = DBCreate(dbname.c_str(), DB_CLOBBER, DB_LOCAL, NULL, DB_HDF5); - if (m_siloDb) - return true; - - std::cout << "SILO: Error creating database" << std::endl; - return false; - } - - bool write_mesh(double time = -1.0, int cycle = 0) - { - DBoptlist *optlist = nullptr; - - if (time >= 0) - { - optlist = DBMakeOptlist(2); - DBAddOption(optlist, DBOPT_CYCLE, &cycle); - DBAddOption(optlist, DBOPT_DTIME, &time); - } - - double *coords[] = {node_coords_x.data(), node_coords_y.data(), node_coords_z.data()}; - - int lnodelist = nodelist.size(); - - int shapetype[] = { DB_ZONETYPE_TET }; - int shapesize[] = {4}; - int shapecounts[] = { num_cells }; - int nshapetypes = 1; - int nnodes = node_coords_x.size(); - int nzones = num_cells; - int ndims = 3; - - DBPutZonelist2(m_siloDb, "zonelist", nzones, ndims, - nodelist.data(), lnodelist, 1, 0, 0, shapetype, shapesize, - shapecounts, nshapetypes, NULL); - - DBPutUcdmesh(m_siloDb, "mesh", ndims, NULL, coords, nnodes, nzones, - "zonelist", NULL, DB_DOUBLE, optlist); - - if (optlist) - DBFreeOptlist(optlist); - - return true; - } - - bool write_zonal_variable(const std::string& name, const std::vector<double>& data) - { - if (data.size() != num_cells) - { - std::cout << "Invalid number of cells" << std::endl; - return false; - } - - DBPutUcdvar1(m_siloDb, name.c_str(), "mesh", data.data(), data.size(), NULL, - 0, DB_DOUBLE, DB_ZONECENT, NULL); - - return true; - } - - void close_db() - { - if (m_siloDb) - DBClose(m_siloDb); - - m_siloDb = nullptr; - } - - ~silo() - { - close_db(); - } + silo(); + ~silo(); + + void import_mesh_from_gmsh(); + bool create_db(const std::string&); + bool write_mesh(double time = -1.0, int cycle = 0); + bool write_zonal_variable(const std::string&, const std::vector<double>&); + bool write_zonal_variable(const std::string&, const std::vector<float>&); + bool write_scalar_expression(const std::string& name, const std::string& expression); + bool write_vector_expression(const std::string& name, const std::string& expression); + void close_db(); }; diff --git a/src/entity.cpp b/src/entity.cpp index d6801d4006661d498773a8fcf4a763611f6eb895..aa68961024c99036c7317db5785cc65b9a17a632 100644 --- a/src/entity.cpp +++ b/src/entity.cpp @@ -126,6 +126,45 @@ entity::project(const scalar_function& function, vecxd& pf) const } } +/* Project a function on this entity and store the partial result in + * the global vector pf at the offset corresponding to this entity. */ +void +entity::project(const vector_function& function, vecxd& pfx, vecxd& pfy, vecxd& pfz) const +{ + auto num_bf = reference_cells[0].num_basis_functions(); + + for (size_t iT = 0; iT < physical_cells.size(); iT++) + { + const auto& pe = physical_cells[iT]; + assert( pe.orientation() < reference_cells.size() ); + const auto& re = reference_cells[pe.orientation()]; + const auto pqps = pe.integration_points(); + const auto dets = pe.determinants(); + + assert(pqps.size() == dets.size()); + + matxd mass = matxd::Zero(num_bf, num_bf); + vecxd rhs_x = vecxd::Zero(num_bf); + vecxd rhs_y = vecxd::Zero(num_bf); + vecxd rhs_z = vecxd::Zero(num_bf); + for (size_t iQp = 0; iQp < pqps.size(); iQp++) + { + const auto& pqp = pqps[iQp]; + const vecxd phi = re.basis_functions(iQp); + mass += pqp.weight() * phi * phi.transpose(); + vec3d fval = function(pqp.point()); + rhs_x += pqp.weight() * fval(0) * phi; + rhs_y += pqp.weight() * fval(1) * phi; + rhs_z += pqp.weight() * fval(2) * phi; + } + + Eigen::LDLT<matxd> mass_ldlt(mass); + pfx.segment(m_dof_base + iT*num_bf, num_bf) = mass_ldlt.solve(rhs_x); + pfy.segment(m_dof_base + iT*num_bf, num_bf) = mass_ldlt.solve(rhs_y); + pfz.segment(m_dof_base + iT*num_bf, num_bf) = mass_ldlt.solve(rhs_z); + } +} + const reference_element& entity::cell_refelem(size_t iO) const { diff --git a/src/gmsh_io.cpp b/src/gmsh_io.cpp index 7009681dce42cb49332b9db161adfa02e29dfee3..e6022c2e17bbaa1b23b7edc02942c08f1dedb18c 100644 --- a/src/gmsh_io.cpp +++ b/src/gmsh_io.cpp @@ -117,6 +117,12 @@ model::entity_at(size_t pos) return entities.at(pos); } +const entity& +model::entity_at(size_t pos) const +{ + return entities.at(pos); +} + size_t model::num_dofs(void) const { @@ -147,6 +153,12 @@ model::num_cells(void) const return ret; } +size_t +model::num_entities(void) const +{ + return entities.size(); +} + void model::update_connectivity(const entity& e, size_t entity_index) { diff --git a/src/kernels_cuda.cu b/src/kernels_cuda.cu index 667d480c5b764d2ae0c24da934457e9a2ffe34cc..da11465929dad3538f57be27107f8072254f8461 100644 --- a/src/kernels_cuda.cu +++ b/src/kernels_cuda.cu @@ -1,56 +1,59 @@ #include "entity_data.h" #include "kernels_gpu.h" +/* compute α∇F */ template<size_t K> __global__ void -gpu_deriv_planar(gpuTextureObject_t F, const double * __restrict__ J, +gpu_deriv_planar(const double *F, const double * __restrict__ J, gpuTextureObject_t DM_tex, double * __restrict__ dF_dx, double * __restrict__ dF_dy, double * __restrict__ dF_dz, - int32_t num_all_elems, int32_t* orients, int32_t dof_base) + double alpha, int32_t num_all_elems, const int32_t* orients, + int32_t dof_base) { using KS = kernel_gpu_sizes<K>; - int ofs_in_entity = (blockIdx.x * blockDim.x + threadIdx.x); + const int32_t ofs_in_entity = (blockIdx.x * blockDim.x + threadIdx.x); if (ofs_in_entity >= num_all_elems*KS::num_bf) return; - int32_t output_dof_offset = dof_base + ofs_in_entity; + const int32_t output_dof_offset = dof_base + ofs_in_entity; - int32_t iT = ofs_in_entity / KS::num_bf; - int32_t elem_dof_base = dof_base + iT*KS::num_bf; - int32_t iO = orients[iT]; - int32_t DM_row = ofs_in_entity % KS::num_bf; - int32_t DM_orient = 3*KS::num_bf*KS::num_bf*iO; + const int32_t iT = ofs_in_entity / KS::num_bf; + const int32_t elem_dof_base = dof_base + iT*KS::num_bf; + const int32_t iO = orients[iT]; + const int32_t DM_row = ofs_in_entity % KS::num_bf; + const int32_t DM_orient = 3*KS::num_bf*KS::num_bf*iO; + const int32_t DM_base = DM_orient + DM_row; + const int32_t jac_ofs = 9*iT; double accm_dF_dx = 0.0; double accm_dF_dy = 0.0; double accm_dF_dz = 0.0; - int32_t jac_ofs = 9*iT; for (int32_t dof = 0; dof < KS::num_bf; dof++) { - int32_t d_ofs = DM_orient + DM_row + 3*KS::num_bf*dof; + int32_t d_ofs = DM_base + 3*KS::num_bf*dof; int32_t f_ofs = elem_dof_base + dof; - double v = fetch_tex(DM_tex, d_ofs) * fetch_tex(F, f_ofs); + double v = fetch_tex(DM_tex, d_ofs) * F[f_ofs];//fetch_tex(F, f_ofs); accm_dF_dx += J[jac_ofs+0] * v; accm_dF_dy += J[jac_ofs+3] * v; accm_dF_dz += J[jac_ofs+6] * v; - d_ofs = DM_orient + DM_row + 3*KS::num_bf*dof + KS::num_bf; - v = fetch_tex(DM_tex, d_ofs) * fetch_tex(F, f_ofs); + d_ofs = DM_base + 3*KS::num_bf*dof + KS::num_bf; + v = fetch_tex(DM_tex, d_ofs) * F[f_ofs];//fetch_tex(F, f_ofs); accm_dF_dx += J[jac_ofs+1] * v; accm_dF_dy += J[jac_ofs+4] * v; accm_dF_dz += J[jac_ofs+7] * v; - d_ofs = DM_orient + DM_row + 3*KS::num_bf*dof + 2*KS::num_bf; - v = fetch_tex(DM_tex, d_ofs) * fetch_tex(F, f_ofs); + d_ofs = DM_base + 3*KS::num_bf*dof + 2*KS::num_bf; + v = fetch_tex(DM_tex, d_ofs) * F[f_ofs];//fetch_tex(F, f_ofs); accm_dF_dx += J[jac_ofs+2] * v; accm_dF_dy += J[jac_ofs+5] * v; accm_dF_dz += J[jac_ofs+8] * v; } - dF_dx[output_dof_offset] = accm_dF_dx; - dF_dy[output_dof_offset] = accm_dF_dy; - dF_dz[output_dof_offset] = accm_dF_dz; + dF_dx[output_dof_offset] = alpha*accm_dF_dx; + dF_dy[output_dof_offset] = alpha*accm_dF_dy; + dF_dz[output_dof_offset] = alpha*accm_dF_dz; } template<size_t K> @@ -64,43 +67,44 @@ gpu_deriv_planar_blocked(gpuTextureObject_t F, const double * __restrict__ J, if (threadIdx.x >= KS::dofs_per_dblock) return; - - int32_t elem_base = (blockIdx.y * blockDim.y + threadIdx.y) * KS::cells_per_dblock; - int32_t elem_ofs = threadIdx.x / KS::num_bf; - int32_t iT = elem_base + elem_ofs; + const int32_t y_idx = (blockIdx.y * blockDim.y + threadIdx.y); + const int32_t elem_base = y_idx * KS::cells_per_dblock; + const int32_t elem_ofs = threadIdx.x / KS::num_bf; + const int32_t iT = elem_base + elem_ofs; if (iT >= num_all_elems) return; - int32_t block_dof_base = (blockIdx.y * blockDim.y + threadIdx.y) * KS::dblock_size; + const int32_t block_dof_base = y_idx * KS::dblock_size; - int32_t output_dof_offset = dof_base + block_dof_base + threadIdx.x; + const int32_t output_dof_offset = dof_base + block_dof_base + threadIdx.x; - int32_t elem_dof_base = dof_base + block_dof_base + elem_ofs*KS::num_bf; - int32_t iO = orients[iT]; - int32_t DM_row = threadIdx.x % KS::num_bf; - int32_t DM_orient = 3*KS::num_bf*KS::num_bf*iO; + const int32_t elem_dof_base = dof_base + block_dof_base + elem_ofs*KS::num_bf; + const int32_t iO = orients[iT]; + const int32_t DM_row = threadIdx.x % KS::num_bf; + const int32_t DM_orient = 3*KS::num_bf*KS::num_bf*iO; + const int32_t DM_base = DM_orient + DM_row; + const int32_t jac_ofs = 9*iT; double accm_dF_dx = 0.0; double accm_dF_dy = 0.0; double accm_dF_dz = 0.0; - int32_t jac_ofs = 9*iT; for (int32_t dof = 0; dof < KS::num_bf; dof++) { - int32_t d_ofs = DM_orient + DM_row + 3*KS::num_bf*dof; + int32_t d_ofs = DM_base + 3*KS::num_bf*dof; int32_t f_ofs = elem_dof_base + dof; double v = fetch_tex(DM_tex, d_ofs) * fetch_tex(F, f_ofs); accm_dF_dx += J[jac_ofs+0] * v; accm_dF_dy += J[jac_ofs+3] * v; accm_dF_dz += J[jac_ofs+6] * v; - d_ofs = DM_orient + DM_row + 3*KS::num_bf*dof + KS::num_bf; + d_ofs = DM_base + 3*KS::num_bf*dof + KS::num_bf; v = fetch_tex(DM_tex, d_ofs) * fetch_tex(F, f_ofs); accm_dF_dx += J[jac_ofs+1] * v; accm_dF_dy += J[jac_ofs+4] * v; accm_dF_dz += J[jac_ofs+7] * v; - d_ofs = DM_orient + DM_row + 3*KS::num_bf*dof + 2*KS::num_bf; + d_ofs = DM_base + 3*KS::num_bf*dof + 2*KS::num_bf; v = fetch_tex(DM_tex, d_ofs) * fetch_tex(F, f_ofs); accm_dF_dx += J[jac_ofs+2] * v; accm_dF_dy += J[jac_ofs+5] * v; @@ -114,8 +118,8 @@ gpu_deriv_planar_blocked(gpuTextureObject_t F, const double * __restrict__ J, template<size_t K> void -launch_deriv_kernel(entity_data_gpu& edg, - gpuTextureObject_t f, double *df_dx, double* df_dy, double* df_dz) +launch_deriv_kernel(const entity_data_gpu& edg, + const double *f, double *df_dx, double* df_dy, double* df_dz, double alpha) { auto J = edg.jacobians.data(); auto Dtex = edg.differentiation_matrices.get_texture(); @@ -144,42 +148,42 @@ launch_deriv_kernel(entity_data_gpu& edg, if (edg.g_order == 1) gpu_deriv_planar<K><<<num_blocks, KS::deriv_threads>>>(f, J, - Dtex, df_dx, df_dy, df_dz, num_elems, orients, edg.dof_base); + Dtex, df_dx, df_dy, df_dz, alpha, num_elems, orients, edg.dof_base); //else // compute_field_derivatives_kernel_curved<1>(ed, f, df_dx, df_dy, df_dz); #endif } void -gpu_compute_field_derivatives(entity_data_gpu& edg, - gpuTextureObject_t f, double *df_dx, double* df_dy, double* df_dz) +gpu_compute_field_derivatives(const entity_data_gpu& edg, + const double *f, double *df_dx, double* df_dy, double* df_dz, double alpha) { switch (edg.a_order) { case 1: - launch_deriv_kernel<1>(edg, f, df_dx, df_dy, df_dz); + launch_deriv_kernel<1>(edg, f, df_dx, df_dy, df_dz, alpha); break; case 2: - launch_deriv_kernel<2>(edg, f, df_dx, df_dy, df_dz); + launch_deriv_kernel<2>(edg, f, df_dx, df_dy, df_dz, alpha); break; case 3: - launch_deriv_kernel<3>(edg, f, df_dx, df_dy, df_dz); + launch_deriv_kernel<3>(edg, f, df_dx, df_dy, df_dz, alpha); break; case 4: - launch_deriv_kernel<4>(edg, f, df_dx, df_dy, df_dz); + launch_deriv_kernel<4>(edg, f, df_dx, df_dy, df_dz, alpha); break; case 5: - launch_deriv_kernel<5>(edg, f, df_dx, df_dy, df_dz); + launch_deriv_kernel<5>(edg, f, df_dx, df_dy, df_dz, alpha); break; case 6: - launch_deriv_kernel<6>(edg, f, df_dx, df_dy, df_dz); + launch_deriv_kernel<6>(edg, f, df_dx, df_dy, df_dz, alpha); break; default: @@ -193,6 +197,12 @@ gpu_lift_planar(const double *flux, gpuTextureObject_t LM_tex, const double * __restrict__ dets, double * __restrict__ lifted_flux, int32_t num_all_elems, int32_t* orients, int32_t dof_base, int32_t flux_base) { + /* This kernel saturates the texture cache bandwidth (~1 TB/s on K20x!!) + * and thus it does not achieve good performance. This happens because + * the lifting matrix entries are not reused sufficiently. Sufficient + * reuse should be achieved by using a single entry to multiply different + * rhs instead of only one at a time. Orientations however make the + * implementation complicated, so for now the kernel remains slow. */ using KS = kernel_gpu_sizes<K>; /* One thread per *output* dof */ @@ -277,3 +287,23 @@ gpu_compute_flux_lifting(entity_data_gpu& edg, const double *f, double *out) throw std::invalid_argument("compute_field_derivatives: invalid order"); } } + +void __global__ +gpu_subtract_kernel(double * __restrict__ dst, const double * __restrict__ add, + const double * __restrict__ sub, size_t num_dofs) +{ + int32_t dof = blockIdx.x * blockDim.x + threadIdx.x; + if (dof < num_dofs) + dst[dof] = add[dof] - sub[dof]; +} + +void +gpu_subtract(double *dst, const double *add, const double *sub, size_t num_dofs) +{ + static const size_t SUBTRACT_THREADS = 256; + auto gs = num_dofs/SUBTRACT_THREADS; + if (num_dofs % SUBTRACT_THREADS != 0) + gs += 1; + + gpu_subtract_kernel<<<gs, SUBTRACT_THREADS>>>(dst, add, sub, num_dofs); +} \ No newline at end of file diff --git a/src/maxwell_interface.cpp b/src/maxwell_interface.cpp new file mode 100644 index 0000000000000000000000000000000000000000..76b732ca6389bff7943f8edbbf8a4f1e95c115f0 --- /dev/null +++ b/src/maxwell_interface.cpp @@ -0,0 +1,77 @@ +#include "maxwell_interface.h" + +void +electromagnetic_field::resize(size_t p_num_dofs) +{ + Ex = vecxd::Zero(p_num_dofs); + Ey = vecxd::Zero(p_num_dofs); + Ez = vecxd::Zero(p_num_dofs); + Hx = vecxd::Zero(p_num_dofs); + Hy = vecxd::Zero(p_num_dofs); + Hz = vecxd::Zero(p_num_dofs); + num_dofs = p_num_dofs; +} + +void +electromagnetic_field_gpu::zero() +{ + Ex.zero(); + Ey.zero(); + Ez.zero(); + Hx.zero(); + Hy.zero(); + Hz.zero(); +} + +void +electromagnetic_field_gpu::resize(size_t p_num_dofs) +{ + Ex.resize(p_num_dofs); + Ey.resize(p_num_dofs); + Ez.resize(p_num_dofs); + Hx.resize(p_num_dofs); + Hy.resize(p_num_dofs); + Hz.resize(p_num_dofs); + num_dofs = p_num_dofs; +} + +electromagnetic_field_gpu::raw_ptrs +electromagnetic_field_gpu::data(void) +{ + raw_ptrs ret; + + ret.num_dofs = num_dofs; + ret.Ex = Ex.data(); + ret.Ey = Ey.data(); + ret.Ez = Ez.data(); + ret.Hx = Hx.data(); + ret.Hy = Hy.data(); + ret.Hz = Hz.data(); + + return ret; +} + +void +electromagnetic_field_gpu::copyin(const electromagnetic_field& emf) +{ + Ex.copyin(emf.Ex.data(), emf.Ex.size()); + Ey.copyin(emf.Ey.data(), emf.Ey.size()); + Ez.copyin(emf.Ez.data(), emf.Ez.size()); + Hx.copyin(emf.Hx.data(), emf.Hx.size()); + Hy.copyin(emf.Hy.data(), emf.Hy.size()); + Hz.copyin(emf.Hz.data(), emf.Hz.size()); +} + +void +electromagnetic_field_gpu::copyout(electromagnetic_field& emf) +{ + if (num_dofs != emf.num_dofs) + emf.resize(num_dofs); + + Ex.copyout(emf.Ex.data()); + Ey.copyout(emf.Ey.data()); + Ez.copyout(emf.Ez.data()); + Hx.copyout(emf.Hx.data()); + Hy.copyout(emf.Hy.data()); + Hz.copyout(emf.Hz.data()); +} \ No newline at end of file diff --git a/src/maxwell_solver.cpp b/src/maxwell_solver.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ca3b15b78c7cc7012948a9a623131214b0ca3350 --- /dev/null +++ b/src/maxwell_solver.cpp @@ -0,0 +1,317 @@ +#include <iostream> + +#include "gmsh_io.h" +#include "maxwell_interface.h" +#include "kernels_cpu.h" +#include "kernels_gpu.h" +#include "silo_output.hpp" + +static void +make_geometry(int order, double mesh_h) +{ + gm::add("difftest"); + + std::vector<std::pair<int,int>> objects; + + objects.push_back( + std::make_pair(3, gmo::addBox(0.0, 0.0, 0.0, 1.0, 1.0, 0.5) ) + ); + objects.push_back( + std::make_pair(3, gmo::addBox(0.0, 0.0, 0.5, 0.5, 0.5, 0.5) ) + ); + + std::vector<std::pair<int, int>> tools; + gmsh::vectorpair odt; + std::vector<gmsh::vectorpair> odtm; + gmo::fragment(objects, tools, odt, odtm); + + gmo::synchronize(); + + gvp_t vp; + gm::getEntities(vp); + gmm::setSize(vp, mesh_h); +} + +struct solver_state +{ + std::vector<entity_data_cpu> eds; + electromagnetic_field emf_curr; + electromagnetic_field emf_next; + electromagnetic_material_params matparams; + + double delta_t; + double curr_time; + size_t curr_timestep; + + electromagnetic_field dx; + electromagnetic_field dy; + electromagnetic_field dz; +}; + +struct solver_state_gpu +{ + std::vector<entity_data_cpu> eds; + std::vector<entity_data_gpu> edgs; + electromagnetic_field_gpu emf_curr; + electromagnetic_field_gpu emf_next; + electromagnetic_material_params matparams; + + double delta_t; + double curr_time; + size_t curr_timestep; + + electromagnetic_field_gpu dx; + electromagnetic_field_gpu dy; + electromagnetic_field_gpu dz; +}; + +void init_from_model(const model& mod, solver_state& state) +{ + state.emf_curr.resize( mod.num_dofs() ); + state.emf_next.resize( mod.num_dofs() ); + state.dx.resize( mod.num_dofs() ); + state.dy.resize( mod.num_dofs() ); + state.dz.resize( mod.num_dofs() ); + + auto E = [](const point_3d& pt) -> vec3d { + vec3d ret; + ret(0) = std::sin(M_PI * pt.y()); + ret(1) = 0; + ret(2) = 0; + return ret; + }; + + for (auto& e : mod) + { + entity_data_cpu ed; + e.populate_entity_data(ed); + state.eds.push_back( std::move(ed) ); + e.project(E, state.emf_curr.Ex, state.emf_curr.Ey, state.emf_curr.Ez); + } +} + +void init_from_model(const model& mod, solver_state_gpu& state) +{ + state.emf_curr.resize( mod.num_dofs() ); + state.emf_next.resize( mod.num_dofs() ); + state.dx.resize( mod.num_dofs() ); + state.dy.resize( mod.num_dofs() ); + state.dz.resize( mod.num_dofs() ); + + electromagnetic_field emf_io; + emf_io.resize( mod.num_dofs() ); + + auto E = [](const point_3d& pt) -> vec3d { + vec3d ret; + ret(0) = std::sin(M_PI * pt.y()); + ret(1) = 0; + ret(2) = 0; + return ret; + }; + + + for (auto& e : mod) + { + entity_data_cpu ed; + e.populate_entity_data(ed); + e.project(E, emf_io.Ex, emf_io.Ey, emf_io.Ez); + entity_data_gpu edg(ed); + state.eds.push_back( std::move(ed) ); + state.edgs.push_back( std::move(edg) ); + } + + state.emf_curr.copyin(emf_io); +} + +void compute_curls(solver_state& state, const electromagnetic_field& curr, + electromagnetic_field& next) +{ + for (const auto& ed : state.eds) + { + compute_field_derivatives(ed, curr.Ex, state.dx.Ex, state.dy.Ex, state.dz.Ex); + compute_field_derivatives(ed, curr.Ey, state.dx.Ey, state.dy.Ey, state.dz.Ey); + compute_field_derivatives(ed, curr.Ez, state.dx.Ez, state.dy.Ez, state.dz.Ez); + compute_field_derivatives(ed, curr.Hx, state.dx.Hx, state.dy.Hx, state.dz.Hx); + compute_field_derivatives(ed, curr.Hy, state.dx.Hy, state.dy.Hy, state.dz.Hy); + compute_field_derivatives(ed, curr.Hz, state.dx.Hz, state.dy.Hz, state.dz.Hz); + } + + next.Ex = state.dy.Hz - state.dz.Hy; + next.Ey = state.dz.Hx - state.dx.Hz; + next.Ez = state.dx.Hy - state.dy.Hx; + next.Hx = state.dz.Ey - state.dy.Ez; + next.Hy = state.dx.Ez - state.dz.Ex; + next.Hz = state.dy.Ex - state.dx.Ey; +} + +void compute_curls(solver_state_gpu& state, const electromagnetic_field_gpu& curr, + electromagnetic_field_gpu& next) +{ + auto currp = curr.data(); + auto nextp = next.data(); + auto dxp = state.dx.data(); + auto dyp = state.dy.data(); + auto dzp = state.dz.data(); + + for (const auto& edg : state.edgs) + { + gpu_compute_field_derivatives(edg, currp.Ex, dxp.Ex, dyp.Ex, dzp.Ex, 1.0); + gpu_compute_field_derivatives(edg, currp.Ey, dxp.Ey, dyp.Ey, dzp.Ey, 1.0); + gpu_compute_field_derivatives(edg, currp.Ez, dxp.Ez, dyp.Ez, dzp.Ez, 1.0); + gpu_compute_field_derivatives(edg, currp.Hx, dxp.Hx, dyp.Hx, dzp.Hx, 1.0); + gpu_compute_field_derivatives(edg, currp.Hy, dxp.Hy, dyp.Hy, dzp.Hy, 1.0); + gpu_compute_field_derivatives(edg, currp.Hz, dxp.Hz, dyp.Hz, dzp.Hz, 1.0); + } + + + gpu_subtract(nextp.Ex, dyp.Hz, dzp.Hy, nextp.num_dofs); + gpu_subtract(nextp.Ey, dzp.Hx, dxp.Hz, nextp.num_dofs); + gpu_subtract(nextp.Ez, dxp.Hy, dyp.Hx, nextp.num_dofs); + gpu_subtract(nextp.Hx, dzp.Ey, dyp.Ez, nextp.num_dofs); + gpu_subtract(nextp.Hy, dxp.Ez, dzp.Ex, nextp.num_dofs); + gpu_subtract(nextp.Hz, dyp.Ex, dxp.Ey, nextp.num_dofs); +} + +void apply_operator(solver_state& state, const electromagnetic_field& curr, + electromagnetic_field& next) +{ + compute_curls(state, curr, next); +} + +void apply_operator(solver_state_gpu& state, const electromagnetic_field_gpu& curr, + electromagnetic_field_gpu& next) +{ + compute_curls(state, curr, next); +} + +void +export_to_silo(const model& mod, const solver_state& state, const std::string& filename) +{ + silo sdb; + sdb.create_db(filename); + sdb.import_mesh_from_gmsh(); + sdb.write_mesh(); + + std::vector<double> curr_Ex; curr_Ex.resize( mod.num_cells() ); + std::vector<double> curr_Ey; curr_Ey.resize( mod.num_cells() ); + std::vector<double> curr_Ez; curr_Ez.resize( mod.num_cells() ); + std::vector<double> curr_Hx; curr_Hx.resize( mod.num_cells() ); + std::vector<double> curr_Hy; curr_Hy.resize( mod.num_cells() ); + std::vector<double> curr_Hz; curr_Hz.resize( mod.num_cells() ); + + std::vector<double> next_Ex; next_Ex.resize( mod.num_cells() ); + std::vector<double> next_Ey; next_Ey.resize( mod.num_cells() ); + std::vector<double> next_Ez; next_Ez.resize( mod.num_cells() ); + std::vector<double> next_Hx; next_Hx.resize( mod.num_cells() ); + std::vector<double> next_Hy; next_Hy.resize( mod.num_cells() ); + std::vector<double> next_Hz; next_Hz.resize( mod.num_cells() ); + + for (size_t iE = 0; iE < mod.num_entities(); iE++) + { + auto& e = mod.entity_at(iE); + for (size_t iT = 0; iT < e.num_cells(); iT++) + { + auto& pe = e.cell(iT); + auto& re = e.cell_refelem(pe); + vecxd phi_bar = re.basis_functions({1./3., 1./3., 1./3.}); + auto num_bf = re.num_basis_functions(); + auto ofs = e.cell_global_dof_offset(iT); + auto gi = e.cell_global_index_by_gmsh(iT); + curr_Ex[gi] = state.emf_curr.Ex.segment(ofs, num_bf).dot(phi_bar); + curr_Ey[gi] = state.emf_curr.Ey.segment(ofs, num_bf).dot(phi_bar); + curr_Ez[gi] = state.emf_curr.Ez.segment(ofs, num_bf).dot(phi_bar); + curr_Hx[gi] = state.emf_curr.Hx.segment(ofs, num_bf).dot(phi_bar); + curr_Hy[gi] = state.emf_curr.Hy.segment(ofs, num_bf).dot(phi_bar); + curr_Hz[gi] = state.emf_curr.Hz.segment(ofs, num_bf).dot(phi_bar); + next_Ex[gi] = state.emf_next.Ex.segment(ofs, num_bf).dot(phi_bar); + next_Ey[gi] = state.emf_next.Ey.segment(ofs, num_bf).dot(phi_bar); + next_Ez[gi] = state.emf_next.Ez.segment(ofs, num_bf).dot(phi_bar); + next_Hx[gi] = state.emf_next.Hx.segment(ofs, num_bf).dot(phi_bar); + next_Hy[gi] = state.emf_next.Hy.segment(ofs, num_bf).dot(phi_bar); + next_Hz[gi] = state.emf_next.Hz.segment(ofs, num_bf).dot(phi_bar); + } + } + + sdb.write_zonal_variable("curr_Ex", curr_Ex); + sdb.write_zonal_variable("curr_Ey", curr_Ey); + sdb.write_zonal_variable("curr_Ez", curr_Ez); + sdb.write_zonal_variable("curr_Hx", curr_Hx); + sdb.write_zonal_variable("curr_Hy", curr_Hy); + sdb.write_zonal_variable("curr_Hz", curr_Hz); + //sdb.write_vector_expression("curr_E", "{curr_Ex, curr_Ey, curr_Ez}"); + //sdb.write_vector_expression("curr_H", "{curr_Hx, curr_Hy, curr_Hz}"); + sdb.write_zonal_variable("next_Ex", next_Ex); + sdb.write_zonal_variable("next_Ey", next_Ey); + sdb.write_zonal_variable("next_Ez", next_Ez); + sdb.write_zonal_variable("next_Hx", next_Hx); + sdb.write_zonal_variable("next_Hy", next_Hy); + sdb.write_zonal_variable("next_Hz", next_Hz); + //sdb.write_vector_expression("next_E", "{next_Ex, next_Ey, next_Ez}"); + //sdb.write_vector_expression("next_H", "{next_Hx, next_Hy, next_Hz}"); +} + +void +export_to_silo(const model& mod, const electromagnetic_field& emf, + const std::string& filename) +{ + silo sdb; + sdb.create_db(filename); + sdb.import_mesh_from_gmsh(); + sdb.write_mesh(); + + std::vector<double> Ex; Ex.resize( mod.num_cells() ); + std::vector<double> Ey; Ey.resize( mod.num_cells() ); + std::vector<double> Ez; Ez.resize( mod.num_cells() ); + std::vector<double> Hx; Hx.resize( mod.num_cells() ); + std::vector<double> Hy; Hy.resize( mod.num_cells() ); + std::vector<double> Hz; Hz.resize( mod.num_cells() ); + + for (size_t iE = 0; iE < mod.num_entities(); iE++) + { + auto& e = mod.entity_at(iE); + for (size_t iT = 0; iT < e.num_cells(); iT++) + { + auto& pe = e.cell(iT); + auto& re = e.cell_refelem(pe); + vecxd phi_bar = re.basis_functions({1./3., 1./3., 1./3.}); + auto num_bf = re.num_basis_functions(); + auto ofs = e.cell_global_dof_offset(iT); + auto gi = e.cell_global_index_by_gmsh(iT); + Ex[gi] = emf.Ex.segment(ofs, num_bf).dot(phi_bar); + Ey[gi] = emf.Ey.segment(ofs, num_bf).dot(phi_bar); + Ez[gi] = emf.Ez.segment(ofs, num_bf).dot(phi_bar); + Hx[gi] = emf.Hx.segment(ofs, num_bf).dot(phi_bar); + Hy[gi] = emf.Hy.segment(ofs, num_bf).dot(phi_bar); + Hz[gi] = emf.Hz.segment(ofs, num_bf).dot(phi_bar); + } + } + + sdb.write_zonal_variable("Ex", Ex); + sdb.write_zonal_variable("Ey", Ey); + sdb.write_zonal_variable("Ez", Ez); + sdb.write_zonal_variable("Hx", Hx); + sdb.write_zonal_variable("Hy", Hy); + sdb.write_zonal_variable("Hz", Hz); +} + +int main(int argc, const char *argv[]) +{ + gmsh::initialize(); + + make_geometry(1, 0.05); + + model mod(1,2); + solver_state_gpu state; + init_from_model(mod, state); + + apply_operator(state, state.emf_curr, state.emf_next); + + electromagnetic_field res; + state.emf_next.copyout(res); + + export_to_silo(mod, res, "test.silo"); + + //gmsh::finalize(); + + return 0; +} \ No newline at end of file diff --git a/src/silo_io.cpp b/src/silo_io.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b4ab382767cf083be49f65bcd6626ce4c7629066 --- /dev/null +++ b/src/silo_io.cpp @@ -0,0 +1,212 @@ +#include <iostream> +#include <cassert> + +#include "gmsh.h" +#include "silo_output.hpp" + +#define INVALID_OFS ((size_t) (~0)) + +silo::silo() +{} + +silo::~silo() +{ + close_db(); +} + +void +silo::gmsh_get_nodes(void) +{ + std::vector<size_t> nodeTags; + std::vector<double> coords; + std::vector<double> paraCoords; + + gmsh::model::mesh::getNodes(nodeTags, coords, paraCoords, -1, -1, true, false); + + auto maxtag_pos = std::max_element(nodeTags.begin(), nodeTags.end()); + + size_t maxtag = 0; + if (maxtag_pos != nodeTags.end()) + maxtag = (*maxtag_pos)+1; + + node_coords_x.resize( nodeTags.size() ); + node_coords_y.resize( nodeTags.size() ); + node_coords_z.resize( nodeTags.size() ); + + for (size_t i = 0; i < nodeTags.size(); i++) + { + node_coords_x[i] = coords[3*i+0]; + node_coords_y[i] = coords[3*i+1]; + node_coords_z[i] = coords[3*i+2]; + } + + node_tag2ofs.resize(maxtag, INVALID_OFS); + for (size_t i = 0; i < nodeTags.size(); i++) + node_tag2ofs.at( nodeTags[i] ) = i; +} + +void +silo::gmsh_get_elements() +{ + gmsh::vectorpair entities; + num_cells = 0; + gmsh::model::getEntities(entities, 3); + for (auto [dim, tag] : entities) + { + std::vector<int> elemTypes; + gmsh::model::mesh::getElementTypes(elemTypes, dim, tag); + for (auto& elemType : elemTypes) + { + std::vector<size_t> elemTags; + std::vector<size_t> elemNodeTags; + gmsh::model::mesh::getElementsByType(elemType, elemTags, elemNodeTags, tag); + auto nodesPerElem = elemNodeTags.size()/elemTags.size(); + assert( elemTags.size() * nodesPerElem == elemNodeTags.size() ); + + for (size_t i = 0; i < elemTags.size(); i++) + { + auto base = nodesPerElem * i; + + auto node0_tag = elemNodeTags[base + 0]; + assert(node0_tag < node_tag2ofs.size()); + auto node0_ofs = node_tag2ofs[node0_tag]; + assert(node0_ofs != INVALID_OFS); + nodelist.push_back(node0_ofs + 1); + + auto node1_tag = elemNodeTags[base + 1]; + assert(node1_tag < node_tag2ofs.size()); + auto node1_ofs = node_tag2ofs[node1_tag]; + assert(node1_ofs != INVALID_OFS); + nodelist.push_back(node1_ofs + 1); + + auto node2_tag = elemNodeTags[base + 2]; + assert(node2_tag < node_tag2ofs.size()); + auto node2_ofs = node_tag2ofs[node2_tag]; + assert(node2_ofs != INVALID_OFS); + nodelist.push_back(node2_ofs + 1); + + auto node3_tag = elemNodeTags[base + 3]; + assert(node3_tag < node_tag2ofs.size()); + auto node3_ofs = node_tag2ofs[node3_tag]; + assert(node3_ofs != INVALID_OFS); + nodelist.push_back(node3_ofs + 1); + } + + num_cells += elemTags.size(); + } + } + + assert(nodelist.size() == 4*num_cells); +} + +void +silo::import_mesh_from_gmsh() +{ + gmsh_get_nodes(); + gmsh_get_elements(); +} + +bool +silo::create_db(const std::string& dbname) +{ + m_siloDb = DBCreate(dbname.c_str(), DB_CLOBBER, DB_LOCAL, NULL, DB_HDF5); + if (m_siloDb) + return true; + + std::cout << "SILO: Error creating database" << std::endl; + return false; +} + +bool +silo::write_mesh(double time, int cycle) +{ + DBoptlist *optlist = nullptr; + + if (time >= 0) + { + optlist = DBMakeOptlist(2); + DBAddOption(optlist, DBOPT_CYCLE, &cycle); + DBAddOption(optlist, DBOPT_DTIME, &time); + } + + double *coords[] = {node_coords_x.data(), node_coords_y.data(), node_coords_z.data()}; + + int lnodelist = nodelist.size(); + + int shapetype[] = { DB_ZONETYPE_TET }; + int shapesize[] = {4}; + int shapecounts[] = { num_cells }; + int nshapetypes = 1; + int nnodes = node_coords_x.size(); + int nzones = num_cells; + int ndims = 3; + + DBPutZonelist2(m_siloDb, "zonelist", nzones, ndims, + nodelist.data(), lnodelist, 1, 0, 0, shapetype, shapesize, + shapecounts, nshapetypes, NULL); + + DBPutUcdmesh(m_siloDb, "mesh", ndims, NULL, coords, nnodes, nzones, + "zonelist", NULL, DB_DOUBLE, optlist); + + if (optlist) + DBFreeOptlist(optlist); + + return true; +} + +bool +silo::write_zonal_variable(const std::string& name, const std::vector<double>& data) +{ + if (data.size() != num_cells) + { + std::cout << "Invalid number of cells" << std::endl; + return false; + } + + DBPutUcdvar1(m_siloDb, name.c_str(), "mesh", data.data(), data.size(), NULL, + 0, DB_DOUBLE, DB_ZONECENT, NULL); + + return true; +} + +bool +silo::write_zonal_variable(const std::string& name, const std::vector<float>& data) +{ + if (data.size() != num_cells) + { + std::cout << "Invalid number of cells" << std::endl; + return false; + } + + DBPutUcdvar1(m_siloDb, name.c_str(), "mesh", data.data(), data.size(), NULL, + 0, DB_FLOAT, DB_ZONECENT, NULL); + + return true; +} + +bool +silo::write_scalar_expression(const std::string& name, const std::string& expression) +{ + const char *names[] = { name.c_str() }; + const char *defs[] = { expression.c_str() }; + int type = DB_VARTYPE_SCALAR; + DBPutDefvars(m_siloDb, "defvars", 1, names, &type, defs, nullptr); +} + +bool +silo::write_vector_expression(const std::string& name, const std::string& expression) +{ + const char *names[] = { name.c_str() }; + const char *defs[] = { expression.c_str() }; + int type = DB_VARTYPE_VECTOR; + DBPutDefvars(m_siloDb, "defvars", 1, names, &type, defs, nullptr); +} + +void +silo::close_db() +{ + if (m_siloDb) + DBClose(m_siloDb); + + m_siloDb = nullptr; +} \ No newline at end of file diff --git a/tests/profile_differentiation_gpu.cpp b/tests/profile_differentiation_gpu.cpp index 4ace3de2ac94e8afd3c2ea505ae795d9bd71cd5a..522e8824486592ad1eea6cadae3f293d2e260853 100644 --- a/tests/profile_differentiation_gpu.cpp +++ b/tests/profile_differentiation_gpu.cpp @@ -68,7 +68,7 @@ int profile_differentiation(int geometric_order, int approximation_order) timecounter_gpu tc; tc.tic(); gpu_compute_field_derivatives(edg, Pf_gpu.get_texture(), df_dx_gpu.data(), - df_dy_gpu.data(), df_dz_gpu.data()); + df_dy_gpu.data(), df_dz_gpu.data(), 1.0); double time = tc.toc(); vecxd df_dx_cpu = vecxd::Zero( Pf_cpu.size() ); @@ -82,7 +82,7 @@ int profile_differentiation(int geometric_order, int approximation_order) if (geometric_order == 1) { std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: "; - double flops = 21*ed.num_bf*ed.num_bf*e0.num_cells(); + double flops = (21*ed.num_bf + 3)*ed.num_bf*e0.num_cells(); std::cout << flops/(1e9*time) << " GFlops/s" << std::endl; } else diff --git a/tests/test_differentiation_gpu.cpp b/tests/test_differentiation_gpu.cpp index 4431c0a1297518700015d0e60ba3dd22f25bad81..b848773d8bfa704c6d9b93508df56c63e3ce6e87 100644 --- a/tests/test_differentiation_gpu.cpp +++ b/tests/test_differentiation_gpu.cpp @@ -123,7 +123,7 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde reshape_dofs(eds[i], edgs[i], Pf, Pf_reshaped, true); texture_allocator<double> Pf_gpu(Pf_reshaped.data(), Pf_reshaped.size()); #else - texture_allocator<double> Pf_gpu(Pf.data(), Pf.size()); + device_vector<double> Pf_gpu(Pf.data(), Pf.size()); #endif device_vector<double> df_dx_gpu(model_num_dofs_gpu); device_vector<double> df_dy_gpu(model_num_dofs_gpu); @@ -133,15 +133,15 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde { timecounter_gpu tc; tc.tic(); - gpu_compute_field_derivatives(edg, Pf_gpu.get_texture(), - df_dx_gpu.data(), df_dy_gpu.data(), df_dz_gpu.data()); + gpu_compute_field_derivatives(edg, Pf_gpu.data(), + df_dx_gpu.data(), df_dy_gpu.data(), df_dz_gpu.data(), 1.0); double time = tc.toc(); auto num_cells = edg.num_all_elems; if (geometric_order == 1) { std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: "; - double flops = 21*edg.num_bf*edg.num_bf*num_cells; + double flops = (21*edg.num_bf + 3)*edg.num_bf*num_cells; std::cout << flops/(1e9*time) << " GFlops/s" << std::endl; } else @@ -270,7 +270,7 @@ int main(void) std::cout << Bmagentafg << " *** TESTING: DIFFERENTIATION ***" << reset << std::endl; for (size_t go = 1; go < 2; go++) - for (size_t ao = go; ao < 5; ao++) + for (size_t ao = 1; ao < 7; ao++) failed_tests += test_differentiation_convergence(go, ao); return failed_tests;