Skip to content
Snippets Groups Projects
Commit a4cb746a authored by Matteo Cicuttin's avatar Matteo Cicuttin
Browse files

Maxwell almost there.

parent d7deccc9
No related branches found
No related tags found
No related merge requests found
......@@ -147,7 +147,7 @@ endif ()
if (COMPILER_IS_CLANG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-zero-as-null-pointer-constant -Wno-global-constructors -Wno-padded -Wno-shorten-64-to-32 -Wno-sign-conversion -Wno-old-style-cast -Wno-sign-compare -Wno-c99-extensions -Wno-extra-semi-stmt")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-sign-compare")
endif()
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -fpermissive")
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -g -DNDEBUG -fpermissive")
......@@ -243,6 +243,7 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
set(COMMON_SOURCES gmsh_io.cpp reference_element.cpp physical_element.cpp entity.cpp kernels_cpu.cpp connectivity.cpp entity_data.cpp)
set(LIBGMSHDG_SOURCES src/gmsh_io.cpp
src/silo_io.cpp
src/entity.cpp
src/reference_element.cpp
src/physical_element.cpp
......@@ -251,11 +252,15 @@ set(LIBGMSHDG_SOURCES src/gmsh_io.cpp
src/kernels_cpu.cpp
src/kernels_gpu_mi.cpp
src/kernels_cuda.cu
src/maxwell_interface.cpp
)
add_library(gmshdg SHARED ${LIBGMSHDG_SOURCES})
target_link_libraries(gmshdg ${LINK_LIBS})
add_executable(maxwell_solver src/maxwell_solver.cpp)
target_link_libraries(maxwell_solver gmshdg)
add_executable(test_basics tests/test_basics.cpp)
target_link_libraries(test_basics gmshdg)
......
......@@ -8,6 +8,7 @@
#include "entity_data.h"
using scalar_function = std::function<double(const point_3d&)>;
using vector_function = std::function<vec3d(const point_3d&)>;
class entity
{
......@@ -59,8 +60,12 @@ public:
/* add method to adjust offset in system!! */
double measure(void) const;
vecxd project(const scalar_function&) const;
//matxd project(const vector_function&) const;
void project(const scalar_function&, vecxd&) const;
void project(const vector_function&, vecxd&, vecxd&, vecxd&) const;
size_t num_cells(void) const;
size_t num_cell_orientations(void) const;
......
......@@ -60,6 +60,7 @@ public:
size_t num_dofs() const;
size_t num_fluxes() const;
size_t num_cells() const;
size_t num_entities() const;
std::vector<entity>::const_iterator begin() const;
std::vector<entity>::const_iterator end() const;
......@@ -67,7 +68,7 @@ public:
std::vector<entity>::iterator end();
entity& entity_at(size_t);
entity entity_at(size_t) const;
const entity& entity_at(size_t) const;
};
......@@ -148,82 +148,109 @@ fetch_tex(cudaTextureObject_t tex, int32_t ofs)
template<typename T>
class device_vector
{
T *d_vptr;
size_t vsize;
T * m_devptr;
size_t m_size;
public:
device_vector()
: d_vptr(nullptr), vsize(0)
: m_devptr(nullptr), m_size(0)
{}
device_vector(size_t size)
: d_vptr(nullptr), vsize(0)
: m_devptr(nullptr), m_size(0)
{
(void)checkGPU( gpuMalloc((void**)&d_vptr, size*sizeof(T)) );
(void)checkGPU( gpuMemset(d_vptr, 0, size*sizeof(T)) );
vsize = size;
(void)checkGPU( gpuMalloc((void**)&m_devptr, size*sizeof(T)) );
(void)checkGPU( gpuMemset(m_devptr, 0, size*sizeof(T)) );
m_size = size;
}
/* allocate memory and copyin data at ptr */
device_vector(const T *src, size_t size)
: d_vptr(nullptr), vsize(0)
: m_devptr(nullptr), m_size(0)
{
copyin(src, size);
}
device_vector(const device_vector& other)
{
(void)checkGPU( gpuMalloc((void**)&m_devptr, other.m_size*sizeof(T)) );
(void)checkGPU( gpuMemcpy(m_devptr, other.m_devptr, other.m_size*sizeof(T), gpuMemcpyDeviceToDevice) );
m_size = other.m_size;
}
device_vector(device_vector&& other)
{
m_devptr = other.m_devptr;
m_size = other.m_size;
other.m_devptr = nullptr;
other.m_size = 0;
}
void copyin(const T *src, size_t size)
{
if (d_vptr != nullptr)
(void) gpuFree(d_vptr);
if (m_devptr != nullptr)
(void) gpuFree(m_devptr);
(void)checkGPU( gpuMalloc((void**)&d_vptr, size*sizeof(T)) );
(void)checkGPU( gpuMemcpy(d_vptr, src, size*sizeof(T), gpuMemcpyHostToDevice) );
vsize = size;
(void)checkGPU( gpuMalloc((void**)&m_devptr, size*sizeof(T)) );
(void)checkGPU( gpuMemcpy(m_devptr, src, size*sizeof(T), gpuMemcpyHostToDevice) );
m_size = size;
}
void copyout(T *dst)
{
if (d_vptr == nullptr)
if (m_devptr == nullptr)
return;
(void)checkGPU( gpuMemcpy(dst, d_vptr, vsize*sizeof(T), gpuMemcpyDeviceToHost) );
(void)checkGPU( gpuMemcpy(dst, m_devptr, m_size*sizeof(T), gpuMemcpyDeviceToHost) );
}
void resize(size_t size)
{
if (d_vptr != nullptr)
(void) gpuFree(d_vptr);
if (size == m_size)
return;
(void)checkGPU( gpuMalloc((void**)&d_vptr, size*sizeof(T)) );
vsize = size;
}
if (m_devptr != nullptr)
(void) gpuFree(m_devptr);
device_vector(const device_vector&) = delete;
(void)checkGPU( gpuMalloc((void**)&m_devptr, size*sizeof(T)) );
m_size = size;
}
device_vector(device_vector&& other)
device_vector& operator=(const device_vector& other)
{
if ( (m_devptr != nullptr) or (m_size != other.m_size) )
{
d_vptr = other.d_vptr;
vsize = other.vsize;
other.d_vptr = nullptr;
other.vsize = 0;
(void) gpuFree(m_devptr);
(void) checkGPU( gpuMalloc((void**)&m_devptr, other.m_size*sizeof(T)) );
}
device_vector& operator=(const device_vector&) = delete;
(void) checkGPU( gpuMemcpy(m_devptr, other.m_devptr, other.m_size*sizeof(T), gpuMemcpyDeviceToDevice) );
m_size = other.m_size;
}
void zero(void)
{
(void) checkGPU( gpuMemset(m_devptr, 0, m_size*sizeof(T)) );
}
T* data()
{
return d_vptr;
return m_devptr;
}
const T* data() const
{
return m_devptr;
}
size_t size() const
{
return vsize;
return m_size;
}
~device_vector()
{
if (d_vptr)
(void)checkGPU(gpuFree(d_vptr));
if (m_devptr)
(void)checkGPU(gpuFree(m_devptr));
}
};
......
......@@ -143,8 +143,9 @@ void reshape_dofs(const entity_data_cpu&, const entity_data_gpu&, const vecxd&,
void
gpu_compute_field_derivatives(entity_data_gpu& edg,
gpuTextureObject_t F, double *dF_dx, double* dF_dy, double* dF_dz);
gpu_compute_field_derivatives(const entity_data_gpu& edg, const double *F,
double *dF_dx, double* dF_dy, double* dF_dz, double alpha);
void
gpu_compute_flux_lifting(entity_data_gpu& edg, const double *f, double *out);
\ No newline at end of file
void gpu_compute_flux_lifting(entity_data_gpu&, const double *, double *);
void gpu_subtract(double *, const double *, const double *, size_t);
\ No newline at end of file
#pragma once
#include "types.h"
#include "gpu_support.hpp"
struct electromagnetic_field
{
vecxd Ex;
vecxd Ey;
vecxd Ez;
vecxd Hx;
vecxd Hy;
vecxd Hz;
size_t num_dofs;
void resize(size_t);
};
struct electromagnetic_material_params
{
size_t num_dofs;
size_t num_fluxes;
vecxd inv_epsilon;
vecxd inv_mu;
vecxd sigma;
vecxd sigma_over_epsilon;
/* Lax-Milgram flux coefficients */
vecxd aE;
vecxd bE;
vecxd aH;
vecxd bH;
/* Boundary conditions coefficients */
vecxd bc_coeffs;
};
struct electromagnetic_material_params_gpu
{
size_t num_dofs;
size_t num_fluxes;
device_vector<double> inv_epsilon;
device_vector<double> inv_mu;
device_vector<double> sigma;
device_vector<double> sigma_over_epsilon;
/* Lax-Milgram flux coefficients */
device_vector<double> aE;
device_vector<double> bE;
device_vector<double> aH;
device_vector<double> bH;
/* Boundary conditions coefficients */
device_vector<double> bc_coeffs;
};
struct electromagnetic_field_gpu
{
device_vector<double> Ex;
device_vector<double> Ey;
device_vector<double> Ez;
device_vector<double> Hx;
device_vector<double> Hy;
device_vector<double> Hz;
size_t num_dofs;
struct raw_ptrs
{
size_t num_dofs;
double *Ex;
double *Ey;
double *Ez;
double *Hx;
double *Hy;
double *Hz;
};
void zero(void);
void resize(size_t);
raw_ptrs data(void);
void copyin(const electromagnetic_field&);
void copyout(electromagnetic_field&);
};
\ No newline at end of file
#pragma once
#include <cassert>
#include <vector>
#include <silo.h>
#include "gmsh.h"
#define INVALID_OFS ((size_t) (~0))
class silo
{
......@@ -18,169 +15,19 @@ class silo
DBfile * m_siloDb;
void gmsh_get_nodes()
{
std::vector<size_t> nodeTags;
std::vector<double> coords;
std::vector<double> paraCoords;
gmsh::model::mesh::getNodes(nodeTags, coords, paraCoords, -1, -1, true, false);
auto maxtag_pos = std::max_element(nodeTags.begin(), nodeTags.end());
size_t maxtag = 0;
if (maxtag_pos != nodeTags.end())
maxtag = (*maxtag_pos)+1;
node_coords_x.resize( nodeTags.size() );
node_coords_y.resize( nodeTags.size() );
node_coords_z.resize( nodeTags.size() );
for (size_t i = 0; i < nodeTags.size(); i++)
{
node_coords_x[i] = coords[3*i+0];
node_coords_y[i] = coords[3*i+1];
node_coords_z[i] = coords[3*i+2];
}
node_tag2ofs.resize(maxtag, INVALID_OFS);
for (size_t i = 0; i < nodeTags.size(); i++)
node_tag2ofs.at( nodeTags[i] ) = i;
}
void gmsh_get_elements()
{
gmsh::vectorpair entities;
num_cells = 0;
gmsh::model::getEntities(entities, 3);
for (auto [dim, tag] : entities)
{
std::vector<int> elemTypes;
gmsh::model::mesh::getElementTypes(elemTypes, dim, tag);
for (auto& elemType : elemTypes)
{
std::vector<size_t> elemTags;
std::vector<size_t> elemNodeTags;
gmsh::model::mesh::getElementsByType(elemType, elemTags, elemNodeTags, tag);
auto nodesPerElem = elemNodeTags.size()/elemTags.size();
assert( elemTags.size() * nodesPerElem == elemNodeTags.size() );
for (size_t i = 0; i < elemTags.size(); i++)
{
auto base = nodesPerElem * i;
auto node0_tag = elemNodeTags[base + 0];
assert(node0_tag < node_tag2ofs.size());
auto node0_ofs = node_tag2ofs[node0_tag];
assert(node0_ofs != INVALID_OFS);
nodelist.push_back(node0_ofs + 1);
auto node1_tag = elemNodeTags[base + 1];
assert(node1_tag < node_tag2ofs.size());
auto node1_ofs = node_tag2ofs[node1_tag];
assert(node1_ofs != INVALID_OFS);
nodelist.push_back(node1_ofs + 1);
auto node2_tag = elemNodeTags[base + 2];
assert(node2_tag < node_tag2ofs.size());
auto node2_ofs = node_tag2ofs[node2_tag];
assert(node2_ofs != INVALID_OFS);
nodelist.push_back(node2_ofs + 1);
auto node3_tag = elemNodeTags[base + 3];
assert(node3_tag < node_tag2ofs.size());
auto node3_ofs = node_tag2ofs[node3_tag];
assert(node3_ofs != INVALID_OFS);
nodelist.push_back(node3_ofs + 1);
}
num_cells += elemTags.size();
}
}
assert(nodelist.size() == 4*num_cells);
}
void gmsh_get_nodes(void);
void gmsh_get_elements(void);
public:
silo()
{}
void import_mesh_from_gmsh()
{
gmsh_get_nodes();
gmsh_get_elements();
}
bool create_db(const std::string& dbname)
{
m_siloDb = DBCreate(dbname.c_str(), DB_CLOBBER, DB_LOCAL, NULL, DB_HDF5);
if (m_siloDb)
return true;
std::cout << "SILO: Error creating database" << std::endl;
return false;
}
bool write_mesh(double time = -1.0, int cycle = 0)
{
DBoptlist *optlist = nullptr;
if (time >= 0)
{
optlist = DBMakeOptlist(2);
DBAddOption(optlist, DBOPT_CYCLE, &cycle);
DBAddOption(optlist, DBOPT_DTIME, &time);
}
double *coords[] = {node_coords_x.data(), node_coords_y.data(), node_coords_z.data()};
int lnodelist = nodelist.size();
int shapetype[] = { DB_ZONETYPE_TET };
int shapesize[] = {4};
int shapecounts[] = { num_cells };
int nshapetypes = 1;
int nnodes = node_coords_x.size();
int nzones = num_cells;
int ndims = 3;
DBPutZonelist2(m_siloDb, "zonelist", nzones, ndims,
nodelist.data(), lnodelist, 1, 0, 0, shapetype, shapesize,
shapecounts, nshapetypes, NULL);
DBPutUcdmesh(m_siloDb, "mesh", ndims, NULL, coords, nnodes, nzones,
"zonelist", NULL, DB_DOUBLE, optlist);
if (optlist)
DBFreeOptlist(optlist);
return true;
}
bool write_zonal_variable(const std::string& name, const std::vector<double>& data)
{
if (data.size() != num_cells)
{
std::cout << "Invalid number of cells" << std::endl;
return false;
}
DBPutUcdvar1(m_siloDb, name.c_str(), "mesh", data.data(), data.size(), NULL,
0, DB_DOUBLE, DB_ZONECENT, NULL);
return true;
}
void close_db()
{
if (m_siloDb)
DBClose(m_siloDb);
m_siloDb = nullptr;
}
~silo()
{
close_db();
}
silo();
~silo();
void import_mesh_from_gmsh();
bool create_db(const std::string&);
bool write_mesh(double time = -1.0, int cycle = 0);
bool write_zonal_variable(const std::string&, const std::vector<double>&);
bool write_zonal_variable(const std::string&, const std::vector<float>&);
bool write_scalar_expression(const std::string& name, const std::string& expression);
bool write_vector_expression(const std::string& name, const std::string& expression);
void close_db();
};
......@@ -126,6 +126,45 @@ entity::project(const scalar_function& function, vecxd& pf) const
}
}
/* Project a function on this entity and store the partial result in
* the global vector pf at the offset corresponding to this entity. */
void
entity::project(const vector_function& function, vecxd& pfx, vecxd& pfy, vecxd& pfz) const
{
auto num_bf = reference_cells[0].num_basis_functions();
for (size_t iT = 0; iT < physical_cells.size(); iT++)
{
const auto& pe = physical_cells[iT];
assert( pe.orientation() < reference_cells.size() );
const auto& re = reference_cells[pe.orientation()];
const auto pqps = pe.integration_points();
const auto dets = pe.determinants();
assert(pqps.size() == dets.size());
matxd mass = matxd::Zero(num_bf, num_bf);
vecxd rhs_x = vecxd::Zero(num_bf);
vecxd rhs_y = vecxd::Zero(num_bf);
vecxd rhs_z = vecxd::Zero(num_bf);
for (size_t iQp = 0; iQp < pqps.size(); iQp++)
{
const auto& pqp = pqps[iQp];
const vecxd phi = re.basis_functions(iQp);
mass += pqp.weight() * phi * phi.transpose();
vec3d fval = function(pqp.point());
rhs_x += pqp.weight() * fval(0) * phi;
rhs_y += pqp.weight() * fval(1) * phi;
rhs_z += pqp.weight() * fval(2) * phi;
}
Eigen::LDLT<matxd> mass_ldlt(mass);
pfx.segment(m_dof_base + iT*num_bf, num_bf) = mass_ldlt.solve(rhs_x);
pfy.segment(m_dof_base + iT*num_bf, num_bf) = mass_ldlt.solve(rhs_y);
pfz.segment(m_dof_base + iT*num_bf, num_bf) = mass_ldlt.solve(rhs_z);
}
}
const reference_element&
entity::cell_refelem(size_t iO) const
{
......
......@@ -117,6 +117,12 @@ model::entity_at(size_t pos)
return entities.at(pos);
}
const entity&
model::entity_at(size_t pos) const
{
return entities.at(pos);
}
size_t
model::num_dofs(void) const
{
......@@ -147,6 +153,12 @@ model::num_cells(void) const
return ret;
}
size_t
model::num_entities(void) const
{
return entities.size();
}
void
model::update_connectivity(const entity& e, size_t entity_index)
{
......
#include "entity_data.h"
#include "kernels_gpu.h"
/* compute α∇F */
template<size_t K>
__global__ void
gpu_deriv_planar(gpuTextureObject_t F, const double * __restrict__ J,
gpu_deriv_planar(const double *F, const double * __restrict__ J,
gpuTextureObject_t DM_tex, double * __restrict__ dF_dx,
double * __restrict__ dF_dy, double * __restrict__ dF_dz,
int32_t num_all_elems, int32_t* orients, int32_t dof_base)
double alpha, int32_t num_all_elems, const int32_t* orients,
int32_t dof_base)
{
using KS = kernel_gpu_sizes<K>;
int ofs_in_entity = (blockIdx.x * blockDim.x + threadIdx.x);
const int32_t ofs_in_entity = (blockIdx.x * blockDim.x + threadIdx.x);
if (ofs_in_entity >= num_all_elems*KS::num_bf)
return;
int32_t output_dof_offset = dof_base + ofs_in_entity;
const int32_t output_dof_offset = dof_base + ofs_in_entity;
int32_t iT = ofs_in_entity / KS::num_bf;
int32_t elem_dof_base = dof_base + iT*KS::num_bf;
int32_t iO = orients[iT];
int32_t DM_row = ofs_in_entity % KS::num_bf;
int32_t DM_orient = 3*KS::num_bf*KS::num_bf*iO;
const int32_t iT = ofs_in_entity / KS::num_bf;
const int32_t elem_dof_base = dof_base + iT*KS::num_bf;
const int32_t iO = orients[iT];
const int32_t DM_row = ofs_in_entity % KS::num_bf;
const int32_t DM_orient = 3*KS::num_bf*KS::num_bf*iO;
const int32_t DM_base = DM_orient + DM_row;
const int32_t jac_ofs = 9*iT;
double accm_dF_dx = 0.0;
double accm_dF_dy = 0.0;
double accm_dF_dz = 0.0;
int32_t jac_ofs = 9*iT;
for (int32_t dof = 0; dof < KS::num_bf; dof++)
{
int32_t d_ofs = DM_orient + DM_row + 3*KS::num_bf*dof;
int32_t d_ofs = DM_base + 3*KS::num_bf*dof;
int32_t f_ofs = elem_dof_base + dof;
double v = fetch_tex(DM_tex, d_ofs) * fetch_tex(F, f_ofs);
double v = fetch_tex(DM_tex, d_ofs) * F[f_ofs];//fetch_tex(F, f_ofs);
accm_dF_dx += J[jac_ofs+0] * v;
accm_dF_dy += J[jac_ofs+3] * v;
accm_dF_dz += J[jac_ofs+6] * v;
d_ofs = DM_orient + DM_row + 3*KS::num_bf*dof + KS::num_bf;
v = fetch_tex(DM_tex, d_ofs) * fetch_tex(F, f_ofs);
d_ofs = DM_base + 3*KS::num_bf*dof + KS::num_bf;
v = fetch_tex(DM_tex, d_ofs) * F[f_ofs];//fetch_tex(F, f_ofs);
accm_dF_dx += J[jac_ofs+1] * v;
accm_dF_dy += J[jac_ofs+4] * v;
accm_dF_dz += J[jac_ofs+7] * v;
d_ofs = DM_orient + DM_row + 3*KS::num_bf*dof + 2*KS::num_bf;
v = fetch_tex(DM_tex, d_ofs) * fetch_tex(F, f_ofs);
d_ofs = DM_base + 3*KS::num_bf*dof + 2*KS::num_bf;
v = fetch_tex(DM_tex, d_ofs) * F[f_ofs];//fetch_tex(F, f_ofs);
accm_dF_dx += J[jac_ofs+2] * v;
accm_dF_dy += J[jac_ofs+5] * v;
accm_dF_dz += J[jac_ofs+8] * v;
}
dF_dx[output_dof_offset] = accm_dF_dx;
dF_dy[output_dof_offset] = accm_dF_dy;
dF_dz[output_dof_offset] = accm_dF_dz;
dF_dx[output_dof_offset] = alpha*accm_dF_dx;
dF_dy[output_dof_offset] = alpha*accm_dF_dy;
dF_dz[output_dof_offset] = alpha*accm_dF_dz;
}
template<size_t K>
......@@ -64,43 +67,44 @@ gpu_deriv_planar_blocked(gpuTextureObject_t F, const double * __restrict__ J,
if (threadIdx.x >= KS::dofs_per_dblock)
return;
int32_t elem_base = (blockIdx.y * blockDim.y + threadIdx.y) * KS::cells_per_dblock;
int32_t elem_ofs = threadIdx.x / KS::num_bf;
int32_t iT = elem_base + elem_ofs;
const int32_t y_idx = (blockIdx.y * blockDim.y + threadIdx.y);
const int32_t elem_base = y_idx * KS::cells_per_dblock;
const int32_t elem_ofs = threadIdx.x / KS::num_bf;
const int32_t iT = elem_base + elem_ofs;
if (iT >= num_all_elems)
return;
int32_t block_dof_base = (blockIdx.y * blockDim.y + threadIdx.y) * KS::dblock_size;
const int32_t block_dof_base = y_idx * KS::dblock_size;
int32_t output_dof_offset = dof_base + block_dof_base + threadIdx.x;
const int32_t output_dof_offset = dof_base + block_dof_base + threadIdx.x;
int32_t elem_dof_base = dof_base + block_dof_base + elem_ofs*KS::num_bf;
int32_t iO = orients[iT];
int32_t DM_row = threadIdx.x % KS::num_bf;
int32_t DM_orient = 3*KS::num_bf*KS::num_bf*iO;
const int32_t elem_dof_base = dof_base + block_dof_base + elem_ofs*KS::num_bf;
const int32_t iO = orients[iT];
const int32_t DM_row = threadIdx.x % KS::num_bf;
const int32_t DM_orient = 3*KS::num_bf*KS::num_bf*iO;
const int32_t DM_base = DM_orient + DM_row;
const int32_t jac_ofs = 9*iT;
double accm_dF_dx = 0.0;
double accm_dF_dy = 0.0;
double accm_dF_dz = 0.0;
int32_t jac_ofs = 9*iT;
for (int32_t dof = 0; dof < KS::num_bf; dof++)
{
int32_t d_ofs = DM_orient + DM_row + 3*KS::num_bf*dof;
int32_t d_ofs = DM_base + 3*KS::num_bf*dof;
int32_t f_ofs = elem_dof_base + dof;
double v = fetch_tex(DM_tex, d_ofs) * fetch_tex(F, f_ofs);
accm_dF_dx += J[jac_ofs+0] * v;
accm_dF_dy += J[jac_ofs+3] * v;
accm_dF_dz += J[jac_ofs+6] * v;
d_ofs = DM_orient + DM_row + 3*KS::num_bf*dof + KS::num_bf;
d_ofs = DM_base + 3*KS::num_bf*dof + KS::num_bf;
v = fetch_tex(DM_tex, d_ofs) * fetch_tex(F, f_ofs);
accm_dF_dx += J[jac_ofs+1] * v;
accm_dF_dy += J[jac_ofs+4] * v;
accm_dF_dz += J[jac_ofs+7] * v;
d_ofs = DM_orient + DM_row + 3*KS::num_bf*dof + 2*KS::num_bf;
d_ofs = DM_base + 3*KS::num_bf*dof + 2*KS::num_bf;
v = fetch_tex(DM_tex, d_ofs) * fetch_tex(F, f_ofs);
accm_dF_dx += J[jac_ofs+2] * v;
accm_dF_dy += J[jac_ofs+5] * v;
......@@ -114,8 +118,8 @@ gpu_deriv_planar_blocked(gpuTextureObject_t F, const double * __restrict__ J,
template<size_t K>
void
launch_deriv_kernel(entity_data_gpu& edg,
gpuTextureObject_t f, double *df_dx, double* df_dy, double* df_dz)
launch_deriv_kernel(const entity_data_gpu& edg,
const double *f, double *df_dx, double* df_dy, double* df_dz, double alpha)
{
auto J = edg.jacobians.data();
auto Dtex = edg.differentiation_matrices.get_texture();
......@@ -144,42 +148,42 @@ launch_deriv_kernel(entity_data_gpu& edg,
if (edg.g_order == 1)
gpu_deriv_planar<K><<<num_blocks, KS::deriv_threads>>>(f, J,
Dtex, df_dx, df_dy, df_dz, num_elems, orients, edg.dof_base);
Dtex, df_dx, df_dy, df_dz, alpha, num_elems, orients, edg.dof_base);
//else
// compute_field_derivatives_kernel_curved<1>(ed, f, df_dx, df_dy, df_dz);
#endif
}
void
gpu_compute_field_derivatives(entity_data_gpu& edg,
gpuTextureObject_t f, double *df_dx, double* df_dy, double* df_dz)
gpu_compute_field_derivatives(const entity_data_gpu& edg,
const double *f, double *df_dx, double* df_dy, double* df_dz, double alpha)
{
switch (edg.a_order)
{
case 1:
launch_deriv_kernel<1>(edg, f, df_dx, df_dy, df_dz);
launch_deriv_kernel<1>(edg, f, df_dx, df_dy, df_dz, alpha);
break;
case 2:
launch_deriv_kernel<2>(edg, f, df_dx, df_dy, df_dz);
launch_deriv_kernel<2>(edg, f, df_dx, df_dy, df_dz, alpha);
break;
case 3:
launch_deriv_kernel<3>(edg, f, df_dx, df_dy, df_dz);
launch_deriv_kernel<3>(edg, f, df_dx, df_dy, df_dz, alpha);
break;
case 4:
launch_deriv_kernel<4>(edg, f, df_dx, df_dy, df_dz);
launch_deriv_kernel<4>(edg, f, df_dx, df_dy, df_dz, alpha);
break;
case 5:
launch_deriv_kernel<5>(edg, f, df_dx, df_dy, df_dz);
launch_deriv_kernel<5>(edg, f, df_dx, df_dy, df_dz, alpha);
break;
case 6:
launch_deriv_kernel<6>(edg, f, df_dx, df_dy, df_dz);
launch_deriv_kernel<6>(edg, f, df_dx, df_dy, df_dz, alpha);
break;
default:
......@@ -193,6 +197,12 @@ gpu_lift_planar(const double *flux, gpuTextureObject_t LM_tex,
const double * __restrict__ dets, double * __restrict__ lifted_flux,
int32_t num_all_elems, int32_t* orients, int32_t dof_base, int32_t flux_base)
{
/* This kernel saturates the texture cache bandwidth (~1 TB/s on K20x!!)
* and thus it does not achieve good performance. This happens because
* the lifting matrix entries are not reused sufficiently. Sufficient
* reuse should be achieved by using a single entry to multiply different
* rhs instead of only one at a time. Orientations however make the
* implementation complicated, so for now the kernel remains slow. */
using KS = kernel_gpu_sizes<K>;
/* One thread per *output* dof */
......@@ -277,3 +287,23 @@ gpu_compute_flux_lifting(entity_data_gpu& edg, const double *f, double *out)
throw std::invalid_argument("compute_field_derivatives: invalid order");
}
}
void __global__
gpu_subtract_kernel(double * __restrict__ dst, const double * __restrict__ add,
const double * __restrict__ sub, size_t num_dofs)
{
int32_t dof = blockIdx.x * blockDim.x + threadIdx.x;
if (dof < num_dofs)
dst[dof] = add[dof] - sub[dof];
}
void
gpu_subtract(double *dst, const double *add, const double *sub, size_t num_dofs)
{
static const size_t SUBTRACT_THREADS = 256;
auto gs = num_dofs/SUBTRACT_THREADS;
if (num_dofs % SUBTRACT_THREADS != 0)
gs += 1;
gpu_subtract_kernel<<<gs, SUBTRACT_THREADS>>>(dst, add, sub, num_dofs);
}
\ No newline at end of file
#include "maxwell_interface.h"
void
electromagnetic_field::resize(size_t p_num_dofs)
{
Ex = vecxd::Zero(p_num_dofs);
Ey = vecxd::Zero(p_num_dofs);
Ez = vecxd::Zero(p_num_dofs);
Hx = vecxd::Zero(p_num_dofs);
Hy = vecxd::Zero(p_num_dofs);
Hz = vecxd::Zero(p_num_dofs);
num_dofs = p_num_dofs;
}
void
electromagnetic_field_gpu::zero()
{
Ex.zero();
Ey.zero();
Ez.zero();
Hx.zero();
Hy.zero();
Hz.zero();
}
void
electromagnetic_field_gpu::resize(size_t p_num_dofs)
{
Ex.resize(p_num_dofs);
Ey.resize(p_num_dofs);
Ez.resize(p_num_dofs);
Hx.resize(p_num_dofs);
Hy.resize(p_num_dofs);
Hz.resize(p_num_dofs);
num_dofs = p_num_dofs;
}
electromagnetic_field_gpu::raw_ptrs
electromagnetic_field_gpu::data(void)
{
raw_ptrs ret;
ret.num_dofs = num_dofs;
ret.Ex = Ex.data();
ret.Ey = Ey.data();
ret.Ez = Ez.data();
ret.Hx = Hx.data();
ret.Hy = Hy.data();
ret.Hz = Hz.data();
return ret;
}
void
electromagnetic_field_gpu::copyin(const electromagnetic_field& emf)
{
Ex.copyin(emf.Ex.data(), emf.Ex.size());
Ey.copyin(emf.Ey.data(), emf.Ey.size());
Ez.copyin(emf.Ez.data(), emf.Ez.size());
Hx.copyin(emf.Hx.data(), emf.Hx.size());
Hy.copyin(emf.Hy.data(), emf.Hy.size());
Hz.copyin(emf.Hz.data(), emf.Hz.size());
}
void
electromagnetic_field_gpu::copyout(electromagnetic_field& emf)
{
if (num_dofs != emf.num_dofs)
emf.resize(num_dofs);
Ex.copyout(emf.Ex.data());
Ey.copyout(emf.Ey.data());
Ez.copyout(emf.Ez.data());
Hx.copyout(emf.Hx.data());
Hy.copyout(emf.Hy.data());
Hz.copyout(emf.Hz.data());
}
\ No newline at end of file
#include <iostream>
#include "gmsh_io.h"
#include "maxwell_interface.h"
#include "kernels_cpu.h"
#include "kernels_gpu.h"
#include "silo_output.hpp"
static void
make_geometry(int order, double mesh_h)
{
gm::add("difftest");
std::vector<std::pair<int,int>> objects;
objects.push_back(
std::make_pair(3, gmo::addBox(0.0, 0.0, 0.0, 1.0, 1.0, 0.5) )
);
objects.push_back(
std::make_pair(3, gmo::addBox(0.0, 0.0, 0.5, 0.5, 0.5, 0.5) )
);
std::vector<std::pair<int, int>> tools;
gmsh::vectorpair odt;
std::vector<gmsh::vectorpair> odtm;
gmo::fragment(objects, tools, odt, odtm);
gmo::synchronize();
gvp_t vp;
gm::getEntities(vp);
gmm::setSize(vp, mesh_h);
}
struct solver_state
{
std::vector<entity_data_cpu> eds;
electromagnetic_field emf_curr;
electromagnetic_field emf_next;
electromagnetic_material_params matparams;
double delta_t;
double curr_time;
size_t curr_timestep;
electromagnetic_field dx;
electromagnetic_field dy;
electromagnetic_field dz;
};
struct solver_state_gpu
{
std::vector<entity_data_cpu> eds;
std::vector<entity_data_gpu> edgs;
electromagnetic_field_gpu emf_curr;
electromagnetic_field_gpu emf_next;
electromagnetic_material_params matparams;
double delta_t;
double curr_time;
size_t curr_timestep;
electromagnetic_field_gpu dx;
electromagnetic_field_gpu dy;
electromagnetic_field_gpu dz;
};
void init_from_model(const model& mod, solver_state& state)
{
state.emf_curr.resize( mod.num_dofs() );
state.emf_next.resize( mod.num_dofs() );
state.dx.resize( mod.num_dofs() );
state.dy.resize( mod.num_dofs() );
state.dz.resize( mod.num_dofs() );
auto E = [](const point_3d& pt) -> vec3d {
vec3d ret;
ret(0) = std::sin(M_PI * pt.y());
ret(1) = 0;
ret(2) = 0;
return ret;
};
for (auto& e : mod)
{
entity_data_cpu ed;
e.populate_entity_data(ed);
state.eds.push_back( std::move(ed) );
e.project(E, state.emf_curr.Ex, state.emf_curr.Ey, state.emf_curr.Ez);
}
}
void init_from_model(const model& mod, solver_state_gpu& state)
{
state.emf_curr.resize( mod.num_dofs() );
state.emf_next.resize( mod.num_dofs() );
state.dx.resize( mod.num_dofs() );
state.dy.resize( mod.num_dofs() );
state.dz.resize( mod.num_dofs() );
electromagnetic_field emf_io;
emf_io.resize( mod.num_dofs() );
auto E = [](const point_3d& pt) -> vec3d {
vec3d ret;
ret(0) = std::sin(M_PI * pt.y());
ret(1) = 0;
ret(2) = 0;
return ret;
};
for (auto& e : mod)
{
entity_data_cpu ed;
e.populate_entity_data(ed);
e.project(E, emf_io.Ex, emf_io.Ey, emf_io.Ez);
entity_data_gpu edg(ed);
state.eds.push_back( std::move(ed) );
state.edgs.push_back( std::move(edg) );
}
state.emf_curr.copyin(emf_io);
}
void compute_curls(solver_state& state, const electromagnetic_field& curr,
electromagnetic_field& next)
{
for (const auto& ed : state.eds)
{
compute_field_derivatives(ed, curr.Ex, state.dx.Ex, state.dy.Ex, state.dz.Ex);
compute_field_derivatives(ed, curr.Ey, state.dx.Ey, state.dy.Ey, state.dz.Ey);
compute_field_derivatives(ed, curr.Ez, state.dx.Ez, state.dy.Ez, state.dz.Ez);
compute_field_derivatives(ed, curr.Hx, state.dx.Hx, state.dy.Hx, state.dz.Hx);
compute_field_derivatives(ed, curr.Hy, state.dx.Hy, state.dy.Hy, state.dz.Hy);
compute_field_derivatives(ed, curr.Hz, state.dx.Hz, state.dy.Hz, state.dz.Hz);
}
next.Ex = state.dy.Hz - state.dz.Hy;
next.Ey = state.dz.Hx - state.dx.Hz;
next.Ez = state.dx.Hy - state.dy.Hx;
next.Hx = state.dz.Ey - state.dy.Ez;
next.Hy = state.dx.Ez - state.dz.Ex;
next.Hz = state.dy.Ex - state.dx.Ey;
}
void compute_curls(solver_state_gpu& state, const electromagnetic_field_gpu& curr,
electromagnetic_field_gpu& next)
{
auto currp = curr.data();
auto nextp = next.data();
auto dxp = state.dx.data();
auto dyp = state.dy.data();
auto dzp = state.dz.data();
for (const auto& edg : state.edgs)
{
gpu_compute_field_derivatives(edg, currp.Ex, dxp.Ex, dyp.Ex, dzp.Ex, 1.0);
gpu_compute_field_derivatives(edg, currp.Ey, dxp.Ey, dyp.Ey, dzp.Ey, 1.0);
gpu_compute_field_derivatives(edg, currp.Ez, dxp.Ez, dyp.Ez, dzp.Ez, 1.0);
gpu_compute_field_derivatives(edg, currp.Hx, dxp.Hx, dyp.Hx, dzp.Hx, 1.0);
gpu_compute_field_derivatives(edg, currp.Hy, dxp.Hy, dyp.Hy, dzp.Hy, 1.0);
gpu_compute_field_derivatives(edg, currp.Hz, dxp.Hz, dyp.Hz, dzp.Hz, 1.0);
}
gpu_subtract(nextp.Ex, dyp.Hz, dzp.Hy, nextp.num_dofs);
gpu_subtract(nextp.Ey, dzp.Hx, dxp.Hz, nextp.num_dofs);
gpu_subtract(nextp.Ez, dxp.Hy, dyp.Hx, nextp.num_dofs);
gpu_subtract(nextp.Hx, dzp.Ey, dyp.Ez, nextp.num_dofs);
gpu_subtract(nextp.Hy, dxp.Ez, dzp.Ex, nextp.num_dofs);
gpu_subtract(nextp.Hz, dyp.Ex, dxp.Ey, nextp.num_dofs);
}
void apply_operator(solver_state& state, const electromagnetic_field& curr,
electromagnetic_field& next)
{
compute_curls(state, curr, next);
}
void apply_operator(solver_state_gpu& state, const electromagnetic_field_gpu& curr,
electromagnetic_field_gpu& next)
{
compute_curls(state, curr, next);
}
void
export_to_silo(const model& mod, const solver_state& state, const std::string& filename)
{
silo sdb;
sdb.create_db(filename);
sdb.import_mesh_from_gmsh();
sdb.write_mesh();
std::vector<double> curr_Ex; curr_Ex.resize( mod.num_cells() );
std::vector<double> curr_Ey; curr_Ey.resize( mod.num_cells() );
std::vector<double> curr_Ez; curr_Ez.resize( mod.num_cells() );
std::vector<double> curr_Hx; curr_Hx.resize( mod.num_cells() );
std::vector<double> curr_Hy; curr_Hy.resize( mod.num_cells() );
std::vector<double> curr_Hz; curr_Hz.resize( mod.num_cells() );
std::vector<double> next_Ex; next_Ex.resize( mod.num_cells() );
std::vector<double> next_Ey; next_Ey.resize( mod.num_cells() );
std::vector<double> next_Ez; next_Ez.resize( mod.num_cells() );
std::vector<double> next_Hx; next_Hx.resize( mod.num_cells() );
std::vector<double> next_Hy; next_Hy.resize( mod.num_cells() );
std::vector<double> next_Hz; next_Hz.resize( mod.num_cells() );
for (size_t iE = 0; iE < mod.num_entities(); iE++)
{
auto& e = mod.entity_at(iE);
for (size_t iT = 0; iT < e.num_cells(); iT++)
{
auto& pe = e.cell(iT);
auto& re = e.cell_refelem(pe);
vecxd phi_bar = re.basis_functions({1./3., 1./3., 1./3.});
auto num_bf = re.num_basis_functions();
auto ofs = e.cell_global_dof_offset(iT);
auto gi = e.cell_global_index_by_gmsh(iT);
curr_Ex[gi] = state.emf_curr.Ex.segment(ofs, num_bf).dot(phi_bar);
curr_Ey[gi] = state.emf_curr.Ey.segment(ofs, num_bf).dot(phi_bar);
curr_Ez[gi] = state.emf_curr.Ez.segment(ofs, num_bf).dot(phi_bar);
curr_Hx[gi] = state.emf_curr.Hx.segment(ofs, num_bf).dot(phi_bar);
curr_Hy[gi] = state.emf_curr.Hy.segment(ofs, num_bf).dot(phi_bar);
curr_Hz[gi] = state.emf_curr.Hz.segment(ofs, num_bf).dot(phi_bar);
next_Ex[gi] = state.emf_next.Ex.segment(ofs, num_bf).dot(phi_bar);
next_Ey[gi] = state.emf_next.Ey.segment(ofs, num_bf).dot(phi_bar);
next_Ez[gi] = state.emf_next.Ez.segment(ofs, num_bf).dot(phi_bar);
next_Hx[gi] = state.emf_next.Hx.segment(ofs, num_bf).dot(phi_bar);
next_Hy[gi] = state.emf_next.Hy.segment(ofs, num_bf).dot(phi_bar);
next_Hz[gi] = state.emf_next.Hz.segment(ofs, num_bf).dot(phi_bar);
}
}
sdb.write_zonal_variable("curr_Ex", curr_Ex);
sdb.write_zonal_variable("curr_Ey", curr_Ey);
sdb.write_zonal_variable("curr_Ez", curr_Ez);
sdb.write_zonal_variable("curr_Hx", curr_Hx);
sdb.write_zonal_variable("curr_Hy", curr_Hy);
sdb.write_zonal_variable("curr_Hz", curr_Hz);
//sdb.write_vector_expression("curr_E", "{curr_Ex, curr_Ey, curr_Ez}");
//sdb.write_vector_expression("curr_H", "{curr_Hx, curr_Hy, curr_Hz}");
sdb.write_zonal_variable("next_Ex", next_Ex);
sdb.write_zonal_variable("next_Ey", next_Ey);
sdb.write_zonal_variable("next_Ez", next_Ez);
sdb.write_zonal_variable("next_Hx", next_Hx);
sdb.write_zonal_variable("next_Hy", next_Hy);
sdb.write_zonal_variable("next_Hz", next_Hz);
//sdb.write_vector_expression("next_E", "{next_Ex, next_Ey, next_Ez}");
//sdb.write_vector_expression("next_H", "{next_Hx, next_Hy, next_Hz}");
}
void
export_to_silo(const model& mod, const electromagnetic_field& emf,
const std::string& filename)
{
silo sdb;
sdb.create_db(filename);
sdb.import_mesh_from_gmsh();
sdb.write_mesh();
std::vector<double> Ex; Ex.resize( mod.num_cells() );
std::vector<double> Ey; Ey.resize( mod.num_cells() );
std::vector<double> Ez; Ez.resize( mod.num_cells() );
std::vector<double> Hx; Hx.resize( mod.num_cells() );
std::vector<double> Hy; Hy.resize( mod.num_cells() );
std::vector<double> Hz; Hz.resize( mod.num_cells() );
for (size_t iE = 0; iE < mod.num_entities(); iE++)
{
auto& e = mod.entity_at(iE);
for (size_t iT = 0; iT < e.num_cells(); iT++)
{
auto& pe = e.cell(iT);
auto& re = e.cell_refelem(pe);
vecxd phi_bar = re.basis_functions({1./3., 1./3., 1./3.});
auto num_bf = re.num_basis_functions();
auto ofs = e.cell_global_dof_offset(iT);
auto gi = e.cell_global_index_by_gmsh(iT);
Ex[gi] = emf.Ex.segment(ofs, num_bf).dot(phi_bar);
Ey[gi] = emf.Ey.segment(ofs, num_bf).dot(phi_bar);
Ez[gi] = emf.Ez.segment(ofs, num_bf).dot(phi_bar);
Hx[gi] = emf.Hx.segment(ofs, num_bf).dot(phi_bar);
Hy[gi] = emf.Hy.segment(ofs, num_bf).dot(phi_bar);
Hz[gi] = emf.Hz.segment(ofs, num_bf).dot(phi_bar);
}
}
sdb.write_zonal_variable("Ex", Ex);
sdb.write_zonal_variable("Ey", Ey);
sdb.write_zonal_variable("Ez", Ez);
sdb.write_zonal_variable("Hx", Hx);
sdb.write_zonal_variable("Hy", Hy);
sdb.write_zonal_variable("Hz", Hz);
}
int main(int argc, const char *argv[])
{
gmsh::initialize();
make_geometry(1, 0.05);
model mod(1,2);
solver_state_gpu state;
init_from_model(mod, state);
apply_operator(state, state.emf_curr, state.emf_next);
electromagnetic_field res;
state.emf_next.copyout(res);
export_to_silo(mod, res, "test.silo");
//gmsh::finalize();
return 0;
}
\ No newline at end of file
#include <iostream>
#include <cassert>
#include "gmsh.h"
#include "silo_output.hpp"
#define INVALID_OFS ((size_t) (~0))
silo::silo()
{}
silo::~silo()
{
close_db();
}
void
silo::gmsh_get_nodes(void)
{
std::vector<size_t> nodeTags;
std::vector<double> coords;
std::vector<double> paraCoords;
gmsh::model::mesh::getNodes(nodeTags, coords, paraCoords, -1, -1, true, false);
auto maxtag_pos = std::max_element(nodeTags.begin(), nodeTags.end());
size_t maxtag = 0;
if (maxtag_pos != nodeTags.end())
maxtag = (*maxtag_pos)+1;
node_coords_x.resize( nodeTags.size() );
node_coords_y.resize( nodeTags.size() );
node_coords_z.resize( nodeTags.size() );
for (size_t i = 0; i < nodeTags.size(); i++)
{
node_coords_x[i] = coords[3*i+0];
node_coords_y[i] = coords[3*i+1];
node_coords_z[i] = coords[3*i+2];
}
node_tag2ofs.resize(maxtag, INVALID_OFS);
for (size_t i = 0; i < nodeTags.size(); i++)
node_tag2ofs.at( nodeTags[i] ) = i;
}
void
silo::gmsh_get_elements()
{
gmsh::vectorpair entities;
num_cells = 0;
gmsh::model::getEntities(entities, 3);
for (auto [dim, tag] : entities)
{
std::vector<int> elemTypes;
gmsh::model::mesh::getElementTypes(elemTypes, dim, tag);
for (auto& elemType : elemTypes)
{
std::vector<size_t> elemTags;
std::vector<size_t> elemNodeTags;
gmsh::model::mesh::getElementsByType(elemType, elemTags, elemNodeTags, tag);
auto nodesPerElem = elemNodeTags.size()/elemTags.size();
assert( elemTags.size() * nodesPerElem == elemNodeTags.size() );
for (size_t i = 0; i < elemTags.size(); i++)
{
auto base = nodesPerElem * i;
auto node0_tag = elemNodeTags[base + 0];
assert(node0_tag < node_tag2ofs.size());
auto node0_ofs = node_tag2ofs[node0_tag];
assert(node0_ofs != INVALID_OFS);
nodelist.push_back(node0_ofs + 1);
auto node1_tag = elemNodeTags[base + 1];
assert(node1_tag < node_tag2ofs.size());
auto node1_ofs = node_tag2ofs[node1_tag];
assert(node1_ofs != INVALID_OFS);
nodelist.push_back(node1_ofs + 1);
auto node2_tag = elemNodeTags[base + 2];
assert(node2_tag < node_tag2ofs.size());
auto node2_ofs = node_tag2ofs[node2_tag];
assert(node2_ofs != INVALID_OFS);
nodelist.push_back(node2_ofs + 1);
auto node3_tag = elemNodeTags[base + 3];
assert(node3_tag < node_tag2ofs.size());
auto node3_ofs = node_tag2ofs[node3_tag];
assert(node3_ofs != INVALID_OFS);
nodelist.push_back(node3_ofs + 1);
}
num_cells += elemTags.size();
}
}
assert(nodelist.size() == 4*num_cells);
}
void
silo::import_mesh_from_gmsh()
{
gmsh_get_nodes();
gmsh_get_elements();
}
bool
silo::create_db(const std::string& dbname)
{
m_siloDb = DBCreate(dbname.c_str(), DB_CLOBBER, DB_LOCAL, NULL, DB_HDF5);
if (m_siloDb)
return true;
std::cout << "SILO: Error creating database" << std::endl;
return false;
}
bool
silo::write_mesh(double time, int cycle)
{
DBoptlist *optlist = nullptr;
if (time >= 0)
{
optlist = DBMakeOptlist(2);
DBAddOption(optlist, DBOPT_CYCLE, &cycle);
DBAddOption(optlist, DBOPT_DTIME, &time);
}
double *coords[] = {node_coords_x.data(), node_coords_y.data(), node_coords_z.data()};
int lnodelist = nodelist.size();
int shapetype[] = { DB_ZONETYPE_TET };
int shapesize[] = {4};
int shapecounts[] = { num_cells };
int nshapetypes = 1;
int nnodes = node_coords_x.size();
int nzones = num_cells;
int ndims = 3;
DBPutZonelist2(m_siloDb, "zonelist", nzones, ndims,
nodelist.data(), lnodelist, 1, 0, 0, shapetype, shapesize,
shapecounts, nshapetypes, NULL);
DBPutUcdmesh(m_siloDb, "mesh", ndims, NULL, coords, nnodes, nzones,
"zonelist", NULL, DB_DOUBLE, optlist);
if (optlist)
DBFreeOptlist(optlist);
return true;
}
bool
silo::write_zonal_variable(const std::string& name, const std::vector<double>& data)
{
if (data.size() != num_cells)
{
std::cout << "Invalid number of cells" << std::endl;
return false;
}
DBPutUcdvar1(m_siloDb, name.c_str(), "mesh", data.data(), data.size(), NULL,
0, DB_DOUBLE, DB_ZONECENT, NULL);
return true;
}
bool
silo::write_zonal_variable(const std::string& name, const std::vector<float>& data)
{
if (data.size() != num_cells)
{
std::cout << "Invalid number of cells" << std::endl;
return false;
}
DBPutUcdvar1(m_siloDb, name.c_str(), "mesh", data.data(), data.size(), NULL,
0, DB_FLOAT, DB_ZONECENT, NULL);
return true;
}
bool
silo::write_scalar_expression(const std::string& name, const std::string& expression)
{
const char *names[] = { name.c_str() };
const char *defs[] = { expression.c_str() };
int type = DB_VARTYPE_SCALAR;
DBPutDefvars(m_siloDb, "defvars", 1, names, &type, defs, nullptr);
}
bool
silo::write_vector_expression(const std::string& name, const std::string& expression)
{
const char *names[] = { name.c_str() };
const char *defs[] = { expression.c_str() };
int type = DB_VARTYPE_VECTOR;
DBPutDefvars(m_siloDb, "defvars", 1, names, &type, defs, nullptr);
}
void
silo::close_db()
{
if (m_siloDb)
DBClose(m_siloDb);
m_siloDb = nullptr;
}
\ No newline at end of file
......@@ -68,7 +68,7 @@ int profile_differentiation(int geometric_order, int approximation_order)
timecounter_gpu tc;
tc.tic();
gpu_compute_field_derivatives(edg, Pf_gpu.get_texture(), df_dx_gpu.data(),
df_dy_gpu.data(), df_dz_gpu.data());
df_dy_gpu.data(), df_dz_gpu.data(), 1.0);
double time = tc.toc();
vecxd df_dx_cpu = vecxd::Zero( Pf_cpu.size() );
......@@ -82,7 +82,7 @@ int profile_differentiation(int geometric_order, int approximation_order)
if (geometric_order == 1)
{
std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: ";
double flops = 21*ed.num_bf*ed.num_bf*e0.num_cells();
double flops = (21*ed.num_bf + 3)*ed.num_bf*e0.num_cells();
std::cout << flops/(1e9*time) << " GFlops/s" << std::endl;
}
else
......
......@@ -123,7 +123,7 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde
reshape_dofs(eds[i], edgs[i], Pf, Pf_reshaped, true);
texture_allocator<double> Pf_gpu(Pf_reshaped.data(), Pf_reshaped.size());
#else
texture_allocator<double> Pf_gpu(Pf.data(), Pf.size());
device_vector<double> Pf_gpu(Pf.data(), Pf.size());
#endif
device_vector<double> df_dx_gpu(model_num_dofs_gpu);
device_vector<double> df_dy_gpu(model_num_dofs_gpu);
......@@ -133,15 +133,15 @@ int test_differentiation_convergence(int geometric_order, int approximation_orde
{
timecounter_gpu tc;
tc.tic();
gpu_compute_field_derivatives(edg, Pf_gpu.get_texture(),
df_dx_gpu.data(), df_dy_gpu.data(), df_dz_gpu.data());
gpu_compute_field_derivatives(edg, Pf_gpu.data(),
df_dx_gpu.data(), df_dy_gpu.data(), df_dz_gpu.data(), 1.0);
double time = tc.toc();
auto num_cells = edg.num_all_elems;
if (geometric_order == 1)
{
std::cout << "Kernel runtime: " << time << " seconds. Estimated performance: ";
double flops = 21*edg.num_bf*edg.num_bf*num_cells;
double flops = (21*edg.num_bf + 3)*edg.num_bf*num_cells;
std::cout << flops/(1e9*time) << " GFlops/s" << std::endl;
}
else
......@@ -270,7 +270,7 @@ int main(void)
std::cout << Bmagentafg << " *** TESTING: DIFFERENTIATION ***" << reset << std::endl;
for (size_t go = 1; go < 2; go++)
for (size_t ao = go; ao < 5; ao++)
for (size_t ao = 1; ao < 7; ao++)
failed_tests += test_differentiation_convergence(go, ao);
return failed_tests;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment