2#ifndef PRECICE_NO_KOKKOS_KERNELS
4#include <Kokkos_Core.hpp>
33template <
typename RADIAL_BASIS_FUNCTION_T>
36 using RBF_T = RADIAL_BASIS_FUNCTION_T;
43 const std::vector<mesh::Vertex> ¢ers,
46 bool computeEvaluationOffline,
55 template <
typename... Args>
99template <
typename RADIAL_BASIS_FUNCTION_T>
103 const std::vector<mesh::Vertex> ¢ers,
104 double clusterRadius,
106 bool computeEvaluationOffline,
113 PRECICE_CHECK(RADIAL_BASIS_FUNCTION_T::isStrictlyPositiveDefinite(),
"batched solver is only available for positive definite basis functions, i.e., compact-polynomial functions and Gaussian.");
115 PRECICE_CHECK(!(inMesh->
vertices().empty() || outMesh->
vertices().empty()),
"One of the meshes in the batched solvers is empty, which is invalid.");
123 PRECICE_DEBUG(
"Using batched PU-RBF solver on executor \"{}\" for \"{}\" PU-RBF clusters in execution mode {}.", ginkgoParameter.
executor, centers.size(),
_computeEvaluationOffline ?
"\"minimal-compute\" (evaluation offline)" :
"\"minimal-memory\" (evaluation online)");
129 for (
int i = 0; i < inMesh->
nVertices(); ++i) {
132 for (
int i = 0; i < outMesh->
nVertices(); ++i) {
148 auto hostIn = Kokkos::create_mirror_view(
_inOffsets);
149 auto hostOut = Kokkos::create_mirror_view(
_outOffsets);
154 std::vector<VertexID> globalInIDs;
155 std::vector<VertexID> globalOutIDs;
162 std::uint64_t inCheck = 0;
163 std::uint64_t outCheck = 0;
166 const auto ¢er = centers[i];
169 auto inIDs = inMesh->
index().getVerticesInsideBox(center, clusterRadius);
171 std::uint64_t tmpIn = hostIn(i) + inIDs.size();
174 if constexpr (std::numeric_limits<offset_1d_type>::digits < std::numeric_limits<std::uint64_t>::digits) {
175 PRECICE_CHECK(tmpIn < std::numeric_limits<offset_1d_type>::max(),
176 "The selected integer precision for the (input) vector offsets (\"offset_1d_type\") overflow. You might want to change the precision specified in \"device/KokkosTypes.hpp\"");
178 if constexpr (std::numeric_limits<offset_2d_type>::digits < std::numeric_limits<std::uint64_t>::digits) {
179 inCheck +=
static_cast<std::uint64_t
>(inIDs.size() * inIDs.size());
180 PRECICE_CHECK(inCheck < std::numeric_limits<offset_2d_type>::max(),
181 "The selected integer precision for the (input) matrix offsets (\"offset_2d_type\") overflow. You might want to change the precision specified in \"device/KokkosTypes.hpp\"");
184 std::copy(inIDs.begin(), inIDs.end(), std::back_inserter(globalInIDs));
189 std::uint64_t tmpOut = hostOut(i) + outIDs.size();
192 if constexpr (std::numeric_limits<offset_1d_type>::digits < std::numeric_limits<std::uint64_t>::digits) {
193 PRECICE_CHECK(tmpOut < std::numeric_limits<offset_1d_type>::max(),
194 "The selected integer precision for the (output) vector offsets (\"offset_1d_type\") overflow. You might want to change the precision specified in \"device/KokkosTypes.hpp\"");
197 if constexpr (std::numeric_limits<offset_2d_type>::digits < std::numeric_limits<std::uint64_t>::digits) {
198 outCheck +=
static_cast<std::uint64_t
>(outIDs.size() * inIDs.size());
199 PRECICE_CHECK(outCheck < std::numeric_limits<offset_2d_type>::max(),
200 "The selected integer precision for the (output) matrix offsets (\"offset_2d_type\") overflow. You might want to change the precision specified in \"device/KokkosTypes.hpp\"");
204 std::copy(outIDs.begin(), outIDs.end(), std::back_inserter(globalOutIDs));
219 Kokkos::View<VertexID *, Kokkos::HostSpace, UnmanagedMemory>
220 tmpIn(globalInIDs.data(), globalInIDs.size());
221 Kokkos::View<VertexID *, Kokkos::HostSpace, UnmanagedMemory>
222 tmpOut(globalOutIDs.data(), globalOutIDs.size());
228 eNearestNeighbors.
stop();
253 auto hostInMesh = Kokkos::create_mirror_view(
_inMesh);
254 auto hostOutMesh = Kokkos::create_mirror_view(
_outMesh);
256 for (
int i = 0; i < inMesh->
nVertices(); ++i) {
257 const auto &v = inMesh->
vertex(i);
258 for (
int d = 0; d <
_dim; ++d) {
259 hostInMesh(i, d) = v.rawCoords()[d];
262 for (
int i = 0; i < outMesh->
nVertices(); ++i) {
263 const auto &v = outMesh->
vertex(i);
264 for (
int d = 0; d <
_dim; ++d) {
265 hostOutMesh(i, d) = v.rawCoords()[d];
269 Kokkos::deep_copy(
_inMesh, hostInMesh);
270 Kokkos::deep_copy(
_outMesh, hostOutMesh);
280 auto hostCenterMesh = Kokkos::create_mirror_view(centerMesh);
282 const auto &v = centers[i];
283 for (
int d = 0; d <
_dim; ++d) {
284 hostCenterMesh(i, d) = v.rawCoords()[d];
287 Kokkos::deep_copy(centerMesh, hostCenterMesh);
295 PRECICE_CHECK(success,
"Clustering resulted in unassigned vertices for the output mesh \"{}\".", outMesh->
getName());
315 Kokkos::deep_copy(unrolledSize, last_elem_view);
329 Kokkos::deep_copy(evalSize, last_elem_view2);
352template <
typename RADIAL_BASIS_FUNCTION_T>
355 auto solve_component =
356 [&](
const double *inPtr, Eigen::Index inSize,
double *outPtr, Eigen::Index outSize) {
358 Kokkos::View<const double *, Kokkos::HostSpace, UnmanagedMemory>
359 inView(inPtr, inSize);
363 Kokkos::deep_copy(
_inData, inView);
382 Kokkos::View<double *, Kokkos::HostSpace, UnmanagedMemory>
383 outView(outPtr, outSize);
384 Kokkos::deep_copy(outView,
_outData);
389 const int nComponents = globalIn.
dataDims;
392 if (nComponents == 1) {
393 solve_component(globalIn.
values.data(), globalIn.
values.size(), globalOut.data(), globalOut.size());
396 Eigen::Map<const Eigen::MatrixXd> inMatrix(globalIn.
values.data(), nComponents, globalIn.
values.size() / nComponents);
397 Eigen::Map<Eigen::MatrixXd> outMatrix(globalOut.data(), nComponents, globalOut.size() / nComponents);
400 Eigen::VectorXd tmpIn(inMatrix.cols());
401 Eigen::VectorXd tmpOut(outMatrix.cols());
402 for (
int c = 0; c < nComponents; ++c) {
403 tmpIn = inMatrix.row(c);
404 solve_component(tmpIn.data(), tmpIn.size(), tmpOut.data(), tmpOut.size());
405 outMatrix.row(c) = tmpOut;
411template <
typename RADIAL_BASIS_FUNCTION_T>
412template <
typename... Args>
417 if (evaluation_op_available)
422 if (evaluation_op_available)
436template <
typename RADIAL_BASIS_FUNCTION_T>
437class BatchedRBFSolver {
439 BatchedRBFSolver(RADIAL_BASIS_FUNCTION_T,
442 const std::vector<mesh::Vertex> &,
446 MappingConfiguration::GinkgoParameter) {}
448 void solveConsistent(
const time::Sample &, Eigen::VectorXd &) {}
#define PRECICE_DEBUG(...)
#define PRECICE_TRACE(...)
#define PRECICE_CHECK(check,...)
#define PRECICE_ASSERT(...)
int getDimensions() const
VertexContainer & vertices()
Returns modifieable container holding all vertices.
const std::string & getName() const
Returns the name of the mesh, as set in the config file.
std::size_t nVertices() const
Returns the number of vertices.
Vertex & vertex(VertexID id)
Mutable access to a vertex by VertexID.
const query::Index & index() const
Call preprocess() before index() to ensure correct projection handling.
static void initialize(int *argc, char ***argv)
This class provides a lightweight logger.
RADIAL_BASIS_FUNCTION_T RBF_T
precice::logging::Logger _log
VectorView _kernelMatrices
MatrixOffsetView _evaluationOffsets
MatrixOffsetView _kernelOffsets
void _dispatch_solve_kernel(bool polynomial, bool evaluation_op_available, Args &&...args)
VectorView _normalizedWeights
void solveConsistent(const time::Sample &globalIn, Eigen::VectorXd &globalOut)
BatchedRBFSolver(RBF_T basisFunction, mesh::PtrMesh inMesh, mesh::PtrMesh outMesh, const std::vector< mesh::Vertex > ¢ers, double clusterRadius, Polynomial polynomial, bool computeEvaluationOffline, MappingConfiguration::GinkgoParameter ginkgoParameter)
const bool _computeEvaluationOffline
VectorOffsetView _inOffsets
VectorOffsetView _outOffsets
GlobalIDView _globalInIDs
GlobalIDView _globalOutIDs
Wendland radial basis function with compact support.
void stop()
Stops a running event.
void do_batched_qr(int nCluster, int dim, int avgClusterSize, int maxClusterSize, VectorOffsetView< MemorySpace > inOffsets, GlobalIDView< MemorySpace > globalInIDs, MeshView< MemorySpace > inMesh, VectorView< MemorySpace > qrMatrix, VectorView< MemorySpace > qrTau, PivotView< MemorySpace > qrP)
void do_batched_assembly(int nCluster, int dim, int avgClusterSize, EvalFunctionType f, const VectorOffsetView< MemorySpace > &inOffsets, const GlobalIDView< MemorySpace > &globalInIDs, const MeshView< MemorySpace > &inCoords, const VectorOffsetView< MemorySpace > &targetOffsets, const GlobalIDView< MemorySpace > &globalTargetIDs, const MeshView< MemorySpace > &targetCoords, const MatrixOffsetView< MemorySpace > &matrixOffsets, VectorView< MemorySpace > matrices)
bool compute_weights(const int nCluster, const int avgOutClusterSize, const offset_1d_type nWeights, const int nMeshVertices, const int dim, VectorOffsetView< MemorySpace > offsets, MeshView< MemorySpace > centers, GlobalIDView< MemorySpace > globalIDs, MeshView< MemorySpace > mesh, const CompactPolynomialC2 &w, VectorView< MemorySpace > normalizedWeights)
void do_input_assembly(int nCluster, int dim, int avgClusterSize, int maxInClusterSize, EvalFunctionType f, const VectorOffsetView< MemorySpace > &inOffsets, const GlobalIDView< MemorySpace > &globalInIDs, const MeshView< MemorySpace > &inCoords, const MatrixOffsetView< MemorySpace > &matrixOffsets, VectorView< MemorySpace > matrices)
void do_batched_solve(int nCluster, int dim, int avgInClusterSize, int maxInClusterSize, int maxOutClusterSize, EvalFunctionType f, const VectorOffsetView< MemorySpace > &rhsOffsets, const GlobalIDView< MemorySpace > &globalRhsIDs, VectorView< MemorySpace > rhs, const MatrixOffsetView< MemorySpace > &matrixOffsets, const VectorView< MemorySpace > &matrices, const VectorView< MemorySpace > &normalizedWeights, const MatrixOffsetView< MemorySpace > &evalOffsets, const VectorView< MemorySpace > &evalMat, const VectorOffsetView< MemorySpace > &outOffsets, const GlobalIDView< MemorySpace > &globalOutIDs, VectorView< MemorySpace > out, const MeshView< MemorySpace > &inMesh, const MeshView< MemorySpace > &outMesh, const VectorView< MemorySpace > &qrMatrix, const VectorView< MemorySpace > &qrTau, const PivotView< MemorySpace > &qrP)
void compute_offsets(const VectorOffsetView< MemorySpace > src1, const VectorOffsetView< MemorySpace > src2, MatrixOffsetView< MemorySpace > dst, int nCluster)
void do_batched_lu(int nCluster, int avgClusterSize, const MatrixOffsetView< MemorySpace > &matrixOffsets, VectorView< MemorySpace > matrices)
contains data mapping from points to meshes.
ExecutionSpace::size_type offset_2d_type
Kokkos::View< int *, MemorySpace > PivotView
Kokkos::View< offset_2d_type *, MemorySpace > MatrixOffsetView
Kokkos::View< offset_1d_type *, MemorySpace > VectorOffsetView
Kokkos::View< VertexID *, MemorySpace > GlobalIDView
ExecutionSpace::size_type offset_1d_type
Kokkos::View< double *, MemorySpace > VectorView
Polynomial
How to handle the polynomial?
Kokkos::View< double **, Kokkos::LayoutRight, MemorySpace > MeshView
constexpr double NUMERICAL_ZERO_DIFFERENCE
std::shared_ptr< Mesh > PtrMesh
Wrapper struct that is used to transfer RBF-specific parameters to the GPU.
int dataDims
The dimensionality of the data.