cpp/latest/BatchedRBFSolver_8hpp_source.html

#pragma once

#ifndef PRECICE_NO_KOKKOS_KERNELS


#include <Kokkos_Core.hpp>

#include <array>

#include <cmath>

#include <functional>

#include <numeric>

#include "mapping/RadialBasisFctSolver.hpp"

#include "mapping/config/MappingConfiguration.hpp"

#include "mapping/device/Device.hpp"

#include "mapping/device/KokkosPUMKernels.hpp"

#include "mapping/device/KokkosTypes.hpp"

#include "mapping/impl/BasisFunctions.hpp"

#include "mesh/Mesh.hpp"

#include "precice/impl/Types.hpp"

#include "profiling/Event.hpp"


using precice::mapping::RadialBasisParameters;


namespace precice::mapping {


template <typename RADIAL_BASIS_FUNCTION_T>


class BatchedRBFSolver {

public:

  using RBF_T = RADIAL_BASIS_FUNCTION_T;


  BatchedRBFSolver(RBF_T                                 basisFunction,

                   mesh::PtrMesh                         inMesh,

                   mesh::PtrMesh                         outMesh,

                   const std::vector<mesh::Vertex>      &centers,

                   double                                clusterRadius,

                   Polynomial                            polynomial,

                   bool                                  computeEvaluationOffline,

                   MappingConfiguration::GinkgoParameter ginkgoParameter);


  void solveConsistent(const time::Sample &globalIn, Eigen::VectorXd &globalOut);


private:

  mutable precice::logging::Logger _log{"mapping::BatchedRBFSolver"};


  // Helper to dispatch the actual kernel. Needed to help with the template parameters

  template <typename... Args>

  void _dispatch_solve_kernel(bool polynomial, bool evaluation_op_available, Args &&...args);


  // Linear offsets for each cluster, i.e., all cluster sizes

  VectorOffsetView<> _inOffsets;

  VectorOffsetView<> _outOffsets;


  // Stores for each cluster the VertexIDs

  GlobalIDView<> _globalInIDs;

  GlobalIDView<> _globalOutIDs;


  MatrixOffsetView<> _kernelOffsets;

  MatrixOffsetView<> _evaluationOffsets;


  MeshView<> _inMesh;

  MeshView<> _outMesh;


  VectorView<> _qrMatrix; // flat view of (nCluster x verticesPerCluster_i x (dim + 1) = nCluster x verticesPerCluster_i x polyParams)

  VectorView<> _qrTau;    // flat view of Householder tau (nCluster x (dim + 1) = nCluster x polyParams)

  PivotView<>  _qrP;      // flat view of Permutation and rank (nCluster x (dim + 2) = nCluster x (polyParams + rank))


  VectorView<> _kernelMatrices;


  VectorView<> _evalMatrices;

  VectorView<> _normalizedWeights;


  // Currently only scalar data

  VectorView<> _inData;

  // Kokkos::View<double *>::HostMirror                    _inDataMirror;

  VectorView<> _outData;

  // Kokkos::View<double *>::HostMirror                    _outDataMirror;


  int _maxInClusterSize;

  int _maxOutClusterSize;


  RBF_T      _basisFunction;

  Polynomial _polynomial;

  const int  _nCluster;

  const int  _dim; // Mesh dimension

  int        _avgClusterSize{};

  const bool _computeEvaluationOffline;

  // MappingConfiguration::GinkgoParameter _ginkgoParameter;

};


template <typename RADIAL_BASIS_FUNCTION_T>


BatchedRBFSolver<RADIAL_BASIS_FUNCTION_T>::BatchedRBFSolver(RBF_T                                 basisFunction,

                                                            mesh::PtrMesh                         inMesh,

                                                            mesh::PtrMesh                         outMesh,

                                                            const std::vector<mesh::Vertex>      &centers,

                                                            double                                clusterRadius,

                                                            Polynomial                            polynomial,

                                                            bool                                  computeEvaluationOffline,

                                                            MappingConfiguration::GinkgoParameter ginkgoParameter)

    : _basisFunction(basisFunction), _polynomial(polynomial), _nCluster(static_cast<int>(centers.size())), _dim(inMesh->getDimensions()), _computeEvaluationOffline(computeEvaluationOffline)

{

  PRECICE_TRACE();

  PRECICE_CHECK(_polynomial != Polynomial::ON, "Setting polynomial to \"on\" for the mapping between \"{}\" and \"{}\" is not supported", inMesh->getName(), outMesh->getName());

  // The LU decomposition uses no pivoting, which leads to divisions by zero if the diagonal contains zero entries, which is the case for our basis functions

  PRECICE_CHECK(RADIAL_BASIS_FUNCTION_T::isStrictlyPositiveDefinite(), "batched solver is only available for positive definite basis functions, i.e., compact-polynomial functions and Gaussian.");


  PRECICE_CHECK(!(inMesh->vertices().empty() || outMesh->vertices().empty()), "One of the meshes in the batched solvers is empty, which is invalid.");

  PRECICE_CHECK(inMesh->getDimensions() == outMesh->getDimensions(), "Incompatible dimensions passed to the batched solver.");


  precice::profiling::Event eInit("solver.initializeKokkos");

  // We have to initialize Kokkos and Ginkgo here, as the initialization call allocates memory

  // in the current setup, this will only initialize the device (and allocate memory) on the primary rank

  // TODO: Document restriction: all mappings must use the same executor configuration within one participant

  device::Device::initialize(ginkgoParameter.nThreads, ginkgoParameter.deviceId);

  PRECICE_DEBUG("Using batched PU-RBF solver on executor \"{}\" for \"{}\" PU-RBF clusters in execution mode {}.", ginkgoParameter.executor, centers.size(), _computeEvaluationOffline ? "\"minimal-compute\" (evaluation offline)" : "\"minimal-memory\" (evaluation online)");

  Kokkos::fence();

  eInit.stop();


// General assumption of the algorithm

#ifndef NDEBUG

  for (int i = 0; i < inMesh->nVertices(); ++i) {

    PRECICE_ASSERT(inMesh->vertices()[i].getID() == i);

  }

  for (int i = 0; i < outMesh->nVertices(); ++i) {

    PRECICE_ASSERT(outMesh->vertices()[i].getID() == i);

  }

#endif


  precice::profiling::Event eNearestNeighbors("solver.queryVertices");


  // Step 1:  Query n-nearest neighbors and compute offsets, which hold the range for each cluster


  PRECICE_DEBUG("Computing cluster association on the GPU");


  // Initialize the view for the GPU offsets

  _inOffsets  = VectorOffsetView<>("inOffsets", _nCluster + 1);

  _outOffsets = VectorOffsetView<>("outOffsets", _nCluster + 1);


  // we fill this view on the host side

  auto hostIn  = Kokkos::create_mirror_view(_inOffsets);

  auto hostOut = Kokkos::create_mirror_view(_outOffsets);


  // for the global IDs, we use now a std::vector which we emplace

  // we need at least contiguous memory here

  // TODO: Check the performance of reallocations

  std::vector<VertexID> globalInIDs;

  std::vector<VertexID> globalOutIDs;


  // has to be a separate loop, as we first need to gather knowledge about

  // the shape for the meshes

  hostIn(0)  = 0;

  hostOut(0) = 0;

  // To detect overflows

  std::uint64_t inCheck  = 0;

  std::uint64_t outCheck = 0;

  _maxInClusterSize = _maxOutClusterSize = 0;

  for (int i = 0; i < _nCluster; ++i) {

    const auto &center = centers[i];


    // First we handle the input side

    auto inIDs          = inMesh->index().getVerticesInsideBox(center, clusterRadius);

    _maxInClusterSize   = std::max(_maxInClusterSize, static_cast<int>(inIDs.size()));

    std::uint64_t tmpIn = hostIn(i) + inIDs.size();


    // Check overflows

    if constexpr (std::numeric_limits<offset_1d_type>::digits < std::numeric_limits<std::uint64_t>::digits) {

      PRECICE_CHECK(tmpIn < std::numeric_limits<offset_1d_type>::max(),

                    "The selected integer precision for the (input) vector offsets (\"offset_1d_type\") overflow. You might want to change the precision specified in \"device/KokkosTypes.hpp\"");

    }

    if constexpr (std::numeric_limits<offset_2d_type>::digits < std::numeric_limits<std::uint64_t>::digits) {

      inCheck += static_cast<std::uint64_t>(inIDs.size() * inIDs.size());

      PRECICE_CHECK(inCheck < std::numeric_limits<offset_2d_type>::max(),

                    "The selected integer precision for the (input) matrix offsets (\"offset_2d_type\") overflow. You might want to change the precision specified in \"device/KokkosTypes.hpp\"");

    }

    hostIn(i + 1) = static_cast<offset_1d_type>(tmpIn);

    std::copy(inIDs.begin(), inIDs.end(), std::back_inserter(globalInIDs));


    // ... and the same for the output side

    auto outIDs          = outMesh->index().getVerticesInsideBox(center, clusterRadius - math::NUMERICAL_ZERO_DIFFERENCE);

    _maxOutClusterSize   = std::max(_maxOutClusterSize, static_cast<int>(outIDs.size()));

    std::uint64_t tmpOut = hostOut(i) + outIDs.size();


    // Check overflows

    if constexpr (std::numeric_limits<offset_1d_type>::digits < std::numeric_limits<std::uint64_t>::digits) {

      PRECICE_CHECK(tmpOut < std::numeric_limits<offset_1d_type>::max(),

                    "The selected integer precision for the (output) vector offsets (\"offset_1d_type\") overflow. You might want to change the precision specified in \"device/KokkosTypes.hpp\"");

    }

    if (_computeEvaluationOffline) {

      if constexpr (std::numeric_limits<offset_2d_type>::digits < std::numeric_limits<std::uint64_t>::digits) {

        outCheck += static_cast<std::uint64_t>(outIDs.size() * inIDs.size()); // is in x out

        PRECICE_CHECK(outCheck < std::numeric_limits<offset_2d_type>::max(),

                      "The selected integer precision for the (output) matrix offsets (\"offset_2d_type\") overflow. You might want to change the precision specified in \"device/KokkosTypes.hpp\"");

      }

    }

    hostOut(i + 1) = static_cast<offset_1d_type>(tmpOut);

    std::copy(outIDs.begin(), outIDs.end(), std::back_inserter(globalOutIDs));

  }


  _avgClusterSize = hostIn(_nCluster /* = hostIn.extent(0) - 1 */) / _nCluster;

  PRECICE_DEBUG("Average cluster size used to find a good team size of the kernel execution: {}", _avgClusterSize);


  // Copy offsets onto the device

  Kokkos::deep_copy(_inOffsets, hostIn);

  Kokkos::deep_copy(_outOffsets, hostOut);


  // ... now that we have the sizes, we transfer the map onto the device

  _globalInIDs  = GlobalIDView<>("globalInIDs", globalInIDs.size());

  _globalOutIDs = GlobalIDView<>("globalOutIDs", globalOutIDs.size());


  // Wrap in a view to perform deep copies further down

  Kokkos::View<VertexID *, Kokkos::HostSpace, UnmanagedMemory>

      tmpIn(globalInIDs.data(), globalInIDs.size());

  Kokkos::View<VertexID *, Kokkos::HostSpace, UnmanagedMemory>

      tmpOut(globalOutIDs.data(), globalOutIDs.size());


  Kokkos::deep_copy(_globalInIDs, tmpIn);

  Kokkos::deep_copy(_globalOutIDs, tmpOut);


  Kokkos::fence();

  eNearestNeighbors.stop();


  precice::profiling::Event eOff2d("solver.kernel.compute2DOffsets");


  // Step 2: Compute the matrix offsets on the device

  PRECICE_DEBUG("Computing matrix offsets");

  // We use a parallel scan for that

  _kernelOffsets = MatrixOffsetView<>("kernelOffsets", _nCluster + 1);

  Kokkos::deep_copy(_kernelOffsets, 0);

  kernel::compute_offsets(_inOffsets, _inOffsets, _kernelOffsets, _nCluster);


  if (_computeEvaluationOffline) {

    _evaluationOffsets = MatrixOffsetView<>("evaluationOffsets", _nCluster + 1);

    Kokkos::deep_copy(_evaluationOffsets, 0);

    kernel::compute_offsets(_inOffsets, _outOffsets, _evaluationOffsets, _nCluster);

  }

  Kokkos::fence();

  eOff2d.stop();

  precice::profiling::Event eMesh("solver.copyMeshes");

  // Step 3: Handle the mesh data structure and copy over to the device

  PRECICE_DEBUG("Computing mesh data on the device");


  _inMesh  = MeshView<>("inMesh", inMesh->nVertices(), _dim);

  _outMesh = MeshView<>("outMesh", outMesh->nVertices(), _dim);


  auto hostInMesh  = Kokkos::create_mirror_view(_inMesh);

  auto hostOutMesh = Kokkos::create_mirror_view(_outMesh);


  for (int i = 0; i < inMesh->nVertices(); ++i) {

    const auto &v = inMesh->vertex(i);

    for (int d = 0; d < _dim; ++d) {

      hostInMesh(i, d) = v.rawCoords()[d];

    }

  }

  for (int i = 0; i < outMesh->nVertices(); ++i) {

    const auto &v = outMesh->vertex(i);

    for (int d = 0; d < _dim; ++d) {

      hostOutMesh(i, d) = v.rawCoords()[d];

    }

  }

  // Copy to device

  Kokkos::deep_copy(_inMesh, hostInMesh);

  Kokkos::deep_copy(_outMesh, hostOutMesh);

  Kokkos::fence();

  eMesh.stop();

  {

    PRECICE_DEBUG("Computing PU-RBF weights");

    precice::profiling::Event eWeights("solver.kernel.computeWeights");


    // Step 4: Compute the weights for each vertex

    // we first need to transfer the center coordinates and the meshes onto the device

    MeshView<> centerMesh("centerMesh", _nCluster, _dim);

    auto       hostCenterMesh = Kokkos::create_mirror_view(centerMesh);

    for (int i = 0; i < _nCluster; ++i) {

      const auto &v = centers[i];

      for (int d = 0; d < _dim; ++d) {

        hostCenterMesh(i, d) = v.rawCoords()[d];

      }

    }

    Kokkos::deep_copy(centerMesh, hostCenterMesh);


    _normalizedWeights = VectorView<>("normalizedWeights", globalOutIDs.size());

    CompactPolynomialC2 weightingFunction(clusterRadius);

    // Computing the weights parallelizes over the number of output mesh vertices

    int  avgOutClusterSize = hostOut(_nCluster /* = hostOut.extent(0) - 1 */) / _nCluster;

    bool success           = kernel::compute_weights(_nCluster, avgOutClusterSize, globalOutIDs.size(), outMesh->nVertices(), _dim, _outOffsets,

                                                     centerMesh, _globalOutIDs, _outMesh, weightingFunction, _normalizedWeights);

    PRECICE_CHECK(success, "Clustering resulted in unassigned vertices for the output mesh \"{}\".", outMesh->getName());

    Kokkos::fence();

  }


  PRECICE_ASSERT(_avgClusterSize > 0);

  if (_polynomial == Polynomial::SEPARATE) {

    PRECICE_DEBUG("Computing polynomial QR");

    precice::profiling::Event ePoly("solver.kernel.computePolynomialQR");

    _qrMatrix = VectorView<>("qrMatrix", globalInIDs.size() * (_dim + 1)); // = nCluster x verticesPerCluster_i x polyParams

    _qrTau    = VectorView<>("qrTau", _nCluster * (_dim + 1));             // = nCluster x polyParams

    _qrP      = PivotView<>("qrP", _nCluster * (_dim + 2));                //  = nCluster x (polyParams + rank)

    kernel::do_batched_qr(_nCluster, _dim, _avgClusterSize, _maxInClusterSize, _inOffsets, _globalInIDs, _inMesh, _qrMatrix, _qrTau, _qrP);

    Kokkos::fence();

  }

  precice::profiling::Event eMatr("solver.kernel.assembleInputMatrices");

  // Step 6: Launch the parallel kernel to assemble the kernel matrices

  PRECICE_DEBUG("Assemble batched matrices");

  // The kernel matrices /////////////

  offset_2d_type unrolledSize   = 0;

  auto           last_elem_view = Kokkos::subview(_kernelOffsets, _nCluster);

  Kokkos::deep_copy(unrolledSize, last_elem_view);

  _kernelMatrices = VectorView<>("kernelMatrices", unrolledSize);


  kernel::do_input_assembly(_nCluster, _dim, _avgClusterSize, _maxInClusterSize, basisFunction,

                            _inOffsets, _globalInIDs, _inMesh, _kernelOffsets, _kernelMatrices);


  Kokkos::fence();

  eMatr.stop();


  if (_computeEvaluationOffline) {

    // The eval matrices ///////////////

    precice::profiling::Event eMatrOut("solver.kernel.assembleOutputMatrices");

    offset_2d_type            evalSize        = 0;

    auto                      last_elem_view2 = Kokkos::subview(_evaluationOffsets, _nCluster);

    Kokkos::deep_copy(evalSize, last_elem_view2);

    _evalMatrices = VectorView<>("evalMatrices", evalSize);


    kernel::do_batched_assembly(_nCluster, _dim, _avgClusterSize, basisFunction,

                                _inOffsets, _globalInIDs, _inMesh, _outOffsets, _globalOutIDs, _outMesh, _evaluationOffsets, _evalMatrices);

    Kokkos::fence();

  }


  precice::profiling::Event eLU("solver.kernel.lu");

  // Step 7: Compute batched lu

  PRECICE_DEBUG("Compute batched lu");

  kernel::do_batched_lu(_nCluster, _avgClusterSize, _kernelOffsets, _kernelMatrices);

  Kokkos::fence();

  eLU.stop();

  precice::profiling::Event eAllo("solver.allocateData");

  // Step 8: Allocate memory for data transfer

  PRECICE_DEBUG("Allocate data containers for data transfer");


  _inData  = VectorView<>("inData", inMesh->nVertices());

  _outData = VectorView<>("outData", outMesh->nVertices());

  Kokkos::fence();

}


template <typename RADIAL_BASIS_FUNCTION_T>


void BatchedRBFSolver<RADIAL_BASIS_FUNCTION_T>::solveConsistent(const time::Sample &globalIn, Eigen::VectorXd &globalOut)

{

  auto solve_component =

      [&](const double *inPtr, Eigen::Index inSize, double *outPtr, Eigen::Index outSize) {

        // Step 1: Wrap memory into an unmanaged view

        Kokkos::View<const double *, Kokkos::HostSpace, UnmanagedMemory>

            inView(inPtr, inSize);


        // Step 2: Copy over

        precice::profiling::Event e1("solver.copyHostToDevice");

        Kokkos::deep_copy(_inData, inView);

        Kokkos::deep_copy(_outData, 0.0); // Reset output data


        Kokkos::fence();

        e1.stop();


        // Step 3: Launch the kernel

        precice::profiling::Event e2("solver.kernel.batchedSolve");

        _dispatch_solve_kernel(_polynomial == Polynomial::SEPARATE, _computeEvaluationOffline,

                               _nCluster, _dim, _avgClusterSize, _maxInClusterSize, _maxOutClusterSize, _basisFunction,

                               _inOffsets, _globalInIDs, _inData, _kernelOffsets, _kernelMatrices, _normalizedWeights,

                               _evaluationOffsets, _evalMatrices, _outOffsets, _globalOutIDs, _outData,

                               _inMesh, _outMesh, _qrMatrix, _qrTau, _qrP);


        Kokkos::fence();

        e2.stop();


        // Step 4: Copy back

        precice::profiling::Event e3("solver.copyDeviceToHost");

        Kokkos::View<double *, Kokkos::HostSpace, UnmanagedMemory>

            outView(outPtr, outSize);

        Kokkos::deep_copy(outView, _outData);

        Kokkos::fence();

        e3.stop();

      };


  const int nComponents = globalIn.dataDims;


  // If we have just one component, we can directly copy the data over and solve

  if (nComponents == 1) {

    solve_component(globalIn.values.data(), globalIn.values.size(), globalOut.data(), globalOut.size());

  } else {

    // Otherwise, we map the data to a component-wise matrix

    Eigen::Map<const Eigen::MatrixXd> inMatrix(globalIn.values.data(), nComponents, globalIn.values.size() / nComponents);

    Eigen::Map<Eigen::MatrixXd>       outMatrix(globalOut.data(), nComponents, globalOut.size() / nComponents);


    // ... and solve component-wise. This requires each component to be contiguous in memory

    Eigen::VectorXd tmpIn(inMatrix.cols());

    Eigen::VectorXd tmpOut(outMatrix.cols());

    for (int c = 0; c < nComponents; ++c) {

      tmpIn = inMatrix.row(c);

      solve_component(tmpIn.data(), tmpIn.size(), tmpOut.data(), tmpOut.size());

      outMatrix.row(c) = tmpOut;

    }

  }

}


// Forwarding dispatcher for the actual implementation to help with the template parameters:

template <typename RADIAL_BASIS_FUNCTION_T>

template <typename... Args>


void BatchedRBFSolver<RADIAL_BASIS_FUNCTION_T>::_dispatch_solve_kernel(bool polynomial, bool evaluation_op_available,

                                                                       Args &&...args)

{

  if (polynomial) {

    if (evaluation_op_available)

      kernel::do_batched_solve<true, true>(std::forward<Args>(args)...);

    else

      kernel::do_batched_solve<true, false>(std::forward<Args>(args)...);

  } else {

    if (evaluation_op_available)

      kernel::do_batched_solve<false, true>(std::forward<Args>(args)...);

    else

      kernel::do_batched_solve<false, false>(std::forward<Args>(args)...);

  }

}


} // namespace precice::mapping


#else


#include "mapping/config/MappingConfiguration.hpp"

namespace precice::mapping {


template <typename RADIAL_BASIS_FUNCTION_T>

class BatchedRBFSolver {

public:

  BatchedRBFSolver(RADIAL_BASIS_FUNCTION_T,

                   mesh::PtrMesh,

                   mesh::PtrMesh,

                   const std::vector<mesh::Vertex> &,

                   double,

                   Polynomial,

                   bool,

                   MappingConfiguration::GinkgoParameter) {}


  void solveConsistent(const time::Sample &, Eigen::VectorXd &) {}

};

} // namespace precice::mapping

#endif // PRECICE_NO_KOKKOS_KERNELS

BasisFunctions.hpp

Device.hpp

Event.hpp

KokkosPUMKernels.hpp

KokkosTypes.hpp

PRECICE_DEBUG
#define PRECICE_DEBUG(...)
Definition LogMacros.hpp:61

PRECICE_TRACE
#define PRECICE_TRACE(...)
Definition LogMacros.hpp:92

PRECICE_CHECK
#define PRECICE_CHECK(check,...)
Definition LogMacros.hpp:32

MappingConfiguration.hpp

Mesh.hpp

RadialBasisFctSolver.hpp

PRECICE_ASSERT
#define PRECICE_ASSERT(...)
Definition assertion.hpp:85

Mesh::getDimensions
int getDimensions() const
Definition Mesh.cpp:99

Mesh::vertices
VertexContainer & vertices()
Returns modifieable container holding all vertices.
Definition Mesh.cpp:54

Mesh::getName
const std::string & getName() const
Returns the name of the mesh, as set in the config file.
Definition Mesh.cpp:242

Mesh::nVertices
std::size_t nVertices() const
Returns the number of vertices.
Definition Mesh.cpp:64

Mesh::vertex
Vertex & vertex(VertexID id)
Mutable access to a vertex by VertexID.
Definition Mesh.cpp:42

Mesh::index
const query::Index & index() const
Call preprocess() before index() to ensure correct projection handling.
Definition Mesh.hpp:329

precice::device::Device::initialize
static void initialize(int *argc, char ***argv)
Definition Device.cpp:12

precice::logging::Logger
This class provides a lightweight logger.
Definition Logger.hpp:17

precice::mapping::BatchedRBFSolver::_avgClusterSize
int _avgClusterSize
Definition BatchedRBFSolver.hpp:94

precice::mapping::BatchedRBFSolver::RBF_T
RADIAL_BASIS_FUNCTION_T RBF_T
Definition BatchedRBFSolver.hpp:36

precice::mapping::BatchedRBFSolver::_log
precice::logging::Logger _log
Definition BatchedRBFSolver.hpp:52

precice::mapping::BatchedRBFSolver::_kernelMatrices
VectorView _kernelMatrices
Definition BatchedRBFSolver.hpp:76

precice::mapping::BatchedRBFSolver::_evaluationOffsets
MatrixOffsetView _evaluationOffsets
Definition BatchedRBFSolver.hpp:67

precice::mapping::BatchedRBFSolver::_maxOutClusterSize
int _maxOutClusterSize
Definition BatchedRBFSolver.hpp:88

precice::mapping::BatchedRBFSolver::_dim
const int _dim
Definition BatchedRBFSolver.hpp:93

precice::mapping::BatchedRBFSolver::_qrMatrix
VectorView _qrMatrix
Definition BatchedRBFSolver.hpp:72

precice::mapping::BatchedRBFSolver::_qrP
PivotView _qrP
Definition BatchedRBFSolver.hpp:74

precice::mapping::BatchedRBFSolver::_inMesh
MeshView _inMesh
Definition BatchedRBFSolver.hpp:69

precice::mapping::BatchedRBFSolver::_kernelOffsets
MatrixOffsetView _kernelOffsets
Definition BatchedRBFSolver.hpp:66

precice::mapping::BatchedRBFSolver::_evalMatrices
VectorView _evalMatrices
Definition BatchedRBFSolver.hpp:78

precice::mapping::BatchedRBFSolver::_dispatch_solve_kernel
void _dispatch_solve_kernel(bool polynomial, bool evaluation_op_available, Args &&...args)
Definition BatchedRBFSolver.hpp:413

precice::mapping::BatchedRBFSolver::_normalizedWeights
VectorView _normalizedWeights
Definition BatchedRBFSolver.hpp:79

precice::mapping::BatchedRBFSolver::_qrTau
VectorView _qrTau
Definition BatchedRBFSolver.hpp:73

precice::mapping::BatchedRBFSolver::_basisFunction
RBF_T _basisFunction
Definition BatchedRBFSolver.hpp:90

precice::mapping::BatchedRBFSolver::_polynomial
Polynomial _polynomial
Definition BatchedRBFSolver.hpp:91

precice::mapping::BatchedRBFSolver::_outData
VectorView _outData
Definition BatchedRBFSolver.hpp:84

precice::mapping::BatchedRBFSolver::solveConsistent
void solveConsistent(const time::Sample &globalIn, Eigen::VectorXd &globalOut)
Definition BatchedRBFSolver.hpp:353

precice::mapping::BatchedRBFSolver::_nCluster
const int _nCluster
Definition BatchedRBFSolver.hpp:92

precice::mapping::BatchedRBFSolver::_outMesh
MeshView _outMesh
Definition BatchedRBFSolver.hpp:70

precice::mapping::BatchedRBFSolver::BatchedRBFSolver
BatchedRBFSolver(RBF_T basisFunction, mesh::PtrMesh inMesh, mesh::PtrMesh outMesh, const std::vector< mesh::Vertex > &centers, double clusterRadius, Polynomial polynomial, bool computeEvaluationOffline, MappingConfiguration::GinkgoParameter ginkgoParameter)
Definition BatchedRBFSolver.hpp:100

precice::mapping::BatchedRBFSolver::_maxInClusterSize
int _maxInClusterSize
Definition BatchedRBFSolver.hpp:87

precice::mapping::BatchedRBFSolver::_computeEvaluationOffline
const bool _computeEvaluationOffline
Definition BatchedRBFSolver.hpp:95

precice::mapping::BatchedRBFSolver::_inOffsets
VectorOffsetView _inOffsets
Definition BatchedRBFSolver.hpp:59

precice::mapping::BatchedRBFSolver::_outOffsets
VectorOffsetView _outOffsets
Definition BatchedRBFSolver.hpp:60

precice::mapping::BatchedRBFSolver::_globalInIDs
GlobalIDView _globalInIDs
Definition BatchedRBFSolver.hpp:63

precice::mapping::BatchedRBFSolver::_globalOutIDs
GlobalIDView _globalOutIDs
Definition BatchedRBFSolver.hpp:64

precice::mapping::BatchedRBFSolver::_inData
VectorView _inData
Definition BatchedRBFSolver.hpp:82

precice::mapping::CompactPolynomialC2
Wendland radial basis function with compact support.
Definition BasisFunctions.hpp:421

precice::profiling::Event
Definition Event.hpp:40

precice::profiling::Event::stop
void stop()
Stops a running event.
Definition Event.cpp:51

Types.hpp

precice::mapping::kernel::do_batched_qr
void do_batched_qr(int nCluster, int dim, int avgClusterSize, int maxClusterSize, VectorOffsetView< MemorySpace > inOffsets, GlobalIDView< MemorySpace > globalInIDs, MeshView< MemorySpace > inMesh, VectorView< MemorySpace > qrMatrix, VectorView< MemorySpace > qrTau, PivotView< MemorySpace > qrP)
Definition KokkosPUMKernels_Impl.hpp:179

precice::mapping::kernel::do_batched_assembly
void do_batched_assembly(int nCluster, int dim, int avgClusterSize, EvalFunctionType f, const VectorOffsetView< MemorySpace > &inOffsets, const GlobalIDView< MemorySpace > &globalInIDs, const MeshView< MemorySpace > &inCoords, const VectorOffsetView< MemorySpace > &targetOffsets, const GlobalIDView< MemorySpace > &globalTargetIDs, const MeshView< MemorySpace > &targetCoords, const MatrixOffsetView< MemorySpace > &matrixOffsets, VectorView< MemorySpace > matrices)
Definition KokkosPUMKernels_Impl.hpp:389

precice::mapping::kernel::compute_weights
bool compute_weights(const int nCluster, const int avgOutClusterSize, const offset_1d_type nWeights, const int nMeshVertices, const int dim, VectorOffsetView< MemorySpace > offsets, MeshView< MemorySpace > centers, GlobalIDView< MemorySpace > globalIDs, MeshView< MemorySpace > mesh, const CompactPolynomialC2 &w, VectorView< MemorySpace > normalizedWeights)
Definition KokkosPUMKernels_Impl.hpp:100

precice::mapping::kernel::do_input_assembly
void do_input_assembly(int nCluster, int dim, int avgClusterSize, int maxInClusterSize, EvalFunctionType f, const VectorOffsetView< MemorySpace > &inOffsets, const GlobalIDView< MemorySpace > &globalInIDs, const MeshView< MemorySpace > &inCoords, const MatrixOffsetView< MemorySpace > &matrixOffsets, VectorView< MemorySpace > matrices)
Definition KokkosPUMKernels_Impl.hpp:307

precice::mapping::kernel::do_batched_solve
void do_batched_solve(int nCluster, int dim, int avgInClusterSize, int maxInClusterSize, int maxOutClusterSize, EvalFunctionType f, const VectorOffsetView< MemorySpace > &rhsOffsets, const GlobalIDView< MemorySpace > &globalRhsIDs, VectorView< MemorySpace > rhs, const MatrixOffsetView< MemorySpace > &matrixOffsets, const VectorView< MemorySpace > &matrices, const VectorView< MemorySpace > &normalizedWeights, const MatrixOffsetView< MemorySpace > &evalOffsets, const VectorView< MemorySpace > &evalMat, const VectorOffsetView< MemorySpace > &outOffsets, const GlobalIDView< MemorySpace > &globalOutIDs, VectorView< MemorySpace > out, const MeshView< MemorySpace > &inMesh, const MeshView< MemorySpace > &outMesh, const VectorView< MemorySpace > &qrMatrix, const VectorView< MemorySpace > &qrTau, const PivotView< MemorySpace > &qrP)
Definition KokkosPUMKernels_Impl.hpp:493

precice::mapping::kernel::compute_offsets
void compute_offsets(const VectorOffsetView< MemorySpace > src1, const VectorOffsetView< MemorySpace > src2, MatrixOffsetView< MemorySpace > dst, int nCluster)
Definition KokkosPUMKernels_Impl.hpp:280

precice::mapping::kernel::do_batched_lu
void do_batched_lu(int nCluster, int avgClusterSize, const MatrixOffsetView< MemorySpace > &matrixOffsets, VectorView< MemorySpace > matrices)
Definition KokkosPUMKernels_Impl.hpp:460

precice::mapping
contains data mapping from points to meshes.
Definition AxialGeoMultiscaleMapping.cpp:5

precice::mapping::offset_2d_type
ExecutionSpace::size_type offset_2d_type
Definition KokkosTypes.hpp:14

precice::mapping::PivotView
Kokkos::View< int *, MemorySpace > PivotView
Definition KokkosTypes.hpp:26

precice::mapping::MatrixOffsetView
Kokkos::View< offset_2d_type *, MemorySpace > MatrixOffsetView
Definition KokkosTypes.hpp:38

precice::mapping::VectorOffsetView
Kokkos::View< offset_1d_type *, MemorySpace > VectorOffsetView
Definition KokkosTypes.hpp:34

precice::mapping::GlobalIDView
Kokkos::View< VertexID *, MemorySpace > GlobalIDView
Definition KokkosTypes.hpp:30

precice::mapping::offset_1d_type
ExecutionSpace::size_type offset_1d_type
Definition KokkosTypes.hpp:13

precice::mapping::VectorView
Kokkos::View< double *, MemorySpace > VectorView
Definition KokkosTypes.hpp:23

precice::mapping::Polynomial
Polynomial
How to handle the polynomial?
Definition MappingConfigurationTypes.hpp:11

precice::mapping::Polynomial::SEPARATE
@ SEPARATE
Definition MappingConfigurationTypes.hpp:14

precice::mapping::Polynomial::ON
@ ON
Definition MappingConfigurationTypes.hpp:12

precice::mapping::MeshView
Kokkos::View< double **, Kokkos::LayoutRight, MemorySpace > MeshView
Definition KokkosTypes.hpp:19

precice::math::NUMERICAL_ZERO_DIFFERENCE
constexpr double NUMERICAL_ZERO_DIFFERENCE
Definition differences.hpp:8

precice::mesh::PtrMesh
std::shared_ptr< Mesh > PtrMesh
Definition SharedPointer.hpp:15

precice::mapping::MappingConfiguration::GinkgoParameter
Definition MappingConfiguration.hpp:40

precice::mapping::MappingConfiguration::GinkgoParameter::deviceId
unsigned int deviceId
Definition MappingConfiguration.hpp:48

precice::mapping::MappingConfiguration::GinkgoParameter::nThreads
unsigned int nThreads
Definition MappingConfiguration.hpp:49

precice::mapping::MappingConfiguration::GinkgoParameter::executor
std::string executor
Definition MappingConfiguration.hpp:41

precice::mapping::RadialBasisParameters
Wrapper struct that is used to transfer RBF-specific parameters to the GPU.
Definition BasisFunctions.hpp:51

precice::time::Sample
Definition Sample.hpp:15

precice::time::Sample::dataDims
int dataDims
The dimensionality of the data.
Definition Sample.hpp:60

precice::time::Sample::values
Eigen::VectorXd values
Definition Sample.hpp:64