68 template <
typename IndexContainer>
70 const mesh::Mesh &outputMesh,
const IndexContainer &outputIDs, std::vector<bool> deadAxis,
Polynomial polynomial,
91 std::shared_ptr<gko::Executor>
_hostExecutor = gko::ReferenceExecutor::create();
171 const mesh::Mesh &outputMesh,
const IndexContainer &outputIDs, std::vector<bool> deadAxis,
Polynomial polynomial,
179 PRECICE_INFO(
"Using Ginkgo solver {} on executor {} with max. iterations {} and residual reduction {}",
185#ifdef PRECICE_WITH_OPENMP
192 PRECICE_CHECK(!(RADIAL_BASIS_FUNCTION_T::isStrictlyPositiveDefinite() && polynomial ==
Polynomial::ON),
"The integrated polynomial (polynomial=\"on\") is not supported for the selected radial-basis function. Please select another radial-basis function or change the polynomial configuration.");
194 std::array<bool, 3> activeAxis({{
false,
false,
false}});
195 std::transform(deadAxis.begin(), deadAxis.end(), activeAxis.begin(), [](
const auto ax) { return !ax; });
197 const std::size_t deadDimensions = std::count(activeAxis.begin(), activeAxis.end(),
false);
198 const std::size_t dimensions = 3;
199 const std::size_t polyparams = polynomial ==
Polynomial::ON ? 1 + dimensions - deadDimensions : 0;
202 const auto inputSize = inputIDs.size();
203 const auto outputSize = outputIDs.size();
204 const auto n = inputSize + polyparams;
209 const std::size_t inputMeshSize = inputMesh.
nVertices();
210 const std::size_t outputMeshSize = outputMesh.
nVertices();
219 _allocCopyEvent.stop();
226 std::size_t inputVerticesM, inputVerticesN, outputVerticesM, outputVerticesN;
228 if (
"cuda-executor" == ginkgoParameter.
executor ||
"hip-executor" == ginkgoParameter.
executor) {
229 inputVerticesM = meshDim;
230 inputVerticesN = inputMeshSize;
231 outputVerticesM = meshDim;
232 outputVerticesN = outputMeshSize;
234 inputVerticesM = inputMeshSize;
235 inputVerticesN = meshDim;
236 outputVerticesM = outputMeshSize;
237 outputVerticesN = meshDim;
240 auto inputVertices = gko::share(GinkgoMatrix::create(
_hostExecutor, gko::dim<2>{inputVerticesM, inputVerticesN}));
241 auto outputVertices = gko::share(GinkgoMatrix::create(
_hostExecutor, gko::dim<2>{outputVerticesM, outputVerticesN}));
242 for (std::size_t i = 0; i < inputMeshSize; ++i) {
243 for (std::size_t j = 0; j < meshDim; ++j) {
244 if (
"cuda-executor" == ginkgoParameter.
executor ||
"hip-executor" == ginkgoParameter.
executor) {
245 inputVertices->at(j, i) = inputMesh.
vertex(i).
coord(j);
247 inputVertices->at(i, j) = inputMesh.
vertex(i).
coord(j);
251 for (std::size_t i = 0; i < outputMeshSize; ++i) {
252 for (std::size_t j = 0; j < meshDim; ++j) {
253 if (
"cuda-executor" == ginkgoParameter.
executor ||
"hip-executor" == ginkgoParameter.
executor) {
254 outputVertices->at(j, i) = outputMesh.
vertex(i).
coord(j);
256 outputVertices->at(i, j) = outputMesh.
vertex(i).
coord(j);
261 _allocCopyEvent.start();
265 inputVertices->clear();
266 outputVertices->clear();
273 _allocCopyEvent.stop();
276 const unsigned int separatePolyParams = 4 - std::count(activeAxis.begin(), activeAxis.end(),
false);
277 _allocCopyEvent.start();
280 _allocCopyEvent.stop();
288 _assemblyEvent.
stop();
294 _allocCopyEvent.start();
299 _allocCopyEvent.stop();
303 auto polynomialSolverFactory = cg::build()
304 .with_criteria(gko::stop::Iteration::build()
305 .with_max_iters(
static_cast<std::size_t
>(40))
307 gko::stop::ResidualNorm<>::build()
308 .with_reduction_factor(1e-6)
309 .with_baseline(gko::stop::mode::initial_resnorm)
320 basisFunction.getFunctionParameters(),
Polynomial::ON == polynomial,
323 systemMatrixAssemblyEvent.
stop();
327 basisFunction.getFunctionParameters(),
Polynomial::ON == polynomial, polyparams);
331 outputMatrixAssemblyEvent.
stop();
332 _assemblyEvent.
stop();
334 dInputVertices->clear();
335 dOutputVertices->clear();
343 .with_baseline(gko::stop::mode::initial_resnorm)
348 .with_reduction_factor(1e-30)
349 .with_baseline(gko::stop::mode::absolute)
357 return cg::build().with_preconditioner(jacobi::build().with_max_block_size(ginkgoParameter.
jacobiBlockSize).on(executor));
359 return cg::build().with_preconditioner(cholesky::build().on(executor));
363 auto solverFactory = solverFactoryWithPreconditioner
369 auto solverFactory = cg::build()
381 return gmres::build().with_preconditioner(jacobi::build().with_max_block_size(ginkgoParameter.
jacobiBlockSize).on(executor));
383 return gmres::build().with_preconditioner(cholesky::build().on(executor));
387 auto solverFactory = solverFactoryWithPreconditioner
393 auto solverFactory = gmres::build()
405 if (
"cuda-executor" == ginkgoParameter.
executor) {
406#ifdef PRECICE_WITH_CUDA
410 }
else if (
"hip-executor" == ginkgoParameter.
executor) {
411#ifdef PRECICE_WITH_HIP
538 Eigen::MatrixXd outmatrix(
getInputSize(), rhsValues.cols());
540 for (
int col = 0; col < rhsValues.cols(); col++) {
541 auto rhs = gko::share(GinkgoVector::create(
_hostExecutor, gko::dim<2>{
static_cast<unsigned long>(rhsValues.rows()), 1}));
543 for (Eigen::Index i = 0; i < rhsValues.rows(); ++i) {
544 rhs->at(i, 0) = rhsValues(i, col);
550 _allocCopyEvent.
stop();
552 auto dAu = gko::share(GinkgoVector::create(
_deviceExecutor, gko::dim<2>{
_matrixA->get_size()[1], dRhs->get_size()[1]}));
554 _matrixA->transpose()->apply(dRhs, dAu);
558#ifdef PRECICE_WITH_CUDA
562#ifdef PRECICE_WITH_HIP
575 auto dEpsilon = gko::share(GinkgoVector::create(
_deviceExecutor, gko::dim<2>{
_matrixV->get_size()[1], dRhs->get_size()[1]}));
576 _matrixV->transpose()->apply(dRhs, dEpsilon);
579 _matrixQ->transpose()->apply(dOutput, dTmp);
592 auto polynomialSolverFactory = cg::build()
593 .with_criteria(gko::stop::Iteration::build()
594 .with_max_iters(
static_cast<std::size_t
>(40))
596 gko::stop::ResidualNorm<>::build()
597 .with_reduction_factor(1e-6)
598 .with_baseline(gko::stop::mode::initial_resnorm)
623 _allocCopyEvent.
start();
625 _allocCopyEvent.
stop();
627 for (Eigen::Index i = 0; i < outmatrix.rows(); ++i) {
628 outmatrix(i, col) = output->at(i, 0);