opm-simulators
Loading...
Searching...
No Matches
ISTLSolverGPUISTL.hpp
1/*
2 Copyright 2025 Equinor ASA
3
4 This file is part of the Open Porous Media project (OPM).
5 OPM is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9 OPM is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13 You should have received a copy of the GNU General Public License
14 along with OPM. If not, see <http://www.gnu.org/licenses/>.
15*/
16
17#ifndef OPM_ISTLSOLVERGPUISTL_HEADER_INCLUDED
18#define OPM_ISTLSOLVERGPUISTL_HEADER_INCLUDED
19
20#include <dune/istl/operators.hh>
21#include <memory>
22#include <optional>
23#include <opm/grid/utility/ElementChunks.hpp>
24#include <opm/simulators/linalg/AbstractISTLSolver.hpp>
25#include <opm/simulators/linalg/getQuasiImpesWeights.hpp>
26#include <opm/simulators/linalg/ISTLSolver.hpp>
27
28#if USE_HIP
29#include <opm/simulators/linalg/gpuistl_hip/GpuSparseMatrixWrapper.hpp>
30#include <opm/simulators/linalg/gpuistl_hip/GpuVector.hpp>
31#include <opm/simulators/linalg/gpuistl_hip/PinnedMemoryHolder.hpp>
32#else
33#include <opm/simulators/linalg/gpuistl/GpuSparseMatrixWrapper.hpp>
34#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
35#include <opm/simulators/linalg/gpuistl/PinnedMemoryHolder.hpp>
36#endif
37
38#include <opm/simulators/linalg/ExtractParallelGridInformationToISTL.hpp>
39#include <opm/simulators/linalg/ParallelIstlInformation.hpp>
40#include <opm/simulators/linalg/findOverlapRowsAndColumns.hpp>
41#include <opm/simulators/linalg/gpuistl/detail/FlexibleSolverWrapper.hpp>
42#include <opm/simulators/linalg/printlinearsolverparameter.hpp>
43
44namespace Opm::gpuistl
45{
46
59template <class TypeTag>
60class ISTLSolverGPUISTL : public AbstractISTLSolver<GetPropType<TypeTag, Properties::SparseMatrixAdapter>,
61 GetPropType<TypeTag, Properties::GlobalEqVector>>
62{
63public:
68 using Matrix = typename SparseMatrixAdapter::IstlMatrix;
72 using ElementChunksType = Opm::ElementChunks<GridView, Dune::Partitions::All>;
73
74 using real_type = typename Vector::field_type;
75
77 using GPUVector = Opm::gpuistl::GpuVector<real_type>;
78 using GPUVectorInt = Opm::gpuistl::GpuVector<int>;
79
80 constexpr static std::size_t pressureIndex = GetPropType<TypeTag, Properties::Indices>::pressureSwitchIdx;
81
82
83#if HAVE_MPI
84 using CommunicationType = Dune::OwnerOverlapCopyCommunication<int, int>;
85#else
86 using CommunicationType = Dune::Communication<int>;
87#endif
88
90
98 ISTLSolverGPUISTL(const Simulator& simulator,
99 const FlowLinearSolverParameters& parameters,
100 bool forceSerial = false)
101 : m_parameters(parameters)
102 , m_simulator(simulator)
103 , m_forceSerial(forceSerial)
104 , m_element_chunks(simulator.vanguard().gridView(), Dune::Partitions::all, ThreadManager::maxThreads())
105 {
106#if HAVE_MPI
107 m_comm = std::make_shared<CommunicationType>(simulator.vanguard().grid().comm());
108 // Extract parallel grid information to populate index sets
109 extractParallelGridInformationToISTL(simulator.vanguard().grid(), m_parallelInformation);
110 // Set up element mapper manually
111 ElementMapper elemMapper(simulator.vanguard().gridView(), Dune::mcmgElementLayout());
112 // Overlap rows are needed for making overlap rows invalid in parallel mode
113 Opm::detail::findOverlapAndInterior(simulator.vanguard().grid(), elemMapper, m_overlapRows, m_interiorRows);
114 if (isParallel()) {
115 const std::size_t size = simulator.vanguard().grid().leafGridView().size(0);
116 // Copy parallel information to communication object (index sets and remote indices)
117 Opm::detail::copyParValues(m_parallelInformation, size, *m_comm);
118 }
119
120#else
121 m_comm = std::make_shared<CommunicationType>(simulator.gridView().comm());
122#endif
123 m_parameters.init(simulator.vanguard().eclState().getSimulationConfig().useCPR());
124 m_propertyTree = setupPropertyTree(m_parameters,
128 OPM_THROW(std::logic_error, "Well operators are currently not supported for the GPU backend. "
129 "Use --matrix-add-well-contributions=true to add well contributions to the matrix instead.");
130 }
131
132 Opm::detail::printLinearSolverParameters(m_parameters, m_propertyTree, simulator.gridView().comm());
133 }
134
137 explicit ISTLSolverGPUISTL(const Simulator& simulator)
138 : ISTLSolverGPUISTL(simulator, FlowLinearSolverParameters(), false)
139 {
140 }
141
147 void eraseMatrix() override
148 {
149 // Nothing, this is the same as the ISTLSolver
150 }
151
158 void setActiveSolver(int num) override
159 {
160 if (num != 0) {
161 OPM_THROW(std::logic_error, "Only one solver available for the GPU backend.");
162 }
163 }
164
170 int numAvailableSolvers() const override
171 {
172 return 1;
173 }
174
184 void prepare(const SparseMatrixAdapter& M, Vector& b) override
185 {
186 prepare(M.istlMatrix(), b);
187 }
188
198 void prepare(const Matrix& M, Vector& b) override
199 {
200 try {
201 if (isParallel() && !m_overlapRows.empty()) {
202 Opm::detail::makeOverlapRowsInvalid(const_cast<Matrix&>(M), m_overlapRows);
203 }
204 updateMatrix(M);
205 updateRhs(b);
206 }
207 OPM_CATCH_AND_RETHROW_AS_CRITICAL_ERROR("This is likely due to a faulty linear solver JSON specification. "
208 "Check for errors related to missing nodes.");
209 }
210
216 void setResidual(Vector&) override
217 {
218 // Should be handled in prepare() instead.
219 }
220
229 void getResidual(Vector& b) const override
230 {
231 if (!m_rhs) {
232 OPM_THROW(std::runtime_error, "m_rhs not initialized, prepare(matrix, rhs); needs to be called");
233 }
234 m_rhs->copyToHost(b);
235 }
236
242 void setMatrix(const SparseMatrixAdapter&) override
243 {
244 // Should be handled in prepare() instead.
245 }
246
258 bool solve(Vector& x) override
259 {
260 // TODO: Write matrix to disk if needed
261 Dune::InverseOperatorResult result;
262 if (!m_matrix) {
263 OPM_THROW(std::runtime_error, "m_matrix not initialized, prepare(matrix, rhs); needs to be called");
264 }
265 if (!m_rhs) {
266 OPM_THROW(std::runtime_error, "m_rhs not initialized, prepare(matrix, rhs); needs to be called");
267 }
268 if (!m_gpuSolver) {
269 OPM_THROW(std::runtime_error,
270 "m_gpuFlexibleSolver not initialized, prepare(matrix, rhs); needs to be called");
271 }
272
273 if (!m_x) {
274 m_x = std::make_unique<GPUVector>(x);
275 m_pinnedXMemory = std::make_unique<PinnedMemoryHolder<real_type>>(
276 const_cast<real_type*>(&x[0][0]),
277 x.dim()
278 );
279 } else {
280 // copy from host to device using main stream and asynchronous transfer
281 m_x->copyFromHostAsync(x);
282 }
283 m_gpuSolver->apply(*m_x, *m_rhs, result);
284
285 m_x->copyToHost(x);
286
287 ++m_solveCount;
288
289 m_lastSeenIterations = result.iterations;
290 return checkConvergence(result);
291 }
292
298 int iterations() const override
299 {
300 return m_lastSeenIterations;
301 }
302
306 const CommunicationType* comm() const override
307 {
308 return m_comm.get();
309 }
310
316 bool isParallel() const
317 {
318 #if HAVE_MPI
319 return !m_forceSerial && m_comm->communicator().size() > 1;
320 #else
321 return false;
322 #endif
323 }
324
328 int getSolveCount() const override
329 {
330 return m_solveCount;
331 }
332
333private:
334 bool checkConvergence(const Dune::InverseOperatorResult& result) const
335 {
337 }
338
339 // Weights to make approximate pressure equations.
340 std::function<GPUVector&()> getWeightsCalculator()
341 {
342 std::function<GPUVector&()> weightsCalculator;
343
344 using namespace std::string_literals;
345
346 auto preconditionerType = m_propertyTree.get("preconditioner.type"s, "cpr"s);
347 // Make the preconditioner type lowercase for internal canonical representation
348 std::transform(preconditionerType.begin(), preconditionerType.end(), preconditionerType.begin(), ::tolower);
349 if (preconditionerType == "cpr" || preconditionerType == "cprt"
350 || preconditionerType == "cprw" || preconditionerType == "cprwt") {
351 const bool transpose = preconditionerType == "cprt" || preconditionerType == "cprwt";
352 const auto weightsType = m_propertyTree.get("preconditioner.weight_type"s, "quasiimpes"s);
353 if (weightsType == "quasiimpes") {
354 m_weights.emplace(m_matrix->N() * m_matrix->blockSize());
355 // Pre-compute diagonal indices once when setting up the calculator
356 auto diagonalIndices = Amg::precomputeDiagonalIndices(*m_matrix);
357 m_diagonalIndices.emplace(diagonalIndices);
358
359 if (transpose) {
360 weightsCalculator = [this]() -> GPUVector& {
361 Amg::getQuasiImpesWeights<real_type, true>(
362 *m_matrix, pressureIndex, *m_weights, *m_diagonalIndices);
363 return *m_weights;
364 };
365 } else {
366 weightsCalculator = [this]() -> GPUVector& {
367 Amg::getQuasiImpesWeights<real_type, false>(
368 *m_matrix, pressureIndex, *m_weights, *m_diagonalIndices);
369 return *m_weights;
370 };
371 }
372 } else if (weightsType == "trueimpes") {
373 // Create CPU vector for the weights and initialize GPU vector
374 m_cpuWeights.resize(m_matrix->N());
375 m_pinnedWeightsMemory = std::make_unique<PinnedMemoryHolder<real_type>>(
376 const_cast<real_type*>(&m_cpuWeights[0][0]), m_cpuWeights.dim());
377 m_weights.emplace(m_cpuWeights);
378
379 const bool enableThreadParallel = m_parameters.cpr_weights_thread_parallel_;
380 // CPU implementation wrapped for GPU
381 weightsCalculator = [this, enableThreadParallel]() -> GPUVector& {
382 // Use the CPU implementation to calculate the weights
383 ElementContext elemCtx(m_simulator);
384 Amg::getTrueImpesWeights(pressureIndex,
385 m_cpuWeights,
386 elemCtx, m_simulator.model(),
387 m_element_chunks,
388 enableThreadParallel);
389
390 // Copy CPU vector to GPU vector using main stream and asynchronous transfer
391 m_weights->copyFromHostAsync(m_cpuWeights);
392 return *m_weights;
393 };
394 } else if (weightsType == "trueimpesanalytic") {
395 // Create CPU vector for the weights and initialize GPU vector
396 m_cpuWeights.resize(m_matrix->N());
397 m_pinnedWeightsMemory = std::make_unique<PinnedMemoryHolder<real_type>>(
398 const_cast<real_type*>(&m_cpuWeights[0][0]), m_cpuWeights.dim());
399 m_weights.emplace(m_cpuWeights);
400 const bool enableThreadParallel = m_parameters.cpr_weights_thread_parallel_;
401 // CPU implementation wrapped for GPU
402 weightsCalculator = [this, enableThreadParallel]() -> GPUVector& {
403 // Use the CPU implementation to calculate the weights
404 ElementContext elemCtx(m_simulator);
405 Amg::getTrueImpesWeightsAnalytic(pressureIndex,
406 m_cpuWeights,
407 elemCtx, m_simulator.model(),
408 m_element_chunks,
409 enableThreadParallel);
410
411 // Copy CPU vector to GPU vector using main stream and asynchronous transfer
412 m_weights->copyFromHostAsync(m_cpuWeights);
413 return *m_weights;
414 };
415 } else {
416 OPM_THROW(std::invalid_argument,
417 "Weights type " + weightsType
418 + " not implemented for cpr."
419 " Please use quasiimpes, trueimpes or trueimpesanalytic.");
420 }
421 }
422 return weightsCalculator;
423 }
424
425 void updateMatrix(const Matrix& M)
426 {
427 if (!m_matrix) {
428 m_matrix.reset(new auto(GPUMatrix::fromMatrix(M)));
429 m_pinnedMatrixMemory = std::make_unique<PinnedMemoryHolder<real_type>>(
430 const_cast<real_type*>(&M[0][0][0][0]),
431 M.nonzeroes() * M[0][0].N() * M[0][0].M()
432 );
433 std::function<GPUVector&()> weightsCalculator = getWeightsCalculator();
434 m_gpuSolver = std::make_unique<SolverType>(
435 *m_matrix, isParallel(), m_propertyTree, pressureIndex, weightsCalculator, m_forceSerial, m_comm.get());
436 } else {
437 m_matrix->updateNonzeroValues(M, true);
438 m_gpuSolver->update();
439 }
440 }
441
442 void updateRhs(const Vector& b)
443 {
444 if (!m_rhs) {
445 m_rhs = std::make_unique<GPUVector>(b);
446 m_pinnedRhsMemory = std::make_unique<PinnedMemoryHolder<real_type>>(
447 const_cast<real_type*>(&b[0][0]),
448 b.dim()
449 );
450 } else {
451 // copy from host to device using main stream and asynchronous transfer
452 m_rhs->copyFromHostAsync(b);
453 }
454 }
455
456 FlowLinearSolverParameters m_parameters;
457 const Simulator& m_simulator;
458 const bool m_forceSerial;
459 PropertyTree m_propertyTree;
460 ElementChunksType m_element_chunks;
461
462 int m_lastSeenIterations = 0;
463 int m_solveCount = 0;
464
465 std::unique_ptr<GPUMatrix> m_matrix;
466
467 std::unique_ptr<SolverType> m_gpuSolver;
468
469 std::unique_ptr<GPUVector> m_rhs;
470 std::unique_ptr<GPUVector> m_x;
471
472 std::unique_ptr<PinnedMemoryHolder<real_type>> m_pinnedMatrixMemory;
473 std::unique_ptr<PinnedMemoryHolder<real_type>> m_pinnedRhsMemory;
474 std::unique_ptr<PinnedMemoryHolder<real_type>> m_pinnedXMemory;
475 std::unique_ptr<PinnedMemoryHolder<real_type>> m_pinnedWeightsMemory;
476
477 Vector m_cpuWeights;
478 std::optional<GPUVector> m_weights;
479 std::optional<GPUVectorInt> m_diagonalIndices;
480
481 std::shared_ptr<CommunicationType> m_comm;
482 std::vector<int> m_interiorRows;
483 std::vector<int> m_overlapRows;
484 std::any m_parallelInformation;
485
486};
487} // namespace Opm::gpuistl
488
489#endif
Abstract interface for ISTL solvers.
Definition AbstractISTLSolver.hpp:45
static bool checkConvergence(const Dune::InverseOperatorResult &result, const FlowLinearSolverParameters &parameters)
Check the convergence of the linear solver.
Definition AbstractISTLSolver.hpp:192
T get(const std::string &key) const
Retrieve property value given hierarchical property key.
Definition PropertyTree.cpp:59
The GpuSparseMatrixWrapper Checks CUDA/HIP version and dispatches a version either using the old or t...
Definition GpuSparseMatrixWrapper.hpp:62
static GpuSparseMatrixWrapper< real_type, false > fromMatrix(const MatrixType &matrix, bool copyNonZeroElementsDirectly=false)
Definition GpuSparseMatrixWrapper.hpp:183
Definition gpu_type_detection.hpp:30
bool isParallel() const
Check if we are running in parallel mode.
Definition ISTLSolverGPUISTL.hpp:316
ISTLSolverGPUISTL(const Simulator &simulator, const FlowLinearSolverParameters &parameters, bool forceSerial=false)
Construct a system solver.
Definition ISTLSolverGPUISTL.hpp:98
void setMatrix(const SparseMatrixAdapter &) override
Set the matrix for the solver.
Definition ISTLSolverGPUISTL.hpp:242
void prepare(const SparseMatrixAdapter &M, Vector &b) override
Prepare the solver with the given matrix and right-hand side vector.
Definition ISTLSolverGPUISTL.hpp:184
const CommunicationType * comm() const override
Get the communication object used by the solver.
Definition ISTLSolverGPUISTL.hpp:306
int getSolveCount() const override
Get the count of how many times the solver has been called.
Definition ISTLSolverGPUISTL.hpp:328
void setResidual(Vector &) override
Set the residual vector.
Definition ISTLSolverGPUISTL.hpp:216
void getResidual(Vector &b) const override
Get the residual vector.
Definition ISTLSolverGPUISTL.hpp:229
bool solve(Vector &x) override
Solve the system of linear equations Ax = b.
Definition ISTLSolverGPUISTL.hpp:258
ISTLSolverGPUISTL(const Simulator &simulator)
Construct a system solver.
Definition ISTLSolverGPUISTL.hpp:137
int iterations() const override
Definition ISTLSolverGPUISTL.hpp:298
void eraseMatrix() override
Signals that the memory for the matrix internally in the solver could be erased.
Definition ISTLSolverGPUISTL.hpp:147
int numAvailableSolvers() const override
Get the number of available solvers.
Definition ISTLSolverGPUISTL.hpp:170
void setActiveSolver(int num) override
Set the active solver by its index.
Definition ISTLSolverGPUISTL.hpp:158
void prepare(const Matrix &M, Vector &b) override
Prepare the solver with the given matrix and right-hand side vector.
Definition ISTLSolverGPUISTL.hpp:198
FlexibleSolverWrapper is compilational trick to reduce compile time overhead.
Definition FlexibleSolverWrapper.hpp:45
A small, fixed‑dimension MiniVector class backed by std::array that can be used in both host and CUDA...
Definition AmgxInterface.hpp:38
typename Properties::Detail::GetPropImpl< TypeTag, Property >::type::type GetPropType
get the type alias defined in the property (equivalent to old macro GET_PROP_TYPE(....
Definition propertysystem.hh:233
PropertyTree setupPropertyTree(FlowLinearSolverParameters p, bool linearSolverMaxIterSet, bool linearSolverReductionSet, bool tpsaSetup)
Set up a property tree intended for FlexibleSolver by either reading the tree from a JSON file or cre...
Definition setupPropertyTree.cpp:182
void extractParallelGridInformationToISTL(const Dune::CpGrid &, std::any &)
Extracts the information about the data decomposition from the grid for dune-istl.
Definition ExtractParallelGridInformationToISTL.cpp:46
bool IsSet(bool errorIfNotRegistered=true)
Returns true if a parameter has been specified at runtime, false otherwise.
Definition parametersystem.hpp:270
auto Get(bool errorIfNotRegistered=true)
Retrieve a runtime parameter.
Definition parametersystem.hpp:187
This class carries all parameters for the NewtonIterationBlackoilInterleaved class.
Definition FlowLinearSolverParameters.hpp:98