My Project
openclSolverBackend.hpp
1 /*
2  Copyright 2020 Equinor ASA
3 
4  This file is part of the Open Porous Media project (OPM).
5 
6  OPM is free software: you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation, either version 3 of the License, or
9  (at your option) any later version.
10 
11  OPM is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with OPM. If not, see <http://www.gnu.org/licenses/>.
18 */
19 
20 #ifndef OPM_OPENCLSOLVER_BACKEND_HEADER_INCLUDED
21 #define OPM_OPENCLSOLVER_BACKEND_HEADER_INCLUDED
22 
23 #include <opm/simulators/linalg/bda/opencl.hpp>
24 #include <opm/simulators/linalg/bda/openclKernels.hpp>
25 #include <opm/simulators/linalg/bda/BdaResult.hpp>
26 #include <opm/simulators/linalg/bda/BdaSolver.hpp>
27 #include <opm/simulators/linalg/bda/ILUReorder.hpp>
28 #include <opm/simulators/linalg/bda/WellContributions.hpp>
29 #include <opm/simulators/linalg/bda/BILU0.hpp>
30 
31 #include <tuple>
32 
33 namespace bda
34 {
35 
37 template <unsigned int block_size>
38 class openclSolverBackend : public BdaSolver<block_size>
39 {
42 
43  using Base::N;
44  using Base::Nb;
45  using Base::nnz;
46  using Base::nnzb;
47  using Base::verbosity;
48  using Base::platformID;
49  using Base::deviceID;
50  using Base::maxit;
51  using Base::tolerance;
52  using Base::initialized;
53 
54 private:
55  double *rb = nullptr; // reordered b vector, if the matrix is reordered, rb is newly allocated, otherwise it just points to b
56  double *vals_contiguous = nullptr; // only used if COPY_ROW_BY_ROW is true in openclSolverBackend.cpp
57 
58  // OpenCL variables must be reusable, they are initialized in initialize()
59  cl::Buffer d_Avals, d_Acols, d_Arows; // (reordered) matrix in BSR format on GPU
60  cl::Buffer d_x, d_b, d_rb, d_r, d_rw, d_p; // vectors, used during linear solve
61  cl::Buffer d_pw, d_s, d_t, d_v; // vectors, used during linear solve
62  cl::Buffer d_tmp; // used as tmp GPU buffer for dot() and norm()
63  cl::Buffer d_toOrder; // only used when reordering is used
64  double *tmp = nullptr; // used as tmp CPU buffer for dot() and norm()
65 
66  // shared pointers are also passed to other objects
67  std::vector<cl::Device> devices;
68  std::unique_ptr<cl::KernelFunctor<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg> > dot_k;
69  std::unique_ptr<cl::KernelFunctor<cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg> > norm_k;
70  std::unique_ptr<cl::KernelFunctor<cl::Buffer&, const double, cl::Buffer&, const unsigned int> > axpy_k;
71  std::unique_ptr<cl::KernelFunctor<cl::Buffer&, const double, const unsigned int> > scale_k;
72  std::unique_ptr<cl::KernelFunctor<cl::Buffer&, cl::Buffer&, cl::Buffer&, const double, const double, const unsigned int> > custom_k;
73  std::unique_ptr<spmv_kernel_type> spmv_blocked_k;
74  std::shared_ptr<ilu_apply1_kernel_type> ILU_apply1_k;
75  std::shared_ptr<ilu_apply2_kernel_type> ILU_apply2_k;
76  std::shared_ptr<stdwell_apply_kernel_type> stdwell_apply_k;
77  std::shared_ptr<stdwell_apply_no_reorder_kernel_type> stdwell_apply_no_reorder_k;
78  std::shared_ptr<ilu_decomp_kernel_type> ilu_decomp_k;
79 
80  Preconditioner *prec = nullptr; // only supported preconditioner is BILU0
81  int *toOrder = nullptr, *fromOrder = nullptr; // BILU0 reorders rows of the matrix via these mappings
82  bool analysis_done = false;
83  std::unique_ptr<BlockedMatrix<block_size> > mat = nullptr; // original matrix
84  BlockedMatrix<block_size> *rmat = nullptr; // reordered matrix (or original if no reordering), used for spmv
85  ILUReorder opencl_ilu_reorder; // reordering strategy
86  std::vector<cl::Event> events;
87  cl_int err;
88 
93  unsigned int ceilDivision(const unsigned int A, const unsigned int B);
94 
100  double dot_w(cl::Buffer in1, cl::Buffer in2, cl::Buffer out);
101 
107  double norm_w(cl::Buffer in, cl::Buffer out);
108 
113  void axpy_w(cl::Buffer in, const double a, cl::Buffer out);
114 
118  void scale_w(cl::Buffer vec, const double a);
119 
127  void custom_w(cl::Buffer p, cl::Buffer v, cl::Buffer r, const double omega, const double beta);
128 
137  void spmv_blocked_w(cl::Buffer vals, cl::Buffer cols, cl::Buffer rows, cl::Buffer x, cl::Buffer b);
138 
142  void gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res);
143 
151  void initialize(int N, int nnz, int dim, double *vals, int *rows, int *cols);
152 
154  void get_opencl_kernels();
155 
157  void finalize();
158 
160  void copy_system_to_gpu();
161 
166  void update_system(double *vals, double *b, WellContributions &wellContribs);
167 
169  void update_system_on_gpu();
170 
173  bool analyse_matrix();
174 
177  bool create_preconditioner();
178 
182  void solve_system(WellContributions &wellContribs, BdaResult &res);
183 
184 public:
185  std::shared_ptr<cl::Context> context;
186  std::shared_ptr<cl::CommandQueue> queue;
187 
195  openclSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID, ILUReorder opencl_ilu_reorder);
196 
199 
215  SolverStatus solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) override;
216 
219  void get_result(double *x) override;
220 
221 }; // end class openclSolverBackend
222 
223 } // namespace bda
224 
225 #endif
226 
227 
This class serves to eliminate the need to include the WellContributions into the matrix (with –matri...
Definition: WellContributions.hpp:61
This class implementa a Blocked ILU0 preconditioner The decomposition is done on CPU,...
Definition: BILU0.hpp:51
This class is based on InverseOperatorResult struct from dune/istl/solver.hh It is needed to prevent ...
Definition: BdaResult.hpp:29
This class serves to simplify choosing between different backend solvers, such as cusparseSolver and ...
Definition: BdaSolver.hpp:43
This struct resembles a blocked csr matrix, like Dune::BCRSMatrix.
Definition: BlockedMatrix.hpp:36
This class implements a opencl-based ilu0-bicgstab solver on GPU.
Definition: openclSolverBackend.hpp:39
~openclSolverBackend()
Destroy a openclSolver, and free memory.
Definition: openclSolverBackend.cpp:191
openclSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID, ILUReorder opencl_ilu_reorder)
Construct a openclSolver.
Definition: openclSolverBackend.cpp:45
void get_result(double *x) override
Get result after linear solve, and peform postprocessing if necessary.
Definition: openclSolverBackend.cpp:767