25 #include <opm/simulators/linalg/bda/BlockedMatrix.hpp>
26 #include <opm/simulators/linalg/bda/ILUReorder.hpp>
28 #include <opm/simulators/linalg/bda/opencl.hpp>
29 #include <opm/simulators/linalg/bda/openclKernels.hpp>
30 #include <opm/simulators/linalg/bda/ChowPatelIlu.hpp>
41 #define CHOW_PATEL_GPU 1
49 template <
unsigned int block_size>
58 std::unique_ptr<BlockedMatrix<block_size> > LUmat =
nullptr;
59 std::shared_ptr<BlockedMatrix<block_size> > rmat =
nullptr;
61 std::unique_ptr<BlockedMatrix<block_size> > Lmat =
nullptr, Umat =
nullptr;
63 double *invDiagVals =
nullptr;
64 std::vector<int> diagIndex;
65 std::vector<int> rowsPerColor;
66 std::vector<int> rowsPerColorPrefix;
67 std::vector<int> toOrder, fromOrder;
70 std::once_flag pattern_uploaded;
72 ILUReorder opencl_ilu_reorder;
75 cl::Buffer invDiagVals;
77 cl::Buffer rowsPerColor;
79 cl::Buffer Lvals, Lcols, Lrows;
80 cl::Buffer Uvals, Ucols, Urows;
82 cl::Buffer LUvals, LUcols, LUrows;
86 ilu_apply1_kernel_type *ILU_apply1;
87 ilu_apply2_kernel_type *ILU_apply2;
88 cl::KernelFunctor<cl::Buffer&, const double, const unsigned int> *scale;
89 cl::KernelFunctor<
const unsigned int,
const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&,
90 cl::Buffer&, cl::Buffer&,
91 const int, cl::LocalSpaceArg> *ilu_decomp;
95 cl::CommandQueue *queue;
96 std::vector<cl::Event> events;
98 int work_group_size = 0;
99 int total_work_items = 0;
100 int lmem_per_work_group = 0;
104 void chow_patel_decomposition();
108 BILU0(ILUReorder opencl_ilu_reorder,
int verbosity);
119 void apply(cl::Buffer& y, cl::Buffer& x);
121 void setOpenCLContext(cl::Context *context);
122 void setOpenCLQueue(cl::CommandQueue *queue);
123 void setKernelParameters(
const unsigned int work_group_size,
const unsigned int total_work_items,
const unsigned int lmem_per_work_group);
125 cl::KernelFunctor<cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg> *ILU_apply1,
126 cl::KernelFunctor<cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg> *ILU_apply2,
127 cl::KernelFunctor<cl::Buffer&, const double, const unsigned int> *scale,
128 cl::KernelFunctor<const unsigned int, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, const int, cl::LocalSpaceArg> *ilu_decomp
133 return toOrder.data();
138 return fromOrder.data();
This class implementa a Blocked ILU0 preconditioner The decomposition is done on CPU,...
Definition: BILU0.hpp:51
This struct resembles a blocked csr matrix, like Dune::BCRSMatrix.
Definition: BlockedMatrix.hpp:36
Definition: ChowPatelIlu.hpp:37