20 #ifndef OPM_FPGASOLVER_BACKEND_HEADER_INCLUDED
21 #define OPM_FPGASOLVER_BACKEND_HEADER_INCLUDED
23 #include <opm/simulators/linalg/bda/BdaSolver.hpp>
24 #include <opm/simulators/linalg/bda/FPGABILU0.hpp>
26 #include <linearalgebra/ilu0bicgstab/xilinx/src/sda_app/bicgstab_solver_config.hpp>
27 #include <linearalgebra/ilu0bicgstab/xilinx/src/sda_app/common/opencl_lib.hpp>
28 #include <linearalgebra/ilu0bicgstab/xilinx/src/sda_app/common/fpga_functions_bicgstab.hpp>
34 template <
unsigned int block_size>
44 using Base::verbosity;
46 using Base::tolerance;
47 using Base::initialized;
52 int *fromOrder =
nullptr, *toOrder =
nullptr;
53 bool analysis_done =
false;
54 bool level_scheduling =
false;
57 std::unique_ptr<BlockedMatrix<block_size> > mat =
nullptr;
59 std::unique_ptr<Preconditioner> prec =
nullptr;
62 void **processedPointers =
nullptr;
63 int *processedSizes =
nullptr;
65 unsigned int fpga_calls = 0;
66 bool perf_call_enabled =
true;
70 double s_preconditioner_create = 0.0;
71 double s_analysis = 0.0;
72 double s_reorder = 0.0;
73 double s_mem_setup = 0.0;
74 double s_mem_h2d = 0.0;
75 double s_kernel_exec = 0.0;
76 unsigned int n_kernel_exec_cycles = 0;
77 float n_kernel_exec_iters = 0.0;
78 double s_mem_d2h = 0.0;
80 double s_postprocess = 0.0;
81 bool converged =
false;
82 unsigned int converged_flags = 0;
83 } perf_call_metrics_t;
86 double s_initialization;
87 double s_preconditioner_setup;
88 double s_preconditioner_create;
89 double s_preconditioner_create_min,s_preconditioner_create_max,s_preconditioner_create_avg;
91 double s_analysis_min,s_analysis_max,s_analysis_avg;
93 double s_reorder_min,s_reorder_max,s_reorder_avg;
95 double s_mem_setup_min,s_mem_setup_max,s_mem_setup_avg;
97 double s_mem_h2d_min,s_mem_h2d_max,s_mem_h2d_avg;
99 double s_kernel_exec_min,s_kernel_exec_max,s_kernel_exec_avg;
100 unsigned long n_kernel_exec_cycles;
101 unsigned long n_kernel_exec_cycles_min,n_kernel_exec_cycles_max,n_kernel_exec_cycles_avg;
102 float n_kernel_exec_iters;
103 float n_kernel_exec_iters_min,n_kernel_exec_iters_max,n_kernel_exec_iters_avg;
105 double s_mem_d2h_min,s_mem_d2h_max,s_mem_d2h_avg;
107 double s_solve_min,s_solve_max,s_solve_avg;
108 double s_postprocess;
109 double s_postprocess_min,s_postprocess_max,s_postprocess_avg;
110 unsigned int n_converged;
111 } perf_total_metrics_t;
112 std::vector<perf_call_metrics_t> perf_call;
113 perf_total_metrics_t perf_total;
116 unsigned int fpga_config_bits = 0;
117 bool fpga_disabled =
false;
119 unsigned int debugbufferSize;
120 unsigned long int *debugBuffer =
nullptr;
121 unsigned int *databufferSize =
nullptr;
122 unsigned char *dataBuffer[RW_BUF] = {
nullptr};
123 unsigned int debug_outbuf_words;
125 int resultsBufferNum;
126 unsigned int resultsBufferSize[RES_BUF_MAX];
127 unsigned int result_offsets[6];
128 unsigned int kernel_cycles, kernel_iter_run;
130 unsigned char last_norm_idx;
131 bool kernel_aborted, kernel_signature, kernel_overflow;
132 bool kernel_noresults;
133 bool kernel_wrafterend, kernel_dbgfifofull;
134 bool use_residuals =
false;
135 bool use_LU_res =
false;
138 unsigned int abort_cycles = 2000000000;
139 unsigned int debug_sample_rate = 65535;
140 int nnzValArrays_size = 0;
141 int L_nnzValArrays_size = 0;
142 int U_nnzValArrays_size = 0;
144 long unsigned int *setupArray =
nullptr;
145 double **nnzValArrays =
nullptr;
146 short unsigned int *columnIndexArray =
nullptr;
147 unsigned char *newRowOffsetArray =
nullptr;
148 unsigned int *PIndexArray =
nullptr;
149 unsigned int *colorSizesArray =
nullptr;
150 double **L_nnzValArrays =
nullptr;
151 short unsigned int *L_columnIndexArray =
nullptr;
152 unsigned char *L_newRowOffsetArray =
nullptr;
153 unsigned int *L_PIndexArray =
nullptr;
154 unsigned int *L_colorSizesArray =
nullptr;
155 double **U_nnzValArrays =
nullptr;
156 short unsigned int *U_columnIndexArray =
nullptr;
157 unsigned char *U_newRowOffsetArray =
nullptr;
158 unsigned int *U_PIndexArray =
nullptr;
159 unsigned int *U_colorSizesArray =
nullptr;
160 double *BLKDArray =
nullptr;
161 double *X1Array =
nullptr, *X2Array =
nullptr;
162 double *R1Array =
nullptr, *R2Array =
nullptr;
163 double *LresArray =
nullptr, *UresArray =
nullptr;
164 double *resultsBuffer[RES_BUF_MAX] = {
nullptr};
166 cl_device_id device_id;
168 cl_command_queue commands;
171 cl_mem cldata[RW_BUF] = {
nullptr};
172 cl_mem cldebug =
nullptr;
174 unsigned int hw_x_vector_elem;
175 unsigned int hw_max_row_size;
176 unsigned int hw_max_column_size;
177 unsigned int hw_max_colors_size;
178 unsigned short hw_max_nnzs_per_row;
179 unsigned int hw_max_matrix_size;
181 bool hw_write_ilu0_results;
182 unsigned short hw_dma_data_width;
183 unsigned char hw_x_vector_latency;
184 unsigned char hw_add_latency;
185 unsigned char hw_mult_latency;
186 unsigned char hw_mult_num;
187 unsigned char hw_num_read_ports;
188 unsigned char hw_num_write_ports;
189 unsigned short hw_reset_cycles;
190 unsigned short hw_reset_settle;
192 bool reset_data_buffers =
false;
193 bool fill_results_buffers =
false;
194 int dump_data_buffers = 0;
195 bool dump_results =
false;
196 char *data_dir =
nullptr;
197 char *basename =
nullptr;
198 unsigned short rst_assert_cycles = 0;
199 unsigned short rst_settle_cycles = 0;
208 void initialize(
int N,
int nnz,
int dim,
double *vals,
int *rows,
int *cols);
213 void update_system(
double *vals,
double *b);
217 bool analyse_matrix();
221 bool create_preconditioner();
228 void generate_statistics(
void);
238 FpgaSolverBackend(std::string fpga_bitstream,
int linear_solver_verbosity,
int maxit,
double tolerance, ILUReorder opencl_ilu_reorder);
254 SolverStatus solve_system(
int N,
int nnz,
int dim,
double *vals,
int *rows,
int *cols,
double *b,
WellContributions& wellContribs,
BdaResult &res)
override;
This class serves to eliminate the need to include the WellContributions into the matrix (with –matri...
Definition: WellContributions.hpp:61
This class is based on InverseOperatorResult struct from dune/istl/solver.hh It is needed to prevent ...
Definition: BdaResult.hpp:29
This class serves to simplify choosing between different backend solvers, such as cusparseSolver and ...
Definition: BdaSolver.hpp:43
This struct resembles a blocked csr matrix, like Dune::BCRSMatrix.
Definition: BlockedMatrix.hpp:36
Definition: FPGABILU0.hpp:39
This class implements an ilu0-bicgstab solver on FPGA.
Definition: FPGASolverBackend.hpp:36
FpgaSolverBackend(std::string fpga_bitstream, int linear_solver_verbosity, int maxit, double tolerance, ILUReorder opencl_ilu_reorder)
Construct an fpgaSolver.
Definition: FPGASolverBackend.cpp:48
void get_result(double *x) override
Get result after linear solve, and peform postprocessing if necessary.
Definition: FPGASolverBackend.cpp:208
~FpgaSolverBackend()
Destroy an fpgaSolver, and free memory.
Definition: FPGASolverBackend.cpp:174