TBCI Numerical high perf. C++ Library 2.8.0
bd_lu_solver.h
Go to the documentation of this file.
1
7
8/* Naming conventions:
9 * LU_xxxx operates on an already LU decomposed spmat
10 * whereas lu_xxxx does decompose it and then does
11 * the requested action (eg. determinant calculation ...)
12 * THE lu_xxxx VARIANTS THEREFORE CHANGE THE MATRIX PASSED!
13 */
14
15/* Written 23.06.97 by KG
16 * Last changed: 17.12.97 KG
17 * $Id: bd_lu_solver.h,v 1.27.2.20 2022/11/03 17:28:12 garloff Exp $
18 */
19
20#ifndef TBCI_SOLVER_BD_LU_SOLVER_H
21#define TBCI_SOLVER_BD_LU_SOLVER_H
22
23#include "tbci/vector.h"
24#include "tbci/band_matrix.h"
25#include "tbci/matrix.h"
26
27#ifdef PRAGMA_I
28# pragma interface "bd_lu_solver.h"
29#endif
30
31/* When to warn because of diagonal zeroes */
32#ifndef BDMLU_MINVAL
33# define BDMLU_MINVAL 1e-16
34#endif
35
36/* When to warn because of not converged solution */
37#ifndef BDMLU_RES_WARN
38# define BDMLU_RES_WARN 1e-11
39#endif
40
42
52
53INST(template <typename T> class BdMatrix friend int lu_decomp (BdMatrix<T>&);)
54
55#if 1 || defined(USE_PLAIN_VEC_KERNELS) || (defined(UNROLL_DEPTH) && UNROLL_DEPTH == 1)
56template <typename T>
58{
59 const unsigned matr = mat.rows();
60 //BdMatrices are always quadratic in shape: Check not necessary
61
62 //check that LU decomp fits in mat
63 mat.expand();
64
65 //const unsigned od = mat.diagconf.size();
66 const unsigned maxoff = mat.getmaxoff();
67
68 // TODO: The mem access pattern might be optimized
69 for (unsigned j = 0; j < matr; ++j) {
70 unsigned i = MAX((signed)0, (signed)j - (signed)maxoff);
71 const unsigned imax = MIN(j,maxoff);
72 ALIGN2(REGISTER T tmp, MIN_ALIGN2); tmp = 0;
73 for (; i <= imax; ++i) {
74 tmp = mat.get(i, j);
75 for (REGISTER unsigned k = 0; k < i; ++k)
76 tmp -= mat.get(i, k) * mat.get(k, j);
77 mat.setval(tmp, i, j);
78 }
79 for (; i <= j; ++i) {
80 tmp = mat(i, j);
81 for (REGISTER unsigned k = j - maxoff; k < i; ++k)
82 tmp -= mat.get(i, k) * mat.get(k, j);
83 mat.setval(tmp, i, j);
84 }
85 BCHK(MATH__ fabs(tmp) == 0, BdMatrixErr, Diagonal zero encountered, j, -1);
86 T invdiag = T(1.0)/tmp;
87 //No pivoting for the first shot (OK for most FD problems)
88 // Diagonal zeros are lethal (!)
89 //#warning "We don't do pivoting, so don't supply non diagonal-dominant matrices!"
90 unsigned bound = MIN(matr, j + mat.getmaxoff() + 1);
91 for (; i < bound; ++i) {
92 tmp = mat.get(i, j);
93 for (REGISTER unsigned int k = MAX((signed)0, (signed)j - (signed)maxoff); k < j; ++k)
94 tmp -= mat.get(i, k) * mat.get(k, j);
95 // BCHK(MATH__ fabs (mat.get(j, j)) < BDMLU_MINVAL, BdMatrixErr, Diagonal zero encountered, j, -1);
96# ifdef ERRCHECK
97 if (UNLIKELY (MATH__ fabs(tmp * invdiag) > 1/BDMLU_MINVAL))
98 STD__ cerr << "\n BdMatrixWarning: BdMatrix badly conditioned, continuing ...\n" << STD__ flush;
99# endif
100 mat.setval(tmp * invdiag, i, j);
101 }
102 }
103 return 0;
104}
105
106#else
107
108template <typename T>
109int lu_decomp (BdMatrix<T>& mat)
110{
111 const unsigned matr = mat.rows();
112 //BdMatrices are always quadratic in shape: Check not necessary
113
114 //check that LU decomp fits in mat
115 mat.expand();
116
117 //const unsigned od = mat.diagconf.size();
118 const unsigned maxoff = mat.getmaxoff();
119
120 // TODO: The mem access pattern might be optimized
121 for (unsigned j = 0; j < matr; ++j) {
122 unsigned i = MAX((signed)0, (signed)j - (signed)maxoff);
123 PREFETCH_R(&mat.get(i, 0), 3);
124 PREFETCH_R(&mat.get(i, j), 3);
125 const unsigned imax = MIN(j,maxoff);
126 PREFETCH_R(&mat.get(0, j), 2);
127 PREFETCH_R(&mat.get(1, j), 2);
129 for (; i <= imax; ++i) {
130 tmp = mat.get(i, j);
131 PREFETCH_R(&mat.get(i, 0), 3);
132 PREFETCH_R(&mat.get(i, 1), 3);
133 for (REGISTER unsigned k = 0; k < i; ++k) {
134 tmp -= mat.get(i, k) * mat.get(k, j);
135 PREFETCH_R(&mat.get(i, k+2),3);
136 PREFETCH_R(&mat.get(k+1, j), 2);
137 }
138 mat.setval(tmp, i, j);
139 PREFETCH_W(&mat(i+1,j),3);
140 }
141 for (; i <= j; ++i) {
142 tmp = mat.get(i, j);
143 PREFETCH_R(&mat.get(i, j-maxoff), 3);
144 PREFETCH_R(&mat.get(i, j-maxoff+1), 3);
145 REGISTER unsigned k;
146 for (k = j - maxoff; k < (signed)i-(signed)MAX(8,4*EL_PER_CL(T)); k+=4) {
147 tmp -= mat.get(i, k) * mat.get(k, j);
148 PREFETCH_R(&mat.get(i,k+4), 3);
149 PREFETCH_R(&mat.get(i,k+5), 3);
150 tmp -= mat.get(i, k+1) * mat.get(k+1, j);
151 PREFETCH_R(&mat.get(i,k+6), 3);
152 PREFETCH_R(&mat.get(i,k+7), 3);
153 tmp -= mat.get(i, k+2) * mat.get(k+2, j);
154 PREFETCH_R(&mat.get(k+4,j), 3);
155 PREFETCH_R(&mat.get(k+5,j), 3);
156 tmp -= mat.get(i, k+3) * mat.get(k+3, j);
157 PREFETCH_R(&mat.get(k+6,j), 3);
158 PREFETCH_R(&mat.get(k+7,j), 3);
159 }
160 for (; k < i; ++k)
161 tmp -= mat.get(i, k) * mat.get(k, j);
162 mat.setval(tmp, i, j);
163 }
164 BCHK(MATH__ fabs (tmp) == 0, BdMatrixErr, Diagonal zero encountered, j, -1);
165 T invdiag = T(1.0)/tmp;
166 //No pivoting for the first shot (OK for most FD problems)
167 // Diagonal zeros are lethal (!)
168 //#warning "We don't do pivoting, so don't supply non diagonal-dominant matrices!"
169 unsigned bound = MIN(matr, j + mat.getmaxoff() + 1);
170 for (; i < bound; ++i) {
171 const unsigned kstart = MAX((signed)0, (signed)j - (signed)maxoff);
172 tmp = mat.get(i, j);
173 PREFETCH_R(&mat.get(i, kstart), 3);
174 PREFETCH_R(&mat.get(i, kstart+1), 3);
175 REGISTER unsigned int k;
176 for (k = kstart; (signed)k < (signed)j-(signed)MAX(8,4*EL_PER_CL(T)); k+=4) {
177 tmp -= mat.get(i, k) * mat.get(k, j);
178 PREFETCH_R(&mat.get(i,k+4), 3);
179 PREFETCH_R(&mat.get(i,k+5), 3);
180 tmp -= mat.get(i, k+1) * mat.get(k+1, j);
181 PREFETCH_R(&mat.get(i,k+6), 3);
182 PREFETCH_R(&mat.get(i,k+7), 3);
183 tmp -= mat.get(i, k+2) * mat.get(k+2, j);
184 PREFETCH_R(&mat.get(k+4,j), 3);
185 PREFETCH_R(&mat.get(k+5,j), 3);
186 tmp -= mat.get(i, k+3) * mat.get(k+3, j);
187 PREFETCH_R(&mat.get(k+6,j), 3);
188 PREFETCH_R(&mat.get(k+7,j), 3);
189 }
190 for (; k < j; ++k)
191 tmp -= mat.get(i, k) * mat.get(k, j);
192 // BCHK(MATH__ fabs (mat.get(j, j)) < BDMLU_MINVAL, BdMatrixErr, Diagonal zero encountered, j, -1);
193# ifdef ERRCHECK
194 if (UNLIKELY (MATH__ fabs(tmp * invdiag) > 1/BDMLU_MINVAL))
195 STD__ cerr << "\n BdMatrixWarning: BdMatrix badly conditioned, continuing ...\n" << STD__ flush;
196# endif
197 mat.setval(tmp * invdiag, i, j);
198 }
199 }
200 return 0;
201}
202#endif
203
204
205INST(template <typename T> class BdMatrix friend TVector<T> LU_fwd_subst(const BdMatrix<T>&, const Vector<T>&);)
206template <typename T>
208{
209 const unsigned ysize = y.size();
210 const unsigned maxoff = lu.getmaxoff();
211 TVector<T> x(ysize);
212 BCHK(ysize != lu.rows(), BdMatrixErr, LU fwd subst size error, ysize, x);
213 for (unsigned i = 0; i < ysize; ++i) {
214 ALIGN2(REGISTER T tmp, MIN_ALIGN2) = y(i);
215 for (REGISTER unsigned j = MAX((signed)0, (signed)i - (signed)maxoff); j < i; ++j)
216 tmp -= x.get(j) * lu.get(i, j);
217 x.set(tmp, i);
218 }
219 return x;
220}
221
222INST(template <typename T> class BdMatrix friend TVector<T> LU_bkw_subst(const BdMatrix<T>&, const Vector<T>&);)
223template <typename T>
225{
226 const unsigned ysize = y.size();
227 const unsigned maxoff = lu.getmaxoff();
228 TVector<T> x(ysize);
229 BCHK(ysize != lu.rows(), BdMatrixErr, LU bkw subst size error, ysize, x);
230 for (int i = ysize - 1; (signed)i >= 0; --i) {
231 ALIGN2(REGISTER T tmp, MIN_ALIGN2) = y(i);
232 for (REGISTER int j = MIN(ysize - 1, i + maxoff); j > i; --j)
233 tmp -= x.get(j) * lu.get(i, j);
234 x.set(tmp / lu.get(i, i), i);
235 }
236 return x;
237}
238
243
244INST(template <typename T> class BdMatrix friend TVector<T> LU_solve(const BdMatrix<T>&, const Vector<T>&);)
245template <typename T>
246inline TVector<T> LU_solve(const BdMatrix<T>& lu, const Vector<T>& b)
247{
248 Vector<T> y (LU_fwd_subst(lu, b)); //STD__ cout << y << STD__ endl;
249 return (LU_bkw_subst(lu, y));
250}
251
258INST(template <typename T> class BdMatrix friend TVector<T> lu_solve(BdMatrix<T>&, const Vector<T>&);)
259template <typename T>
261{
262#ifdef ERRCHECK
263 BdMatrix<T> m_backup (mat);
264 double res;
265#endif
266 lu_decomp (mat);
267 TVector<T> x (LU_solve (mat, b));
268#ifdef ERRCHECK
269 // No return!, Andreas
270 Vector<T> xx(b.size()); xx.copy (x);
271 if (UNLIKELY((res = MATH__ fabs(m_backup * xx - b)/MATH__ fabs(b)) > BDMLU_RES_WARN))
272 STD__ cerr << "bd_lu_solver: Inaccurate solution, rel. residuum= " << res << STD__ endl;
273#endif
274 return x;
275}
276
277INST(template <typename T> class BdMatrix friend TMatrix<T> LU_solve(const BdMatrix<T>&, const Matrix<T>&);)
278template <typename T>
280{
281 TMatrix<T> x(b.rows(), b.columns());
282 Vector<T> y(b.rows()), w(b.rows());
283 const unsigned bc = b.columns();
284 for (unsigned int i = 0; i < bc; ++i) {
285 w = b.get_col(i);
286 y = LU_fwd_subst(lu, w);
287 x.set_col(LU_bkw_subst(lu, y), i);
288 }
289 return x;
290}
291
292
293INST(template <typename T> class BdMatrix friend TMatrix<T> lu_solve(BdMatrix<T>&, const Matrix<T>&);)
294template <typename T>
296{
297 lu_decomp(mat);
298 return LU_solve(mat, b);
299}
300
301#if 0
302/* These may be needed for expm */
303INST(template <typename T> class BdMatrix friend TMatrix<T> LU_solve (const BdMatrix<T>&, const BdMatrix<T>&);)
304template <typename T>
305inline TMatrix<T> LU_solve (const BdMatrix<T>& lu, const BdMatrix<T>& b)
306{ return LU_solve (lu, Matrix<T>(b)); }
307
308INST(template <typename T> class BdMatrix friend TMatrix<T> lu_solve (BdMatrix<T>&, const BdMatrix<T>&);)
309template <typename T>
310inline TMatrix<T> lu_solve (BdMatrix<T>& mat, const BdMatrix<T>& b)
311{ lu_decomp (mat); return LU_solve (mat, b); }
312#endif
313
315INST(template <typename T> class BdMatrix friend T LU_det(const BdMatrix<T>&);)
316template <typename T>
318{
319 ALIGN2(REGISTER T tmp, MIN_ALIGN2) = lu.get(0, 0);
320 const unsigned lr = lu.rows();
321 for (unsigned i = 1; i < lr; ++i)
322 tmp *= lu.get(i, i);
323 return tmp;
324}
325
327INST(template <typename T> class BdMatrix friend T lu_det(BdMatrix<T>&);)
328template <typename T>
329inline T lu_det(BdMatrix<T>& mat)
330{
331 lu_decomp(mat);
332 return LU_det(mat);
333}
334
336INST(template <typename T> class BdMatrix friend TMatrix<T> LU_invert(const BdMatrix<T>&);)
337template <typename T>
339{
340 TMatrix<T> inv ((T)0, lu.columns(), lu.rows());
341 const unsigned lr = lu.rows();
342 for (unsigned int j = 0; j < lr; ++j) {
343 Vector<T> v ((T)0, lr); /* lu.columns()); */
344 v(j) = (T)1;
345 Vector<T> y (LU_solve(lu, v));
346 inv.set_col(y, j);
347 }
348 return inv;
349}
350
351
353INST(template <typename T> class BdMatrix friend TMatrix<T> lu_invert(BdMatrix<T>&);)
354template <typename T>
356{
357 lu_decomp(mat);
358 return LU_invert(mat);
359}
360
362
363#endif /* TBCI_SOLVER_BD_LU_SOLVER_H */
const Vector< T > const Vector< T > & x
Definition LM_fit.h:97
int i
Definition LM_fit.h:71
doublereal y
Definition TOMS_707.C:27
#define STD__
Definition basics.h:338
#define BCHK(cond, exc, txt, ind, rtval)
Definition basics.h:575
#define MIN(a, b)
Definition basics.h:655
#define MIN_ALIGN2
Definition basics.h:424
#define NAMESPACE_END
Definition basics.h:323
#define INST(x)
Definition basics.h:238
#define ALIGN2(v, x)
Definition basics.h:443
#define PREFETCH_R(addr, loc)
In case gcc does not yet support __builtin_prefetch(), we have handcoded assembly with gcc for a few ...
Definition basics.h:748
#define NAMESPACE_TBCI
Definition basics.h:317
#define PREFETCH_W(addr, loc)
Definition basics.h:749
#define UNLIKELY(expr)
Definition basics.h:101
#define REGISTER
Definition basics.h:108
#define MATH__
Definition basics.h:339
#define MAX(a, b)
Definition basics.h:656
TVector< T > lu_solve(BdMatrix< T > &mat, const Vector< T > &b)
Solve the equation Ax = b where A still needs to be LU decomposed.
TVector< T > LU_bkw_subst(const BdMatrix< T > &lu, const Vector< T > &y)
T lu_det(BdMatrix< T > &mat)
calculates the determinant of a BdMatrix by doing an LU decomposition
#define BDMLU_RES_WARN
TVector< T > LU_solve(const BdMatrix< T > &lu, const Vector< T > &b)
Solve the equation Ax = b where A IS already LU decomposed.
T LU_det(const BdMatrix< T > &lu)
calculates determinant of an already LU decomposed BdMatrix
#define BDMLU_MINVAL
NAMESPACE_TBCI int lu_decomp(BdMatrix< T > &mat)
LU decompose a TBCI::BdMatrix.
TMatrix< T > LU_invert(const BdMatrix< T > &lu)
return the inverse Matrix for an already LU decomposed BdMatrix
TVector< T > LU_fwd_subst(const BdMatrix< T > &lu, const Vector< T > &y)
TMatrix< T > lu_invert(BdMatrix< T > &mat)
return the inverse Matrix for a BdMatrix by doing an LU decomposition
#define T
Definition bdmatlib.cc:20
BVector< T > & copy(const BVector< T > &bv)
copy does a resize, if necessary
Definition bvector.h:484
exception class
Definition band_matrix.h:47
The class BdMatrix is an implementation to store and do operations on sparse Matrices with a band str...
tbci_traits< T >::const_refval_type get(const unsigned int, const unsigned int) const HOT
BdMatrix< T > & expand(unsigned=0)
unsigned int getmaxoff() const
max distance from main diagonal
BdMatrix< T > & setval(const T &v, const unsigned int r, const unsigned int c)
autoinsert, sparse class compatible
unsigned int rows() const
number of rows
unsigned int columns() const
number of columns
Temporary Base Class Idiom: Class TVector is used for temporary variables.
Definition vector.h:73
double fabs(const TBCI__ cplx< T > &c)
Definition cplx.h:746
F_TMatrix< T > b
Definition f_matrix.h:736
F_TMatrix< double > lu_solve(const F_Matrix< double > &A, const F_Matrix< double > &B, int overwriteA=0)
Definition lapack.cpp:156
F_TMatrix< double > inv(const F_Matrix< double > &A, int overwriteA=0)
Definition lapack.cpp:181
int lu_decomp(Matrix< T > &) HOT
LU decomposes the TBCI::Matrix mat.
Definition lu_solver.h:94
const unsigned TMatrix< T > * res
#define UNROLL_DEPTH
How many iters per loop (unrolling) Trade code bloat against speed.
Definition perf_opt.h:146
#define EL_PER_CL(T)
Definition perf_opt.h:172
#define USE_PLAIN_VEC_KERNELS
file perf_opt.h Default settings for optimum performance on different architectures and compilers.
Definition perf_opt.h:114