TBCI Numerical high perf. C++ Library 2.8.0
lu_solver.h
Go to the documentation of this file.
1
19
20#ifndef TBCI_SOLVER_LU_SOLVER_H
21#define TBCI_SOLVER_LU_SOLVER_H
22
23#include "tbci/matrix.h"
24#include "tbci/vector.h"
25
26#ifdef PRAGMA_I
27# pragma interface "lu_solver.h"
28#endif
29
30/* When to warn because of diagonal zeroes */
31#ifndef MLU_MINVAL
32# define MLU_MINVAL 5e-16
33#endif
34
35/* When to warn because of not converged solution */
36#ifndef MLU_RES_WARN
37# define MLU_RES_WARN 1e-11
38#endif
39
41
51INST(template <typename T> class Matrix friend int lu_decomp (Matrix<T>&);)
52#if defined(USE_PLAIN_VEC_KERNELS) //|| (defined(UNROLL_DEPTH) && UNROLL_DEPTH == 1)
53template <typename T>
54int lu_decomp (Matrix<T>& mat)
55{
56 const unsigned matr = mat.rows();
57 int err = 0, col = -1;
58 //Check if Matrix is quadratic in shape
59 BCHK(matr != mat.columns(), MatErr, LU only for quadratic matrices, matr, -1);
60 // Decomposition is compactly stored in one Matrix
61 // The diagonal elements of the lower Matrix are all 1
62 // (this is a feature of the algorithm)
63
64 // TODO: Maybe the mem access pattern could be optimized ...
65 for (unsigned j = 0; j < matr; ++j) {
66 unsigned i; // MSVC++ annoyance
68 for (i = 0; i <= j; ++i) {
69 tmp = mat.get(i, j);
70 for (REGISTER unsigned k = 0; k < i; ++k)
71 tmp -= mat.get(i, k) * mat.get(k, j);
72 mat.setval(tmp, i, j);
73 }
74 if (UNLIKELY(MATH__ fabs(tmp) < MLU_MINVAL)) {
75 ++err;
76 col = j;
77 }
78 T invdiag = T(1.0)/tmp;
79 //No pivoting for the first shot
80 // Diagonal zeros are lethal
81 //#warning "We don't do pivoting, so don't supply non diagonal-dominant matrices!"
82 for (i = j + 1; i < matr; ++i) {
83 tmp = mat.get(i, j);
84 for (REGISTER unsigned k = 0; k < j; ++k)
85 tmp -= mat.get(i, k) * mat.get(k, j);
86 mat.setval(tmp * invdiag, i, j);
87 }
88 }
89 BCHK(err != 0, MatErr, Diagonal zero encountered, col, err);
90 return err;
91}
92#else
93template <typename T>
95{
96 const unsigned matr = mat.rows();
97 int err = 0, col = -1;
98 //Check if Matrix is quadratic in shape
99 BCHK(matr != mat.columns(), MatErr, LU only for quadratic matrices, matr, -1);
100 // Decomposition is compactly stored in one Matrix
101 // The diagonal elements of the lower Matrix are all 1
102 // (this is a feature of the algorithm)
103 //
104 // TODO: Maybe the mem access pattern could be optimized ...
105 for (unsigned j = 0; j < matr; ++j) {
106 PREFETCH_R(&mat.getcref(0,0),3);
107 PREFETCH_R(&mat.getcref(0,j),3);
108 unsigned i; // MSVC++ annoyance
110 for (i = 0; i <= MIN(j,4); ++i) {
111 PREFETCH_R(&mat.getcref(i,j), 3);
112 PREFETCH_R(&mat.getcref(i+1,0), 3);
113 tmp = mat.get(i, j);
114 for (REGISTER unsigned k = 0; k < i; ++k)
115 tmp -= mat.get(i, k) * mat.get(k, j);
116 mat.setval (tmp, i, j);
117 }
118 PREFETCH_R(&mat.getcref(1,j),2);
119 PREFETCH_R(&mat.getcref(2,j),2);
120 PREFETCH_R(&mat.getcref(3,j),2);
121 for (; i <= j; ++i) {
122 PREFETCH_R(&mat.getcref(i,0), 3);
123 PREFETCH_R(&mat.getcref(i,EL_PER_CL(T)), 3);
124 tmp = mat.get(i, j);
125 REGISTER unsigned k;
126 for (k = 0; (signed)k < (signed)i-(signed)MAX(8,4*EL_PER_CL(T)); k+=4) {
127 tmp -= mat.get(i, k) * mat.get(k, j);
128 PREFETCH_R(&mat.getcref(i,k+2*EL_PER_CL(T)),3);
129 tmp -= mat.get(i,k+1) * mat.get(k+1,j);
130 PREFETCH_R(&mat.getcref(i,k+3*EL_PER_CL(T)),3);
131 tmp -= mat.get(i,k+2) * mat.get(k+2,j);
132 PREFETCH_R(&mat.getcref(k+4,j), 2);
133 PREFETCH_R(&mat.getcref(k+5,j), 2);
134 tmp -= mat.get(i,k+3) * mat.get(k+3,j);
135 PREFETCH_R(&mat.getcref(k+6,j), 2);
136 PREFETCH_R(&mat.getcref(k+7,j), 2);
137 }
138 for (; k < i; ++k)
139 tmp -= mat.get(i,k) * mat.get(k,j);
140 mat.setval (tmp, i, j);
141 if (LIKELY(i<matr-1))
142 PREFETCH_R(&mat.getcref(i+1,j),3);
143 }
144 if (UNLIKELY(MATH__ fabs(tmp) < MLU_MINVAL)) {
145 ++err;
146 col = j;
147 }
148 T invdiag = T(1.0)/tmp;
149 //No pivoting for the first shot
150 // Diagonal zeros are lethal
151 //#warning "We don't do pivoting, so don't supply non diagonal-dominant matrices!"
152 for (i = j + 1; i < matr; ++i)
153 {
154 PREFETCH_R(&mat.getcref(i,j), 3);
155 PREFETCH_R(&mat.getcref(i,0), 3);
156 PREFETCH_R(&mat.getcref(i,EL_PER_CL(T)), 3);
157 tmp = mat.get(i, j);
158 REGISTER unsigned k;
159 for (k = 0; (signed)k < (signed)j-(signed)MAX(8,4*EL_PER_CL(T)); k+=4) {
160 tmp -= mat.get(i, k) * mat.get(k, j);
161 PREFETCH_R(&mat.getcref(i,k+2*EL_PER_CL(T)),3);
162 tmp -= mat.get(i,k+1) * mat.get(k+1,j);
163 PREFETCH_R(&mat.getcref(i,k+3*EL_PER_CL(T)),3);
164 tmp -= mat.get(i,k+2) * mat.get(k+2,j);
165 PREFETCH_R(&mat.getcref(k+4,j), 2);
166 PREFETCH_R(&mat.getcref(k+5,j), 2);
167 tmp -= mat.get(i,k+3) * mat.get(k+3,j);
168 PREFETCH_R(&mat.getcref(k+6,j), 2);
169 PREFETCH_R(&mat.getcref(k+7,j), 2);
170 }
171 for (; k < j; ++k)
172 tmp -= mat.get(i,k) * mat.get(k,j);
173 mat.setval (tmp * invdiag, i, j);
174 }
175 }
176 BCHK(err != 0, MatErr, Diagonal zero encountered, col, err);
177 return err;
178}
179#endif
180
181INST(template <typename T> class Matrix friend TVector<T> LU_fwd_subst(const Matrix<T>&, const Vector<T>&);)
182template <typename T>
184{
185 const unsigned ysize = y.size();
186 TVector<T> x(ysize);
187 BCHK(ysize != lu.rows(), MatErr, LU fwd subst size error, ysize, x);
188 for (unsigned i = 0; i < ysize; ++i) {
189#if 0
190 ALIGN2(REGISTER T tmp, MIN_ALIGN2) = -y(i);
191 do_vec_mult<T> (i, &x.get(0), &lu(i,0), tmp);
192 x.set (-tmp, i);
193#else
194 ALIGN2(REGISTER T tmp, MIN_ALIGN2) = y(i);
195 for (REGISTER unsigned j = 0; j < i; ++j)
196 tmp -= x.get(j) * lu.get(i, j);
197 x.set (tmp, i);
198#endif
199 }
200 return x;
201}
202
203INST(template <typename T> class Matrix friend TVector<T> LU_bkw_subst(const Matrix<T>&, const Vector<T>&);)
204template <typename T>
206{
207 const unsigned ysize = y.size();
208 TVector<T> x(ysize);
209 BCHK(ysize != lu.rows(), MatErr, LU bkw subst size error, ysize, x);
210 for (unsigned i = ysize - 1; (signed)i >= 0; --i) {
211 ALIGN2(REGISTER T tmp, MIN_ALIGN2) = y(i);
212 for (REGISTER unsigned j = ysize - 1; j > i; --j)
213 tmp -= x.get(j) * lu.get(i, j);
214 x.set(tmp / lu.get(i, i), i);
215 }
216 return x;
217}
218
223INST(template <typename T> class Matrix friend TVector<T> LU_solve(const Matrix<T>&, const Vector<T>&);)
224template <typename T>
225inline TVector<T> LU_solve(const Matrix<T>& lu, const Vector<T>& b)
226{
227 Vector<T> y(LU_fwd_subst(lu, b));
228 //STD__ cout << y << STD__ endl;
229 return LU_bkw_subst(lu, y);
230}
231
238INST(template <typename T> class Matrix friend TVector<T> lu_solve(Matrix<T>&, const Vector<T>&);)
239template <typename T>
241{
242#ifdef ERRCHECK
243 Matrix<T> m_backup (mat);
244 double res;
245#endif
246 lu_decomp(mat);
247 TVector<T> x (LU_solve(mat, b));
248#ifdef ERRCHECK
249 Vector<T> xx(b.size()); xx.copy (x);
250 BCHK ((res = MATH__ fabs(m_backup * xx - b)/MATH__ fabs(b)) > MLU_RES_WARN, MatErr,
251 LU solver: Inaccurate solution, (int)(res / MLU_RES_WARN), x);
252#endif
253 return x;
254}
255
256INST(template <typename T> class Matrix friend TMatrix<T> LU_solve(const Matrix<T>&, const Matrix<T>&);)
257template <typename T>
259{
260 TMatrix<T> x(b.rows(), b.columns());
261 Vector<T> y(b.rows()), w(b.rows());
262 for (unsigned i = 0; i < b.columns(); ++i) {
263 w = b.get_col(i);
264 y = LU_fwd_subst(lu, w);
265 x.set_col(LU_bkw_subst(lu, y), i);
266 }
267 return x;
268}
269
270INST(template <typename T> class Matrix friend TMatrix<T> lu_solve(Matrix<T>&, const Matrix<T>&);)
271template <typename T>
273{
274 lu_decomp(mat);
275 return LU_solve(mat, b);
276}
277
279INST(template <typename T> class Matrix friend T LU_det(const Matrix<T>&);)
280template <typename T>
282{
283 ALIGN2(REGISTER T tmp, MIN_ALIGN2) = lu(0, 0);
284 BCHK(lu.rows() != lu.columns(), MatErr, LU determinant only for quadratic matrices, 0, 0);
285 for (REGISTER unsigned i = 1; i < lu.rows(); ++i)
286 tmp *= lu.get(i, i);
287 return tmp;
288}
289
291INST(template <typename T> class Matrix friend T lu_det(Matrix<T>&);)
292template <typename T>
293inline T lu_det(Matrix<T>& mat)
294{
295 lu_decomp(mat);
296 return LU_det(mat);
297}
298
299
301INST(template <typename T> class Matrix friend TMatrix<T> LU_invert(const Matrix<T>&);)
302template <typename T>
304{
305 TMatrix<T> inv (T(0), mat.rows(), mat.columns());
306 BCHK (mat.rows() != mat.columns(), MatErr, LU invert non quadr. Matrix, 0, inv);
307 for (unsigned j = 0; j < mat.rows(); ++j) {
308 Vector<T> v (T(0), mat.columns());
309 v.setval(j) = T(1);
310 Vector<T> y (LU_solve(mat, v));
311 inv.set_col(y, j);
312 }
313 return inv;
314}
315
316
318INST(template <typename T> class Matrix friend TMatrix<T> lu_invert(Matrix<T>&);)
319template <typename T>
321{
322 lu_decomp(mat);
323 return LU_invert(mat);
324}
325
327#endif /* TBCI_SOLVER_LU_SOLVER_H */
const Vector< T > const Vector< T > & x
Definition LM_fit.h:97
const Vector< T > const Vector< T > const Vector< T > int T T & err
Definition LM_fit.h:102
int i
Definition LM_fit.h:71
doublereal y
Definition TOMS_707.C:27
#define BCHK(cond, exc, txt, ind, rtval)
Definition basics.h:575
#define LIKELY(expr)
branch prediction note that we sometimes on purpose mark the unlikely possibility likely and vice ver...
Definition basics.h:100
#define MIN(a, b)
Definition basics.h:655
#define MIN_ALIGN2
Definition basics.h:424
#define NAMESPACE_END
Definition basics.h:323
#define INST(x)
Definition basics.h:238
#define ALIGN2(v, x)
Definition basics.h:443
#define PREFETCH_R(addr, loc)
In case gcc does not yet support __builtin_prefetch(), we have handcoded assembly with gcc for a few ...
Definition basics.h:748
#define NAMESPACE_TBCI
Definition basics.h:317
#define UNLIKELY(expr)
Definition basics.h:101
#define REGISTER
Definition basics.h:108
#define MATH__
Definition basics.h:339
#define MAX(a, b)
Definition basics.h:656
#define T
Definition bdmatlib.cc:20
BVector< T > & copy(const BVector< T > &bv)
copy does a resize, if necessary
Definition bvector.h:484
exception class
Definition matrix.h:54
const T & getcref(const unsigned r, const unsigned c) const
Definition matrix.h:292
unsigned int rows() const
number of rows
Definition matrix.h:317
tbci_traits< T >::const_refval_type get(const unsigned r, const unsigned c) const
get, set and getcref are used internally and not for public consumption
Definition matrix.h:288
unsigned int columns() const
number of columns
Definition matrix.h:315
T & setval(const T &val, const unsigned int r, const unsigned int c)
Definition matrix.h:281
Temporary Base Class Idiom: Class TVector is used for temporary variables.
Definition vector.h:73
T & setval(const unsigned long i) const
Definition vector.h:192
double fabs(const TBCI__ cplx< T > &c)
Definition cplx.h:746
F_TMatrix< T > b
Definition f_matrix.h:736
F_TMatrix< double > inv(const F_Matrix< double > &A, int overwriteA=0)
Definition lapack.cpp:181
T lu_det(Matrix< T > &mat)
calculates determinant of a Matrix by LU decomposition
Definition lu_solver.h:293
TVector< T > LU_fwd_subst(const Matrix< T > &lu, const Vector< T > &y)
Definition lu_solver.h:183
TVector< T > lu_solve(Matrix< T > &mat, const Vector< T > &b)
Definition lu_solver.h:240
#define MLU_RES_WARN
Definition lu_solver.h:37
T LU_det(const Matrix< T > &lu)
calculates determinant of an already LU decomposed Matrix
Definition lu_solver.h:281
TMatrix< T > lu_invert(Matrix< T > &mat)
Returns the inverse of a Matrix by performing a LU decomposition.
Definition lu_solver.h:320
TVector< T > LU_solve(const Matrix< T > &lu, const Vector< T > &b)
Definition lu_solver.h:225
TMatrix< T > LU_invert(const Matrix< T > &mat)
Returns the inverse of a already LU decomposed Matrix.
Definition lu_solver.h:303
TVector< T > LU_bkw_subst(const Matrix< T > &lu, const Vector< T > &y)
Definition lu_solver.h:205
#define MLU_MINVAL
Definition lu_solver.h:32
NAMESPACE_TBCI int lu_decomp(Matrix< T > &mat)
LU decomposes the TBCI::Matrix mat.
Definition lu_solver.h:94
int lu_decomp(Matrix< T > &) HOT
LU decomposes the TBCI::Matrix mat.
Definition lu_solver.h:94
const unsigned TMatrix< T > * res
#define EL_PER_CL(T)
Definition perf_opt.h:172
#define USE_PLAIN_VEC_KERNELS
file perf_opt.h Default settings for optimum performance on different architectures and compilers.
Definition perf_opt.h:114