TBCI Numerical high perf. C++ Library  2.8.0
lu_solver.h
Go to the documentation of this file.
1 
20 #ifndef TBCI_SOLVER_LU_SOLVER_H
21 #define TBCI_SOLVER_LU_SOLVER_H
22 
23 #include "tbci/matrix.h"
24 #include "tbci/vector.h"
25 
26 #ifdef PRAGMA_I
27 # pragma interface "lu_solver.h"
28 #endif
29 
30 /* When to warn because of diagonal zeroes */
31 #ifndef MLU_MINVAL
32 # define MLU_MINVAL 5e-16
33 #endif
34 
35 /* When to warn because of not converged solution */
36 #ifndef MLU_RES_WARN
37 # define MLU_RES_WARN 1e-11
38 #endif
39 
41 
51 INST(template <typename T> class Matrix friend int lu_decomp (Matrix<T>&);)
52 #if defined(USE_PLAIN_VEC_KERNELS) //|| (defined(UNROLL_DEPTH) && UNROLL_DEPTH == 1)
53 template <typename T>
54 int lu_decomp (Matrix<T>& mat)
55 {
56  const unsigned matr = mat.rows();
57  int err = 0, col = -1;
58  //Check if Matrix is quadratic in shape
59  BCHK(matr != mat.columns(), MatErr, LU only for quadratic matrices, matr, -1);
60  // Decomposition is compactly stored in one Matrix
61  // The diagonal elements of the lower Matrix are all 1
62  // (this is a feature of the algorithm)
63 
64  // TODO: Maybe the mem access pattern could be optimized ...
65  for (unsigned j = 0; j < matr; ++j) {
66  unsigned i; // MSVC++ annoyance
68  for (i = 0; i <= j; ++i) {
69  tmp = mat.get(i, j);
70  for (REGISTER unsigned k = 0; k < i; ++k)
71  tmp -= mat.get(i, k) * mat.get(k, j);
72  mat.setval(tmp, i, j);
73  }
74  if (UNLIKELY(MATH__ fabs(tmp) < MLU_MINVAL)) {
75  ++err;
76  col = j;
77  }
78  T invdiag = T(1.0)/tmp;
79  //No pivoting for the first shot
80  // Diagonal zeros are lethal
81  //#warning "We don't do pivoting, so don't supply non diagonal-dominant matrices!"
82  for (i = j + 1; i < matr; ++i) {
83  tmp = mat.get(i, j);
84  for (REGISTER unsigned k = 0; k < j; ++k)
85  tmp -= mat.get(i, k) * mat.get(k, j);
86  mat.setval(tmp * invdiag, i, j);
87  }
88  }
89  BCHK(err != 0, MatErr, Diagonal zero encountered, col, err);
90  return err;
91 }
92 #else
93 template <typename T>
94 int lu_decomp (Matrix<T>& mat)
95 {
96  const unsigned matr = mat.rows();
97  int err = 0, col = -1;
98  //Check if Matrix is quadratic in shape
99  BCHK(matr != mat.columns(), MatErr, LU only for quadratic matrices, matr, -1);
100  // Decomposition is compactly stored in one Matrix
101  // The diagonal elements of the lower Matrix are all 1
102  // (this is a feature of the algorithm)
103  //
104  // TODO: Maybe the mem access pattern could be optimized ...
105  for (unsigned j = 0; j < matr; ++j) {
106  PREFETCH_R(&mat.getcref(0,0),3);
107  PREFETCH_R(&mat.getcref(0,j),3);
108  unsigned i; // MSVC++ annoyance
109  ALIGN2(REGISTER T tmp, MIN_ALIGN2);
110  for (i = 0; i <= MIN(j,4); ++i) {
111  PREFETCH_R(&mat.getcref(i,j), 3);
112  PREFETCH_R(&mat.getcref(i+1,0), 3);
113  tmp = mat.get(i, j);
114  for (REGISTER unsigned k = 0; k < i; ++k)
115  tmp -= mat.get(i, k) * mat.get(k, j);
116  mat.setval (tmp, i, j);
117  }
118  PREFETCH_R(&mat.getcref(1,j),2);
119  PREFETCH_R(&mat.getcref(2,j),2);
120  PREFETCH_R(&mat.getcref(3,j),2);
121  for (; i <= j; ++i) {
122  PREFETCH_R(&mat.getcref(i,0), 3);
123  PREFETCH_R(&mat.getcref(i,EL_PER_CL(T)), 3);
124  tmp = mat.get(i, j);
125  REGISTER unsigned k;
126  for (k = 0; (signed)k < (signed)i-(signed)MAX(8,4*EL_PER_CL(T)); k+=4) {
127  tmp -= mat.get(i, k) * mat.get(k, j);
128  PREFETCH_R(&mat.getcref(i,k+2*EL_PER_CL(T)),3);
129  tmp -= mat.get(i,k+1) * mat.get(k+1,j);
130  PREFETCH_R(&mat.getcref(i,k+3*EL_PER_CL(T)),3);
131  tmp -= mat.get(i,k+2) * mat.get(k+2,j);
132  PREFETCH_R(&mat.getcref(k+4,j), 2);
133  PREFETCH_R(&mat.getcref(k+5,j), 2);
134  tmp -= mat.get(i,k+3) * mat.get(k+3,j);
135  PREFETCH_R(&mat.getcref(k+6,j), 2);
136  PREFETCH_R(&mat.getcref(k+7,j), 2);
137  }
138  for (; k < i; ++k)
139  tmp -= mat.get(i,k) * mat.get(k,j);
140  mat.setval (tmp, i, j);
141  if (LIKELY(i<matr-1))
142  PREFETCH_R(&mat.getcref(i+1,j),3);
143  }
144  if (UNLIKELY(MATH__ fabs(tmp) < MLU_MINVAL)) {
145  ++err;
146  col = j;
147  }
148  T invdiag = T(1.0)/tmp;
149  //No pivoting for the first shot
150  // Diagonal zeros are lethal
151  //#warning "We don't do pivoting, so don't supply non diagonal-dominant matrices!"
152  for (i = j + 1; i < matr; ++i)
153  {
154  PREFETCH_R(&mat.getcref(i,j), 3);
155  PREFETCH_R(&mat.getcref(i,0), 3);
156  PREFETCH_R(&mat.getcref(i,EL_PER_CL(T)), 3);
157  tmp = mat.get(i, j);
158  REGISTER unsigned k;
159  for (k = 0; (signed)k < (signed)j-(signed)MAX(8,4*EL_PER_CL(T)); k+=4) {
160  tmp -= mat.get(i, k) * mat.get(k, j);
161  PREFETCH_R(&mat.getcref(i,k+2*EL_PER_CL(T)),3);
162  tmp -= mat.get(i,k+1) * mat.get(k+1,j);
163  PREFETCH_R(&mat.getcref(i,k+3*EL_PER_CL(T)),3);
164  tmp -= mat.get(i,k+2) * mat.get(k+2,j);
165  PREFETCH_R(&mat.getcref(k+4,j), 2);
166  PREFETCH_R(&mat.getcref(k+5,j), 2);
167  tmp -= mat.get(i,k+3) * mat.get(k+3,j);
168  PREFETCH_R(&mat.getcref(k+6,j), 2);
169  PREFETCH_R(&mat.getcref(k+7,j), 2);
170  }
171  for (; k < j; ++k)
172  tmp -= mat.get(i,k) * mat.get(k,j);
173  mat.setval (tmp * invdiag, i, j);
174  }
175  }
176  BCHK(err != 0, MatErr, Diagonal zero encountered, col, err);
177  return err;
178 }
179 #endif
180 
181 INST(template <typename T> class Matrix friend TVector<T> LU_fwd_subst(const Matrix<T>&, const Vector<T>&);)
182 template <typename T>
184 {
185  const unsigned ysize = y.size();
186  TVector<T> x(ysize);
187  BCHK(ysize != lu.rows(), MatErr, LU fwd subst size error, ysize, x);
188  for (unsigned i = 0; i < ysize; ++i) {
189 #if 0
190  ALIGN2(REGISTER T tmp, MIN_ALIGN2) = -y(i);
191  do_vec_mult<T> (i, &x.get(0), &lu(i,0), tmp);
192  x.set (-tmp, i);
193 #else
194  ALIGN2(REGISTER T tmp, MIN_ALIGN2) = y(i);
195  for (REGISTER unsigned j = 0; j < i; ++j)
196  tmp -= x.get(j) * lu.get(i, j);
197  x.set (tmp, i);
198 #endif
199  }
200  return x;
201 }
202 
203 INST(template <typename T> class Matrix friend TVector<T> LU_bkw_subst(const Matrix<T>&, const Vector<T>&);)
204 template <typename T>
206 {
207  const unsigned ysize = y.size();
208  TVector<T> x(ysize);
209  BCHK(ysize != lu.rows(), MatErr, LU bkw subst size error, ysize, x);
210  for (unsigned i = ysize - 1; (signed)i >= 0; --i) {
211  ALIGN2(REGISTER T tmp, MIN_ALIGN2) = y(i);
212  for (REGISTER unsigned j = ysize - 1; j > i; --j)
213  tmp -= x.get(j) * lu.get(i, j);
214  x.set(tmp / lu.get(i, i), i);
215  }
216  return x;
217 }
218 
223 INST(template <typename T> class Matrix friend TVector<T> LU_solve(const Matrix<T>&, const Vector<T>&);)
224 template <typename T>
225 inline TVector<T> LU_solve(const Matrix<T>& lu, const Vector<T>& b)
226 {
227  Vector<T> y(LU_fwd_subst(lu, b));
228  //STD__ cout << y << STD__ endl;
229  return LU_bkw_subst(lu, y);
230 }
231 
238 INST(template <typename T> class Matrix friend TVector<T> lu_solve(Matrix<T>&, const Vector<T>&);)
239 template <typename T>
241 {
242 #ifdef ERRCHECK
243  Matrix<T> m_backup (mat);
244  double res;
245 #endif
246  lu_decomp(mat);
247  TVector<T> x (LU_solve(mat, b));
248 #ifdef ERRCHECK
249  Vector<T> xx(b.size()); xx.copy (x);
250  BCHK ((res = MATH__ fabs(m_backup * xx - b)/MATH__ fabs(b)) > MLU_RES_WARN, MatErr,
251  LU solver: Inaccurate solution, (int)(res / MLU_RES_WARN), x);
252 #endif
253  return x;
254 }
255 
256 INST(template <typename T> class Matrix friend TMatrix<T> LU_solve(const Matrix<T>&, const Matrix<T>&);)
257 template <typename T>
259 {
260  TMatrix<T> x(b.rows(), b.columns());
261  Vector<T> y(b.rows()), w(b.rows());
262  for (unsigned i = 0; i < b.columns(); ++i) {
263  w = b.get_col(i);
264  y = LU_fwd_subst(lu, w);
265  x.set_col(LU_bkw_subst(lu, y), i);
266  }
267  return x;
268 }
269 
270 INST(template <typename T> class Matrix friend TMatrix<T> lu_solve(Matrix<T>&, const Matrix<T>&);)
271 template <typename T>
272 inline TMatrix<T> lu_solve(Matrix<T>& mat, const Matrix<T>& b)
273 {
274  lu_decomp(mat);
275  return LU_solve(mat, b);
276 }
277 
279 INST(template <typename T> class Matrix friend T LU_det(const Matrix<T>&);)
280 template <typename T>
281 T LU_det(const Matrix<T>& lu)
282 {
283  ALIGN2(REGISTER T tmp, MIN_ALIGN2) = lu(0, 0);
284  BCHK(lu.rows() != lu.columns(), MatErr, LU determinant only for quadratic matrices, 0, 0);
285  for (REGISTER unsigned i = 1; i < lu.rows(); ++i)
286  tmp *= lu.get(i, i);
287  return tmp;
288 }
289 
291 INST(template <typename T> class Matrix friend T lu_det(Matrix<T>&);)
292 template <typename T>
293 inline T lu_det(Matrix<T>& mat)
294 {
295  lu_decomp(mat);
296  return LU_det(mat);
297 }
298 
299 
301 INST(template <typename T> class Matrix friend TMatrix<T> LU_invert(const Matrix<T>&);)
302 template <typename T>
304 {
305  TMatrix<T> inv (T(0), mat.rows(), mat.columns());
306  BCHK (mat.rows() != mat.columns(), MatErr, LU invert non quadr. Matrix, 0, inv);
307  for (unsigned j = 0; j < mat.rows(); ++j) {
308  Vector<T> v (T(0), mat.columns());
309  v.setval(j) = T(1);
310  Vector<T> y (LU_solve(mat, v));
311  inv.set_col(y, j);
312  }
313  return inv;
314 }
315 
316 
318 INST(template <typename T> class Matrix friend TMatrix<T> lu_invert(Matrix<T>&);)
319 template <typename T>
321 {
322  lu_decomp(mat);
323  return LU_invert(mat);
324 }
325 
327 #endif /* TBCI_SOLVER_LU_SOLVER_H */
const Vector< T > const Vector< T > & x
Definition: LM_fit.h:97
TVector< T > LU_fwd_subst(const BdMatrix< T > &lu, const Vector< T > &y)
Definition: bd_lu_solver.h:207
#define REGISTER
Definition: basics.h:108
double fabs(const int a)
Definition: basics.h:1215
TVector< T > get_col(const unsigned int) const
Column vector.
Definition: matrix.h:627
#define MIN(a, b)
Definition: basics.h:655
F_TMatrix< double > inv(const F_Matrix< double > &A, int overwriteA=0)
Definition: lapack.cpp:181
#define NAMESPACE_TBCI
Definition: basics.h:317
const T & getcref(const unsigned r, const unsigned c) const
Definition: matrix.h:292
F_TMatrix< double > lu_solve(const F_Matrix< double > &A, const F_Matrix< double > &B, int overwriteA=0)
Definition: lapack.cpp:156
tbci_traits< T >::const_refval_type get(const unsigned long i) const
Definition: vector.h:190
#define MIN_ALIGN2
Definition: basics.h:424
#define EL_PER_CL(T)
Definition: perf_opt.h:172
#define BCHK(cond, exc, txt, ind, rtval)
Definition: basics.h:575
#define UNLIKELY(expr)
Definition: basics.h:101
TVector< T > LU_bkw_subst(const BdMatrix< T > &lu, const Vector< T > &y)
Definition: bd_lu_solver.h:224
TMatrix< T > LU_invert(const BdMatrix< T > &lu)
return the inverse Matrix for an already LU decomposed BdMatrix
Definition: bd_lu_solver.h:338
T & set(const T &val, const unsigned long i) const
Definition: vector.h:194
T & setval(const T &val, const unsigned int r, const unsigned int c)
Definition: matrix.h:281
int lu_decomp(Matrix< T > &) HOT
LU decomposes the TBCI::Matrix mat.
Definition: lu_solver.h:94
#define PREFETCH_R(addr, loc)
In case gcc does not yet support __builtin_prefetch(), we have handcoded assembly with gcc for a few ...
Definition: basics.h:748
unsigned int columns() const
number of columns
Definition: matrix.h:315
#define MLU_RES_WARN
Definition: lu_solver.h:37
T LU_det(const BdMatrix< T > &lu)
calculates determinant of an already LU decomposed BdMatrix
Definition: bd_lu_solver.h:317
F_TMatrix< T > b
Definition: f_matrix.h:736
exception class
Definition: matrix.h:53
#define USE_PLAIN_VEC_KERNELS
file perf_opt.h Default settings for optimum performance on different architectures and compilers...
Definition: perf_opt.h:114
unsigned long size() const
Definition: vector.h:104
Definition: bvector.h:49
#define MLU_MINVAL
Definition: lu_solver.h:32
const Vector< T > const Vector< T > const Vector< T > int T T & err
Definition: LM_fit.h:102
#define ALIGN2(v, x)
Definition: basics.h:443
#define INST(x)
Definition: basics.h:238
int i
Definition: LM_fit.h:71
Temporary Base Class Idiom: Class TVector is used for temporary variables.
Definition: bvector.h:52
#define MAX(a, b)
Definition: basics.h:656
#define NAMESPACE_END
Definition: basics.h:323
BVector< T > & copy(const BVector< T > &bv)
copy does a resize, if necessary
Definition: bvector.h:484
Definition: bvector.h:54
TVector< T > LU_solve(const BdMatrix< T > &lu, const Vector< T > &b)
Solve the equation Ax = b where A IS already LU decomposed.
Definition: bd_lu_solver.h:246
const Vector< T > Vector< T > Vector< T > Vector< T > & y
Definition: LM_fit.h:172
const Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > long int res
Definition: LM_fit.h:199
#define T
Definition: bdmatlib.cc:20
const Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > long int int char v
&lt; find minimun of func on grid with resolution res
Definition: LM_fit.h:205
TMatrix< T > lu_invert(BdMatrix< T > &mat)
return the inverse Matrix for a BdMatrix by doing an LU decomposition
Definition: bd_lu_solver.h:355
#define MATH__
Definition: basics.h:339
#define LIKELY(expr)
branch prediction note that we sometimes on purpose mark the unlikely possibility likely and vice ver...
Definition: basics.h:100
tbci_traits< T >::const_refval_type get(const unsigned r, const unsigned c) const
get, set and getcref are used internally and not for public consumption
Definition: matrix.h:288
unsigned int rows() const
number of rows
Definition: matrix.h:317
T lu_det(BdMatrix< T > &mat)
calculates the determinant of a BdMatrix by doing an LU decomposition
Definition: bd_lu_solver.h:329