20 #ifndef TBCI_SOLVER_LU_SOLVER_H
21 #define TBCI_SOLVER_LU_SOLVER_H
23 #include "tbci/matrix.h"
24 #include "tbci/vector.h"
27 # pragma interface "lu_solver.h"
32 # define MLU_MINVAL 5e-16
37 # define MLU_RES_WARN 1e-11
56 const unsigned matr = mat.rows();
57 int err = 0, col = -1;
59 BCHK(matr != mat.columns(),
MatErr, LU only
for quadratic matrices, matr, -1);
65 for (
unsigned j = 0; j < matr; ++j) {
68 for (i = 0; i <= j; ++
i) {
71 tmp -= mat.get(i, k) * mat.get(k, j);
72 mat.setval(tmp, i, j);
78 T invdiag =
T(1.0)/tmp;
82 for (i = j + 1; i < matr; ++
i) {
84 for (
REGISTER unsigned k = 0; k < j; ++k)
85 tmp -= mat.get(i, k) * mat.get(k, j);
86 mat.setval(tmp * invdiag, i, j);
89 BCHK(err != 0,
MatErr, Diagonal zero encountered, col, err);
96 const unsigned matr = mat.
rows();
97 int err = 0, col = -1;
105 for (
unsigned j = 0; j < matr; ++j) {
110 for (i = 0; i <=
MIN(j,4); ++
i) {
114 for (
REGISTER unsigned k = 0; k <
i; ++k)
115 tmp -= mat.
get(i, k) * mat.
get(k, j);
121 for (; i <= j; ++
i) {
126 for (k = 0; (signed)k < (
signed)i-(signed)
MAX(8,4*
EL_PER_CL(
T)); k+=4) {
127 tmp -= mat.
get(i, k) * mat.
get(k, j);
129 tmp -= mat.
get(i,k+1) * mat.
get(k+1,j);
131 tmp -= mat.
get(i,k+2) * mat.
get(k+2,j);
134 tmp -= mat.
get(i,k+3) * mat.
get(k+3,j);
139 tmp -= mat.
get(i,k) * mat.
get(k,j);
148 T invdiag =
T(1.0)/tmp;
152 for (i = j + 1; i < matr; ++
i)
159 for (k = 0; (signed)k < (
signed)j-(signed)
MAX(8,4*
EL_PER_CL(
T)); k+=4) {
160 tmp -= mat.
get(i, k) * mat.
get(k, j);
162 tmp -= mat.
get(i,k+1) * mat.
get(k+1,j);
164 tmp -= mat.
get(i,k+2) * mat.
get(k+2,j);
167 tmp -= mat.
get(i,k+3) * mat.
get(k+3,j);
172 tmp -= mat.
get(i,k) * mat.
get(k,j);
173 mat.
setval (tmp * invdiag, i, j);
176 BCHK(err != 0,
MatErr, Diagonal zero encountered, col, err);
182 template <typename T>
185 const unsigned ysize = y.
size();
188 for (
unsigned i = 0;
i < ysize; ++
i) {
191 do_vec_mult<T> (
i, &x.
get(0), &lu(i,0), tmp);
195 for (
REGISTER unsigned j = 0; j <
i; ++j)
196 tmp -= x.
get(j) * lu.
get(i, j);
204 template <typename T>
207 const unsigned ysize = y.
size();
210 for (
unsigned i = ysize - 1; (signed)
i >= 0; --
i) {
212 for (
REGISTER unsigned j = ysize - 1; j >
i; --j)
213 tmp -= x.
get(j) * lu.
get(i, j);
224 template <typename T>
239 template <typename T>
251 LU solver: Inaccurate solution, (
int)(res /
MLU_RES_WARN), x);
257 template <typename T>
271 template <typename T>
280 template <typename T>
292 template <typename T>
302 template <typename T>
307 for (
unsigned j = 0; j < mat.
rows(); ++j) {
319 template <typename T>
const Vector< T > const Vector< T > & x
TVector< T > LU_fwd_subst(const BdMatrix< T > &lu, const Vector< T > &y)
TVector< T > get_col(const unsigned int) const
Column vector.
F_TMatrix< double > inv(const F_Matrix< double > &A, int overwriteA=0)
const T & getcref(const unsigned r, const unsigned c) const
F_TMatrix< double > lu_solve(const F_Matrix< double > &A, const F_Matrix< double > &B, int overwriteA=0)
tbci_traits< T >::const_refval_type get(const unsigned long i) const
#define BCHK(cond, exc, txt, ind, rtval)
TVector< T > LU_bkw_subst(const BdMatrix< T > &lu, const Vector< T > &y)
TMatrix< T > LU_invert(const BdMatrix< T > &lu)
return the inverse Matrix for an already LU decomposed BdMatrix
T & set(const T &val, const unsigned long i) const
T & setval(const T &val, const unsigned int r, const unsigned int c)
int lu_decomp(Matrix< T > &) HOT
LU decomposes the TBCI::Matrix mat.
#define PREFETCH_R(addr, loc)
In case gcc does not yet support __builtin_prefetch(), we have handcoded assembly with gcc for a few ...
unsigned int columns() const
number of columns
T LU_det(const BdMatrix< T > &lu)
calculates determinant of an already LU decomposed BdMatrix
#define USE_PLAIN_VEC_KERNELS
file perf_opt.h Default settings for optimum performance on different architectures and compilers...
unsigned long size() const
const Vector< T > const Vector< T > const Vector< T > int T T & err
Temporary Base Class Idiom: Class TVector is used for temporary variables.
BVector< T > & copy(const BVector< T > &bv)
copy does a resize, if necessary
TVector< T > LU_solve(const BdMatrix< T > &lu, const Vector< T > &b)
Solve the equation Ax = b where A IS already LU decomposed.
const Vector< T > Vector< T > Vector< T > Vector< T > & y
const Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > long int res
const Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > long int int char v
< find minimun of func on grid with resolution res
TMatrix< T > lu_invert(BdMatrix< T > &mat)
return the inverse Matrix for a BdMatrix by doing an LU decomposition
#define LIKELY(expr)
branch prediction note that we sometimes on purpose mark the unlikely possibility likely and vice ver...
tbci_traits< T >::const_refval_type get(const unsigned r, const unsigned c) const
get, set and getcref are used internally and not for public consumption
unsigned int rows() const
number of rows
T lu_det(BdMatrix< T > &mat)
calculates the determinant of a BdMatrix by doing an LU decomposition