20#ifndef TBCI_SOLVER_LU_SOLVER_H
21#define TBCI_SOLVER_LU_SOLVER_H
23#include "tbci/matrix.h"
24#include "tbci/vector.h"
27# pragma interface "lu_solver.h"
32# define MLU_MINVAL 5e-16
37# define MLU_RES_WARN 1e-11
56 const unsigned matr = mat.
rows();
57 int err = 0, col = -1;
65 for (
unsigned j = 0; j < matr; ++j) {
68 for (
i = 0;
i <= j; ++
i) {
71 tmp -= mat.
get(
i, k) * mat.
get(k, j);
78 T invdiag =
T(1.0)/tmp;
82 for (
i = j + 1;
i < matr; ++
i) {
84 for (
REGISTER unsigned k = 0; k < j; ++k)
85 tmp -= mat.
get(
i, k) * mat.
get(k, j);
86 mat.
setval(tmp * invdiag,
i, j);
96 const unsigned matr = mat.
rows();
97 int err = 0, col = -1;
105 for (
unsigned j = 0; j < matr; ++j) {
110 for (
i = 0;
i <=
MIN(j,4); ++
i) {
114 for (
REGISTER unsigned k = 0; k <
i; ++k)
115 tmp -= mat.
get(
i, k) * mat.
get(k, j);
121 for (;
i <= j; ++
i) {
126 for (k = 0; (signed)k < (signed)
i-(signed)
MAX(8,4*
EL_PER_CL(
T)); k+=4) {
127 tmp -= mat.
get(
i, k) * mat.
get(k, j);
129 tmp -= mat.
get(
i,k+1) * mat.
get(k+1,j);
131 tmp -= mat.
get(
i,k+2) * mat.
get(k+2,j);
134 tmp -= mat.
get(
i,k+3) * mat.
get(k+3,j);
139 tmp -= mat.
get(
i,k) * mat.
get(k,j);
148 T invdiag =
T(1.0)/tmp;
152 for (
i = j + 1;
i < matr; ++
i)
159 for (k = 0; (signed)k < (signed)j-(signed)
MAX(8,4*
EL_PER_CL(
T)); k+=4) {
160 tmp -= mat.
get(
i, k) * mat.
get(k, j);
162 tmp -= mat.
get(
i,k+1) * mat.
get(k+1,j);
164 tmp -= mat.
get(
i,k+2) * mat.
get(k+2,j);
167 tmp -= mat.
get(
i,k+3) * mat.
get(k+3,j);
172 tmp -= mat.
get(
i,k) * mat.
get(k,j);
173 mat.
setval (tmp * invdiag,
i, j);
185 const unsigned ysize =
y.size();
188 for (
unsigned i = 0;
i < ysize; ++
i) {
191 do_vec_mult<T> (
i, &
x.get(0), &lu(
i,0), tmp);
195 for (
REGISTER unsigned j = 0; j <
i; ++j)
196 tmp -=
x.get(j) * lu.
get(
i, j);
207 const unsigned ysize =
y.size();
210 for (
unsigned i = ysize - 1; (signed)
i >= 0; --
i) {
212 for (
REGISTER unsigned j = ysize - 1; j >
i; --j)
213 tmp -=
x.get(j) * lu.
get(
i, j);
262 for (
unsigned i = 0;
i <
b.columns(); ++
i) {
307 for (
unsigned j = 0; j < mat.
rows(); ++j) {
const Vector< T > const Vector< T > & x
const Vector< T > const Vector< T > const Vector< T > int T T & err
#define BCHK(cond, exc, txt, ind, rtval)
#define LIKELY(expr)
branch prediction note that we sometimes on purpose mark the unlikely possibility likely and vice ver...
#define PREFETCH_R(addr, loc)
In case gcc does not yet support __builtin_prefetch(), we have handcoded assembly with gcc for a few ...
BVector< T > & copy(const BVector< T > &bv)
copy does a resize, if necessary
const T & getcref(const unsigned r, const unsigned c) const
unsigned int rows() const
number of rows
tbci_traits< T >::const_refval_type get(const unsigned r, const unsigned c) const
get, set and getcref are used internally and not for public consumption
unsigned int columns() const
number of columns
T & setval(const T &val, const unsigned int r, const unsigned int c)
Temporary Base Class Idiom: Class TVector is used for temporary variables.
T & setval(const unsigned long i) const
double fabs(const TBCI__ cplx< T > &c)
F_TMatrix< double > inv(const F_Matrix< double > &A, int overwriteA=0)
T lu_det(Matrix< T > &mat)
calculates determinant of a Matrix by LU decomposition
TVector< T > LU_fwd_subst(const Matrix< T > &lu, const Vector< T > &y)
TVector< T > lu_solve(Matrix< T > &mat, const Vector< T > &b)
T LU_det(const Matrix< T > &lu)
calculates determinant of an already LU decomposed Matrix
TMatrix< T > lu_invert(Matrix< T > &mat)
Returns the inverse of a Matrix by performing a LU decomposition.
TVector< T > LU_solve(const Matrix< T > &lu, const Vector< T > &b)
TMatrix< T > LU_invert(const Matrix< T > &mat)
Returns the inverse of a already LU decomposed Matrix.
TVector< T > LU_bkw_subst(const Matrix< T > &lu, const Vector< T > &y)
NAMESPACE_TBCI int lu_decomp(Matrix< T > &mat)
LU decomposes the TBCI::Matrix mat.
int lu_decomp(Matrix< T > &) HOT
LU decomposes the TBCI::Matrix mat.
const unsigned TMatrix< T > * res
#define USE_PLAIN_VEC_KERNELS
file perf_opt.h Default settings for optimum performance on different architectures and compilers.