20 #ifndef TBCI_SOLVER_BD_LU_SOLVER_H
21 #define TBCI_SOLVER_BD_LU_SOLVER_H
23 #include "tbci/vector.h"
24 #include "tbci/band_matrix.h"
25 #include "tbci/matrix.h"
28 # pragma interface "bd_lu_solver.h"
33 # define BDMLU_MINVAL 1e-16
37 #ifndef BDMLU_RES_WARN
38 # define BDMLU_RES_WARN 1e-11
59 const unsigned matr = mat.
rows();
69 for (
unsigned j = 0; j < matr; ++j) {
70 unsigned i =
MAX((
signed)0, (
signed)j - (
signed)maxoff);
71 const unsigned imax =
MIN(j,maxoff);
73 for (; i <= imax; ++
i) {
76 tmp -= mat.
get(i, k) * mat.
get(k, j);
81 for (
REGISTER unsigned k = j - maxoff; k <
i; ++k)
82 tmp -= mat.
get(i, k) * mat.
get(k, j);
86 T invdiag =
T(1.0)/tmp;
91 for (; i < bound; ++
i) {
93 for (
REGISTER unsigned int k =
MAX((
signed)0, (
signed)j - (
signed)maxoff); k < j; ++k)
94 tmp -= mat.
get(i, k) * mat.
get(k, j);
98 STD__ cerr <<
"\n BdMatrixWarning: BdMatrix badly conditioned, continuing ...\n" <<
STD__ flush;
100 mat.
setval(tmp * invdiag, i, j);
108 template <
typename T>
111 const unsigned matr = mat.
rows();
121 for (
unsigned j = 0; j < matr; ++j) {
122 unsigned i =
MAX((
signed)0, (
signed)j - (
signed)maxoff);
125 const unsigned imax =
MIN(j,maxoff);
129 for (; i <= imax; ++
i) {
133 for (
REGISTER unsigned k = 0; k <
i; ++k) {
134 tmp -= mat.
get(i, k) * mat.
get(k, j);
141 for (; i <= j; ++
i) {
146 for (k = j - maxoff; k < (signed)i-(
signed)
MAX(8,4*
EL_PER_CL(
T)); k+=4) {
147 tmp -= mat.
get(i, k) * mat.
get(k, j);
150 tmp -= mat.
get(i, k+1) * mat.
get(k+1, j);
153 tmp -= mat.
get(i, k+2) * mat.
get(k+2, j);
156 tmp -= mat.
get(i, k+3) * mat.
get(k+3, j);
161 tmp -= mat.
get(i, k) * mat.
get(k, j);
165 T invdiag =
T(1.0)/tmp;
170 for (; i < bound; ++
i) {
171 const unsigned kstart =
MAX((
signed)0, (
signed)j - (
signed)maxoff);
176 for (k = kstart; (signed)k < (
signed)j-(signed)
MAX(8,4*
EL_PER_CL(T)); k+=4) {
177 tmp -= mat.
get(i, k) * mat.
get(k, j);
180 tmp -= mat.
get(i, k+1) * mat.
get(k+1, j);
183 tmp -= mat.
get(i, k+2) * mat.
get(k+2, j);
186 tmp -= mat.
get(i, k+3) * mat.
get(k+3, j);
191 tmp -= mat.
get(i, k) * mat.
get(k, j);
195 STD__ cerr <<
"\n BdMatrixWarning: BdMatrix badly conditioned, continuing ...\n" <<
STD__ flush;
197 mat.
setval(tmp * invdiag, i, j);
206 template <typename T>
209 const unsigned ysize = y.
size();
213 for (
unsigned i = 0; i < ysize; ++
i) {
215 for (
REGISTER unsigned j =
MAX((
signed)0, (
signed)i - (
signed)maxoff); j <
i; ++j)
216 tmp -= x.
get(j) * lu.
get(i, j);
223 template <typename T>
226 const unsigned ysize = y.
size();
230 for (
int i = ysize - 1; (signed)i >= 0; --
i) {
232 for (
REGISTER int j =
MIN(ysize - 1, i + maxoff); j >
i; --j)
233 tmp -= x.
get(j) * lu.
get(i, j);
245 template <typename T>
259 template <typename T>
272 STD__ cerr <<
"bd_lu_solver: Inaccurate solution, rel. residuum= " << res <<
STD__ endl;
278 template <typename T>
283 const unsigned bc = b.
columns();
284 for (
unsigned int i = 0; i < bc; ++
i) {
294 template <typename T>
304 template <typename T>
309 template <typename T>
316 template <typename T>
320 const unsigned lr = lu.
rows();
321 for (
unsigned i = 1; i < lr; ++
i)
328 template <typename T>
337 template <typename T>
341 const unsigned lr = lu.
rows();
342 for (
unsigned int j = 0; j < lr; ++j) {
354 template <typename T>
const Vector< T > const Vector< T > & x
BdMatrix< T > & expand(unsigned=0)
TVector< T > LU_fwd_subst(const BdMatrix< T > &lu, const Vector< T > &y)
The class BdMatrix is an implementation to store and do operations on sparse Matrices with a band str...
TVector< T > get_col(const unsigned int) const
Column vector.
F_TMatrix< double > inv(const F_Matrix< double > &A, int overwriteA=0)
F_TMatrix< double > lu_solve(const F_Matrix< double > &A, const F_Matrix< double > &B, int overwriteA=0)
tbci_traits< T >::const_refval_type get(const unsigned int, const unsigned int) const HOT
tbci_traits< T >::const_refval_type get(const unsigned long i) const
unsigned int getmaxoff() const
max distance from main diagonal
unsigned int columns() const
number of columns
#define BCHK(cond, exc, txt, ind, rtval)
TVector< T > LU_bkw_subst(const BdMatrix< T > &lu, const Vector< T > &y)
TMatrix< T > LU_invert(const BdMatrix< T > &lu)
return the inverse Matrix for an already LU decomposed BdMatrix
T & set(const T &val, const unsigned long i) const
int lu_decomp(Matrix< T > &) HOT
LU decomposes the TBCI::Matrix mat.
#define PREFETCH_R(addr, loc)
In case gcc does not yet support __builtin_prefetch(), we have handcoded assembly with gcc for a few ...
unsigned int columns() const
number of columns
#define UNROLL_DEPTH
How many iters per loop (unrolling) Trade code bloat against speed.
T LU_det(const BdMatrix< T > &lu)
calculates determinant of an already LU decomposed BdMatrix
#define USE_PLAIN_VEC_KERNELS
file perf_opt.h Default settings for optimum performance on different architectures and compilers...
unsigned int rows() const
number of rows
unsigned long size() const
#define PREFETCH_W(addr, loc)
Temporary Base Class Idiom: Class TVector is used for temporary variables.
BVector< T > & copy(const BVector< T > &bv)
copy does a resize, if necessary
TVector< T > LU_solve(const BdMatrix< T > &lu, const Vector< T > &b)
Solve the equation Ax = b where A IS already LU decomposed.
const Vector< T > Vector< T > Vector< T > Vector< T > & y
const Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > long int res
BdMatrix< T > & setval(const T &v, const unsigned int r, const unsigned int c)
autoinsert, sparse class compatible
const Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > long int int char v
< find minimun of func on grid with resolution res
TMatrix< T > lu_invert(BdMatrix< T > &mat)
return the inverse Matrix for a BdMatrix by doing an LU decomposition
unsigned int rows() const
number of rows
T lu_det(BdMatrix< T > &mat)
calculates the determinant of a BdMatrix by doing an LU decomposition