20#ifndef TBCI_SOLVER_BD_LU_SOLVER_H
21#define TBCI_SOLVER_BD_LU_SOLVER_H
23#include "tbci/vector.h"
24#include "tbci/band_matrix.h"
25#include "tbci/matrix.h"
28# pragma interface "bd_lu_solver.h"
33# define BDMLU_MINVAL 1e-16
38# define BDMLU_RES_WARN 1e-11
59 const unsigned matr = mat.
rows();
69 for (
unsigned j = 0; j < matr; ++j) {
70 unsigned i =
MAX((
signed)0, (
signed)j - (
signed)maxoff);
71 const unsigned imax =
MIN(j,maxoff);
73 for (;
i <= imax; ++
i) {
76 tmp -= mat.
get(
i, k) * mat.
get(k, j);
81 for (
REGISTER unsigned k = j - maxoff; k <
i; ++k)
82 tmp -= mat.
get(
i, k) * mat.
get(k, j);
86 T invdiag =
T(1.0)/tmp;
91 for (;
i < bound; ++
i) {
93 for (
REGISTER unsigned int k =
MAX((
signed)0, (
signed)j - (
signed)maxoff); k < j; ++k)
94 tmp -= mat.
get(
i, k) * mat.
get(k, j);
98 STD__ cerr <<
"\n BdMatrixWarning: BdMatrix badly conditioned, continuing ...\n" <<
STD__ flush;
100 mat.
setval(tmp * invdiag,
i, j);
111 const unsigned matr = mat.
rows();
121 for (
unsigned j = 0; j < matr; ++j) {
122 unsigned i =
MAX((
signed)0, (
signed)j - (
signed)maxoff);
125 const unsigned imax =
MIN(j,maxoff);
129 for (;
i <= imax; ++
i) {
133 for (
REGISTER unsigned k = 0; k <
i; ++k) {
134 tmp -= mat.
get(
i, k) * mat.
get(k, j);
141 for (;
i <= j; ++
i) {
146 for (k = j - maxoff; k < (signed)
i-(signed)
MAX(8,4*
EL_PER_CL(
T)); k+=4) {
147 tmp -= mat.
get(
i, k) * mat.
get(k, j);
150 tmp -= mat.
get(
i, k+1) * mat.
get(k+1, j);
153 tmp -= mat.
get(
i, k+2) * mat.
get(k+2, j);
156 tmp -= mat.
get(
i, k+3) * mat.
get(k+3, j);
161 tmp -= mat.
get(
i, k) * mat.
get(k, j);
165 T invdiag =
T(1.0)/tmp;
170 for (;
i < bound; ++
i) {
171 const unsigned kstart =
MAX((
signed)0, (
signed)j - (
signed)maxoff);
176 for (k = kstart; (signed)k < (signed)j-(signed)
MAX(8,4*
EL_PER_CL(
T)); k+=4) {
177 tmp -= mat.
get(
i, k) * mat.
get(k, j);
180 tmp -= mat.
get(
i, k+1) * mat.
get(k+1, j);
183 tmp -= mat.
get(
i, k+2) * mat.
get(k+2, j);
186 tmp -= mat.
get(
i, k+3) * mat.
get(k+3, j);
191 tmp -= mat.
get(
i, k) * mat.
get(k, j);
195 STD__ cerr <<
"\n BdMatrixWarning: BdMatrix badly conditioned, continuing ...\n" <<
STD__ flush;
197 mat.
setval(tmp * invdiag,
i, j);
209 const unsigned ysize =
y.size();
213 for (
unsigned i = 0;
i < ysize; ++
i) {
215 for (
REGISTER unsigned j =
MAX((
signed)0, (
signed)
i - (
signed)maxoff); j <
i; ++j)
216 tmp -=
x.get(j) * lu.
get(
i, j);
226 const unsigned ysize =
y.size();
230 for (
int i = ysize - 1; (signed)
i >= 0; --
i) {
232 for (
REGISTER int j =
MIN(ysize - 1,
i + maxoff); j >
i; --j)
233 tmp -=
x.get(j) * lu.
get(
i, j);
272 STD__ cerr <<
"bd_lu_solver: Inaccurate solution, rel. residuum= " <<
res <<
STD__ endl;
283 const unsigned bc =
b.columns();
284 for (
unsigned int i = 0;
i < bc; ++
i) {
320 const unsigned lr = lu.
rows();
321 for (
unsigned i = 1;
i < lr; ++
i)
341 const unsigned lr = lu.
rows();
342 for (
unsigned int j = 0; j < lr; ++j) {
const Vector< T > const Vector< T > & x
#define BCHK(cond, exc, txt, ind, rtval)
#define PREFETCH_R(addr, loc)
In case gcc does not yet support __builtin_prefetch(), we have handcoded assembly with gcc for a few ...
#define PREFETCH_W(addr, loc)
TVector< T > lu_solve(BdMatrix< T > &mat, const Vector< T > &b)
Solve the equation Ax = b where A still needs to be LU decomposed.
TVector< T > LU_bkw_subst(const BdMatrix< T > &lu, const Vector< T > &y)
T lu_det(BdMatrix< T > &mat)
calculates the determinant of a BdMatrix by doing an LU decomposition
TVector< T > LU_solve(const BdMatrix< T > &lu, const Vector< T > &b)
Solve the equation Ax = b where A IS already LU decomposed.
T LU_det(const BdMatrix< T > &lu)
calculates determinant of an already LU decomposed BdMatrix
NAMESPACE_TBCI int lu_decomp(BdMatrix< T > &mat)
LU decompose a TBCI::BdMatrix.
TMatrix< T > LU_invert(const BdMatrix< T > &lu)
return the inverse Matrix for an already LU decomposed BdMatrix
TVector< T > LU_fwd_subst(const BdMatrix< T > &lu, const Vector< T > &y)
TMatrix< T > lu_invert(BdMatrix< T > &mat)
return the inverse Matrix for a BdMatrix by doing an LU decomposition
BVector< T > & copy(const BVector< T > &bv)
copy does a resize, if necessary
The class BdMatrix is an implementation to store and do operations on sparse Matrices with a band str...
tbci_traits< T >::const_refval_type get(const unsigned int, const unsigned int) const HOT
BdMatrix< T > & expand(unsigned=0)
unsigned int getmaxoff() const
max distance from main diagonal
BdMatrix< T > & setval(const T &v, const unsigned int r, const unsigned int c)
autoinsert, sparse class compatible
unsigned int rows() const
number of rows
unsigned int columns() const
number of columns
Temporary Base Class Idiom: Class TVector is used for temporary variables.
double fabs(const TBCI__ cplx< T > &c)
F_TMatrix< double > lu_solve(const F_Matrix< double > &A, const F_Matrix< double > &B, int overwriteA=0)
F_TMatrix< double > inv(const F_Matrix< double > &A, int overwriteA=0)
int lu_decomp(Matrix< T > &) HOT
LU decomposes the TBCI::Matrix mat.
const unsigned TMatrix< T > * res
#define UNROLL_DEPTH
How many iters per loop (unrolling) Trade code bloat against speed.
#define USE_PLAIN_VEC_KERNELS
file perf_opt.h Default settings for optimum performance on different architectures and compilers.