9 #ifndef TBCI_BAND_MATRIX_H 10 #define TBCI_BAND_MATRIX_H 14 #include "tbci/basics.h" 15 #include "tbci/vector.h" 16 #include "tbci/matrix.h" 21 # define BD_MINVAL 1e-16 25 #ifndef BDMAT_NEW_ALLOC 26 # define BDMAT_ONE_BLOCK 29 #ifdef BDMAT_MEM_TRACE 30 # define BDMATDBG(x) x 36 #if !defined(NO_GD) && !defined(AUTO_DECL) 37 # include "tbci/band_matrix_gd.h" 42 #ifndef TBCI_DISABLE_EXCEPT 50 :
NumErr(
"Error in var conf Band_Matrix class") {}
60 # pragma interface "band_matrix.h" 102 template <
typename T>
117 void constructor(
const unsigned int,
const bool =
true);
118 T*
check(
const unsigned r,
const unsigned c)
const HOT;
125 mutable int outw, outp, outf;
128 inline bool search2(
unsigned int&,
const unsigned int,
const unsigned int)
HOT;
138 explicit BdMatrix(
const unsigned int = 0);
140 explicit BdMatrix(
const unsigned int,
const unsigned int);
149 BdMatrix(
const T&,
const unsigned int,
unsigned int);
165 {
return "BdMatrix"; }
167 #ifndef HAVE_PROMOTION_BUG 169 # ifndef HAVE_GCC295_TMPLFRNDCLS_BUG 175 for (
unsigned k=0; k<
dim; ++k)
179 for (
unsigned j=0; j<
dim-di; ++j) {
185 outw = bm.outw; outp = bm.outp;
186 outf = bm.outf; outset =
true;
199 typename tbci_traits<T>::const_refval_type
200 operator() (
const unsigned int,
const unsigned int)
const HOT;
201 typename tbci_traits<T>::const_refval_type
202 get(
const unsigned int,
const unsigned int)
const HOT;
206 const unsigned int c)
208 T&
setval(
const unsigned int r,
const unsigned int c);
243 inline unsigned int size()
const {
return dim; }
247 inline unsigned int rows()
const {
return dim; }
277 #ifdef BDMAT_TRANS_FRIENDS 289 #ifdef BDMAT_OP_FRIENDS 320 friend STD__ ostream& operator << FGD (STD__ ostream&, const BdMatrix<T>&);
322 { outw = w; outp =
p; outf = f; outset =
true;
return *
this; }
355 #ifdef BDMAT_ONE_BLOCK 357 template <
typename T>
365 template <
typename T>
377 template <
typename T>
384 unsigned long sz =
dim;
int t;
386 for (
REGISTER unsigned j = 0; j < dcs; ++j)
393 for (
unsigned i = 0;
i < dcs; ++
i) {
395 int s = (signed)
dim - (
signed)di;
397 STD__ cerr <<
"Wrong config. Vector supplied!" <<
STD__ endl;
408 STD__ cerr <<
"BdMat ctor: malloc failed to get " << sz <<
" bytes!" <<
STD__ endl;
416 arsz = sz; dummy = (
T)0;
425 template <
typename T>
428 for (
int i = u-1;
i >= 0; --
i) {
440 template <
typename T>
451 template <
typename T>
454 unsigned long sz = 0;
unsigned i = 0;
474 int s = (signed)
dim - (
signed)di;
480 T* ptr =
NEW(
T, 2*s);
490 arsz = sz; dummy = (
T)0;
498 STD__ cerr <<
"BdMat ctor: mem alloc (" << sz <<
") failed!" <<
STD__ endl;
505 template <
typename T>
514 template <
typename T>
524 template <
typename T>
532 template <
typename T>
539 outw = mat.outw; outp = mat.outp;
540 outf = mat.outf; outset =
true;
544 template <
typename T>
554 template <
typename T>
561 for (
REGISTER unsigned int t = 0; t < mo; ++t)
569 template <
typename T>
579 template <
typename T>
587 for (
unsigned int l = 1; l < mat.
columns(); ++l) {
589 (signed)j < ((
signed)(mat.
rows() - l)); ++j) {
590 if (mat(j, j+l) !=
T(0) || mat(j+l, j) !=
T(0)) {
607 for (
REGISTER unsigned int j = 0; j <
dim - dci; ++j) {
608 adiag.
get(dci)[j] = mat(j, j + dci);
609 bdiag.
get(dci)[j] = mat(dci + j, j);
614 template <
typename T>
622 template <
typename T>
624 const unsigned int j)
635 template <
typename T>
658 template <
typename T>
667 template <
typename T>
675 return (dummy = (
T)0);
679 template <
typename T>
680 inline typename tbci_traits<T>::const_refval_type
688 return (dummy = (
T)0);
692 template <
typename T>
693 inline typename tbci_traits<T>::const_refval_type
701 return (dummy = (
T)0);
706 template <
typename T>
723 #ifdef BDMAT_ONE_BLOCK 724 template <
typename T>
734 template <
typename T>
748 template <
typename T>
754 T* oldd(
diag);
unsigned int od =
dim;
755 #ifdef BDMAT_ONE_BLOCK 756 unsigned int oldar =
arsz;
762 if (od <= 1 && nd > 1)
786 #ifndef BDMAT_ONE_BLOCK 787 for (
unsigned i = 0;
i < oldconf.
size(); ++
i)
789 2*(od-oldconf.
get(
i)));
800 template <
typename T>
807 if (dim <= 1 && nd > 1)
817 template <
typename T>
831 template <
typename T>
838 for (
unsigned i = 0;
i <
dim; ++
i)
845 template <
typename T>
851 #ifdef BDMAT_ONE_BLOCK 852 unsigned int oldar =
arsz;
854 unsigned int olddim =
dim;
873 #ifndef BDMAT_ONE_BLOCK 874 for (
unsigned i = 0;
i < oldconf.
size(); ++
i)
876 2*(olddim-oldconf.
get(
i)));
886 #ifdef BDMAT_ONE_BLOCK 887 template <
typename T>
890 const int newsz =
dim - ndg;
900 STD__ cerr <<
"Allocation error" <<
STD__ endl;
913 long diff = (long)
diag - (
long)olddiag;
921 for (
unsigned i = 0;
i < dcs; ++
i) {
944 template <
typename T>
947 const int newsz =
dim - ndg;
952 T* ptr =
NEW(
T, 2*newsz);
955 STD__ cerr <<
"BdMat adddiag allocation failed!" <<
STD__ endl;
972 template <
typename T>
977 #ifdef BDMAT_ONE_BLOCK 995 template <
typename T>
1013 template <
typename T>
1016 for (
unsigned i=0;
i<val.
size(); ++
i)
1021 template <
typename T>
1037 template <
typename T>
1053 #ifdef BDMAT_ONE_BLOCK 1054 template <
typename T>
1061 template <
typename T>
1070 template <
typename T>
1082 template <
typename T>
1096 template <
typename T>
1104 #ifdef BDMAT_ONE_BLOCK 1106 for (
unsigned i = 0;
i < m; ++
i)
1107 newconf.
set(
i) =
i+1;
1110 for (
unsigned i=1;
i<=m; ++
i)
1119 template <
typename T>
1124 int sz = arow.
size();
1139 int r = bestrow(level++);
1140 int rw = 0;
int mul;
1144 mul = (
i%2? -1: 1); rw--;
1149 for (
REGISTER int j = 0; j < sz; j++) {
1156 for (
REGISTER int t = 0; t < sz; t++) {
1165 TBCICOPY(&(
ac(j)), &(acol(j+1)),
int, (sz-j-1));
1168 tmp += (j%2? -1: 1) * sdet(level, ar,
ac) * el;
1173 template <
typename T>
1177 #ifndef TBCI_DISABLE_EXCEPT 1178 throw (
BdMatrixErr(
"Unequal # of lines and rows to ignore!"));
1180 STD__ cerr <<
"Unequal # of lines and rows to ignore!" <<
STD__ endl;
1188 bestrow.resize(
dim);
1190 unsigned int rw = 0;
unsigned int cl = 0;
1206 matmem<T>* el = offdiag.setfirst();
int elemi = (el? el->row: -1);
1208 for (
int i = 0;
i <
dim;
i++) {
1209 if (!(actrow.contains(
i))) {
1210 rowfill(
i) =
dim + 1;
continue;
1214 if (actcol.contains(
i))
1216 if (actcol.contains(
i+1) &&
i+1 <
dim)
1218 if (actcol.contains(
i-1) &&
i-1 >= 0)
1228 while (el && elemi ==
i) {
1229 if (actcol.contains(el->col))
1231 el = offdiag.setnext(); elemi = (el? el->row: -1);
1238 for (
int k = 0; k <
dim; k++) {
1239 int min =
dim + 2;
int minr = -1;
1240 for (
int s = 0; s <
dim; s++)
1241 if (rowfill(s) <
min) {
1242 min = rowfill(s); minr = s;
1246 rowfill(minr) =
dim + 1;
1248 return sdet(0, actrow, actcol);
1253 template <
typename T>
1260 STD__ cout <<
"Warning: Automatic reconfiguring BdMatrix in assignment!" <<
STD__ endl;
1275 template <
typename T>
1283 #define SFORALL_S(op) \ 1284 template <typename T> \ 1285 BdMatrix<T>& BdMatrix<T>::operator op (const BdMatrix<T>& mat) \ 1287 BCHK(mat.dim != dim, BdMatrixErr, Adding wrong size BdMatrix, mat.dim, *this); \ 1289 for (i = 0; i < dim; ++i) \ 1290 diag[i] op mat.diag[i]; \ 1291 for (j = 0; j < diagconf.size(); ++j) { \ 1292 const REGISTER unsigned di = diagconf.get(j); \ 1293 if (LIKELY(mat.adiag.get(di))) \ 1294 for (i = 0; i < (dim-di); ++i) { \ 1295 adiag.set(di)[i] op mat.adiag.get(di)[i]; \ 1296 bdiag.set(di)[i] op mat.bdiag.get(di)[i]; \ 1299 if (diagconf == mat.diagconf) \ 1301 for (j = 0; j < mat.diagconf.size(); ++j) { \ 1302 const REGISTER unsigned di = mat.diagconf.get(j); \ 1303 if (!diagconf.contains(di)) { \ 1305 for (i = 0; i < (dim-di); ++i) { \ 1306 adiag.set(di)[i] op mat.adiag.get(di)[i]; \ 1307 bdiag.set(di)[i] op mat.bdiag.get(di)[i]; \ 1318 #define SFORALL_T(op) \ 1319 template <typename T> \ 1320 BdMatrix<T>& BdMatrix<T>::operator op (const T& mult) \ 1322 for (REGISTER unsigned int i = 0; i < dim; ++i) \ 1324 for (unsigned j = 0; j < diagconf.size(); ++j) { \ 1325 const REGISTER unsigned di = diagconf.get(j); \ 1326 for (unsigned k = 0; k < (dim-di); ++k) { \ 1327 adiag.set(di)[k] op mult; \ 1328 bdiag.set(di)[k] op mult; \ 1339 template <typename
T>
1348 for (
unsigned k = 0; k < (
dim-di); ++k) {
1356 #define INTDIVEQ(T) \ 1358 inline BdMatrix < T >& BdMatrix< T >::operator /= (const T & div) \ 1360 BCHK((div==( T )0), BdMatrixErr, "Divide BdMatrix by zero", 0, *this); \ 1361 for (REGISTER unsigned i = 0; i < dim; ++i) \ 1363 for (unsigned j = 0; j < diagconf.size(); ++j) { \ 1364 const REGISTER unsigned di = diagconf.get(j); \ 1365 for (unsigned k = 0; k < (dim-di); ++k) { \ 1366 adiag.set(di)[k] /= div; \ 1367 bdiag.set(di)[k] /= div; \ 1376 template <typename
T>
1379 #if 0 //def BDMAT_ONE_BLOCK 1392 template <typename T>
1398 #define STDDEF_ST(op) \ 1399 template <typename T> \ 1400 inline BdMatrix<T> BdMatrix<T>::operator op (const T& val) const \ 1402 return (BdMatrix<T> (*this)) op##= val; \ 1410 #define STDDEF_SS(op) \ 1411 template <typename T> \ 1412 BdMatrix<T> BdMatrix<T>::operator op (const BdMatrix<T>& m2) const \ 1414 return (BdMatrix<T> (*this)) op##= m2; \ 1421 template <typename
T>
1424 const unsigned int newoff =
maxoff + mat2.maxoff;
1429 for (
int i = 0;
i < (signed)
dim; ++
i) {
1431 const int jend =
MIN(
dim,
i+newoff+1);
1435 for (
int j =
MAX(0,
i-(
signed)newoff); j < jend; ++j) {
1437 for (
REGISTER int k = kstart; k < kend; ++k)
1438 el += this->
get(i, k) * mat2.get(k, j);
1448 template <typename T>
1454 template <
typename T>
1459 #ifndef TBCI_DISABLE_EXCEPT 1460 throw (
BdMatrixErr(
"Multiplying Matrix with Matrix: wrong sizes!"));
1462 STD__ cerr <<
"Multiplying Matrix with Matrix: wrong sizes!" <<
STD__ endl;
1468 for (
int c = 0;
c < tm.columns();
c++)
1469 for (
int i = 0;
i < mat.
rows();
i++) {
1472 val += this->
operator()(
i, j) * mat(j,
c);
1480 # define MAY_PREFETCH_R(adr,loc) PREFETCH_R(adr,loc) 1481 # define MAY_PREFETCH_W(adr,loc) PREFETCH_W(adr,loc) 1483 # define MAY_PREFETCH_R(adr,loc) do {} while (0) 1484 # define MAY_PREFETCH_W(adr,loc) do {} while (0) 1488 template <
typename T>
1507 #if !defined(BDMATVEC_LNWISE) && !defined(BDMATVEC_LNWISE_OPT) && !defined(BDMATVEC_DIAGWISE) 1508 # define BDMATVEC_LNWISE_OPT 1512 #define COST_BDMATVEC_LN(d,o,m) (d*(COST_MULT+2*COST_UNIT_LOAD+COST_UNIT_STORE+COST_LOOP\ 1513 +o*(COST_LOOP+2*COST_BRANCH+COST_UNIT_LOAD))+(2*d-m)*o*(3*COST_NU_LOAD+COST_ADD+COST_MULT)) 1514 #define COST_BDMATVEC_LNO(d,o,m) (d*(COST_MULT+2*COST_UNIT_LOAD+COST_UNIT_STORE+COST_LOOP\ 1515 +o*(COST_LOOP+COST_UNIT_LOAD))+(2*d-m)*o*(3*COST_NU_LOAD+COST_ADD+COST_MULT+COST_BRANCH/2)) 1516 #define COST_BDMATVEC_DIAG(d,o,m) (d*(2*COST_UNIT_LOAD+COST_MULT+COST_UNIT_STORE+COST_LOOP)\ 1517 +o*(6*COST_NU_LOAD+COST_LOOP+3*COST_UNIT_LOAD+2*COST_NU_STORE\ 1518 +(2*d-m)*(3*COST_UNIT_LOAD+COST_LOOP+COST_MULT+COST_ADD+COST_UNIT_STORE))) 1520 template <
typename T>
1527 const unsigned int dim = mat->
dim;
1531 for (
unsigned i = start;
i <
end; ++
i) {
1536 for (
unsigned j = 0; j < od; ++j) {
1541 comp += (t-val) -
y;
1547 comp += (t-val) -
y;
1551 res->set(val-comp,
i);
1554 for (
unsigned i = start;
i <
end; ++
i) {
1558 for (
unsigned j = 0; j < od; ++j) {
1570 template <
typename T>
1577 const unsigned int dim = mat->
dim;
1589 for (
unsigned int _j = 0; _j < od; ++_j) {
1604 for (
i = start;
i < iend; ++
i) {
1611 for (
unsigned int j = 0; j < od; ++j) {
1615 const T t = val +
y;
1618 comp += (t-val) -
y;
1624 const T t = val +
y;
1627 comp += (t-val) -
y;
1630 res->set(val-comp,
i);
1636 for (;
i < iend; ++
i) {
1643 for (
unsigned int j = 0; j < od; ++j) {
1649 comp += (t-val) -
y;
1655 comp += (t-val) -
y;
1658 res->set(val-comp,
i);
1665 for (;
i < iend; ++
i) {
1672 for (
unsigned int j = 0; j < od; ++j) {
1676 const T t = val +
y;
1679 comp += (t-val) -
y;
1682 if (s < (
signed)
dim -
i) {
1684 const T t = val +
y;
1687 comp += (t-val) -
y;
1691 res->set(val-comp,
i);
1698 for (;
i <
end; ++
i) {
1705 for (
unsigned int j = 0; j < od; ++j) {
1712 comp += (t-val) -
y;
1714 if (s < (
signed)
dim -
i) {
1719 comp += (t-val) -
y;
1723 res->set(val-comp,
i);
1730 for (
i = start;
i < iend; ++
i) {
1736 for (
unsigned int j = 0; j < od; ++j) {
1755 for (;
i < iend; ++
i) {
1761 for (
unsigned int j = 0; j < od; ++j) {
1776 for (;
i < iend; ++
i) {
1782 for (
unsigned int j = 0; j < od; ++j) {
1789 if (s < (
signed)
dim -
i) {
1802 for (;
i <
end; ++
i) {
1808 for (
unsigned int j = 0; j < od; ++j) {
1814 if (s < (
signed)
dim -
i) {
1826 template <
typename T>
1833 const unsigned int dim = mat->
dim;
1843 do_vec_vec_mul(
end-start, &
res->setval(start),
1846 for (
unsigned j = 0; j < od; ++j) {
1850 const unsigned iend2 =
MAX(0,(
int)(
end-s));
1851 const unsigned istart2 =
MAX((
int)(start-s),0);
1854 for (
unsigned off = start; off < iend; ++off) {
1857 corr.
setval(off-start) += (t -
res->get(off)) -
y;
1858 res->setval(off) = t;
1862 for (
unsigned off = istart2; off < iend2; ++off) {
1865 corr.
setval(s+off-start) += (t -
res->get(s+off)) -
y;
1866 res->setval(s+off) = t;
1870 for (
unsigned off = start; off <
end; ++off)
1871 res->setval(off) -= corr.
get(off-start);
1874 template <
typename T>
1880 const unsigned int dim = mat->
dim;
1894 do_vec_vec_mul(
end-start, &
res->setval(start),
1897 for (
unsigned j = 0; j < od; ++j) {
1901 const unsigned iend2 =
MAX(0,(
int)(
end-s));
1902 const unsigned istart2 =
MAX((
int)(start-s),0);
1903 if (
LIKELY(iend > start))
1904 do_add_vec_vec_mul(iend-start, &
res->setval(start),
1906 if (
LIKELY(iend2 > istart2))
1907 do_add_vec_vec_mul(iend2-istart2, &
res->setval(istart2+s),
1912 #if defined(BDMATVEC_LNWISE) 1913 # define COST_BDMATVEC(d,o,m) COST_BDMATVEC_LN(d,o,m) 1914 #elif defined(BDMATVEC_LNWISE_OPT) 1915 # define COST_BDMATVEC(d,o,m) COST_BDMATVEC_LNO(d,o,m) 1916 #elif defined(BDMATVEC_DIAGWISE) 1917 # define COST_BDMATVEC(d,o,m) COST_BDMATVEC_DIAG(d,o,m) 1920 #define COST_BDMATVEC_TRANS(d,o,m) COST_BDMATVEC(d,o,m) 1921 template <
typename T>
1927 const unsigned int dim = mat->
dim;
1929 for (
unsigned i = start;
i <
end; ++
i) {
1934 for (
unsigned int j = 0; j < od; ++j) {
1938 const T t = val +
y;
1939 comp += (t-val) -
y;
1944 const T t = val +
y;
1945 comp += (t-val) -
y;
1949 res->set(val-comp,
i);
1952 for (
unsigned i = start;
i <
end; ++
i) {
1956 for (
unsigned int j = 0; j < od; ++j) {
1968 template <
typename T>
1974 const unsigned int dim = mat->
dim;
1986 for (
unsigned int _j = 0; _j < od; ++_j) {
2001 for (
i = start;
i < iend; ++
i) {
2008 for (
unsigned int j = 0; j < od; ++j) {
2012 const T t = val +
y;
2015 comp += (t-val) -
y;
2020 const T t = val +
y;
2023 comp += (t-val) -
y;
2026 res->set(val-comp,
i);
2033 for (;
i < iend; ++
i) {
2040 for (
unsigned int j = 0; j < od; ++j) {
2046 comp += (t-val) -
y;
2052 comp += (t-val) -
y;
2055 res->set(val-comp,
i);
2062 for (;
i < iend; ++
i) {
2069 for (
unsigned int j = 0; j < od; ++j) {
2073 const T t = val +
y;
2076 comp += (t-val) -
y;
2079 if (s < (
signed)
dim -
i) {
2081 const T t = val +
y;
2084 comp += (t-val) -
y;
2088 res->set(val-comp,
i);
2094 for (;
i <
end; ++
i) {
2101 for (
unsigned int j = 0; j < od; ++j) {
2108 comp += (t-val) -
y;
2110 if (s < (
signed)
dim -
i) {
2115 comp += (t-val) -
y;
2119 res->set(val-comp,
i);
2123 for (
i = start;
i < iend; ++
i) {
2129 for (
unsigned int j = 0; j < od; ++j) {
2148 for (;
i < iend; ++
i) {
2154 for (
unsigned int j = 0; j < od; ++j) {
2170 for (;
i < iend; ++
i) {
2176 for (
unsigned int j = 0; j < od; ++j) {
2183 if (s < (
signed)
dim -
i) {
2195 for (;
i <
end; ++
i) {
2201 for (
unsigned int j = 0; j < od; ++j) {
2207 if (s < (
signed)
dim -
i) {
2219 template <
typename T>
2224 const unsigned int dim = mat->
dim;
2230 do_vec_vec_mul(
end-start, &
res->setval(start),
2233 for (
unsigned j = 0; j < od; ++j) {
2237 const int iend2 =
end-s;
2238 const int istart2 =
MAX((
int)(start-s),0);
2241 for (
int off = istart2; off < iend2; ++off) {
2244 corr.
setval(off+s-start) += (t -
res->get(off+s)) -
y;
2245 res->setval(off+s) = t;
2249 for (
int off = start; off < iend; ++off) {
2252 corr.
setval(off-start) += (t -
res->get(off)) -
y;
2253 res->setval(off) = t;
2257 for (
unsigned off = start; off <
end; ++off)
2258 res->setval(off) -= corr(off-start);
2261 template <
typename T>
2266 const unsigned int dim = mat->
dim;
2275 do_vec_vec_mul(
end-start, &
res->setval(start),
2278 for (
unsigned j = 0; j < od; ++j) {
2282 const int iend2 =
end-s;
2283 const int istart2 =
MAX((
int)(start-s),0);
2284 do_add_vec_vec_mul(iend2-istart2, &
res->setval(istart2+s),
2286 do_add_vec_vec_mul(iend-start, &
res->setval(start),
2291 #define COST_BDMATVEC_DOT(d,o,m) COST_BDMATVEC(d,o,m) 2292 template <
typename T>
2298 const unsigned int dim = mat->
dim;
2299 #ifdef BDMATVEC_LNWISE 2302 for (
unsigned i = start;
i <
end; ++
i) {
2307 for (
unsigned j = 0; j < od; ++j) {
2311 const T t = val +
y;
2312 comp += (t-val) -
y;
2317 const T t = val +
y;
2318 comp += (t-val) -
y;
2322 res->set(val-comp,
i);
2325 for (
unsigned i = start;
i <
end; ++
i) {
2329 for (
unsigned j = 0; j < od; ++j) {
2339 #elif defined(BDMATVEC_LNWISE_OPT) 2351 for (
unsigned int _j = 0; _j < od; ++_j) {
2367 for (
i = start;
i < iend; ++
i) {
2373 for (
unsigned int j = 0; j < od; ++j) {
2394 for (;
i < iend; ++
i) {
2400 for (
unsigned int j = 0; j < od; ++j) {
2416 for (;
i < iend; ++
i) {
2422 for (
unsigned int j = 0; j < od; ++j) {
2429 if (s < (
signed)
dim -
i) {
2443 for (;
i <
end; ++
i) {
2449 for (
unsigned int j = 0; j < od; ++j) {
2455 if (s < (
signed)
dim -
i) {
2464 #elif defined(BDMATVEC_DIAGWISE) 2468 do_vec_vec_cmul(
end-start, &
res->setval(start),
2471 for (
unsigned j = 0; j < od; j++) {
2475 const int iend2 =
end-s;
2476 const int istart2 =
MAX((
int)(start-s),0);
2477 do_add_vec_vec_cmul(iend-start, &
res->setval(start),
2479 do_add_vec_vec_cmul(iend2-istart2, &
res->setval(istart2+s),
2485 #if defined(SMP) && !defined(NOSMP_BDMATVEC) 2488 template <typename T>
2497 template <typename T>
2507 # if !defined(SMP_BDMATSLICE) 2508 # define SMP_BDMATSLICE 32768 2510 # define SMP_BDMATSLICE2 (SMP_BDMATSLICE/sizeof(T)) 2512 template <
typename T>
2517 update_n_thr(n_thr);
2520 do_bdmat_vec_mult<T>(0UL,
dim, &tv,
this, &vec);
2525 const unsigned long first = slice_offset(1, n_thr,
dim, (
T*)0);
2526 unsigned long st, en = first;
2527 for (
unsigned t = 0; t < n_thr-1; ++t) {
2528 st = en; en = slice_offset(t+2, n_thr,
dim, (
T*)0);
2530 st, en, &tv,
this, &vec, (
void*)0);
2532 do_bdmat_vec_mult<T>(0UL, first, &tv,
this, &vec);
2533 for (
unsigned s = 0; s < n_thr-1; ++s)
2540 template <
typename T>
2545 update_n_thr(n_thr);
2548 do_bdmat_vec_transmult<T> (0UL,
dim, &tv,
this, &vec);
2553 const unsigned long first = slice_offset(1, n_thr,
dim, (
T*)0);
2554 unsigned long st, en = first;
2555 for (
unsigned t = 0; t < n_thr-1; ++t) {
2556 st = en; en = slice_offset(t+2, n_thr,
dim, (
T*)0);
2558 st, en, &tv,
this, &vec, (
void*)0);
2560 do_bdmat_vec_transmult<T>(0UL, first, &tv,
this, &vec);
2561 for (
unsigned s = 0; s < n_thr-1; ++s)
2569 template <
typename T>
2574 do_bdmat_vec_mult<T>(0,
dim, &tv,
this, &vec);
2579 template <
typename T>
2584 do_bdmat_vec_transmult<T>(0,
dim, &tv,
this, &vec);
2591 template <
typename T>
2596 do_bdmat_vec_dotmult<T>(0,
dim, &tv,
this, &vec);
2608 template <
typename T>
2624 for (
unsigned j = 0; j <
dim-di; ++j)
2634 for (
unsigned j = 0; j <
dim-di; ++j)
2642 template <
typename T>
2647 for (
unsigned j = 0; j <
dim; ++j)
2652 for (
unsigned i = 0;
i < (
dim-off); ++
i)
2654 for (
unsigned k = 0; k < (
dim-off); ++k)
2660 template <
typename T>
2665 for (
unsigned j = 0; j <
dim; ++j)
2670 for (
unsigned i = 0;
i < (
dim-off); ++
i)
2672 for (
unsigned k = 0; k < (
dim-off); ++k)
2678 template <
typename T>
2694 template <
typename T>
2711 template <
typename T>
2712 STD__ ostream& operator << (STD__ ostream& ostr, const BdMatrix<T>& mat)
2717 for (
unsigned i = 0;
i < mat.
dim; ++
i) {
2718 for (
unsigned j = 0; j < mat.
dim; ++j) {
2720 ostr.width(mat.outw);
2721 ostr.precision(mat.outp);
2722 ostr.
fill(mat.outf);
2724 if (
i == j || mat.diagconf.contains(
CSTD__ abs((
int)
i-(
int)j)))
2725 ostr << mat.
get(
i, j) <<
' ';
2727 ostr << (
T)0 <<
' ';
2738 template <
typename T>
2742 for (
unsigned i = 0;
i < mat.
dim; ++
i) {
2743 for (
unsigned j = 0; j < mat.
dim; ++j) {
2749 STD__ cerr <<
"Element (" <<
i <<
"," << j
2750 <<
") should be zero!" <<
STD__ endl;
2756 template <
typename T>
2762 for (
unsigned i = 0;
i <
_DIM; ++
i)
2767 for (
unsigned k = 0; k <
_DIM-di; ++k) {
2776 template <
typename T>
2781 for (
i = 0;
i <
dim; ++
i)
2794 template <
typename T>
2802 #ifndef TBCI_DISABLE_EXCEPT 2803 throw (
BdMatrixErr(
"Trying to invert singular Matrix!"));
2805 STD__ cerr <<
"Trying to invert singular Matrix!" <<
STD__ endl;
2810 for (
unsigned i = 0;
i <
dim;
i++)
2811 for (
unsigned j = 0; j <
dim; j++) {
2812 ri(0) =
i; ci(0) = j;
2813 inv.setval(subdet(ri, ci) * ((
i+j)%2? -d: d), j,
i);
2819 #if defined(SMP) && defined(HAVE_LIBNUMA) 2820 template <
typename T>
2822 const int node,
bool fault_in,
2823 const unsigned first,
const unsigned last)
2826 const unsigned int sz = bm.
rows();
2829 moved = do_numa_move_pages(node, fault_in,
2830 (
unsigned long)(
diag+first),
2831 (
unsigned long)(
diag+last));
2833 for (
unsigned i = 0;
i < cfg.
size(); ++
i) {
2834 const unsigned off = cfg.
get(
i);
2835 const unsigned dln = sz - off;
2839 moved += do_numa_move_pages(node, fault_in,
2840 (
unsigned long)(
adiag+first),
2841 (
unsigned long)(
adiag+
MIN(dln,last)));
2843 moved += do_numa_move_pages(node, fault_in,
2844 (
unsigned long)(
bdiag+
MAX(0,(
signed)(first-off))),
2845 (
unsigned long)(
bdiag+last-off));
2850 template <
typename T>
2851 void numa_opt_bdmat_job(
struct thr_ctrl *tc)
2856 const int node = tc->
t_size;
2857 const bool fault_in = tc->
t_off;
2859 const unsigned st = (
unsigned long)tc->
t_par[0];
2860 const unsigned en = (
unsigned long)tc->
t_par[1];
2861 tc->
t_res_l = numa_opt_bdmat_helper(bm, bmcfg, node, fault_in, st, en);
2864 template <
typename T>
2870 const unsigned sz = bm.
rows();
2885 numa_opt_bdmat_helper(bm, bmcfg,
2890 const unsigned first = slice_offset(1, n_thr, sz, (
T*)0);
2891 unsigned long st, en = first;
2893 unsigned long res = 0;
2894 for (
unsigned t = 0; t < n_thr-1; ++t) {
2895 st = en; en = slice_offset(t+2, n_thr, sz, (
T*)0);
2898 st, en, &bm, &bmcfg, (
void*)0);
2901 res = numa_opt_bdmat_helper(bm, bmcfg,
2904 for (
unsigned t = 0; t < n_thr-1; ++t) {
2914 template <
typename T>
#define TBCICOPY(n, o, t, s)
void do_copy(const BdMatrix< T > &)
BdMatrix< T > transpose(BdMatrix< T > &mat)
unsigned int getmaxoff() const
max distance from main diagonal
#define BCHKNR(cond, exc, txt, ind)
unsigned long size() const
BVector< T > & swap(BVector< T > &v)
const Vector< T > const Vector< T > const Vector< T > & p
BdMatrix< T > & expand(unsigned=0)
void job_bdmat_vec_transmult(struct thr_ctrl *tc)
unsigned int noelem() const
Number of elements that are actually stored.
The class BdMatrix is an implementation to store and do operations on sparse Matrices with a band str...
STD__ istream & operator>>(STD__ istream &istr, BdMatrix< T > &mat)
BdMatrix< T > & operator+=(const BdMatrix< T > &)
void free_diags(const unsigned)
Implementation alternative: All memory is allocated in one big chunk (default)
void do_bdmat_vec_mult_lnw_opt(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
T & setval(const unsigned long i) const
BdMatrix< T > operator/(const T &) const
F_TMatrix< double > inv(const F_Matrix< double > &A, int overwriteA=0)
#define MAY_PREFETCH_R(adr, loc)
BdMatrix< T > & adddiag(const unsigned)
unsigned long size() const HOT
unsigned int rows() const
number of rows
const BdMatrix< T > & setoutopts(int w=5, int p=4, char f=' ') const
BdMatrix< T > & reconfig(const BVector< unsigned int > &) COLD
TVector< T > get_row(const unsigned int) const
Construct Vector from row.
exception base class for the TBCI NumLib
friend NOINST void gaussj FGDT(const BdMatrix< T > &, const Matrix< T > &)
tbci_traits< T >::const_refval_type get(const unsigned long i) const
NAMESPACE_TBCI char gaussj(Matrix< T > &a, Matrix< T > &b)
Common interface definition (signature) for all Matrices.
tbci_traits< T >::const_refval_type get(const unsigned int, const unsigned int) const HOT
const unsigned TMatrix< T > * res
BdMatrix< T > & div_row(const T &, const unsigned)
BVector< T * > bdiag
pointers to upper and lower diagonals
#define BCHK(cond, exc, txt, ind, rtval)
T * check(const unsigned r, const unsigned c) const HOT
const T & getcref(const unsigned long idx) const
BdMatrix< T > operator*(const BdMatrix< T > &) const
#define TBCIFILL(n, v, t, s)
unsigned int columns() const
number of columns
#define REALLOC(v, os, t, s)
BdMatrix< T > & set_row(const Vector< T > &val, const unsigned, const unsigned=0)
Overwrite row with Vector.
BdMatrix< T > & operator=(const BdMatrix< T > &)
The assignment operators in TBCI are NOT resizing.
TVector< T > dotMult(const Vector< T > &) const
T & setval(const T &val, const unsigned int r, const unsigned int c)
T aligned_value_type TALIGN(MIN_ALIGN2)
tbci_traits< T >::const_refval_type get(const unsigned long idx) const HOT
struct thr_struct * threads
#define PREFETCH_R_MANY(addr, loc)
void do_bdmat_vec_mult_lnw(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
int lu_decomp(Matrix< T > &) HOT
LU decomposes the TBCI::Matrix mat.
void do_bdmat_vec_mult(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
#define PREFETCH_R(addr, loc)
In case gcc does not yet support __builtin_prefetch(), we have handcoded assembly with gcc for a few ...
T min(const FS_Vector< dims, T > &fv)
Matrix< T > & fill(const T &v=(T) 0)
bool operator!=(const BdMatrix< T > &ma) const
unsigned int columns() const
number of columns
BdMatrix< T > & fill(const T &=0)
void do_bdmat_vec_transmult_diagw_exact(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
const T & getcref(const unsigned long i) const
void constructor(const unsigned int, const bool=true)
void destroy()
destroy object explicitly
long int Vector< T > & index
unsigned int size() const
size: incompatibility to Matrix (!)
BdMatrix(const BdMatrix< U > &bm)
BdMatrix< T > & transpose()
transpose() does change the object!
T & operator()(const unsigned int, const unsigned int) HOT
rw member access
BdMatrix< T > operator+(const BdMatrix< T > &) const
BVector< T > & append(const T &)
performs poorly
BdMatrix< T > operator-() const
void do_bdmat_vec_mult_diagw_exact(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
BdMatrix< T > & div_rows(const Vector< T > &)
BVector< T > & remove(const unsigned long)
const BVector< unsigned int > & getcfg() const
ro access to config BVector
#define PREFETCH_W_MANY(addr, loc)
int conj(const int arg)
conj for elementary types
unsigned int rows() const
number of rows
BdMatrixErr(const char *t, const long i=0)
for(REGISTER T *p1=c. vec, *p2=b. vec ;p1< c. endvec ;p1++, p2++) *p1
bool contains(const T &, unsigned long *=0) const
T & set(const unsigned long idx) HOT
void do_bdmat_vec_transmult_lnw_opt(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
unsigned int maxoff
size, max offset from diag
void do_bdmat_vec_dotmult(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
BVector< unsigned > diagconf
configuration
void job_bdmat_vec_mult(struct thr_ctrl *tc)
BdMatrix< T > & autoinsert(const T &, const unsigned, const unsigned)
#define EXPCHK(cond, exc, txt, ind, rtval)
static const char * mat_info()
TVector< T > transMult(const Vector< T > &) const HOT
Transposed Matrix-Vector multiplication.
T & set(const T &val, const unsigned long i) const
BVector< T > & resize(const BVector< T > &)
Actually it's a resize and copy (some people would expect the assignment op to do this) ...
#define MAY_PREFETCH_W(adr, loc)
BdMatrix< T > & removediag(const unsigned)
#define TBCIDELETE(t, v, sz)
void do_bdmat_vec_transmult_lnw(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
Temporary Base Class Idiom: Class TVector is used for temporary variables.
void thread_wait(const int thr_no, struct job_output *out)
void(* thr_job_t)(struct thr_ctrl *)
Before the double inclusion guard on purpose!
BdMatrix< T > & operator*=(const T &)
BdMatrix< T > & mult_row(const T &, const unsigned)
tbci_traits< T >::const_refval_type get(const unsigned r, const unsigned c) const
get, set and getcref are used internally and not for public consumption
BVector< T > & copy(const BVector< T > &bv)
copy does a resize, if necessary
BdMatrix< T > & resize(const BdMatrix< T > &bdm)
Resizing assignment.
bool operator==(const BdMatrix< T > &) const
BdMatrix< T > & operator/=(const T &)
#define TBCICOMP(n, o, t, s)
unsigned int do_exactsum2()
BdMatrix< T > & resize(const unsigned int)
TVector< T > get_col(const unsigned int) const
Construct Vector from column.
int numa_optimize(const BdMatrix< T > &bm, bool fault_in)
BdMatrix< T > & mult_rows(const Vector< T > &)
Linewise multiplication.
BdMatrix< T > & setval(const T &v, const unsigned int r, const unsigned int c)
autoinsert, sparse class compatible
T * check_internal(const unsigned r, const unsigned c) const HOT
void thread_start_off(const int thr_no, thr_job_t job, const unsigned long off, const unsigned long sz,...)
void do_bdmat_vec_transmult(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
BdMatrixErr(const BdMatrixErr &be)
BdMatrix< T > & operator-=(const BdMatrix< T > &)
#define LIKELY(expr)
branch prediction note that we sometimes on purpose mark the unlikely possibility likely and vice ver...
unsigned long arsz
of saved elems,
BdMatrix< T > & setunit(const T &=1)
BdMatrix< T > inverse() const