9 #ifndef TBCI_BAND_MATRIX_H
10 #define TBCI_BAND_MATRIX_H
14 #include "tbci/basics.h"
15 #include "tbci/vector.h"
16 #include "tbci/matrix.h"
21 # define BD_MINVAL 1e-16
25 #ifndef BDMAT_NEW_ALLOC
26 # define BDMAT_ONE_BLOCK
29 #ifdef BDMAT_MEM_TRACE
30 # define BDMATDBG(x) x
36 #if !defined(NO_GD) && !defined(AUTO_DECL)
37 # include "tbci/band_matrix_gd.h"
42 #ifndef TBCI_DISABLE_EXCEPT
50 :
NumErr(
"Error in var conf Band_Matrix class") {}
60 # pragma interface "band_matrix.h"
102 template <
typename T>
117 void constructor(
const unsigned int,
const bool =
true);
118 T*
check(
const unsigned r,
const unsigned c)
const HOT;
125 mutable int outw, outp, outf;
128 inline bool search2(
unsigned int&,
const unsigned int,
const unsigned int)
HOT;
138 explicit BdMatrix(
const unsigned int = 0);
140 explicit BdMatrix(
const unsigned int,
const unsigned int);
149 BdMatrix(
const T&,
const unsigned int,
unsigned int);
165 {
return "BdMatrix"; }
167 #ifndef HAVE_PROMOTION_BUG
169 # ifndef HAVE_GCC295_TMPLFRNDCLS_BUG
172 : diagconf(bm.diagconf), adiag(bm.dim), bdiag(bm.dim)
175 for (
unsigned k=0; k<
dim; ++k)
176 diag[k] = bm.
diag[k];
178 const unsigned di = diagconf.
get(
i);
179 for (
unsigned j=0; j<dim-di; ++j) {
180 adiag.
get(di)[j] = bm.
adiag.get(di)[j];
181 bdiag.
get(di)[j] = bm.
bdiag.get(di)[j];
185 outw = bm.outw; outp = bm.outp;
186 outf = bm.outf; outset =
true;
199 typename tbci_traits<T>::const_refval_type
200 operator() (
const unsigned int,
const unsigned int)
const HOT;
201 typename tbci_traits<T>::const_refval_type
202 get(
const unsigned int,
const unsigned int)
const HOT;
206 const unsigned int c)
208 T&
setval(
const unsigned int r,
const unsigned int c);
243 inline
unsigned int size()
const {
return dim; }
247 inline unsigned int rows()
const {
return dim; }
277 #ifdef BDMAT_TRANS_FRIENDS
289 #ifdef BDMAT_OP_FRIENDS
322 { outw = w; outp =
p; outf = f; outset =
true;
return *
this; }
355 #ifdef BDMAT_ONE_BLOCK
357 template <
typename T>
365 template <
typename T>
370 for (
unsigned i = 0;
i < diagconf.
size(); ++
i) {
371 const unsigned di = diagconf.
get(
i);
372 adiag.
set(di) = (T*)((
long)diag + (long)bdm.
adiag.
get(di) - (long)bdm.
diag);
373 bdiag.
set(di) = (T*)((
long)diag + (long)bdm.
bdiag.
get(di) - (long)bdm.
diag);
377 template <
typename T>
384 unsigned long sz =
dim;
int t;
385 const unsigned long dcs = diagconf.
size();
386 for (
REGISTER unsigned j = 0; j < dcs; ++j)
387 if ((t = ((
signed)dim - (
signed)diagconf.
get(j))) > 0)
393 for (
unsigned i = 0;
i < dcs; ++
i) {
394 const unsigned int di = diagconf.
get(
i);
395 int s = (signed)dim - (
signed)di;
396 if (
UNLIKELY(s <= 0 || s >= (
signed)dim)) {
397 STD__ cerr <<
"Wrong config. Vector supplied!" <<
STD__ endl;
400 adiag.
set(di) = diag+sz; sz += s;
401 bdiag.
set(di) = diag+sz; sz += s;
408 STD__ cerr <<
"BdMat ctor: malloc failed to get " << sz <<
" bytes!" <<
STD__ endl;
416 arsz = sz; dummy = (
T)0;
425 template <
typename T>
428 for (
int i = u-1;
i >= 0; --
i) {
440 template <
typename T>
444 for (
unsigned i = 0;
i < diagconf.
size(); ++
i) {
451 template <
typename T>
454 unsigned long sz = 0;
unsigned i = 0;
472 while (i < diagconf.
size()) {
473 const unsigned di = diagconf.
get(i);
474 int s = (signed)dim - (
signed)di;
475 if (
UNLIKELY(s <= 0 || s >= (
signed)dim)) {
480 T* ptr =
NEW(T, 2*s);
484 bdiag.
set(di) = ptr+s;
490 arsz = sz; dummy = (
T)0;
498 STD__ cerr <<
"BdMat ctor: mem alloc (" << sz <<
") failed!" <<
STD__ endl;
505 template <
typename T>
507 : diagconf(1, 1), adiag(d), bdiag(d)
514 template <
typename T>
516 : diagconf(1, 1), adiag(d), bdiag(d)
524 template <
typename T>
526 : diagconf(conf), adiag(d), bdiag(d)
532 template <
typename T>
534 : diagconf(mat.diagconf), adiag(mat.dim), bdiag(mat.dim)
539 outw = mat.outw; outp = mat.outp;
540 outf = mat.outf; outset =
true;
544 template <
typename T>
546 : diagconf(1, 1), adiag(d), bdiag(d)
554 template <
typename T>
556 : diagconf(mo), adiag(d), bdiag(d)
561 for (
REGISTER unsigned int t = 0; t < mo; ++t)
562 diagconf.
set(t) = t+1;
569 template <
typename T>
572 : diagconf(conf), adiag(d), bdiag(d)
579 template <
typename T>
581 : diagconf(0, 0), adiag(mat.
rows()), bdiag(mat.
columns())
587 for (
unsigned int l = 1; l < mat.
columns(); ++l) {
589 (signed)j < ((
signed)(mat.
rows() - l)); ++j) {
590 if (mat(j, j+l) !=
T(0) || mat(j+l, j) !=
T(0)) {
592 diagconf.
set(ds - 1) = l;
605 for (
unsigned int i = 0; i < diagconf.
size(); ++
i) {
606 const unsigned int dci = diagconf.
get(i);
607 for (
REGISTER unsigned int j = 0; j < dim - dci; ++j) {
608 adiag.
get(dci)[j] = mat(j, j + dci);
609 bdiag.
get(dci)[j] = mat(dci + j, j);
614 template <
typename T>
622 template <
typename T>
624 const unsigned int j)
626 for (
REGISTER int t = 0; (unsigned)t < diagconf.
size(); ++t) {
635 template <
typename T>
658 template <
typename T>
667 template <
typename T>
675 return (dummy = (T)0);
679 template <
typename T>
680 inline typename tbci_traits<T>::const_refval_type
688 return (dummy = (T)0);
692 template <
typename T>
693 inline typename tbci_traits<T>::const_refval_type
701 return (dummy = (T)0);
706 template <
typename T>
723 #ifdef BDMAT_ONE_BLOCK
724 template <
typename T>
729 TBCIFILL((diag+dim), (T)0, T, (arsz-dim));
734 template <
typename T>
738 for (
unsigned i = 0; i < diagconf.
size(); ++
i) {
748 template <
typename T>
754 T* oldd(diag);
unsigned int od =
dim;
755 #ifdef BDMAT_ONE_BLOCK
756 unsigned int oldar =
arsz;
762 if (od <= 1 && nd > 1)
771 TBCIFILL((diag+min), (T)0, T, (nd-dim));
773 for (
unsigned int t = 0; t < diagconf.
size(); ++t) {
786 #ifndef BDMAT_ONE_BLOCK
787 for (
unsigned i = 0; i < oldconf.
size(); ++
i)
789 2*(od-oldconf.
get(i)));
800 template <
typename T>
807 if (dim <= 1 && nd > 1)
817 template <
typename T>
820 if (nd == dim && cfg == diagconf)
831 template <
typename T>
838 for (
unsigned i = 0; i <
dim; ++
i)
845 template <
typename T>
848 if (dim == 0 || cfg == diagconf)
851 #ifdef BDMAT_ONE_BLOCK
852 unsigned int oldar =
arsz;
854 unsigned int olddim =
dim;
866 for (
REGISTER unsigned t = 0; t < diagconf.
size(); ++t) {
867 const unsigned int dct = diagconf.
get(t);
873 #ifndef BDMAT_ONE_BLOCK
874 for (
unsigned i = 0; i < oldconf.
size(); ++
i)
876 2*(olddim-oldconf.
get(i)));
886 #ifdef BDMAT_ONE_BLOCK
887 template <
typename T>
890 const int newsz = dim - ndg;
897 REALLOC(diag, arsz, T, (arsz + 2 * newsz));
900 STD__ cerr <<
"Allocation error" <<
STD__ endl;
904 << arsz+2*newsz <<
")" <<
STD__ endl);
913 long diff = (long)diag - (
long)olddiag;
918 adiag.
set(0) = (T*)((
long)adiag.
get(0) + diff);
919 bdiag.
set(0) = (T*)((
long)bdiag.
get(0) + diff);
920 const unsigned dcs = diagconf.
size();
921 for (
unsigned i = 0; i < dcs; ++
i) {
922 const unsigned di = diagconf.
get(i);
923 adiag.
set(di) = (T*)((
long)adiag.
get(di) + diff);
924 bdiag.
set(di) = (T*)((
long)bdiag.
get(di) + diff);
932 adiag.
set(ndg) = diag+
arsz; bdiag.
set(ndg) = diag+arsz+newsz;
933 TBCIFILL((diag+arsz), (T)0, T, (
unsigned)(2*newsz));
944 template <
typename T>
947 const int newsz = dim - ndg;
952 T* ptr =
NEW(T, 2*newsz);
955 STD__ cerr <<
"BdMat adddiag allocation failed!" <<
STD__ endl;
958 adiag.
set(ndg) = ptr; bdiag.
set(ndg) = ptr+newsz;
972 template <
typename T>
977 #ifdef BDMAT_ONE_BLOCK
985 adiag.
set(dg) = (T*)0; bdiag.
set(dg) = (T*)0;
986 unsigned ix; diagconf.
contains(dg, &ix);
987 diagconf.
remove(ix); arsz -= 2*(dim-dg);
989 maxoff = diagconf.max();
995 template <
typename T>
1013 template <
typename T>
1016 for (
unsigned i=0; i<val.
size(); ++
i)
1021 template <
typename T>
1026 ret.
set(diag[r], r);
1027 for (
unsigned i = 0; i < diagconf.
size(); ++
i) {
1030 ret.
set(adiag.
get(di)[r] , r+di);
1032 ret.
set(bdiag.
get(di)[r-di], r-di);
1037 template <
typename T>
1042 ret.
set(diag[c], c);
1043 for (
unsigned i = 0; i < diagconf.
size(); i++) {
1046 ret.
set(bdiag.
get(di)[
c] , c+di);
1048 ret.
set(adiag.
get(di)[c-di], c-di);
1053 #ifdef BDMAT_ONE_BLOCK
1054 template <
typename T>
1061 template <
typename T>
1070 template <
typename T>
1074 for (
unsigned i = 0; i < diagconf.
size(); ++
i) {
1082 template <
typename T>
1086 for (
unsigned i = 0; i < diagconf.
size(); ++
i) {
1096 template <
typename T>
1102 if (
LIKELY(maxoff == m && arsz == (dim + 2*dim*m - m*(m+1))))
1104 #ifdef BDMAT_ONE_BLOCK
1106 for (
unsigned i = 0; i < m; ++
i)
1107 newconf.
set(i) = i+1;
1110 for (
unsigned i=1; i<=m; ++
i)
1119 template <
typename T>
1124 int sz = arow.
size();
1139 int r = bestrow(level++);
1140 int rw = 0;
int mul;
1142 for (
REGISTER int i = 0; i < sz; i++) {
1144 mul = (i%2? -1: 1); rw--;
1149 for (
REGISTER int j = 0; j < sz; j++) {
1156 for (
REGISTER int t = 0; t < sz; t++) {
1165 TBCICOPY(&(
ac(j)), &(acol(j+1)),
int, (sz-j-1));
1168 tmp += (j%2? -1: 1) * sdet(level, ar,
ac) * el;
1173 template <
typename T>
1177 #ifndef TBCI_DISABLE_EXCEPT
1178 throw (
BdMatrixErr(
"Unequal # of lines and rows to ignore!"));
1180 STD__ cerr <<
"Unequal # of lines and rows to ignore!" <<
STD__ endl;
1188 bestrow.resize(dim);
1190 unsigned int rw = 0;
unsigned int cl = 0;
1206 matmem<T>* el = offdiag.setfirst();
int elemi = (el? el->row: -1);
1208 for (
int i = 0; i <
dim; i++) {
1209 if (!(actrow.contains(i))) {
1210 rowfill(i) = dim + 1;
continue;
1214 if (actcol.contains(i))
1216 if (actcol.contains(i+1) && i+1 <
dim)
1218 if (actcol.contains(i-1) && i-1 >= 0)
1222 if (actcol.contains(i+diagconf.
get(s)) && i+diagconf.
get(s) <
dim)
1224 if (actcol.contains(i-diagconf.
get(s)) && i-diagconf.
get(s) >= 0)
1228 while (el && elemi == i) {
1229 if (actcol.contains(el->col))
1231 el = offdiag.setnext(); elemi = (el? el->row: -1);
1238 for (
int k = 0; k <
dim; k++) {
1239 int min = dim + 2;
int minr = -1;
1240 for (
int s = 0; s <
dim; s++)
1241 if (rowfill(s) < min) {
1242 min = rowfill(s); minr = s;
1246 rowfill(minr) = dim + 1;
1248 return sdet(0, actrow, actcol);
1253 template <
typename T>
1260 STD__ cout <<
"Warning: Automatic reconfiguring BdMatrix in assignment!" <<
STD__ endl;
1275 template <
typename T>
1283 #define SFORALL_S(op) \
1284 template <typename T> \
1285 BdMatrix<T>& BdMatrix<T>::operator op (const BdMatrix<T>& mat) \
1287 BCHK(mat.dim != dim, BdMatrixErr, Adding wrong size BdMatrix, mat.dim, *this); \
1289 for (i = 0; i < dim; ++i) \
1290 diag[i] op mat.diag[i]; \
1291 for (j = 0; j < diagconf.size(); ++j) { \
1292 const REGISTER unsigned di = diagconf.get(j); \
1293 if (LIKELY(mat.adiag.get(di))) \
1294 for (i = 0; i < (dim-di); ++i) { \
1295 adiag.set(di)[i] op mat.adiag.get(di)[i]; \
1296 bdiag.set(di)[i] op mat.bdiag.get(di)[i]; \
1299 if (diagconf == mat.diagconf) \
1301 for (j = 0; j < mat.diagconf.size(); ++j) { \
1302 const REGISTER unsigned di = mat.diagconf.get(j); \
1303 if (!diagconf.contains(di)) { \
1305 for (i = 0; i < (dim-di); ++i) { \
1306 adiag.set(di)[i] op mat.adiag.get(di)[i]; \
1307 bdiag.set(di)[i] op mat.bdiag.get(di)[i]; \
1318 #define SFORALL_T(op) \
1319 template <typename T> \
1320 BdMatrix<T>& BdMatrix<T>::operator op (const T& mult) \
1322 for (REGISTER unsigned int i = 0; i < dim; ++i) \
1324 for (unsigned j = 0; j < diagconf.size(); ++j) { \
1325 const REGISTER unsigned di = diagconf.get(j); \
1326 for (unsigned k = 0; k < (dim-di); ++k) { \
1327 adiag.set(di)[k] op mult; \
1328 bdiag.set(di)[k] op mult; \
1339 template <typename T>
1346 for (
unsigned j = 0; j < diagconf.
size(); ++j) {
1348 for (
unsigned k = 0; k < (dim-di); ++k) {
1349 adiag.
set(di)[k] *= mult;
1350 bdiag.
set(di)[k] *= mult;
1356 #define INTDIVEQ(T) \
1358 inline BdMatrix < T >& BdMatrix< T >::operator /= (const T & div) \
1360 BCHK((div==( T )0), BdMatrixErr, "Divide BdMatrix by zero", 0, *this); \
1361 for (REGISTER unsigned i = 0; i < dim; ++i) \
1363 for (unsigned j = 0; j < diagconf.size(); ++j) { \
1364 const REGISTER unsigned di = diagconf.get(j); \
1365 for (unsigned k = 0; k < (dim-di); ++k) { \
1366 adiag.set(di)[k] /= div; \
1367 bdiag.set(di)[k] /= div; \
1376 template <typename T>
1379 #if 0 //def BDMAT_ONE_BLOCK
1381 for (
REGISTER unsigned int t = 0; t < diagconf.
size(); ++t) {
1382 const unsigned int s = diagconf.
get(t);
1383 T* tmp = adiag.
get(s); adiag.
set(s) = bdiag.
get(s); bdiag.
set(s) = tmp;
1392 template <typename T>
1398 #define STDDEF_ST(op) \
1399 template <typename T> \
1400 inline BdMatrix<T> BdMatrix<T>::operator op (const T& val) const \
1402 return (BdMatrix<T> (*this)) op##= val; \
1410 #define STDDEF_SS(op) \
1411 template <typename T> \
1412 BdMatrix<T> BdMatrix<T>::operator op (const BdMatrix<T>& m2) const \
1414 return (BdMatrix<T> (*this)) op##= m2; \
1421 template <typename T>
1424 const unsigned int newoff = maxoff + mat2.
maxoff;
1429 for (
int i = 0; i < (signed)dim; ++
i) {
1431 const int jend =
MIN(dim, i+newoff+1);
1432 const int kstart =
MAX(0, i-(
signed)maxoff);
1433 const int kend =
MIN(dim, i+maxoff+1);
1435 for (
int j =
MAX(0,i-(
signed)newoff); j < jend; ++j) {
1437 for (
REGISTER int k = kstart; k < kend; ++k)
1438 el += this->
get(i, k) * mat2.get(k, j);
1448 template <typename T>
1454 template <
typename T>
1459 #ifndef TBCI_DISABLE_EXCEPT
1460 throw (
BdMatrixErr(
"Multiplying Matrix with Matrix: wrong sizes!"));
1462 STD__ cerr <<
"Multiplying Matrix with Matrix: wrong sizes!" <<
STD__ endl;
1468 for (
int c = 0;
c < tm.columns();
c++)
1469 for (
int i = 0; i < mat.
rows(); i++) {
1472 val += this->
operator()(
i, j) * mat(j,
c);
1480 # define MAY_PREFETCH_R(adr,loc) PREFETCH_R(adr,loc)
1481 # define MAY_PREFETCH_W(adr,loc) PREFETCH_W(adr,loc)
1483 # define MAY_PREFETCH_R(adr,loc) do {} while (0)
1484 # define MAY_PREFETCH_W(adr,loc) do {} while (0)
1488 template <
typename T>
1494 neg.
diag[j] = -diag[j];
1496 for (
unsigned int t = 0; t < diagconf.
size(); ++t) {
1497 const unsigned int dc = diagconf.
get(t);
1498 for (
REGISTER unsigned int i = 0; i < (dim - dc); ++
i) {
1507 #if !defined(BDMATVEC_LNWISE) && !defined(BDMATVEC_LNWISE_OPT) && !defined(BDMATVEC_DIAGWISE)
1508 # define BDMATVEC_LNWISE_OPT
1512 #define COST_BDMATVEC_LN(d,o,m) (d*(COST_MULT+2*COST_UNIT_LOAD+COST_UNIT_STORE+COST_LOOP\
1513 +o*(COST_LOOP+2*COST_BRANCH+COST_UNIT_LOAD))+(2*d-m)*o*(3*COST_NU_LOAD+COST_ADD+COST_MULT))
1514 #define COST_BDMATVEC_LNO(d,o,m) (d*(COST_MULT+2*COST_UNIT_LOAD+COST_UNIT_STORE+COST_LOOP\
1515 +o*(COST_LOOP+COST_UNIT_LOAD))+(2*d-m)*o*(3*COST_NU_LOAD+COST_ADD+COST_MULT+COST_BRANCH/2))
1516 #define COST_BDMATVEC_DIAG(d,o,m) (d*(2*COST_UNIT_LOAD+COST_MULT+COST_UNIT_STORE+COST_LOOP)\
1517 +o*(6*COST_NU_LOAD+COST_LOOP+3*COST_UNIT_LOAD+2*COST_NU_STORE\
1518 +(2*d-m)*(3*COST_UNIT_LOAD+COST_LOOP+COST_MULT+COST_ADD+COST_UNIT_STORE)))
1520 template <
typename T>
1527 const unsigned int dim = mat->
dim;
1531 for (
unsigned i = start; i <
end; ++
i) {
1536 for (
unsigned j = 0; j < od; ++j) {
1541 comp += (t-val) - y;
1547 comp += (t-val) - y;
1551 res->
set(val-comp, i);
1554 for (
unsigned i = start; i <
end; ++
i) {
1558 for (
unsigned j = 0; j < od; ++j) {
1570 template <
typename T>
1577 const unsigned int dim = mat->
dim;
1581 const unsigned int maxoff = mat->
maxoff;
1583 unsigned iend =
MIN(maxoff, dim-maxoff);
1584 iend =
MIN(iend, end);
1589 for (
unsigned int _j = 0; _j < od; ++_j) {
1604 for (i = start; i < iend; ++
i) {
1611 for (
unsigned int j = 0; j < od; ++j) {
1615 const T t = val +
y;
1618 comp += (t-val) - y;
1624 const T t = val +
y;
1627 comp += (t-val) - y;
1630 res->
set(val-comp, i);
1633 iend =
MIN(end, dim - maxoff);
1636 for (; i < iend; ++
i) {
1643 for (
unsigned int j = 0; j < od; ++j) {
1649 comp += (t-val) - y;
1655 comp += (t-val) - y;
1658 res->
set(val-comp, i);
1662 iend =
MIN(maxoff, end);
1665 for (; i < iend; ++
i) {
1672 for (
unsigned int j = 0; j < od; ++j) {
1676 const T t = val +
y;
1679 comp += (t-val) - y;
1682 if (s < (
signed)dim -
i) {
1684 const T t = val +
y;
1687 comp += (t-val) - y;
1691 res->
set(val-comp, i);
1698 for (; i <
end; ++
i) {
1705 for (
unsigned int j = 0; j < od; ++j) {
1712 comp += (t-val) - y;
1714 if (s < (
signed)dim -
i) {
1719 comp += (t-val) - y;
1723 res->
set(val-comp, i);
1730 for (i = start; i < iend; ++
i) {
1736 for (
unsigned int j = 0; j < od; ++j) {
1752 iend =
MIN(end, dim - maxoff);
1755 for (; i < iend; ++
i) {
1761 for (
unsigned int j = 0; j < od; ++j) {
1774 iend =
MIN(maxoff, end);
1776 for (; i < iend; ++
i) {
1782 for (
unsigned int j = 0; j < od; ++j) {
1789 if (s < (
signed)dim -
i) {
1802 for (; i <
end; ++
i) {
1808 for (
unsigned int j = 0; j < od; ++j) {
1814 if (s < (
signed)dim -
i) {
1826 template <
typename T>
1833 const unsigned int dim = mat->
dim;
1843 do_vec_vec_mul(end-start, &res->
setval(start),
1846 for (
unsigned j = 0; j < od; ++j) {
1849 const unsigned iend =
MIN(end,dim-s);
1850 const unsigned iend2 =
MAX(0,(
int)(end-s));
1851 const unsigned istart2 =
MAX((
int)(start-s),0);
1854 for (
unsigned off = start; off < iend; ++off) {
1857 corr.
setval(off-start) += (t - res->
get(off)) - y;
1862 for (
unsigned off = istart2; off < iend2; ++off) {
1865 corr.
setval(s+off-start) += (t - res->
get(s+off)) - y;
1870 for (
unsigned off = start; off <
end; ++off)
1871 res->
setval(off) -= corr.
get(off-start);
1874 template <
typename T>
1880 const unsigned int dim = mat->
dim;
1894 do_vec_vec_mul(end-start, &res->
setval(start),
1897 for (
unsigned j = 0; j < od; ++j) {
1900 const unsigned iend =
MIN(end,dim-s);
1901 const unsigned iend2 =
MAX(0,(
int)(end-s));
1902 const unsigned istart2 =
MAX((
int)(start-s),0);
1903 if (
LIKELY(iend > start))
1904 do_add_vec_vec_mul(iend-start, &res->
setval(start),
1906 if (
LIKELY(iend2 > istart2))
1907 do_add_vec_vec_mul(iend2-istart2, &res->
setval(istart2+s),
1912 #if defined(BDMATVEC_LNWISE)
1913 # define COST_BDMATVEC(d,o,m) COST_BDMATVEC_LN(d,o,m)
1914 #elif defined(BDMATVEC_LNWISE_OPT)
1915 # define COST_BDMATVEC(d,o,m) COST_BDMATVEC_LNO(d,o,m)
1916 #elif defined(BDMATVEC_DIAGWISE)
1917 # define COST_BDMATVEC(d,o,m) COST_BDMATVEC_DIAG(d,o,m)
1920 #define COST_BDMATVEC_TRANS(d,o,m) COST_BDMATVEC(d,o,m)
1921 template <
typename T>
1927 const unsigned int dim = mat->
dim;
1929 for (
unsigned i = start; i <
end; ++
i) {
1934 for (
unsigned int j = 0; j < od; ++j) {
1938 const T t = val +
y;
1939 comp += (t-val) - y;
1944 const T t = val +
y;
1945 comp += (t-val) - y;
1949 res->
set(val-comp, i);
1952 for (
unsigned i = start; i <
end; ++
i) {
1956 for (
unsigned int j = 0; j < od; ++j) {
1968 template <
typename T>
1974 const unsigned int dim = mat->
dim;
1978 const unsigned int maxoff = mat->
maxoff;
1980 unsigned iend =
MIN(maxoff, mat->
dim-maxoff);
1981 iend =
MIN(end, iend);
1986 for (
unsigned int _j = 0; _j < od; ++_j) {
2001 for (i = start; i < iend; ++
i) {
2008 for (
unsigned int j = 0; j < od; ++j) {
2012 const T t = val +
y;
2015 comp += (t-val) - y;
2020 const T t = val +
y;
2023 comp += (t-val) - y;
2026 res->
set(val-comp, i);
2029 iend =
MIN(end, dim - maxoff);
2033 for (; i < iend; ++
i) {
2040 for (
unsigned int j = 0; j < od; ++j) {
2046 comp += (t-val) - y;
2052 comp += (t-val) - y;
2055 res->
set(val-comp, i);
2059 iend =
MIN(end, maxoff);
2062 for (; i < iend; ++
i) {
2069 for (
unsigned int j = 0; j < od; ++j) {
2073 const T t = val +
y;
2076 comp += (t-val) - y;
2079 if (s < (
signed)dim -
i) {
2081 const T t = val +
y;
2084 comp += (t-val) - y;
2088 res->
set(val-comp, i);
2094 for (; i <
end; ++
i) {
2101 for (
unsigned int j = 0; j < od; ++j) {
2108 comp += (t-val) - y;
2110 if (s < (
signed)dim -
i) {
2115 comp += (t-val) - y;
2119 res->
set(val-comp, i);
2123 for (i = start; i < iend; ++
i) {
2129 for (
unsigned int j = 0; j < od; ++j) {
2144 iend =
MIN(end, dim - maxoff);
2148 for (; i < iend; ++
i) {
2154 for (
unsigned int j = 0; j < od; ++j) {
2167 iend =
MIN(end, maxoff);
2170 for (; i < iend; ++
i) {
2176 for (
unsigned int j = 0; j < od; ++j) {
2183 if (s < (
signed)dim -
i) {
2195 for (; i <
end; ++
i) {
2201 for (
unsigned int j = 0; j < od; ++j) {
2207 if (s < (
signed)dim -
i) {
2219 template <
typename T>
2224 const unsigned int dim = mat->
dim;
2230 do_vec_vec_mul(end-start, &res->
setval(start),
2233 for (
unsigned j = 0; j < od; ++j) {
2236 const int iend =
MIN(end,dim-s);
2237 const int iend2 = end-s;
2238 const int istart2 =
MAX((
int)(start-s),0);
2241 for (
int off = istart2; off < iend2; ++off) {
2244 corr.
setval(off+s-start) += (t - res->
get(off+s)) - y;
2249 for (
int off = start; off < iend; ++off) {
2252 corr.
setval(off-start) += (t - res->
get(off)) - y;
2257 for (
unsigned off = start; off <
end; ++off)
2258 res->
setval(off) -= corr(off-start);
2261 template <
typename T>
2266 const unsigned int dim = mat->
dim;
2275 do_vec_vec_mul(end-start, &res->
setval(start),
2278 for (
unsigned j = 0; j < od; ++j) {
2281 const int iend =
MIN(end,dim-s);
2282 const int iend2 = end-s;
2283 const int istart2 =
MAX((
int)(start-s),0);
2284 do_add_vec_vec_mul(iend2-istart2, &res->
setval(istart2+s),
2286 do_add_vec_vec_mul(iend-start, &res->
setval(start),
2291 #define COST_BDMATVEC_DOT(d,o,m) COST_BDMATVEC(d,o,m)
2292 template <
typename T>
2298 const unsigned int dim = mat->
dim;
2299 #ifdef BDMATVEC_LNWISE
2302 for (
unsigned i = start; i <
end; ++
i) {
2307 for (
unsigned j = 0; j < od; ++j) {
2311 const T t = val +
y;
2312 comp += (t-val) - y;
2317 const T t = val +
y;
2318 comp += (t-val) - y;
2322 res->
set(val-comp, i);
2325 for (
unsigned i = start; i <
end; ++
i) {
2329 for (
unsigned j = 0; j < od; ++j) {
2339 #elif defined(BDMATVEC_LNWISE_OPT)
2343 const unsigned int maxoff = mat->
maxoff;
2345 unsigned iend =
MIN(maxoff, dim-maxoff);
2346 iend =
MIN(iend, end);
2351 for (
unsigned int _j = 0; _j < od; ++_j) {
2367 for (i = start; i < iend; ++
i) {
2373 for (
unsigned int j = 0; j < od; ++j) {
2389 iend =
MIN(end, dim - maxoff);
2394 for (; i < iend; ++
i) {
2400 for (
unsigned int j = 0; j < od; ++j) {
2413 iend =
MIN(maxoff, end);
2416 for (; i < iend; ++
i) {
2422 for (
unsigned int j = 0; j < od; ++j) {
2429 if (s < (
signed)dim -
i) {
2443 for (; i <
end; ++
i) {
2449 for (
unsigned int j = 0; j < od; ++j) {
2455 if (s < (
signed)dim -
i) {
2464 #elif defined(BDMATVEC_DIAGWISE)
2468 do_vec_vec_cmul(end-start, &res->
setval(start),
2471 for (
unsigned j = 0; j < od; j++) {
2474 const int iend =
MIN(end,dim-s);
2475 const int iend2 = end-s;
2476 const int istart2 =
MAX((
int)(start-s),0);
2477 do_add_vec_vec_cmul(iend-start, &res->
setval(start),
2479 do_add_vec_vec_cmul(iend2-istart2, &res->
setval(istart2+s),
2485 #if defined(SMP) && !defined(NOSMP_BDMATVEC)
2488 template <typename T>
2497 template <typename T>
2507 # if !defined(SMP_BDMATSLICE)
2508 # define SMP_BDMATSLICE 32768
2510 # define SMP_BDMATSLICE2 (SMP_BDMATSLICE/sizeof(T))
2512 template <
typename T>
2517 update_n_thr(n_thr);
2520 do_bdmat_vec_mult<T>(0UL, dim, &tv,
this, &vec);
2525 const unsigned long first = slice_offset(1, n_thr, dim, (T*)0);
2526 unsigned long st, en = first;
2527 for (
unsigned t = 0; t < n_thr-1; ++t) {
2528 st = en; en = slice_offset(t+2, n_thr, dim, (T*)0);
2530 st, en, &tv,
this, &vec, (
void*)0);
2532 do_bdmat_vec_mult<T>(0UL, first, &tv,
this, &vec);
2533 for (
unsigned s = 0; s < n_thr-1; ++s)
2540 template <
typename T>
2545 update_n_thr(n_thr);
2548 do_bdmat_vec_transmult<T> (0UL, dim, &tv,
this, &vec);
2553 const unsigned long first = slice_offset(1, n_thr, dim, (T*)0);
2554 unsigned long st, en = first;
2555 for (
unsigned t = 0; t < n_thr-1; ++t) {
2556 st = en; en = slice_offset(t+2, n_thr, dim, (T*)0);
2558 st, en, &tv,
this, &vec, (
void*)0);
2560 do_bdmat_vec_transmult<T>(0UL, first, &tv,
this, &vec);
2561 for (
unsigned s = 0; s < n_thr-1; ++s)
2569 template <
typename T>
2574 do_bdmat_vec_mult<T>(0,
dim, &tv,
this, &vec);
2579 template <
typename T>
2584 do_bdmat_vec_transmult<T>(0,
dim, &tv,
this, &vec);
2591 template <
typename T>
2596 do_bdmat_vec_dotmult<T>(0,
dim, &tv,
this, &vec);
2608 template <
typename T>
2618 for (i = 0; i < diagconf.
size(); ++
i) {
2619 const unsigned di = diagconf.
get(i);
2624 for (
unsigned j = 0; j < dim-di; ++j)
2634 for (
unsigned j = 0; j < dim-di; ++j)
2642 template <
typename T>
2647 for (
unsigned j = 0; j <
dim; ++j)
2648 diag[j] *= v.
get(j);
2650 for (
unsigned od = 0; od < diagconf.
size(); ++od) {
2651 const unsigned off = diagconf.
get(od);
2652 for (
unsigned i = 0; i < (dim-off); ++
i)
2653 adiag.
get(off)[
i] *= v.
get(i);
2654 for (
unsigned k = 0; k < (dim-off); ++k)
2655 bdiag.
get(off)[k] *= v.
get(k+off);
2660 template <
typename T>
2665 for (
unsigned j = 0; j <
dim; ++j)
2666 diag[j] /= v.
get(j);
2668 for (
unsigned od = 0; od < diagconf.
size(); ++od) {
2669 const unsigned off = diagconf.
get(od);
2670 for (
unsigned i = 0; i < (dim-off); ++
i)
2671 adiag.
get(off)[
i] /= v.
get(i);
2672 for (
unsigned k = 0; k < (dim-off); ++k)
2673 bdiag.
get(off)[k] /= v.
get(k+off);
2678 template <
typename T>
2683 for (
unsigned od = 0; od < diagconf.
size(); ++od) {
2684 const unsigned off = diagconf.
get(od);
2686 adiag.
get(off)[
i] *= f;
2688 bdiag.
get(off)[i-off] *= f;
2694 template <
typename T>
2701 for (
unsigned od = 0; od < diagconf.
size(); ++od) {
2702 const unsigned off = diagconf.
get(od);
2704 adiag.
get(off)[
i] *= f;
2706 bdiag.
get(off)[i-off] *= f;
2711 template <
typename T>
2712 STD__ ostream& operator << (STD__ ostream& ostr, const BdMatrix<T>& mat)
2717 for (
unsigned i = 0; i < mat.
dim; ++
i) {
2718 for (
unsigned j = 0; j < mat.
dim; ++j) {
2720 ostr.width(mat.outw);
2721 ostr.precision(mat.outp);
2722 ostr.
fill(mat.outf);
2724 if (i == j || mat.diagconf.contains(
CSTD__ abs((
int)i-(
int)j)))
2725 ostr << mat.
get(i, j) <<
' ';
2727 ostr << (
T)0 <<
' ';
2738 template <
typename T>
2742 for (
unsigned i = 0; i < mat.
dim; ++
i) {
2743 for (
unsigned j = 0; j < mat.
dim; ++j) {
2749 STD__ cerr <<
"Element (" << i <<
"," << j
2750 <<
") should be zero!" <<
STD__ endl;
2756 template <
typename T>
2762 for (
unsigned i = 0; i <
_DIM; ++
i)
2763 tm.
setval(diag[i], i, i);
2765 for (
unsigned j = 0; j < diagconf.
size(); ++j) {
2766 const unsigned di = diagconf.
get(j);
2767 for (
unsigned k = 0; k < _DIM-di; ++k) {
2776 template <
typename T>
2781 for (i = 0; i <
dim; ++
i)
2782 tm.setval(diag[i], i, i);
2784 for (
unsigned j = 0; j < diagconf.
size(); ++j)
2785 for (
unsigned k = 0; k < dim-(i=diagconf.
get(j)); ++k) {
2786 tm.setval(adiag.
get(i)[k], k, k+
i);
2787 tm.setval(bdiag.
get(i)[k], k+
i, k);
2794 template <
typename T>
2802 #ifndef TBCI_DISABLE_EXCEPT
2803 throw (
BdMatrixErr(
"Trying to invert singular Matrix!"));
2805 STD__ cerr <<
"Trying to invert singular Matrix!" <<
STD__ endl;
2810 for (
unsigned i = 0; i <
dim; i++)
2811 for (
unsigned j = 0; j <
dim; j++) {
2812 ri(0) =
i; ci(0) = j;
2813 inv.setval(subdet(ri, ci) * ((i+j)%2? -d: d), j, i);
2819 #if defined(SMP) && defined(HAVE_LIBNUMA)
2820 template <
typename T>
2822 const int node,
bool fault_in,
2823 const unsigned first,
const unsigned last)
2826 const unsigned int sz = bm.
rows();
2828 const T* diag = &(((
BdMatrix<T>&)bm).setval(0,0));
2829 moved = do_numa_move_pages(node, fault_in,
2830 (
unsigned long)(diag+first),
2831 (
unsigned long)(diag+last));
2833 for (
unsigned i = 0; i < cfg.
size(); ++
i) {
2834 const unsigned off = cfg.
get(i);
2835 const unsigned dln = sz - off;
2836 const T* adiag = &(((
BdMatrix<T>&)bm).setval(0,off));
2837 const T* bdiag = &(((
BdMatrix<T>&)bm).setval(off,0));
2839 moved += do_numa_move_pages(node, fault_in,
2840 (
unsigned long)(adiag+first),
2841 (
unsigned long)(adiag+
MIN(dln,last)));
2843 moved += do_numa_move_pages(node, fault_in,
2844 (
unsigned long)(bdiag+
MAX(0,(
signed)(first-off))),
2845 (
unsigned long)(bdiag+last-off));
2850 template <
typename T>
2851 void numa_opt_bdmat_job(
struct thr_ctrl *tc)
2856 const int node = tc->
t_size;
2857 const bool fault_in = tc->
t_off;
2859 const unsigned st = (
unsigned long)tc->
t_par[0];
2860 const unsigned en = (
unsigned long)tc->
t_par[1];
2861 tc->
t_res_l = numa_opt_bdmat_helper(bm, bmcfg, node, fault_in, st, en);
2864 template <
typename T>
2870 const unsigned sz = bm.
rows();
2885 numa_opt_bdmat_helper(bm, bmcfg,
2890 const unsigned first = slice_offset(1, n_thr, sz, (T*)0);
2891 unsigned long st, en = first;
2893 unsigned long res = 0;
2894 for (
unsigned t = 0; t < n_thr-1; ++t) {
2895 st = en; en = slice_offset(t+2, n_thr, sz, (T*)0);
2898 st, en, &bm, &bmcfg, (
void*)0);
2901 res = numa_opt_bdmat_helper(bm, bmcfg,
2904 for (
unsigned t = 0; t < n_thr-1; ++t) {
2914 template <
typename T>
#define TBCICOPY(n, o, t, s)
TVector< T > get_col(const unsigned int) const
Construct Vector from column.
void do_copy(const BdMatrix< T > &)
BdMatrix< T > transpose(BdMatrix< T > &mat)
T & setval(const T &val, const unsigned int r, const unsigned int c)
#define BCHKNR(cond, exc, txt, ind)
BVector< T > & swap(BVector< T > &v)
const Vector< T > const Vector< T > const Vector< T > & p
BdMatrix< T > & expand(unsigned=0)
unsigned int columns() const
number of columns
void job_bdmat_vec_transmult(struct thr_ctrl *tc)
Matrix< T > & fill(const T &v=(T) 0)
BdMatrix< T > operator/(const T &) const
const T & getcref(const unsigned long idx) const
The class BdMatrix is an implementation to store and do operations on sparse Matrices with a band str...
STD__ istream & operator>>(STD__ istream &istr, BdMatrix< T > &mat)
BdMatrix< T > & operator+=(const BdMatrix< T > &)
void free_diags(const unsigned)
Implementation alternative: All memory is allocated in one big chunk (default)
void do_bdmat_vec_mult_lnw_opt(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
const BVector< unsigned int > & getcfg() const
ro access to config BVector
F_TMatrix< double > inv(const F_Matrix< double > &A, int overwriteA=0)
#define MAY_PREFETCH_R(adr, loc)
BdMatrix< T > & adddiag(const unsigned)
unsigned long size() const HOT
BdMatrix< T > & reconfig(const BVector< unsigned int > &) COLD
exception base class for the TBCI NumLib
friend NOINST void gaussj FGDT(const BdMatrix< T > &, const Matrix< T > &)
NAMESPACE_TBCI char gaussj(Matrix< T > &a, Matrix< T > &b)
Common interface definition (signature) for all Matrices.
tbci_traits< T >::const_refval_type get(const unsigned int, const unsigned int) const HOT
bool operator==(const BdMatrix< T > &) const
unsigned int getmaxoff() const
max distance from main diagonal
const unsigned TMatrix< T > * res
unsigned int columns() const
number of columns
BdMatrix< T > & div_row(const T &, const unsigned)
BVector< T * > bdiag
pointers to upper and lower diagonals
#define BCHK(cond, exc, txt, ind, rtval)
T * check(const unsigned r, const unsigned c) const HOT
#define TBCIFILL(n, v, t, s)
#define REALLOC(v, os, t, s)
void(* thr_job_t)(struct thr_ctrl *)
Before the double inclusion guard on purpose!
BdMatrix< T > & set_row(const Vector< T > &val, const unsigned, const unsigned=0)
Overwrite row with Vector.
BdMatrix< T > & operator=(const BdMatrix< T > &)
The assignment operators in TBCI are NOT resizing.
T aligned_value_type TALIGN(MIN_ALIGN2)
tbci_traits< T >::const_refval_type get(const unsigned long idx) const HOT
for(REGISTER T *p1=c.vec,*p2=b.vec;p1< c.endvec;p1++, p2++)*p1
unsigned int size() const
size: incompatibility to Matrix (!)
struct thr_struct * threads
#define PREFETCH_R_MANY(addr, loc)
void do_bdmat_vec_mult_lnw(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
int lu_decomp(Matrix< T > &) HOT
LU decomposes the TBCI::Matrix mat.
void do_bdmat_vec_mult(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
#define PREFETCH_R(addr, loc)
In case gcc does not yet support __builtin_prefetch(), we have handcoded assembly with gcc for a few ...
T min(const FS_Vector< dims, T > &fv)
bool contains(const T &, unsigned long *=0) const
TVector< T > get_row(const unsigned int) const
Construct Vector from row.
BdMatrix< T > & fill(const T &=0)
void do_bdmat_vec_transmult_diagw_exact(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
void constructor(const unsigned int, const bool=true)
BdMatrix< T > operator-() const
void destroy()
destroy object explicitly
unsigned long size() const
long int Vector< T > & index
BdMatrix(const BdMatrix< U > &bm)
BdMatrix< T > & transpose()
transpose() does change the object!
T & operator()(const unsigned int, const unsigned int) HOT
rw member access
Temporary Base Class Idiom: Class TVector is used for temporary variables.
BVector< T > & append(const T &)
performs poorly
BdMatrix< T > operator*(const BdMatrix< T > &) const
tbci_traits< T >::const_refval_type get(const unsigned long i) const
void do_bdmat_vec_mult_diagw_exact(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
unsigned int noelem() const
Number of elements that are actually stored.
BdMatrix< T > & div_rows(const Vector< T > &)
BVector< T > & remove(const unsigned long)
T & setval(const unsigned long i) const
#define PREFETCH_W_MANY(addr, loc)
BdMatrix< T > operator+(const BdMatrix< T > &) const
int conj(const int arg)
conj for elementary types
BdMatrixErr(const char *t, const long i=0)
BdMatrix< T > inverse() const
T & set(const unsigned long idx) HOT
unsigned int rows() const
number of rows
void do_bdmat_vec_transmult_lnw_opt(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
unsigned int maxoff
size, max offset from diag
void do_bdmat_vec_dotmult(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
unsigned int rows() const
number of rows
BVector< unsigned > diagconf
configuration
void job_bdmat_vec_mult(struct thr_ctrl *tc)
BdMatrix< T > & autoinsert(const T &, const unsigned, const unsigned)
#define EXPCHK(cond, exc, txt, ind, rtval)
static const char * mat_info()
TVector< T > transMult(const Vector< T > &) const HOT
Transposed Matrix-Vector multiplication.
BVector< T > & resize(const BVector< T > &)
Actually it's a resize and copy (some people would expect the assignment op to do this) ...
#define MAY_PREFETCH_W(adr, loc)
BdMatrix< T > & removediag(const unsigned)
const T & getcref(const unsigned long i) const
#define TBCIDELETE(t, v, sz)
void do_bdmat_vec_transmult_lnw(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
void thread_wait(const int thr_no, struct job_output *out)
BdMatrix< T > & operator*=(const T &)
BdMatrix< T > & mult_row(const T &, const unsigned)
BVector< T > & copy(const BVector< T > &bv)
copy does a resize, if necessary
BdMatrix< T > & resize(const BdMatrix< T > &bdm)
Resizing assignment.
BdMatrix< T > & operator/=(const T &)
#define TBCICOMP(n, o, t, s)
unsigned int do_exactsum2()
BdMatrix< T > & resize(const unsigned int)
int numa_optimize(const BdMatrix< T > &bm, bool fault_in)
TVector< T > dotMult(const Vector< T > &) const
BdMatrix< T > & mult_rows(const Vector< T > &)
Linewise multiplication.
BdMatrix< T > & setval(const T &v, const unsigned int r, const unsigned int c)
autoinsert, sparse class compatible
T * check_internal(const unsigned r, const unsigned c) const HOT
tbci_traits< T >::const_refval_type get(const unsigned r, const unsigned c) const
get, set and getcref are used internally and not for public consumption
T & set(const T &val, const unsigned long i) const
void thread_start_off(const int thr_no, thr_job_t job, const unsigned long off, const unsigned long sz,...)
const BdMatrix< T > & setoutopts(int w=5, int p=4, char f= ' ') const
void do_bdmat_vec_transmult(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
BdMatrixErr(const BdMatrixErr &be)
BdMatrix< T > & operator-=(const BdMatrix< T > &)
#define LIKELY(expr)
branch prediction note that we sometimes on purpose mark the unlikely possibility likely and vice ver...
unsigned long arsz
of saved elems,
BdMatrix< T > & setunit(const T &=1)
bool operator!=(const BdMatrix< T > &ma) const