9 #ifndef TBCI_BAND_MATRIX_H
10 #define TBCI_BAND_MATRIX_H
14 #include "tbci/basics.h"
15 #include "tbci/vector.h"
16 #include "tbci/matrix.h"
21 # define BD_MINVAL 1e-16
25 #ifndef BDMAT_NEW_ALLOC
26 # define BDMAT_ONE_BLOCK
29 #ifdef BDMAT_MEM_TRACE
30 # define BDMATDBG(x) x
36 #if !defined(NO_GD) && !defined(AUTO_DECL)
37 # include "tbci/band_matrix_gd.h"
42 #ifndef TBCI_DISABLE_EXCEPT
50 :
NumErr(
"Error in var conf Band_Matrix class") {}
60 # pragma interface "band_matrix.h"
102 template <
typename T>
117 void constructor(
const unsigned int,
const bool =
true);
118 T*
check(
const unsigned r,
const unsigned c)
const HOT;
125 mutable int outw, outp, outf;
128 inline bool search2(
unsigned int&,
const unsigned int,
const unsigned int)
HOT;
138 explicit BdMatrix(
const unsigned int = 0);
140 explicit BdMatrix(
const unsigned int,
const unsigned int);
149 BdMatrix(
const T&,
const unsigned int,
unsigned int);
165 {
return "BdMatrix"; }
167 #ifndef HAVE_PROMOTION_BUG
169 # ifndef HAVE_GCC295_TMPLFRNDCLS_BUG
175 for (
unsigned k=0; k<
dim; ++k)
179 for (
unsigned j=0; j<dim-di; ++j) {
185 outw = bm.outw; outp = bm.outp;
186 outf = bm.outf; outset =
true;
199 typename tbci_traits<T>::const_refval_type
200 operator() (
const unsigned int,
const unsigned int)
const HOT;
201 typename tbci_traits<T>::const_refval_type
202 get(
const unsigned int,
const unsigned int)
const HOT;
206 const unsigned int c)
208 T&
setval(
const unsigned int r,
const unsigned int c);
243 inline
unsigned int size()
const {
return dim; }
247 inline unsigned int rows()
const {
return dim; }
277 #ifdef BDMAT_TRANS_FRIENDS
289 #ifdef BDMAT_OP_FRIENDS
322 { outw = w; outp =
p; outf = f; outset =
true;
return *
this; }
355 #ifdef BDMAT_ONE_BLOCK
357 template <
typename T>
365 template <
typename T>
377 template <
typename T>
384 unsigned long sz =
dim;
int t;
386 for (
REGISTER unsigned j = 0; j < dcs; ++j)
393 for (
unsigned i = 0;
i < dcs; ++
i) {
395 int s = (signed)
dim - (
signed)di;
397 STD__ cerr <<
"Wrong config. Vector supplied!" <<
STD__ endl;
408 STD__ cerr <<
"BdMat ctor: malloc failed to get " << sz <<
" bytes!" <<
STD__ endl;
416 arsz = sz; dummy = (
T)0;
425 template <
typename T>
428 for (
int i = u-1;
i >= 0; --
i) {
440 template <
typename T>
451 template <
typename T>
454 unsigned long sz = 0;
unsigned i = 0;
474 int s = (signed)
dim - (
signed)di;
480 T* ptr =
NEW(T, 2*s);
490 arsz = sz; dummy = (
T)0;
498 STD__ cerr <<
"BdMat ctor: mem alloc (" << sz <<
") failed!" <<
STD__ endl;
505 template <
typename T>
514 template <
typename T>
524 template <
typename T>
532 template <
typename T>
539 outw = mat.outw; outp = mat.outp;
540 outf = mat.outf; outset =
true;
544 template <
typename T>
554 template <
typename T>
561 for (
REGISTER unsigned int t = 0; t < mo; ++t)
569 template <
typename T>
579 template <
typename T>
587 for (
unsigned int l = 1; l < mat.
columns(); ++l) {
589 (signed)j < ((
signed)(mat.
rows() - l)); ++j) {
590 if (mat(j, j+l) !=
T(0) || mat(j+l, j) !=
T(0)) {
607 for (
REGISTER unsigned int j = 0; j < dim - dci; ++j) {
608 adiag.
get(dci)[j] = mat(j, j + dci);
609 bdiag.
get(dci)[j] = mat(dci + j, j);
614 template <
typename T>
622 template <
typename T>
624 const unsigned int j)
635 template <
typename T>
658 template <
typename T>
667 template <
typename T>
675 return (dummy = (T)0);
679 template <
typename T>
680 inline typename tbci_traits<T>::const_refval_type
688 return (dummy = (T)0);
692 template <
typename T>
693 inline typename tbci_traits<T>::const_refval_type
701 return (dummy = (T)0);
706 template <
typename T>
723 #ifdef BDMAT_ONE_BLOCK
724 template <
typename T>
734 template <
typename T>
748 template <
typename T>
754 T* oldd(
diag);
unsigned int od =
dim;
755 #ifdef BDMAT_ONE_BLOCK
756 unsigned int oldar =
arsz;
762 if (od <= 1 && nd > 1)
786 #ifndef BDMAT_ONE_BLOCK
787 for (
unsigned i = 0; i < oldconf.
size(); ++
i)
789 2*(od-oldconf.
get(i)));
800 template <
typename T>
807 if (dim <= 1 && nd > 1)
817 template <
typename T>
831 template <
typename T>
838 for (
unsigned i = 0; i <
dim; ++
i)
845 template <
typename T>
851 #ifdef BDMAT_ONE_BLOCK
852 unsigned int oldar =
arsz;
854 unsigned int olddim =
dim;
873 #ifndef BDMAT_ONE_BLOCK
874 for (
unsigned i = 0; i < oldconf.
size(); ++
i)
876 2*(olddim-oldconf.
get(i)));
886 #ifdef BDMAT_ONE_BLOCK
887 template <
typename T>
890 const int newsz = dim - ndg;
900 STD__ cerr <<
"Allocation error" <<
STD__ endl;
913 long diff = (long)
diag - (
long)olddiag;
921 for (
unsigned i = 0; i < dcs; ++
i) {
944 template <
typename T>
947 const int newsz = dim - ndg;
952 T* ptr =
NEW(T, 2*newsz);
955 STD__ cerr <<
"BdMat adddiag allocation failed!" <<
STD__ endl;
972 template <
typename T>
977 #ifdef BDMAT_ONE_BLOCK
995 template <
typename T>
1013 template <
typename T>
1016 for (
unsigned i=0; i<val.
size(); ++
i)
1021 template <
typename T>
1037 template <
typename T>
1053 #ifdef BDMAT_ONE_BLOCK
1054 template <
typename T>
1061 template <
typename T>
1070 template <
typename T>
1082 template <
typename T>
1096 template <
typename T>
1104 #ifdef BDMAT_ONE_BLOCK
1106 for (
unsigned i = 0; i < m; ++
i)
1107 newconf.
set(i) = i+1;
1110 for (
unsigned i=1; i<=m; ++
i)
1119 template <
typename T>
1124 int sz = arow.
size();
1139 int r = bestrow(level++);
1140 int rw = 0;
int mul;
1142 for (
REGISTER int i = 0; i < sz; i++) {
1144 mul = (i%2? -1: 1); rw--;
1149 for (
REGISTER int j = 0; j < sz; j++) {
1156 for (
REGISTER int t = 0; t < sz; t++) {
1165 TBCICOPY(&(
ac(j)), &(acol(j+1)),
int, (sz-j-1));
1168 tmp += (j%2? -1: 1) * sdet(level, ar,
ac) * el;
1173 template <
typename T>
1177 #ifndef TBCI_DISABLE_EXCEPT
1178 throw (
BdMatrixErr(
"Unequal # of lines and rows to ignore!"));
1180 STD__ cerr <<
"Unequal # of lines and rows to ignore!" <<
STD__ endl;
1188 bestrow.resize(dim);
1190 unsigned int rw = 0;
unsigned int cl = 0;
1206 matmem<T>* el = offdiag.setfirst();
int elemi = (el? el->row: -1);
1208 for (
int i = 0; i <
dim; i++) {
1209 if (!(actrow.contains(i))) {
1210 rowfill(i) = dim + 1;
continue;
1214 if (actcol.contains(i))
1216 if (actcol.contains(i+1) && i+1 <
dim)
1218 if (actcol.contains(i-1) && i-1 >= 0)
1228 while (el && elemi == i) {
1229 if (actcol.contains(el->col))
1231 el = offdiag.setnext(); elemi = (el? el->row: -1);
1238 for (
int k = 0; k <
dim; k++) {
1239 int min = dim + 2;
int minr = -1;
1240 for (
int s = 0; s <
dim; s++)
1241 if (rowfill(s) < min) {
1242 min = rowfill(s); minr = s;
1246 rowfill(minr) = dim + 1;
1248 return sdet(0, actrow, actcol);
1253 template <
typename T>
1260 STD__ cout <<
"Warning: Automatic reconfiguring BdMatrix in assignment!" <<
STD__ endl;
1275 template <
typename T>
1283 #define SFORALL_S(op) \
1284 template <typename T> \
1285 BdMatrix<T>& BdMatrix<T>::operator op (const BdMatrix<T>& mat) \
1287 BCHK(mat.dim != dim, BdMatrixErr, Adding wrong size BdMatrix, mat.dim, *this); \
1289 for (i = 0; i < dim; ++i) \
1290 diag[i] op mat.diag[i]; \
1291 for (j = 0; j < diagconf.size(); ++j) { \
1292 const REGISTER unsigned di = diagconf.get(j); \
1293 if (LIKELY(mat.adiag.get(di))) \
1294 for (i = 0; i < (dim-di); ++i) { \
1295 adiag.set(di)[i] op mat.adiag.get(di)[i]; \
1296 bdiag.set(di)[i] op mat.bdiag.get(di)[i]; \
1299 if (diagconf == mat.diagconf) \
1301 for (j = 0; j < mat.diagconf.size(); ++j) { \
1302 const REGISTER unsigned di = mat.diagconf.get(j); \
1303 if (!diagconf.contains(di)) { \
1305 for (i = 0; i < (dim-di); ++i) { \
1306 adiag.set(di)[i] op mat.adiag.get(di)[i]; \
1307 bdiag.set(di)[i] op mat.bdiag.get(di)[i]; \
1318 #define SFORALL_T(op) \
1319 template <typename T> \
1320 BdMatrix<T>& BdMatrix<T>::operator op (const T& mult) \
1322 for (REGISTER unsigned int i = 0; i < dim; ++i) \
1324 for (unsigned j = 0; j < diagconf.size(); ++j) { \
1325 const REGISTER unsigned di = diagconf.get(j); \
1326 for (unsigned k = 0; k < (dim-di); ++k) { \
1327 adiag.set(di)[k] op mult; \
1328 bdiag.set(di)[k] op mult; \
1339 template <typename T>
1348 for (
unsigned k = 0; k < (dim-di); ++k) {
1356 #define INTDIVEQ(T) \
1358 inline BdMatrix < T >& BdMatrix< T >::operator /= (const T & div) \
1360 BCHK((div==( T )0), BdMatrixErr, "Divide BdMatrix by zero", 0, *this); \
1361 for (REGISTER unsigned i = 0; i < dim; ++i) \
1363 for (unsigned j = 0; j < diagconf.size(); ++j) { \
1364 const REGISTER unsigned di = diagconf.get(j); \
1365 for (unsigned k = 0; k < (dim-di); ++k) { \
1366 adiag.set(di)[k] /= div; \
1367 bdiag.set(di)[k] /= div; \
1376 template <typename T>
1379 #if 0 //def BDMAT_ONE_BLOCK
1392 template <typename T>
1398 #define STDDEF_ST(op) \
1399 template <typename T> \
1400 inline BdMatrix<T> BdMatrix<T>::operator op (const T& val) const \
1402 return (BdMatrix<T> (*this)) op##= val; \
1410 #define STDDEF_SS(op) \
1411 template <typename T> \
1412 BdMatrix<T> BdMatrix<T>::operator op (const BdMatrix<T>& m2) const \
1414 return (BdMatrix<T> (*this)) op##= m2; \
1421 template <typename T>
1424 const unsigned int newoff =
maxoff + mat2.maxoff;
1429 for (
int i = 0; i < (signed)dim; ++
i) {
1431 const int jend =
MIN(dim, i+newoff+1);
1432 const int kstart =
MAX(0, i-(
signed)
maxoff);
1433 const int kend =
MIN(dim, i+maxoff+1);
1435 for (
int j =
MAX(0,i-(
signed)newoff); j < jend; ++j) {
1437 for (
REGISTER int k = kstart; k < kend; ++k)
1438 el += this->
get(i, k) * mat2.get(k, j);
1448 template <typename T>
1454 template <
typename T>
1459 #ifndef TBCI_DISABLE_EXCEPT
1460 throw (
BdMatrixErr(
"Multiplying Matrix with Matrix: wrong sizes!"));
1462 STD__ cerr <<
"Multiplying Matrix with Matrix: wrong sizes!" <<
STD__ endl;
1468 for (
int c = 0;
c < tm.columns();
c++)
1469 for (
int i = 0; i < mat.
rows(); i++) {
1472 val += this->
operator()(
i, j) * mat(j,
c);
1480 # define MAY_PREFETCH_R(adr,loc) PREFETCH_R(adr,loc)
1481 # define MAY_PREFETCH_W(adr,loc) PREFETCH_W(adr,loc)
1483 # define MAY_PREFETCH_R(adr,loc) do {} while (0)
1484 # define MAY_PREFETCH_W(adr,loc) do {} while (0)
1488 template <
typename T>
1498 for (
REGISTER unsigned int i = 0; i < (dim - dc); ++
i) {
1507 #if !defined(BDMATVEC_LNWISE) && !defined(BDMATVEC_LNWISE_OPT) && !defined(BDMATVEC_DIAGWISE)
1508 # define BDMATVEC_LNWISE_OPT
1512 #define COST_BDMATVEC_LN(d,o,m) (d*(COST_MULT+2*COST_UNIT_LOAD+COST_UNIT_STORE+COST_LOOP\
1513 +o*(COST_LOOP+2*COST_BRANCH+COST_UNIT_LOAD))+(2*d-m)*o*(3*COST_NU_LOAD+COST_ADD+COST_MULT))
1514 #define COST_BDMATVEC_LNO(d,o,m) (d*(COST_MULT+2*COST_UNIT_LOAD+COST_UNIT_STORE+COST_LOOP\
1515 +o*(COST_LOOP+COST_UNIT_LOAD))+(2*d-m)*o*(3*COST_NU_LOAD+COST_ADD+COST_MULT+COST_BRANCH/2))
1516 #define COST_BDMATVEC_DIAG(d,o,m) (d*(2*COST_UNIT_LOAD+COST_MULT+COST_UNIT_STORE+COST_LOOP)\
1517 +o*(6*COST_NU_LOAD+COST_LOOP+3*COST_UNIT_LOAD+2*COST_NU_STORE\
1518 +(2*d-m)*(3*COST_UNIT_LOAD+COST_LOOP+COST_MULT+COST_ADD+COST_UNIT_STORE)))
1520 template <
typename T>
1527 const unsigned int dim = mat->
dim;
1531 for (
unsigned i = start; i <
end; ++
i) {
1536 for (
unsigned j = 0; j < od; ++j) {
1541 comp += (t-val) - y;
1547 comp += (t-val) - y;
1551 res->
set(val-comp, i);
1554 for (
unsigned i = start; i <
end; ++
i) {
1558 for (
unsigned j = 0; j < od; ++j) {
1570 template <
typename T>
1577 const unsigned int dim = mat->
dim;
1583 unsigned iend =
MIN(maxoff, dim-maxoff);
1584 iend =
MIN(iend, end);
1589 for (
unsigned int _j = 0; _j < od; ++_j) {
1604 for (i = start; i < iend; ++
i) {
1611 for (
unsigned int j = 0; j < od; ++j) {
1615 const T t = val +
y;
1618 comp += (t-val) - y;
1624 const T t = val +
y;
1627 comp += (t-val) - y;
1630 res->
set(val-comp, i);
1633 iend =
MIN(end, dim - maxoff);
1636 for (; i < iend; ++
i) {
1643 for (
unsigned int j = 0; j < od; ++j) {
1649 comp += (t-val) - y;
1655 comp += (t-val) - y;
1658 res->
set(val-comp, i);
1662 iend =
MIN(maxoff, end);
1665 for (; i < iend; ++
i) {
1672 for (
unsigned int j = 0; j < od; ++j) {
1676 const T t = val +
y;
1679 comp += (t-val) - y;
1682 if (s < (
signed)dim -
i) {
1684 const T t = val +
y;
1687 comp += (t-val) - y;
1691 res->
set(val-comp, i);
1698 for (; i <
end; ++
i) {
1705 for (
unsigned int j = 0; j < od; ++j) {
1712 comp += (t-val) - y;
1714 if (s < (
signed)dim -
i) {
1719 comp += (t-val) - y;
1723 res->
set(val-comp, i);
1730 for (i = start; i < iend; ++
i) {
1736 for (
unsigned int j = 0; j < od; ++j) {
1752 iend =
MIN(end, dim - maxoff);
1755 for (; i < iend; ++
i) {
1761 for (
unsigned int j = 0; j < od; ++j) {
1774 iend =
MIN(maxoff, end);
1776 for (; i < iend; ++
i) {
1782 for (
unsigned int j = 0; j < od; ++j) {
1789 if (s < (
signed)dim -
i) {
1802 for (; i <
end; ++
i) {
1808 for (
unsigned int j = 0; j < od; ++j) {
1814 if (s < (
signed)dim -
i) {
1826 template <
typename T>
1833 const unsigned int dim = mat->
dim;
1843 do_vec_vec_mul(end-start, &res->
setval(start),
1846 for (
unsigned j = 0; j < od; ++j) {
1849 const unsigned iend =
MIN(end,dim-s);
1850 const unsigned iend2 =
MAX(0,(
int)(end-s));
1851 const unsigned istart2 =
MAX((
int)(start-s),0);
1854 for (
unsigned off = start; off < iend; ++off) {
1857 corr.
setval(off-start) += (t - res->
get(off)) - y;
1862 for (
unsigned off = istart2; off < iend2; ++off) {
1865 corr.
setval(s+off-start) += (t - res->
get(s+off)) - y;
1870 for (
unsigned off = start; off <
end; ++off)
1871 res->
setval(off) -= corr.
get(off-start);
1874 template <
typename T>
1880 const unsigned int dim = mat->
dim;
1894 do_vec_vec_mul(end-start, &res->
setval(start),
1897 for (
unsigned j = 0; j < od; ++j) {
1900 const unsigned iend =
MIN(end,dim-s);
1901 const unsigned iend2 =
MAX(0,(
int)(end-s));
1902 const unsigned istart2 =
MAX((
int)(start-s),0);
1903 if (
LIKELY(iend > start))
1904 do_add_vec_vec_mul(iend-start, &res->
setval(start),
1906 if (
LIKELY(iend2 > istart2))
1907 do_add_vec_vec_mul(iend2-istart2, &res->
setval(istart2+s),
1912 #if defined(BDMATVEC_LNWISE)
1913 # define COST_BDMATVEC(d,o,m) COST_BDMATVEC_LN(d,o,m)
1914 #elif defined(BDMATVEC_LNWISE_OPT)
1915 # define COST_BDMATVEC(d,o,m) COST_BDMATVEC_LNO(d,o,m)
1916 #elif defined(BDMATVEC_DIAGWISE)
1917 # define COST_BDMATVEC(d,o,m) COST_BDMATVEC_DIAG(d,o,m)
1920 #define COST_BDMATVEC_TRANS(d,o,m) COST_BDMATVEC(d,o,m)
1921 template <
typename T>
1927 const unsigned int dim = mat->
dim;
1929 for (
unsigned i = start; i <
end; ++
i) {
1934 for (
unsigned int j = 0; j < od; ++j) {
1938 const T t = val +
y;
1939 comp += (t-val) - y;
1944 const T t = val +
y;
1945 comp += (t-val) - y;
1949 res->
set(val-comp, i);
1952 for (
unsigned i = start; i <
end; ++
i) {
1956 for (
unsigned int j = 0; j < od; ++j) {
1968 template <
typename T>
1974 const unsigned int dim = mat->
dim;
1980 unsigned iend =
MIN(maxoff, mat->
dim-maxoff);
1981 iend =
MIN(end, iend);
1986 for (
unsigned int _j = 0; _j < od; ++_j) {
2001 for (i = start; i < iend; ++
i) {
2008 for (
unsigned int j = 0; j < od; ++j) {
2012 const T t = val +
y;
2015 comp += (t-val) - y;
2020 const T t = val +
y;
2023 comp += (t-val) - y;
2026 res->
set(val-comp, i);
2029 iend =
MIN(end, dim - maxoff);
2033 for (; i < iend; ++
i) {
2040 for (
unsigned int j = 0; j < od; ++j) {
2046 comp += (t-val) - y;
2052 comp += (t-val) - y;
2055 res->
set(val-comp, i);
2059 iend =
MIN(end, maxoff);
2062 for (; i < iend; ++
i) {
2069 for (
unsigned int j = 0; j < od; ++j) {
2073 const T t = val +
y;
2076 comp += (t-val) - y;
2079 if (s < (
signed)dim -
i) {
2081 const T t = val +
y;
2084 comp += (t-val) - y;
2088 res->
set(val-comp, i);
2094 for (; i <
end; ++
i) {
2101 for (
unsigned int j = 0; j < od; ++j) {
2108 comp += (t-val) - y;
2110 if (s < (
signed)dim -
i) {
2115 comp += (t-val) - y;
2119 res->
set(val-comp, i);
2123 for (i = start; i < iend; ++
i) {
2129 for (
unsigned int j = 0; j < od; ++j) {
2144 iend =
MIN(end, dim - maxoff);
2148 for (; i < iend; ++
i) {
2154 for (
unsigned int j = 0; j < od; ++j) {
2167 iend =
MIN(end, maxoff);
2170 for (; i < iend; ++
i) {
2176 for (
unsigned int j = 0; j < od; ++j) {
2183 if (s < (
signed)dim -
i) {
2195 for (; i <
end; ++
i) {
2201 for (
unsigned int j = 0; j < od; ++j) {
2207 if (s < (
signed)dim -
i) {
2219 template <
typename T>
2224 const unsigned int dim = mat->
dim;
2230 do_vec_vec_mul(end-start, &res->
setval(start),
2233 for (
unsigned j = 0; j < od; ++j) {
2236 const int iend =
MIN(end,dim-s);
2237 const int iend2 = end-s;
2238 const int istart2 =
MAX((
int)(start-s),0);
2241 for (
int off = istart2; off < iend2; ++off) {
2244 corr.
setval(off+s-start) += (t - res->
get(off+s)) - y;
2249 for (
int off = start; off < iend; ++off) {
2252 corr.
setval(off-start) += (t - res->
get(off)) - y;
2257 for (
unsigned off = start; off <
end; ++off)
2258 res->
setval(off) -= corr(off-start);
2261 template <
typename T>
2266 const unsigned int dim = mat->
dim;
2275 do_vec_vec_mul(end-start, &res->
setval(start),
2278 for (
unsigned j = 0; j < od; ++j) {
2281 const int iend =
MIN(end,dim-s);
2282 const int iend2 = end-s;
2283 const int istart2 =
MAX((
int)(start-s),0);
2284 do_add_vec_vec_mul(iend2-istart2, &res->
setval(istart2+s),
2286 do_add_vec_vec_mul(iend-start, &res->
setval(start),
2291 #define COST_BDMATVEC_DOT(d,o,m) COST_BDMATVEC(d,o,m)
2292 template <
typename T>
2298 const unsigned int dim = mat->
dim;
2299 #ifdef BDMATVEC_LNWISE
2302 for (
unsigned i = start; i <
end; ++
i) {
2307 for (
unsigned j = 0; j < od; ++j) {
2311 const T t = val +
y;
2312 comp += (t-val) - y;
2317 const T t = val +
y;
2318 comp += (t-val) - y;
2322 res->
set(val-comp, i);
2325 for (
unsigned i = start; i <
end; ++
i) {
2329 for (
unsigned j = 0; j < od; ++j) {
2339 #elif defined(BDMATVEC_LNWISE_OPT)
2345 unsigned iend =
MIN(maxoff, dim-maxoff);
2346 iend =
MIN(iend, end);
2351 for (
unsigned int _j = 0; _j < od; ++_j) {
2367 for (i = start; i < iend; ++
i) {
2373 for (
unsigned int j = 0; j < od; ++j) {
2389 iend =
MIN(end, dim - maxoff);
2394 for (; i < iend; ++
i) {
2400 for (
unsigned int j = 0; j < od; ++j) {
2413 iend =
MIN(maxoff, end);
2416 for (; i < iend; ++
i) {
2422 for (
unsigned int j = 0; j < od; ++j) {
2429 if (s < (
signed)dim -
i) {
2443 for (; i <
end; ++
i) {
2449 for (
unsigned int j = 0; j < od; ++j) {
2455 if (s < (
signed)dim -
i) {
2464 #elif defined(BDMATVEC_DIAGWISE)
2468 do_vec_vec_cmul(end-start, &res->
setval(start),
2471 for (
unsigned j = 0; j < od; j++) {
2474 const int iend =
MIN(end,dim-s);
2475 const int iend2 = end-s;
2476 const int istart2 =
MAX((
int)(start-s),0);
2477 do_add_vec_vec_cmul(iend-start, &res->
setval(start),
2479 do_add_vec_vec_cmul(iend2-istart2, &res->
setval(istart2+s),
2485 #if defined(SMP) && !defined(NOSMP_BDMATVEC)
2488 template <typename T>
2497 template <typename T>
2507 # if !defined(SMP_BDMATSLICE)
2508 # define SMP_BDMATSLICE 32768
2510 # define SMP_BDMATSLICE2 (SMP_BDMATSLICE/sizeof(T))
2512 template <
typename T>
2517 update_n_thr(n_thr);
2520 do_bdmat_vec_mult<T>(0UL, dim, &tv,
this, &vec);
2525 const unsigned long first = slice_offset(1, n_thr, dim, (T*)0);
2526 unsigned long st, en = first;
2527 for (
unsigned t = 0; t < n_thr-1; ++t) {
2528 st = en; en = slice_offset(t+2, n_thr, dim, (T*)0);
2530 st, en, &tv,
this, &vec, (
void*)0);
2532 do_bdmat_vec_mult<T>(0UL, first, &tv,
this, &vec);
2533 for (
unsigned s = 0; s < n_thr-1; ++s)
2540 template <
typename T>
2545 update_n_thr(n_thr);
2548 do_bdmat_vec_transmult<T> (0UL, dim, &tv,
this, &vec);
2553 const unsigned long first = slice_offset(1, n_thr, dim, (T*)0);
2554 unsigned long st, en = first;
2555 for (
unsigned t = 0; t < n_thr-1; ++t) {
2556 st = en; en = slice_offset(t+2, n_thr, dim, (T*)0);
2558 st, en, &tv,
this, &vec, (
void*)0);
2560 do_bdmat_vec_transmult<T>(0UL, first, &tv,
this, &vec);
2561 for (
unsigned s = 0; s < n_thr-1; ++s)
2569 template <
typename T>
2574 do_bdmat_vec_mult<T>(0,
dim, &tv,
this, &vec);
2579 template <
typename T>
2584 do_bdmat_vec_transmult<T>(0,
dim, &tv,
this, &vec);
2591 template <
typename T>
2596 do_bdmat_vec_dotmult<T>(0,
dim, &tv,
this, &vec);
2608 template <
typename T>
2624 for (
unsigned j = 0; j < dim-di; ++j)
2634 for (
unsigned j = 0; j < dim-di; ++j)
2642 template <
typename T>
2647 for (
unsigned j = 0; j <
dim; ++j)
2652 for (
unsigned i = 0; i < (dim-off); ++
i)
2654 for (
unsigned k = 0; k < (dim-off); ++k)
2660 template <
typename T>
2665 for (
unsigned j = 0; j <
dim; ++j)
2670 for (
unsigned i = 0; i < (dim-off); ++
i)
2672 for (
unsigned k = 0; k < (dim-off); ++k)
2678 template <
typename T>
2694 template <
typename T>
2711 template <
typename T>
2712 STD__ ostream& operator << (STD__ ostream& ostr, const BdMatrix<T>& mat)
2717 for (
unsigned i = 0; i < mat.
dim; ++
i) {
2718 for (
unsigned j = 0; j < mat.
dim; ++j) {
2720 ostr.width(mat.outw);
2721 ostr.precision(mat.outp);
2722 ostr.
fill(mat.outf);
2724 if (i == j || mat.diagconf.contains(
CSTD__ abs((
int)i-(
int)j)))
2725 ostr << mat.
get(i, j) <<
' ';
2727 ostr << (
T)0 <<
' ';
2738 template <
typename T>
2742 for (
unsigned i = 0; i < mat.
dim; ++
i) {
2743 for (
unsigned j = 0; j < mat.
dim; ++j) {
2749 STD__ cerr <<
"Element (" << i <<
"," << j
2750 <<
") should be zero!" <<
STD__ endl;
2756 template <
typename T>
2762 for (
unsigned i = 0; i <
_DIM; ++
i)
2767 for (
unsigned k = 0; k < _DIM-di; ++k) {
2776 template <
typename T>
2781 for (i = 0; i <
dim; ++
i)
2782 tm.setval(
diag[i], i, i);
2785 for (
unsigned k = 0; k < dim-(i=
diagconf.
get(j)); ++k) {
2794 template <
typename T>
2802 #ifndef TBCI_DISABLE_EXCEPT
2803 throw (
BdMatrixErr(
"Trying to invert singular Matrix!"));
2805 STD__ cerr <<
"Trying to invert singular Matrix!" <<
STD__ endl;
2810 for (
unsigned i = 0; i <
dim; i++)
2811 for (
unsigned j = 0; j <
dim; j++) {
2812 ri(0) =
i; ci(0) = j;
2813 inv.setval(subdet(ri, ci) * ((i+j)%2? -d: d), j, i);
2819 #if defined(SMP) && defined(HAVE_LIBNUMA)
2820 template <
typename T>
2822 const int node,
bool fault_in,
2823 const unsigned first,
const unsigned last)
2826 const unsigned int sz = bm.
rows();
2829 moved = do_numa_move_pages(node, fault_in,
2830 (
unsigned long)(diag+first),
2831 (
unsigned long)(diag+last));
2833 for (
unsigned i = 0; i < cfg.
size(); ++
i) {
2834 const unsigned off = cfg.
get(i);
2835 const unsigned dln = sz - off;
2839 moved += do_numa_move_pages(node, fault_in,
2840 (
unsigned long)(adiag+first),
2841 (
unsigned long)(adiag+
MIN(dln,last)));
2843 moved += do_numa_move_pages(node, fault_in,
2844 (
unsigned long)(bdiag+
MAX(0,(
signed)(first-off))),
2845 (
unsigned long)(bdiag+last-off));
2850 template <
typename T>
2851 void numa_opt_bdmat_job(
struct thr_ctrl *tc)
2856 const int node = tc->
t_size;
2857 const bool fault_in = tc->
t_off;
2859 const unsigned st = (
unsigned long)tc->
t_par[0];
2860 const unsigned en = (
unsigned long)tc->
t_par[1];
2861 tc->
t_res_l = numa_opt_bdmat_helper(bm, bmcfg, node, fault_in, st, en);
2864 template <
typename T>
2870 const unsigned sz = bm.
rows();
2885 numa_opt_bdmat_helper(bm, bmcfg,
2890 const unsigned first = slice_offset(1, n_thr, sz, (T*)0);
2891 unsigned long st, en = first;
2893 unsigned long res = 0;
2894 for (
unsigned t = 0; t < n_thr-1; ++t) {
2895 st = en; en = slice_offset(t+2, n_thr, sz, (T*)0);
2898 st, en, &bm, &bmcfg, (
void*)0);
2901 res = numa_opt_bdmat_helper(bm, bmcfg,
2904 for (
unsigned t = 0; t < n_thr-1; ++t) {
2914 template <
typename T>
#define TBCICOPY(n, o, t, s)
TVector< T > get_col(const unsigned int) const
Construct Vector from column.
void do_copy(const BdMatrix< T > &)
BdMatrix< T > transpose(BdMatrix< T > &mat)
#define BCHKNR(cond, exc, txt, ind)
T & setval(const unsigned long i) const
BVector< T > & swap(BVector< T > &v)
const Vector< T > const Vector< T > const Vector< T > & p
BdMatrix< T > & expand(unsigned=0)
void job_bdmat_vec_transmult(struct thr_ctrl *tc)
BdMatrix< T > operator/(const T &) const
const T & getcref(const unsigned long idx) const
The class BdMatrix is an implementation to store and do operations on sparse Matrices with a band str...
STD__ istream & operator>>(STD__ istream &istr, BdMatrix< T > &mat)
BdMatrix< T > & operator+=(const BdMatrix< T > &)
void free_diags(const unsigned)
Implementation alternative: All memory is allocated in one big chunk (default)
void do_bdmat_vec_mult_lnw_opt(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
const BVector< unsigned int > & getcfg() const
ro access to config BVector
F_TMatrix< double > inv(const F_Matrix< double > &A, int overwriteA=0)
#define MAY_PREFETCH_R(adr, loc)
BdMatrix< T > & adddiag(const unsigned)
unsigned long size() const HOT
BdMatrix< T > & reconfig(const BVector< unsigned int > &) COLD
exception base class for the TBCI NumLib
friend NOINST void gaussj FGDT(const BdMatrix< T > &, const Matrix< T > &)
NAMESPACE_TBCI char gaussj(Matrix< T > &a, Matrix< T > &b)
Common interface definition (signature) for all Matrices.
tbci_traits< T >::const_refval_type get(const unsigned int, const unsigned int) const HOT
tbci_traits< T >::const_refval_type get(const unsigned long i) const
bool operator==(const BdMatrix< T > &) const
unsigned int getmaxoff() const
max distance from main diagonal
unsigned int columns() const
number of columns
BdMatrix< T > & div_row(const T &, const unsigned)
BVector< T * > bdiag
pointers to upper and lower diagonals
#define BCHK(cond, exc, txt, ind, rtval)
T * check(const unsigned r, const unsigned c) const HOT
#define TBCIFILL(n, v, t, s)
#define REALLOC(v, os, t, s)
void(* thr_job_t)(struct thr_ctrl *)
Before the double inclusion guard on purpose!
BdMatrix< T > & set_row(const Vector< T > &val, const unsigned, const unsigned=0)
Overwrite row with Vector.
BdMatrix< T > & operator=(const BdMatrix< T > &)
The assignment operators in TBCI are NOT resizing.
T & set(const T &val, const unsigned long i) const
T & setval(const T &val, const unsigned int r, const unsigned int c)
T aligned_value_type TALIGN(MIN_ALIGN2)
tbci_traits< T >::const_refval_type get(const unsigned long idx) const HOT
for(REGISTER T *p1=c.vec,*p2=b.vec;p1< c.endvec;p1++, p2++)*p1
unsigned int size() const
size: incompatibility to Matrix (!)
struct thr_struct * threads
#define PREFETCH_R_MANY(addr, loc)
void do_bdmat_vec_mult_lnw(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
int lu_decomp(Matrix< T > &) HOT
LU decomposes the TBCI::Matrix mat.
void do_bdmat_vec_mult(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
#define PREFETCH_R(addr, loc)
In case gcc does not yet support __builtin_prefetch(), we have handcoded assembly with gcc for a few ...
Matrix< T > & fill(const T &v=(T) 0)
unsigned int columns() const
number of columns
bool contains(const T &, unsigned long *=0) const
TVector< T > get_row(const unsigned int) const
Construct Vector from row.
BdMatrix< T > & fill(const T &=0)
void do_bdmat_vec_transmult_diagw_exact(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
void constructor(const unsigned int, const bool=true)
BdMatrix< T > operator-() const
void destroy()
destroy object explicitly
long int Vector< T > & index
BdMatrix(const BdMatrix< U > &bm)
BdMatrix< T > & transpose()
transpose() does change the object!
T & operator()(const unsigned int, const unsigned int) HOT
rw member access
BVector< T > & append(const T &)
performs poorly
BdMatrix< T > operator*(const BdMatrix< T > &) const
void do_bdmat_vec_mult_diagw_exact(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
unsigned int noelem() const
Number of elements that are actually stored.
BdMatrix< T > & div_rows(const Vector< T > &)
BVector< T > & remove(const unsigned long)
#define PREFETCH_W_MANY(addr, loc)
BdMatrix< T > operator+(const BdMatrix< T > &) const
int conj(const int arg)
conj for elementary types
BdMatrixErr(const char *t, const long i=0)
BdMatrix< T > inverse() const
T & set(const unsigned long idx) HOT
unsigned int rows() const
number of rows
void do_bdmat_vec_transmult_lnw_opt(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
unsigned int maxoff
size, max offset from diag
const T & getcref(const unsigned long i) const
void do_bdmat_vec_dotmult(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
unsigned long size() const
BVector< unsigned > diagconf
configuration
void job_bdmat_vec_mult(struct thr_ctrl *tc)
BdMatrix< T > & autoinsert(const T &, const unsigned, const unsigned)
#define EXPCHK(cond, exc, txt, ind, rtval)
static const char * mat_info()
TVector< T > transMult(const Vector< T > &) const HOT
Transposed Matrix-Vector multiplication.
BVector< T > & resize(const BVector< T > &)
Actually it's a resize and copy (some people would expect the assignment op to do this) ...
#define MAY_PREFETCH_W(adr, loc)
BdMatrix< T > & removediag(const unsigned)
#define TBCIDELETE(t, v, sz)
void do_bdmat_vec_transmult_lnw(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
Temporary Base Class Idiom: Class TVector is used for temporary variables.
void thread_wait(const int thr_no, struct job_output *out)
BdMatrix< T > & operator*=(const T &)
BdMatrix< T > & mult_row(const T &, const unsigned)
BVector< T > & copy(const BVector< T > &bv)
copy does a resize, if necessary
BdMatrix< T > & resize(const BdMatrix< T > &bdm)
Resizing assignment.
const Vector< T > Vector< T > Vector< T > Vector< T > & y
BdMatrix< T > & operator/=(const T &)
const Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > long int res
#define TBCICOMP(n, o, t, s)
unsigned int do_exactsum2()
BdMatrix< T > & resize(const unsigned int)
int numa_optimize(const BdMatrix< T > &bm, bool fault_in)
TVector< T > dotMult(const Vector< T > &) const
BdMatrix< T > & mult_rows(const Vector< T > &)
Linewise multiplication.
BdMatrix< T > & setval(const T &v, const unsigned int r, const unsigned int c)
autoinsert, sparse class compatible
T * check_internal(const unsigned r, const unsigned c) const HOT
void thread_start_off(const int thr_no, thr_job_t job, const unsigned long off, const unsigned long sz,...)
const Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > long int int char v
< find minimun of func on grid with resolution res
const BdMatrix< T > & setoutopts(int w=5, int p=4, char f= ' ') const
void do_bdmat_vec_transmult(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
BdMatrixErr(const BdMatrixErr &be)
BdMatrix< T > & operator-=(const BdMatrix< T > &)
#define LIKELY(expr)
branch prediction note that we sometimes on purpose mark the unlikely possibility likely and vice ver...
tbci_traits< T >::const_refval_type get(const unsigned r, const unsigned c) const
get, set and getcref are used internally and not for public consumption
unsigned long arsz
of saved elems,
unsigned int rows() const
number of rows
BdMatrix< T > & setunit(const T &=1)
bool operator!=(const BdMatrix< T > &ma) const