9#ifndef TBCI_BAND_MATRIX_H
10#define TBCI_BAND_MATRIX_H
14#include "tbci/basics.h"
15#include "tbci/vector.h"
16#include "tbci/matrix.h"
21# define BD_MINVAL 1e-16
25#ifndef BDMAT_NEW_ALLOC
26# define BDMAT_ONE_BLOCK
36#if !defined(NO_GD) && !defined(AUTO_DECL)
37# include "tbci/band_matrix_gd.h"
42#ifndef TBCI_DISABLE_EXCEPT
50 :
NumErr(
"Error in var conf Band_Matrix class") {}
60# pragma interface "band_matrix.h"
117 void constructor(
const unsigned int,
const bool =
true);
118 T*
check(
const unsigned r,
const unsigned c)
const HOT;
125 mutable int outw, outp, outf;
128 inline bool search2(
unsigned int&,
const unsigned int,
const unsigned int)
HOT;
138 explicit BdMatrix(
const unsigned int = 0);
140 explicit BdMatrix(
const unsigned int,
const unsigned int);
149 BdMatrix(
const T&,
const unsigned int,
unsigned int);
165 {
return "BdMatrix"; }
167#ifndef HAVE_PROMOTION_BUG
169# ifndef HAVE_GCC295_TMPLFRNDCLS_BUG
175 for (
unsigned k=0; k<
dim; ++k)
179 for (
unsigned j=0; j<
dim-di; ++j) {
185 outw = bm.outw; outp = bm.outp;
186 outf = bm.outf; outset =
true;
199 typename tbci_traits<T>::const_refval_type
200 operator() (
const unsigned int,
const unsigned int)
const HOT;
201 typename tbci_traits<T>::const_refval_type
202 get(
const unsigned int,
const unsigned int)
const HOT;
206 const unsigned int c)
208 T&
setval(
const unsigned int r,
const unsigned int c);
243 inline unsigned int size()
const {
return dim; }
247 inline unsigned int rows()
const {
return dim; }
277#ifdef BDMAT_TRANS_FRIENDS
289#ifdef BDMAT_OP_FRIENDS
322 { outw = w; outp =
p; outf = f; outset =
true;
return *
this; }
355#ifdef BDMAT_ONE_BLOCK
384 unsigned long sz =
dim;
int t;
385 const unsigned long dcs =
diagconf.size();
386 for (
REGISTER unsigned j = 0; j < dcs; ++j)
387 if ((t = ((
signed)
dim - (
signed)
diagconf.get(j))) > 0)
393 for (
unsigned i = 0;
i < dcs; ++
i) {
395 int s = (signed)
dim - (
signed)di;
397 STD__ cerr <<
"Wrong config. Vector supplied!" <<
STD__ endl;
408 STD__ cerr <<
"BdMat ctor: malloc failed to get " << sz <<
" bytes!" <<
STD__ endl;
416 arsz = sz; dummy = (
T)0;
428 for (
int i = u-1;
i >= 0; --
i) {
454 unsigned long sz = 0;
unsigned i = 0;
474 int s = (signed)
dim - (
signed)di;
480 T* ptr =
NEW(
T, 2*s);
490 arsz = sz; dummy = (
T)0;
498 STD__ cerr <<
"BdMat ctor: mem alloc (" << sz <<
") failed!" <<
STD__ endl;
539 outw = mat.outw; outp = mat.outp;
540 outf = mat.outf; outset =
true;
561 for (
REGISTER unsigned int t = 0; t < mo; ++t)
587 for (
unsigned int l = 1; l < mat.
columns(); ++l) {
589 (signed)j < ((signed)(mat.
rows() - l)); ++j) {
590 if (mat(j, j+l) !=
T(0) || mat(j+l, j) !=
T(0)) {
605 for (
unsigned int i = 0;
i <
diagconf.size(); ++
i) {
606 const unsigned int dci =
diagconf.get(
i);
607 for (
REGISTER unsigned int j = 0; j <
dim - dci; ++j) {
608 adiag.get(dci)[j] = mat(j, j + dci);
609 bdiag.get(dci)[j] = mat(dci + j, j);
623inline bool BdMatrix<T>::search2(
unsigned int& cfg,
const unsigned int i,
624 const unsigned int j)
675 return (dummy = (
T)0);
680inline typename tbci_traits<T>::const_refval_type
688 return (dummy = (
T)0);
693inline typename tbci_traits<T>::const_refval_type
701 return (dummy = (
T)0);
723#ifdef BDMAT_ONE_BLOCK
754 T* oldd(
diag);
unsigned int od =
dim;
755#ifdef BDMAT_ONE_BLOCK
756 unsigned int oldar =
arsz;
762 if (od <= 1 && nd > 1)
773 for (
unsigned int t = 0; t <
diagconf.size(); ++t) {
786#ifndef BDMAT_ONE_BLOCK
787 for (
unsigned i = 0;
i < oldconf.
size(); ++
i)
789 2*(od-oldconf.
get(
i)));
838 for (
unsigned i = 0;
i <
dim; ++
i)
851#ifdef BDMAT_ONE_BLOCK
852 unsigned int oldar =
arsz;
854 unsigned int olddim =
dim;
867 const unsigned int dct =
diagconf.get(t);
873#ifndef BDMAT_ONE_BLOCK
874 for (
unsigned i = 0;
i < oldconf.
size(); ++
i)
876 2*(olddim-oldconf.
get(
i)));
886#ifdef BDMAT_ONE_BLOCK
890 const int newsz =
dim - ndg;
900 STD__ cerr <<
"Allocation error" <<
STD__ endl;
913 long diff = (long)
diag - (
long)olddiag;
920 const unsigned dcs =
diagconf.size();
921 for (
unsigned i = 0;
i < dcs; ++
i) {
947 const int newsz =
dim - ndg;
952 T* ptr =
NEW(
T, 2*newsz);
955 STD__ cerr <<
"BdMat adddiag allocation failed!" <<
STD__ endl;
977#ifdef BDMAT_ONE_BLOCK
986 unsigned ix;
diagconf.contains(dg, &ix);
1013template <
typename T>
1016 for (
unsigned i=0;
i<val.
size(); ++
i)
1021template <
typename T>
1032 ret.
set(
bdiag.get(di)[r-di], r-di);
1037template <
typename T>
1053#ifdef BDMAT_ONE_BLOCK
1054template <
typename T>
1061template <
typename T>
1070template <
typename T>
1082template <
typename T>
1096template <
typename T>
1104#ifdef BDMAT_ONE_BLOCK
1106 for (
unsigned i = 0;
i < m; ++
i)
1107 newconf.
set(
i) =
i+1;
1110 for (
unsigned i=1;
i<=m; ++
i)
1119template <
typename T>
1124 int sz = arow.
size();
1139 int r = bestrow(level++);
1140 int rw = 0;
int mul;
1144 mul = (
i%2? -1: 1); rw--;
1149 for (
REGISTER int j = 0; j < sz; j++) {
1156 for (
REGISTER int t = 0; t < sz; t++) {
1165 TBCICOPY(&(
ac(j)), &(acol(j+1)),
int, (sz-j-1));
1168 tmp += (j%2? -1: 1) * sdet(level, ar,
ac) * el;
1173template <
typename T>
1177#ifndef TBCI_DISABLE_EXCEPT
1178 throw (
BdMatrixErr(
"Unequal # of lines and rows to ignore!"));
1180 STD__ cerr <<
"Unequal # of lines and rows to ignore!" <<
STD__ endl;
1188 bestrow.resize(
dim);
1190 unsigned int rw = 0;
unsigned int cl = 0;
1206 matmem<T>* el = offdiag.setfirst();
int elemi = (el? el->row: -1);
1208 for (
int i = 0;
i <
dim;
i++) {
1209 if (!(actrow.contains(
i))) {
1210 rowfill(
i) =
dim + 1;
continue;
1214 if (actcol.contains(
i))
1216 if (actcol.contains(
i+1) &&
i+1 <
dim)
1218 if (actcol.contains(
i-1) &&
i-1 >= 0)
1228 while (el && elemi ==
i) {
1229 if (actcol.contains(el->col))
1231 el = offdiag.setnext(); elemi = (el? el->row: -1);
1238 for (
int k = 0; k <
dim; k++) {
1239 int min =
dim + 2;
int minr = -1;
1240 for (
int s = 0; s <
dim; s++)
1241 if (rowfill(s) <
min) {
1242 min = rowfill(s); minr = s;
1246 rowfill(minr) =
dim + 1;
1248 return sdet(0, actrow, actcol);
1253template <
typename T>
1260 STD__ cout <<
"Warning: Automatic reconfiguring BdMatrix in assignment!" <<
STD__ endl;
1275template <
typename T>
1283#define SFORALL_S(op) \
1284template <typename T> \
1285BdMatrix<T>& BdMatrix<T>::operator op (const BdMatrix<T>& mat) \
1287 BCHK(mat.dim != dim, BdMatrixErr, Adding wrong size BdMatrix, mat.dim, *this); \
1289 for (i = 0; i < dim; ++i) \
1290 diag[i] op mat.diag[i]; \
1291 for (j = 0; j < diagconf.size(); ++j) { \
1292 const REGISTER unsigned di = diagconf.get(j); \
1293 if (LIKELY(mat.adiag.get(di))) \
1294 for (i = 0; i < (dim-di); ++i) { \
1295 adiag.set(di)[i] op mat.adiag.get(di)[i]; \
1296 bdiag.set(di)[i] op mat.bdiag.get(di)[i]; \
1299 if (diagconf == mat.diagconf) \
1301 for (j = 0; j < mat.diagconf.size(); ++j) { \
1302 const REGISTER unsigned di = mat.diagconf.get(j); \
1303 if (!diagconf.contains(di)) { \
1305 for (i = 0; i < (dim-di); ++i) { \
1306 adiag.set(di)[i] op mat.adiag.get(di)[i]; \
1307 bdiag.set(di)[i] op mat.bdiag.get(di)[i]; \
1318#define SFORALL_T(op) \
1319template <typename T> \
1320BdMatrix<T>& BdMatrix<T>::operator op (const T& mult) \
1322 for (REGISTER unsigned int i = 0; i < dim; ++i) \
1324 for (unsigned j = 0; j < diagconf.size(); ++j) { \
1325 const REGISTER unsigned di = diagconf.get(j); \
1326 for (unsigned k = 0; k < (dim-di); ++k) { \
1327 adiag.set(di)[k] op mult; \
1328 bdiag.set(di)[k] op mult; \
1339template <typename
T>
1346 for (
unsigned j = 0; j <
diagconf.size(); ++j) {
1348 for (
unsigned k = 0; k < (
dim-di); ++k) {
1349 adiag.set(di)[k] *= mult;
1350 bdiag.set(di)[k] *= mult;
1356#define INTDIVEQ(T) \
1358inline BdMatrix < T >& BdMatrix< T >::operator /= (const T & div) \
1360 BCHK((div==( T )0), BdMatrixErr, "Divide BdMatrix by zero", 0, *this); \
1361 for (REGISTER unsigned i = 0; i < dim; ++i) \
1363 for (unsigned j = 0; j < diagconf.size(); ++j) { \
1364 const REGISTER unsigned di = diagconf.get(j); \
1365 for (unsigned k = 0; k < (dim-di); ++k) { \
1366 adiag.set(di)[k] /= div; \
1367 bdiag.set(di)[k] /= div; \
1376template <typename
T>
1382 const unsigned int s =
diagconf.get(t);
1392template <typename T>
1398#define STDDEF_ST(op) \
1399template <typename T> \
1400inline BdMatrix<T> BdMatrix<T>::operator op (const T& val) const \
1402 return (BdMatrix<T> (*this)) op##= val; \
1410#define STDDEF_SS(op) \
1411template <typename T> \
1412BdMatrix<T> BdMatrix<T>::operator op (const BdMatrix<T>& m2) const \
1414 return (BdMatrix<T> (*this)) op##= m2; \
1421template <typename
T>
1424 const unsigned int newoff =
maxoff + mat2.maxoff;
1429 for (
int i = 0;
i < (signed)
dim; ++
i) {
1431 const int jend =
MIN(
dim,
i+newoff+1);
1435 for (
int j =
MAX(0,
i-(
signed)newoff); j < jend; ++j) {
1437 for (
REGISTER int k = kstart; k < kend; ++k)
1438 el += this->
get(i, k) * mat2.get(k, j);
1448template <typename T>
1454template <
typename T>
1459#ifndef TBCI_DISABLE_EXCEPT
1460 throw (
BdMatrixErr(
"Multiplying Matrix with Matrix: wrong sizes!"));
1462 STD__ cerr <<
"Multiplying Matrix with Matrix: wrong sizes!" <<
STD__ endl;
1468 for (
int c = 0;
c < tm.columns();
c++)
1469 for (
int i = 0;
i < mat.
rows();
i++) {
1472 val += this->
operator()(i, j) * mat(j,
c);
1480# define MAY_PREFETCH_R(adr,loc) PREFETCH_R(adr,loc)
1481# define MAY_PREFETCH_W(adr,loc) PREFETCH_W(adr,loc)
1483# define MAY_PREFETCH_R(adr,loc) do {} while (0)
1484# define MAY_PREFETCH_W(adr,loc) do {} while (0)
1488template <
typename T>
1496 for (
unsigned int t = 0; t <
diagconf.size(); ++t) {
1497 const unsigned int dc =
diagconf.get(t);
1507#if !defined(BDMATVEC_LNWISE) && !defined(BDMATVEC_LNWISE_OPT) && !defined(BDMATVEC_DIAGWISE)
1508# define BDMATVEC_LNWISE_OPT
1512#define COST_BDMATVEC_LN(d,o,m) (d*(COST_MULT+2*COST_UNIT_LOAD+COST_UNIT_STORE+COST_LOOP\
1513 +o*(COST_LOOP+2*COST_BRANCH+COST_UNIT_LOAD))+(2*d-m)*o*(3*COST_NU_LOAD+COST_ADD+COST_MULT))
1514#define COST_BDMATVEC_LNO(d,o,m) (d*(COST_MULT+2*COST_UNIT_LOAD+COST_UNIT_STORE+COST_LOOP\
1515 +o*(COST_LOOP+COST_UNIT_LOAD))+(2*d-m)*o*(3*COST_NU_LOAD+COST_ADD+COST_MULT+COST_BRANCH/2))
1516#define COST_BDMATVEC_DIAG(d,o,m) (d*(2*COST_UNIT_LOAD+COST_MULT+COST_UNIT_STORE+COST_LOOP)\
1517 +o*(6*COST_NU_LOAD+COST_LOOP+3*COST_UNIT_LOAD+2*COST_NU_STORE\
1518 +(2*d-m)*(3*COST_UNIT_LOAD+COST_LOOP+COST_MULT+COST_ADD+COST_UNIT_STORE)))
1520template <
typename T>
1527 const unsigned int dim = mat->
dim;
1531 for (
unsigned i = start;
i <
end; ++
i) {
1536 for (
unsigned j = 0; j < od; ++j) {
1541 comp += (t-val) -
y;
1547 comp += (t-val) -
y;
1551 res->set(val-comp,
i);
1554 for (
unsigned i = start;
i <
end; ++
i) {
1558 for (
unsigned j = 0; j < od; ++j) {
1570template <
typename T>
1577 const unsigned int dim = mat->
dim;
1589 for (
unsigned int _j = 0; _j < od; ++_j) {
1604 for (
i = start;
i < iend; ++
i) {
1611 for (
unsigned int j = 0; j < od; ++j) {
1615 const T t = val +
y;
1618 comp += (t-val) -
y;
1624 const T t = val +
y;
1627 comp += (t-val) -
y;
1630 res->set(val-comp,
i);
1636 for (;
i < iend; ++
i) {
1643 for (
unsigned int j = 0; j < od; ++j) {
1649 comp += (t-val) -
y;
1655 comp += (t-val) -
y;
1658 res->set(val-comp,
i);
1665 for (;
i < iend; ++
i) {
1672 for (
unsigned int j = 0; j < od; ++j) {
1676 const T t = val +
y;
1679 comp += (t-val) -
y;
1682 if (s < (
signed)
dim -
i) {
1684 const T t = val +
y;
1687 comp += (t-val) -
y;
1691 res->set(val-comp,
i);
1698 for (;
i <
end; ++
i) {
1705 for (
unsigned int j = 0; j < od; ++j) {
1712 comp += (t-val) -
y;
1714 if (s < (
signed)
dim -
i) {
1719 comp += (t-val) -
y;
1723 res->set(val-comp,
i);
1730 for (
i = start;
i < iend; ++
i) {
1736 for (
unsigned int j = 0; j < od; ++j) {
1755 for (;
i < iend; ++
i) {
1761 for (
unsigned int j = 0; j < od; ++j) {
1776 for (;
i < iend; ++
i) {
1782 for (
unsigned int j = 0; j < od; ++j) {
1789 if (s < (
signed)
dim -
i) {
1802 for (;
i <
end; ++
i) {
1808 for (
unsigned int j = 0; j < od; ++j) {
1814 if (s < (
signed)
dim -
i) {
1826template <
typename T>
1833 const unsigned int dim = mat->
dim;
1843 do_vec_vec_mul(
end-start, &
res->setval(start),
1846 for (
unsigned j = 0; j < od; ++j) {
1850 const unsigned iend2 =
MAX(0,(
int)(
end-s));
1851 const unsigned istart2 =
MAX((
int)(start-s),0);
1854 for (
unsigned off = start; off < iend; ++off) {
1857 corr.
setval(off-start) += (t -
res->get(off)) -
y;
1858 res->setval(off) = t;
1862 for (
unsigned off = istart2; off < iend2; ++off) {
1865 corr.
setval(s+off-start) += (t -
res->get(s+off)) -
y;
1866 res->setval(s+off) = t;
1870 for (
unsigned off = start; off <
end; ++off)
1871 res->setval(off) -= corr.
get(off-start);
1874template <
typename T>
1880 const unsigned int dim = mat->
dim;
1894 do_vec_vec_mul(
end-start, &
res->setval(start),
1897 for (
unsigned j = 0; j < od; ++j) {
1901 const unsigned iend2 =
MAX(0,(
int)(
end-s));
1902 const unsigned istart2 =
MAX((
int)(start-s),0);
1903 if (
LIKELY(iend > start))
1904 do_add_vec_vec_mul(iend-start, &
res->setval(start),
1906 if (
LIKELY(iend2 > istart2))
1907 do_add_vec_vec_mul(iend2-istart2, &
res->setval(istart2+s),
1912#if defined(BDMATVEC_LNWISE)
1913# define COST_BDMATVEC(d,o,m) COST_BDMATVEC_LN(d,o,m)
1914#elif defined(BDMATVEC_LNWISE_OPT)
1915# define COST_BDMATVEC(d,o,m) COST_BDMATVEC_LNO(d,o,m)
1916#elif defined(BDMATVEC_DIAGWISE)
1917# define COST_BDMATVEC(d,o,m) COST_BDMATVEC_DIAG(d,o,m)
1920#define COST_BDMATVEC_TRANS(d,o,m) COST_BDMATVEC(d,o,m)
1921template <
typename T>
1927 const unsigned int dim = mat->
dim;
1929 for (
unsigned i = start;
i <
end; ++
i) {
1934 for (
unsigned int j = 0; j < od; ++j) {
1938 const T t = val +
y;
1939 comp += (t-val) -
y;
1944 const T t = val +
y;
1945 comp += (t-val) -
y;
1949 res->set(val-comp,
i);
1952 for (
unsigned i = start;
i <
end; ++
i) {
1956 for (
unsigned int j = 0; j < od; ++j) {
1968template <
typename T>
1974 const unsigned int dim = mat->
dim;
1986 for (
unsigned int _j = 0; _j < od; ++_j) {
2001 for (
i = start;
i < iend; ++
i) {
2008 for (
unsigned int j = 0; j < od; ++j) {
2012 const T t = val +
y;
2015 comp += (t-val) -
y;
2020 const T t = val +
y;
2023 comp += (t-val) -
y;
2026 res->set(val-comp,
i);
2033 for (;
i < iend; ++
i) {
2040 for (
unsigned int j = 0; j < od; ++j) {
2046 comp += (t-val) -
y;
2052 comp += (t-val) -
y;
2055 res->set(val-comp,
i);
2062 for (;
i < iend; ++
i) {
2069 for (
unsigned int j = 0; j < od; ++j) {
2073 const T t = val +
y;
2076 comp += (t-val) -
y;
2079 if (s < (
signed)
dim -
i) {
2081 const T t = val +
y;
2084 comp += (t-val) -
y;
2088 res->set(val-comp,
i);
2094 for (;
i <
end; ++
i) {
2101 for (
unsigned int j = 0; j < od; ++j) {
2108 comp += (t-val) -
y;
2110 if (s < (
signed)
dim -
i) {
2115 comp += (t-val) -
y;
2119 res->set(val-comp,
i);
2123 for (
i = start;
i < iend; ++
i) {
2129 for (
unsigned int j = 0; j < od; ++j) {
2148 for (;
i < iend; ++
i) {
2154 for (
unsigned int j = 0; j < od; ++j) {
2170 for (;
i < iend; ++
i) {
2176 for (
unsigned int j = 0; j < od; ++j) {
2183 if (s < (
signed)
dim -
i) {
2195 for (;
i <
end; ++
i) {
2201 for (
unsigned int j = 0; j < od; ++j) {
2207 if (s < (
signed)
dim -
i) {
2219template <
typename T>
2224 const unsigned int dim = mat->
dim;
2230 do_vec_vec_mul(
end-start, &
res->setval(start),
2233 for (
unsigned j = 0; j < od; ++j) {
2237 const int iend2 =
end-s;
2238 const int istart2 =
MAX((
int)(start-s),0);
2241 for (
int off = istart2; off < iend2; ++off) {
2244 corr.
setval(off+s-start) += (t -
res->get(off+s)) -
y;
2245 res->setval(off+s) = t;
2249 for (
int off = start; off < iend; ++off) {
2252 corr.
setval(off-start) += (t -
res->get(off)) -
y;
2253 res->setval(off) = t;
2257 for (
unsigned off = start; off <
end; ++off)
2258 res->setval(off) -= corr(off-start);
2261template <
typename T>
2266 const unsigned int dim = mat->
dim;
2275 do_vec_vec_mul(
end-start, &
res->setval(start),
2278 for (
unsigned j = 0; j < od; ++j) {
2282 const int iend2 =
end-s;
2283 const int istart2 =
MAX((
int)(start-s),0);
2284 do_add_vec_vec_mul(iend2-istart2, &
res->setval(istart2+s),
2286 do_add_vec_vec_mul(iend-start, &
res->setval(start),
2291#define COST_BDMATVEC_DOT(d,o,m) COST_BDMATVEC(d,o,m)
2292template <
typename T>
2298 const unsigned int dim = mat->
dim;
2299#ifdef BDMATVEC_LNWISE
2302 for (
unsigned i = start;
i <
end; ++
i) {
2307 for (
unsigned j = 0; j < od; ++j) {
2311 const T t = val +
y;
2312 comp += (t-val) -
y;
2317 const T t = val +
y;
2318 comp += (t-val) -
y;
2322 res->set(val-comp,
i);
2325 for (
unsigned i = start;
i <
end; ++
i) {
2329 for (
unsigned j = 0; j < od; ++j) {
2339#elif defined(BDMATVEC_LNWISE_OPT)
2351 for (
unsigned int _j = 0; _j < od; ++_j) {
2367 for (
i = start;
i < iend; ++
i) {
2373 for (
unsigned int j = 0; j < od; ++j) {
2394 for (;
i < iend; ++
i) {
2400 for (
unsigned int j = 0; j < od; ++j) {
2416 for (;
i < iend; ++
i) {
2422 for (
unsigned int j = 0; j < od; ++j) {
2429 if (s < (
signed)
dim -
i) {
2443 for (;
i <
end; ++
i) {
2449 for (
unsigned int j = 0; j < od; ++j) {
2455 if (s < (
signed)
dim -
i) {
2464#elif defined(BDMATVEC_DIAGWISE)
2468 do_vec_vec_cmul(
end-start, &
res->setval(start),
2471 for (
unsigned j = 0; j < od; j++) {
2475 const int iend2 =
end-s;
2476 const int istart2 =
MAX((
int)(start-s),0);
2477 do_add_vec_vec_cmul(iend-start, &
res->setval(start),
2479 do_add_vec_vec_cmul(iend2-istart2, &
res->setval(istart2+s),
2485#if defined(SMP) && !defined(NOSMP_BDMATVEC)
2488template <typename T>
2497template <typename T>
2507# if !defined(SMP_BDMATSLICE)
2508# define SMP_BDMATSLICE 32768
2510# define SMP_BDMATSLICE2 (SMP_BDMATSLICE/sizeof(T))
2512template <
typename T>
2517 update_n_thr(n_thr);
2525 const unsigned long first = slice_offset(1, n_thr,
dim, (
T*)0);
2526 unsigned long st, en = first;
2527 for (
unsigned t = 0; t < n_thr-1; ++t) {
2528 st = en; en = slice_offset(t+2, n_thr,
dim, (
T*)0);
2530 st, en, &tv,
this, &vec, (
void*)0);
2533 for (
unsigned s = 0; s < n_thr-1; ++s)
2540template <
typename T>
2545 update_n_thr(n_thr);
2553 const unsigned long first = slice_offset(1, n_thr,
dim, (
T*)0);
2554 unsigned long st, en = first;
2555 for (
unsigned t = 0; t < n_thr-1; ++t) {
2556 st = en; en = slice_offset(t+2, n_thr,
dim, (
T*)0);
2558 st, en, &tv,
this, &vec, (
void*)0);
2561 for (
unsigned s = 0; s < n_thr-1; ++s)
2569template <
typename T>
2579template <
typename T>
2591template <
typename T>
2608template <
typename T>
2624 for (
unsigned j = 0; j <
dim-di; ++j)
2634 for (
unsigned j = 0; j <
dim-di; ++j)
2642template <
typename T>
2647 for (
unsigned j = 0; j <
dim; ++j)
2650 for (
unsigned od = 0; od <
diagconf.size(); ++od) {
2651 const unsigned off =
diagconf.get(od);
2652 for (
unsigned i = 0;
i < (
dim-off); ++
i)
2654 for (
unsigned k = 0; k < (
dim-off); ++k)
2660template <
typename T>
2665 for (
unsigned j = 0; j <
dim; ++j)
2668 for (
unsigned od = 0; od <
diagconf.size(); ++od) {
2669 const unsigned off =
diagconf.get(od);
2670 for (
unsigned i = 0;
i < (
dim-off); ++
i)
2672 for (
unsigned k = 0; k < (
dim-off); ++k)
2678template <
typename T>
2683 for (
unsigned od = 0; od <
diagconf.size(); ++od) {
2684 const unsigned off =
diagconf.get(od);
2688 bdiag.get(off)[
i-off] *= f;
2694template <
typename T>
2701 for (
unsigned od = 0; od <
diagconf.size(); ++od) {
2702 const unsigned off =
diagconf.get(od);
2706 bdiag.get(off)[
i-off] *= f;
2711template <
typename T>
2717 for (
unsigned i = 0;
i < mat.
dim; ++
i) {
2718 for (
unsigned j = 0; j < mat.
dim; ++j) {
2720 ostr.width(mat.outw);
2721 ostr.precision(mat.outp);
2722 ostr.fill(mat.outf);
2725 ostr << mat.
get(
i, j) <<
' ';
2727 ostr << (
T)0 <<
' ';
2738template <
typename T>
2742 for (
unsigned i = 0;
i < mat.
dim; ++
i) {
2743 for (
unsigned j = 0; j < mat.
dim; ++j) {
2749 STD__ cerr <<
"Element (" <<
i <<
"," << j
2750 <<
") should be zero!" <<
STD__ endl;
2756template <
typename T>
2762 for (
unsigned i = 0;
i <
_DIM; ++
i)
2765 for (
unsigned j = 0; j <
diagconf.size(); ++j) {
2766 const unsigned di =
diagconf.get(j);
2767 for (
unsigned k = 0; k <
_DIM-di; ++k) {
2776template <
typename T>
2781 for (
i = 0;
i <
dim; ++
i)
2794template <
typename T>
2802#ifndef TBCI_DISABLE_EXCEPT
2803 throw (
BdMatrixErr(
"Trying to invert singular Matrix!"));
2805 STD__ cerr <<
"Trying to invert singular Matrix!" <<
STD__ endl;
2810 for (
unsigned i = 0;
i <
dim;
i++)
2811 for (
unsigned j = 0; j <
dim; j++) {
2812 ri(0) =
i; ci(0) = j;
2813 inv.setval(subdet(ri, ci) * ((
i+j)%2? -d: d), j,
i);
2819#if defined(SMP) && defined(HAVE_LIBNUMA)
2820template <
typename T>
2822 const int node,
bool fault_in,
2823 const unsigned first,
const unsigned last)
2826 const unsigned int sz = bm.
rows();
2829 moved = do_numa_move_pages(node, fault_in,
2830 (
unsigned long)(
diag+first),
2831 (
unsigned long)(
diag+last));
2833 for (
unsigned i = 0;
i < cfg.
size(); ++
i) {
2834 const unsigned off = cfg.
get(
i);
2835 const unsigned dln = sz - off;
2839 moved += do_numa_move_pages(node, fault_in,
2840 (
unsigned long)(
adiag+first),
2841 (
unsigned long)(
adiag+
MIN(dln,last)));
2843 moved += do_numa_move_pages(node, fault_in,
2844 (
unsigned long)(
bdiag+
MAX(0,(
signed)(first-off))),
2845 (
unsigned long)(
bdiag+last-off));
2850template <
typename T>
2851void numa_opt_bdmat_job(
struct thr_ctrl *tc)
2856 const int node = tc->
t_size;
2857 const bool fault_in = tc->
t_off;
2859 const unsigned st = (
unsigned long)tc->
t_par[0];
2860 const unsigned en = (
unsigned long)tc->
t_par[1];
2861 tc->
t_res_l = numa_opt_bdmat_helper(bm, bmcfg, node, fault_in, st, en);
2864template <
typename T>
2870 const unsigned sz = bm.
rows();
2885 numa_opt_bdmat_helper(bm, bmcfg,
2890 const unsigned first = slice_offset(1, n_thr, sz, (
T*)0);
2891 unsigned long st, en = first;
2893 unsigned long res = 0;
2894 for (
unsigned t = 0; t < n_thr-1; ++t) {
2895 st = en; en = slice_offset(t+2, n_thr, sz, (
T*)0);
2898 st, en, &bm, &bmcfg, (
void*)0);
2901 res = numa_opt_bdmat_helper(bm, bmcfg,
2904 for (
unsigned t = 0; t < n_thr-1; ++t) {
2914template <
typename T>
const Vector< T > const Vector< T > const Vector< T > & p
long int Vector< T > & index
void do_bdmat_vec_transmult_lnw_opt(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
STD__ istream & operator>>(STD__ istream &istr, BdMatrix< T > &mat)
void do_bdmat_vec_mult_lnw_opt(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
int numa_optimize(const BdMatrix< T > &bm, bool fault_in)
void do_bdmat_vec_dotmult(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
void do_bdmat_vec_mult(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
BdMatrix< T > transpose(BdMatrix< T > &mat)
#define MAY_PREFETCH_R(adr, loc)
void do_bdmat_vec_mult_diagw_exact(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
void do_bdmat_vec_transmult(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
void do_bdmat_vec_transmult_diagw_exact(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
#define MAY_PREFETCH_W(adr, loc)
void do_bdmat_vec_transmult_lnw(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
void job_bdmat_vec_transmult(struct thr_ctrl *tc)
void job_bdmat_vec_mult(struct thr_ctrl *tc)
void do_bdmat_vec_mult_lnw(const unsigned start, const unsigned end, TVector< T > *res, const BdMatrix< T > *mat, const Vector< T > *vec)
STD__ ostream & operator<<(STD__ ostream &ostr, const BdMatrix< T > &mat)
#define BCHK(cond, exc, txt, ind, rtval)
#define LIKELY(expr)
branch prediction note that we sometimes on purpose mark the unlikely possibility likely and vice ver...
#define TBCICOMP(n, o, t, s)
#define EXPCHK(cond, exc, txt, ind, rtval)
#define PREFETCH_R(addr, loc)
In case gcc does not yet support __builtin_prefetch(), we have handcoded assembly with gcc for a few ...
#define PREFETCH_W_MANY(addr, loc)
#define BCHKNR(cond, exc, txt, ind)
#define TBCIFILL(n, v, t, s)
#define TBCICOPY(n, o, t, s)
#define PREFETCH_R_MANY(addr, loc)
provides basic Vector functionality but arithmetic operators (+=, - , *, /...).
unsigned long size() const HOT
bool contains(const T &, unsigned long *=0) const
BVector< T > & resize(const BVector< T > &)
Actually it's a resize and copy (some people would expect the assignment op to do this).
BVector< T > & remove(const unsigned long)
const T & getcref(const unsigned long idx) const
tbci_traits< T >::const_refval_type get(const unsigned long idx) const HOT
T & set(const unsigned long idx) HOT
BVector< T > & append(const T &)
performs poorly
BdMatrixErr(const char *t, const long i=0)
BdMatrixErr(const BdMatrixErr &be)
The class BdMatrix is an implementation to store and do operations on sparse Matrices with a band str...
tbci_traits< T >::const_refval_type get(const unsigned int, const unsigned int) const HOT
bool operator==(const BdMatrix< T > &) const
friend void FRIEND_TBCI2__ do_bdmat_vec_mult FGDT(const unsigned, const unsigned, TVector< T > *, const BdMatrix< T > *, const Vector< T > *)
TVector< T > get_col(const unsigned int) const
Construct Vector from column.
BdMatrix< T > & setunit(const T &=1)
BdMatrix< T > & expand(unsigned=0)
const BVector< unsigned int > & getcfg() const
ro access to config BVector
TVector< T > transMult(const Vector< T > &) const HOT
Transposed Matrix-Vector multiplication.
BdMatrix< T > & removediag(const unsigned)
friend void FRIEND_TBCI2__ do_bdmat_vec_transmult FGDT(const unsigned, const unsigned, TVector< T > *, const BdMatrix< T > *, const Vector< T > *)
BdMatrix< T > & reconfig(const BVector< unsigned int > &) COLD
BdMatrix< T > & operator-=(const BdMatrix< T > &)
BdMatrix< T > inverse() const
BdMatrix< T > operator/(const T &) const
BVector< T * > bdiag
pointers to upper and lower diagonals
unsigned int size() const
size: incompatibility to Matrix (!)
BdMatrix< T > & div_rows(const Vector< T > &)
friend void FRIEND_TBCI2__ do_bdmat_vec_transmult_diagw_exact FGDT(const unsigned, const unsigned, TVector< T > *, const BdMatrix< T > *, const Vector< T > *)
friend void FRIEND_TBCI2__ do_bdmat_vec_mult_diagw_exact FGDT(const unsigned, const unsigned, TVector< T > *, const BdMatrix< T > *, const Vector< T > *)
void do_copy(const BdMatrix< T > &)
T * check(const unsigned r, const unsigned c) const HOT
T & operator()(const unsigned int, const unsigned int) HOT
rw member access
BdMatrix< T > & operator+=(const BdMatrix< T > &)
friend void FRIEND_TBCI2__ do_bdmat_vec_mult_lnw_opt FGDT(const unsigned, const unsigned, TVector< T > *, const BdMatrix< T > *, const Vector< T > *)
T * check_internal(const unsigned r, const unsigned c) const HOT
bool operator!=(const BdMatrix< T > &ma) const
BdMatrix< T > & transpose()
transpose() does change the object!
unsigned int getmaxoff() const
max distance from main diagonal
BdMatrix< T > operator-() const
unsigned int maxoff
size, max offset from diag
friend void FRIEND_TBCI2__ do_bdmat_vec_transmult_lnw_opt FGDT(const unsigned, const unsigned, TVector< T > *, const BdMatrix< T > *, const Vector< T > *)
BdMatrix(const BdMatrix< U > &bm)
BdMatrix< T > & operator*=(const T &)
BdMatrix< T > & setval(const T &v, const unsigned int r, const unsigned int c)
autoinsert, sparse class compatible
void free_diags(const unsigned)
Implementation alternative: All memory is allocated in one big chunk (default).
TVector< T > dotMult(const Vector< T > &) const
TVector< T > get_row(const unsigned int) const
Construct Vector from row.
BdMatrix< T > & set_row(const Vector< T > &val, const unsigned, const unsigned=0)
Overwrite row with Vector.
BdMatrix< T > & adddiag(const unsigned)
friend void FRIEND_TBCI2__ do_bdmat_vec_mult_lnw FGDT(const unsigned, const unsigned, TVector< T > *, const BdMatrix< T > *, const Vector< T > *)
BdMatrix< T > & operator=(const BdMatrix< T > &)
The assignment operators in TBCI are NOT resizing.
unsigned int rows() const
number of rows
BdMatrix< T > & operator/=(const T &)
BdMatrix< T > & autoinsert(const T &, const unsigned, const unsigned)
BdMatrix< T > operator+(const BdMatrix< T > &) const
BdMatrix< T > operator*(const BdMatrix< T > &) const
void destroy()
destroy object explicitly
friend void FRIEND_TBCI2__ do_bdmat_vec_dotmult FGDT(const unsigned, const unsigned, TVector< T > *, const BdMatrix< T > *, const Vector< T > *)
BdMatrix< T > & resize(const unsigned int)
unsigned int noelem() const
Number of elements that are actually stored.
friend NOINST int lu_decomp FGDT(BdMatrix< T > &) HOT
void constructor(const unsigned int, const bool=true)
BdMatrix< T > & div_row(const T &, const unsigned)
static const char * mat_info()
BdMatrix< T > & mult_rows(const Vector< T > &)
Linewise multiplication.
BdMatrix< T > & mult_row(const T &, const unsigned)
BdMatrix< T > & fill(const T &=0)
BdMatrix< T > & resize(const BdMatrix< T > &bdm)
Resizing assignment.
friend void FRIEND_TBCI2__ do_bdmat_vec_transmult_lnw FGDT(const unsigned, const unsigned, TVector< T > *, const BdMatrix< T > *, const Vector< T > *)
const BdMatrix< T > & setoutopts(int w=5, int p=4, char f=' ') const
friend NOINST void gaussj FGDT(const BdMatrix< T > &, const Matrix< T > &)
BVector< unsigned > diagconf
configuration
unsigned int columns() const
number of columns
T aligned_value_type TALIGN(MIN_ALIGN2)
unsigned int rows() const
number of rows
unsigned int columns() const
number of columns
T & setval(const T &val, const unsigned int r, const unsigned int c)
Temporary Base Class Idiom: Class TVector is used for temporary variables.
tbci_traits< T >::const_refval_type get(const unsigned long i) const
const T & getcref(const unsigned long i) const
unsigned long size() const
T & set(const T &val, const unsigned long i) const
T & setval(const unsigned long i) const
double fabs(const TBCI__ cplx< T > &c)
NAMESPACE_END NAMESPACE_CPLX TBCI__ cplx< T > conj(const TBCI__ cplx< T > &c)
NAMESPACE_TBCI char gaussj(Matrix< T > &a, Matrix< T > &b)
F_TMatrix< double > inv(const F_Matrix< double > &A, int overwriteA=0)
#define TBCIDELETE(t, v, sz)
#define REALLOC(v, os, t, s)
int lu_decomp(Matrix< T > &) HOT
LU decomposes the TBCI::Matrix mat.
const unsigned TMatrix< T > * res
void thread_start_off(const int thr_no, thr_job_t job, const unsigned long off, const unsigned long sz,...)
void thread_wait(const int thr_no, struct job_output *out)
struct thr_struct * threads
void(* thr_job_t)(struct thr_ctrl *)
Before the double inclusion guard on purpose!
unsigned int do_exactsum2()