16 #include "tbci/bvector.h"
20 #include "tbci/vec_kern_unr_pref.h"
27 # if defined(__AVX__) && !defined(NO_AVX)
28 # include "tbci/vec_kern_special2.h"
30 # include "tbci/vec_kern_special.h"
35 #if !defined(NO_GD) && !defined(AUTO_DECL)
36 # include "tbci/vector_gd.h"
43 template <
typename T>
class Vector;
44 template <
typename T>
class TVector;
45 template <
typename T>
class TSVector;
46 template <
typename T>
class Matrix;
47 template <
typename T>
class TMatrix;
48 template <
typename T>
class TSMatrix;
49 template <
typename T>
class F_Matrix;
53 template <
typename T>
class CRMatrix;
55 template <
typename T>
class BdMatrix;
59 # pragma interface "vector.h"
90 { this->
dim = tv.dim; this->
vec = tv.vec; }
104 unsigned long size ()
const {
return this->
dim; }
120 {
Vector<T> tv(*
this);
return (tv == v); }
189 typename tbci_traits<T>::const_refval_type
190 get (
const unsigned long i)
const {
return this->
vec[
i]; }
193 T&
setval (
const T& val,
const unsigned long i)
const {
return this->
vec[
i] = val; }
194 T&
set (
const T& val,
const unsigned long i)
const {
return this->
vec[
i] = val; }
219 static const char*
vec_info() {
return "TVector"; }
221 friend STD__ ostream& operator << FGD (STD__ ostream&, const TVector<T>&);
223 #ifndef HAVE_PROMOTION_BUG
241 template <
typename T>
252 template <
typename T>
253 inline STD__ ostream& operator << (STD__ ostream& os, const TVector<T>& tv)
254 {
Vector<T> v(tv);
return os << (BVector<T>)v; }
256 template <
typename T>
264 tsv.
eval (this->vec);
276 #if !defined(SMP) || defined(NOSMP_VECVEC)
282 template <typename T>
290 template <typename T>
298 template <typename T>
302 (
const T*)(tc->
t_par[1]));
306 template <typename T>
310 (
const T*)(tc->
t_par[1]));
314 template <typename T>
318 (
const T*)(tc->
t_par[1]));
322 template <typename T>
330 template <typename T>
338 template <typename T>
346 template <typename T>
354 template <typename T>
362 template <typename T>
370 template <typename T>
378 template <typename T>
382 *(
const T*)(tc->
t_par[1]));
386 template <typename T>
390 *(
const T*)(tc->
t_par[1]));
394 template <typename T>
398 *(
const T*)(tc->
t_par[1]));
402 template <typename T>
406 *(
const T*)(tc->
t_par[1]));
410 template <typename T>
414 *(
const T*)(tc->
t_par[1]));
418 template <typename T>
422 *(
const T*)(tc->
t_par[1]));
426 template <typename T>
430 *(
const T*)(tc->
t_par[1]));
436 template <typename T>
441 *(
const T*)(tc->
t_par[3]));
445 template <typename T>
450 *(
const T*)(tc->
t_par[3]));
454 template <typename T>
463 template <typename T>
468 *(
const T*)(tc->
t_par[3]));
472 template <typename T>
477 *(
const T*)(tc->
t_par[3]));
481 template <typename T>
490 template <typename T>
495 *(
const T*)(tc->
t_par[2]));
499 template <typename T>
504 *(
const T*)(tc->
t_par[2]));
508 template <typename T>
513 *(
const T*)(tc->
t_par[2]));
518 template <typename T>
527 template <typename T>
536 template <typename T>
545 template <typename T>
554 template <typename T>
575 #if !defined(SMP) || defined(NOSMP_VECVEC)
576 #define STD_SMP_TEMPLATE2V(oper, dm, a1, a2) \
577 do_##oper <T> (dm, a1, a2)
578 #define STD_SMP_TEMPLATE2C(oper, dm, a1, a2) \
579 do_##oper <T> (dm, a1, a2)
580 #define STD_SMP_TEMPLATE3VV(oper, dm, a1, a2, a3) \
581 do_##oper <T> (dm, a1, a2, a3)
582 #define STD_SMP_TEMPLATE3VC(oper, dm, a1, a2, a3) \
583 do_##oper <T> (dm, a1, a2, a3)
584 #define STD_SMP_TEMPLATE3CC(oper, dm, a1, a2, a3) \
585 do_##oper <T> (dm, a1, a2, a3)
586 #define STD_SMP_TEMPLATE4V(oper, dm, a1, a2, a3, a4) \
587 do_##oper <T> (dm, a1, a2, a3, a4)
588 #define STD_SMP_TEMPLATE4C(oper, dm, a1, a2, a3, a4) \
589 do_##oper <T> (dm, a1, a2, a3, a4)
590 #define STD_SMP_TEMPLATE5(oper, dm, a1, a2, a3, a4, a5) \
591 do_##oper <T> (dm, a1, a2, a3, a4, a5)
595 #define _SMP_TMPL2V(oper,st,en,a1,a2,a3,a4,a5) do_##oper <T> (en-st, a1+st, a2+st)
596 #define _JOB_TMPL2V(oper,st,en,a1,a2,a3,a4,a5) job_##oper <T>, en-st, a1+st, a2+st
597 #define _SMP_TMPL2C(oper,st,en,a1,a2,a3,a4,a5) do_##oper <T> (en-st, a1+st, a2)
598 #define _JOB_TMPL2C(oper,st,en,a1,a2,a3,a4,a5) job_##oper <T>, en-st, a1+st, &a2
599 #define _SMP_TMPL3VV(oper,st,en,a1,a2,a3,a4,a5) do_##oper <T> (en-st, a1+st, a2+st, a3+st)
600 #define _JOB_TMPL3VV(oper,st,en,a1,a2,a3,a4,a5) job_##oper <T>, en-st, a1+st, a2+st, a3+st
601 #define _SMP_TMPL3VC(oper,st,en,a1,a2,a3,a4,a5) do_##oper <T> (en-st, a1+st, a2+st, a3)
602 #define _JOB_TMPL3VC(oper,st,en,a1,a2,a3,a4,a5) job_##oper <T>, en-st, a1+st, a2+st, &a3
603 #define _SMP_TMPL3CC(oper,st,en,a1,a2,a3,a4,a5) do_##oper <T> (en-st, a1+st, a2, a3)
604 #define _JOB_TMPL3CC(oper,st,en,a1,a2,a3,a4,a5) job_##oper <T>, en-st, a1+st, &a2, &a3
605 #define _SMP_TMPL4V(oper,st,en,a1,a2,a3,a4,a5) do_##oper <T> (en-st, a1+st, a2+st, a3+st, a4)
606 #define _JOB_TMPL4V(oper,st,en,a1,a2,a3,a4,a5) job_##oper <T>, en-st, a1+st, a2+st, a3+st, &a4
607 #define _SMP_TMPL4C(oper,st,en,a1,a2,a3,a4,a5) do_##oper <T> (en-st, a1+st, a2+st, a3, a4)
608 #define _JOB_TMPL4C(oper,st,en,a1,a2,a3,a4,a5) job_##oper <T>, en-st, a1+st, a2+st, &a3, &a4
609 #define _SMP_TMPL5(oper,st,en,a1,a2,a3,a4,a5) do_##oper <T> (en-st, a1+st, a2+st, a3+st, a4, a5)
610 #define _JOB_TMPL5(oper,st,en,a1,a2,a3,a4,a5) job_##oper <T>, en-st, a1+st, a2+st, a3+st, &a4, &a5
617 #define STD_SMP_TEMPLATE(oper, dm, a1, a2, a3, a4, a5, SMP_TMPL, JOB_TMPL, p2, p3) \
619 const unsigned n_thr = threads_avail (dm / SMP_VECSLICE2); \
620 update_n_thr(n_thr); \
621 if (LIKELY(n_thr < 2)) { \
622 SMP_TMPL(oper, 0, dm, a1, a2, a3, a4, a5); \
624 PREFETCH_W_MANY(a1, 3); \
627 const unsigned long first = slice_offset(2, n_thr, dm, a1); \
628 unsigned long st, en = first; \
632 for (_t = 0; _t < n_thr-1; ++_t) { \
633 st = en; en = slice_offset(_t+2, n_thr, dm, a1); \
634 thread_start (_t, JOB_TMPL(oper, st, en, a1, a2, a3, a4, a5), (void*)0); \
637 SMP_TMPL(oper, 0UL, first, a1, a2, a3, a4, a5); \
640 for (_t = 0; _t < n_thr-1; ++_t) \
644 #define STD_SMP_TEMPLATE2V(oper, dm, a1, a2) \
645 STD_SMP_TEMPLATE(oper, dm, a1, a2, NULL, NULL, NULL, _SMP_TMPL2V, _JOB_TMPL2V, PREFETCH_R_MANY(a2,2);,)
646 #define STD_SMP_TEMPLATE2C(oper, dm, a1, a2) \
647 STD_SMP_TEMPLATE(oper, dm, a1, a2, NULL, NULL, NULL, _SMP_TMPL2C, _JOB_TMPL2C,,)
648 #define STD_SMP_TEMPLATE3VV(oper, dm, a1, a2, a3) \
649 STD_SMP_TEMPLATE(oper, dm, a1, a2, a3, NULL, NULL, _SMP_TMPL3VV, _JOB_TMPL3VV, PREFETCH_R_MANY(a2,2);, PREFETCH_R_MANY(a3,2);)
650 #define STD_SMP_TEMPLATE3VC(oper, dm, a1, a2, a3) \
651 STD_SMP_TEMPLATE(oper, dm, a1, a2, a3, NULL, NULL, _SMP_TMPL3VC, _JOB_TMPL3VC, PREFETCH_R_MANY(a2,2);,)
652 #define STD_SMP_TEMPLATE3CC(oper, dm, a1, a2, a3) \
653 STD_SMP_TEMPLATE(oper, dm, a1, a2, a3, NULL, NULL, _SMP_TMPL3CC, _JOB_TMPL3CC,,)
654 #define STD_SMP_TEMPLATE4V(oper, dm, a1, a2, a3, a4) \
655 STD_SMP_TEMPLATE(oper, dm, a1, a2, a3, a4, NULL, _SMP_TMPL4V, _JOB_TMPL4V, PREFETCH_R_MANY(a2,2);,)
656 #define STD_SMP_TEMPLATE4C(oper, dm, a1, a2, a3, a4) \
657 STD_SMP_TEMPLATE(oper, dm, a1, a2, a3, a4, NULL, _SMP_TMPL4C, _JOB_TMPL4C, PREFETCH_R_MANY(a2,2);,)
658 #define STD_SMP_TEMPLATE5(oper, dm, a1, a2, a3, a4, a5) \
659 STD_SMP_TEMPLATE(oper, dm, a1, a2, a3, a4, a5, _SMP_TMPL5, _JOB_TMPL5, PREFETCH_R_MANY(a2,2);, PREFETCH_R_MANY(a3,2);)
665 template <
typename T>
672 template <
typename T>
679 template <
typename T>
687 template <
typename T>
696 template <
typename T>
704 template <
typename T>
713 template <
typename T>
723 template <
typename T>
736 template <
typename T>
742 template <
typename T>
752 template <
typename T>
763 template <
typename T>
776 template <typename T>
780 const unsigned long dim = b.
size();
788 template <typename T>
792 const unsigned long dim = b.
size();
800 template <typename T>
811 template <typename
T>
814 T dv = a / b.fabssqr();
820 template <
typename T>
823 template <
typename T>
828 template <
typename T>
835 template <
typename T>
842 template <
typename T>
846 this->vec[
i] = -this->vec[
i];
850 template <
typename T>
861 template <
typename T>
870 template <
typename T>
880 template <
typename T>
890 template <
typename T>
900 template <
typename T>
911 template <
typename T>
926 template <
typename T>
941 template <
typename T>
955 template <
typename T>
958 BCHK (i1 < i0,
VecErr, Slicing needs ordered arguments, i0, *
this);
959 BCHK (i0 >= this->
dim,
VecErr, Slice: arg1 out of range, i0, *
this);
960 BCHK (i1 > this->
dim,
VecErr, Slice: arg2 out of range, i1, *
this);
972 template <
typename T>
975 BCHK (wh >= this->
dim,
VecErr, Tried to change not-exitsting idx, wh, *
this);
989 template <typename T>
996 template <typename T>
1003 template <typename T>
1010 template <typename T>
1025 template <
typename T>
1032 void clone (
const bool =
false,
const T* = 0)
const;
1034 void detach (
const T* = 0)
const;
1060 TSVector (
const TVector<T>& tv,
const T& f = (
T)1)
1062 TSVector (
const Vector<T>& v,
const T& f = (
T)1)
1072 T get (
const unsigned long i)
const HOT {
return vec[
i] *
fac; }
1083 fac = (
T)1;
mut =
true;
return *
this; }
1099 TVector<T>
operator + (
const Vector<T>&) const HOT;
1108 #ifdef TSV_OP_FRIENDS
1118 TVector<T>
incr (
const unsigned long,
const T = (
T)1)
const;
1120 inline unsigned long size ()
const {
return dim; }
1122 T min () { Vector<T> th(*
this);
return th.
min (); }
1123 T max () { Vector<T> th(*
this);
return th.
max (); }
1134 #ifdef TSVEC_NEED_DOT
1147 #ifndef HAVE_GCC295_FRIEND_BUG
1159 bool operator == (
const TVector<T>& tv)
const { Vector<T>
v(tv);
return (*
this == v); }
1160 bool operator != (
const TVector<T>& tv)
const {
return !(*
this == tv); }
1167 friend STD__ ostream& operator << FGD (STD__ ostream&, const TSVector<T>&);
1171 template <
typename T>
1174 if (
LIKELY(!mut || !dim))
return;
1178 template <
typename T>
1181 if (
LIKELY(!dim || (mut && !vv)))
1185 vec =
const_cast<T*
> (vv);
1189 dim = 0; mut =
false;
1195 template <typename T>
1199 template <
typename T>
1202 if (
LIKELY(!dim || (mut && !vv)))
1204 const T* oldv = vec;
bool oldm = mut;
detach (vv);
1206 #if 0 //def __i386__
1207 for (
REGISTER unsigned long t = 0; t < dim; t++)
1208 vec[t] = oldv[t] *
fac;
1220 template <
typename T>
1225 #if 0 //def __i386__
1226 for (
REGISTER unsigned long t = 0; t < dim; t++)
1240 template <
typename T>
1267 template <
typename T>
1297 template <
typename T>
1308 template <
typename T>
1319 template <
typename T>
1324 tv.
vec = vec; tv.
dim = dim;
1334 template <
typename T>
1339 tv.
vec = vec; tv.
dim = dim;
1350 template <
typename T>
1355 tv.
vec = vec; tv.
dim = dim;
1369 template <
typename T>
1374 tv.
vec = vec; tv.
dim = dim;
1389 template <
typename T>
1394 tv.
vec = vec; tv.
dim = dim;
1403 template <
typename T>
1408 tv.
vec = vec; tv.
dim = dim;
1419 template <
typename T>
1424 tv.
vec = vec; tv.
dim = dim;
1434 template <typename T>
1439 template <
typename T>
1444 tv.
vec = vec; tv.
dim = dim;
1454 template <typename T>
1458 template <
typename T>
1459 STD__ ostream& operator << (STD__ ostream& os, const TSVector<T>&
ts)
1461 for (
unsigned long i = 0;
i < ts.
dim; ++
i)
1462 os << ts.
vec[
i]*ts.
fac <<
" ";
1467 template <
typename T>
1470 TVector<T> tv (*
this);
1471 BCHK (wh >= dim,
VecErr, Tried to change not-exitsting idx, wh, tv);
1478 template <typename T>
1487 template <typename T>
1489 {
return ts.fabs(); }
1492 template <typename T>
1494 {
return ts.abs(); }
1525 template <
typename T>
1555 #if defined(__GNUC__) && defined VEC_CPLX_CONSTR
1562 typename tbci_traits<T>::const_refval_type
1565 typename tbci_traits<T>::const_refval_type
1581 TVector<T>
slice (
unsigned long,
unsigned long)
const;
1583 TVector<T>
incr (
const unsigned long,
const T = (
T)1)
const;
1596 #ifdef TBCI_NEW_BRACKET
1608 bool operator == (
const TVector<T>& tv)
const {
return (tv == *
this); }
1609 bool operator != (
const TVector<T>& tv)
const {
return !(*
this == tv); }
1613 bool operator < (const Vector<T>&
v)
const {
return !((*this) >=
v); }
1614 bool operator > (
const Vector<T>& v)
const {
return !((*this) <=
v); }
1616 TVector<T>
operator + (
const Vector<T>&) const HOT;
1634 T operator * (const
Vector<T>&) const;
1636 { Vector<T>
v (tv);
return this->
operator * (v); }
1641 T min (
unsigned long& pos)
const;
1642 T max (
unsigned long& pos)
const;
1660 friend STD__ ostream& operator << (STD__ ostream& os, const Vector<T>&
v);
1668 #ifndef HAVE_PROMOTION_BUG
1679 INST(template <typename T>
friend inline STD__ ostream&
operator << (
STD__ ostream& os,
const Vector<T>& v);)
1680 template <typename T>
1681 inline STD__ ostream&
operator << (
STD__ ostream& os,
const Vector<T>& v)
1682 {
return os << (BVector<T>&)v; }
1686 template <
typename T>
1689 this->
vec = (
T*)0; this->
dim = (unsigned)va; this->keep =
false;
1697 #if !defined(__clang__) || !defined(CPLX)
1698 for (
unsigned long i=0;
i < this->
dim; ++
i)
1699 this->
vec[
i] = va_arg (vl,
T);
1701 throw VecErr(
"vararg not supported for cplx in clang");
1702 #warning no vararg support with cplx numbers and clang
1708 #if defined(__GNUC__) && defined VEC_CPLX_CONSTR
1709 template <
typename T>
1712 for (
unsigned long i = 0;
i < this->
dim; ++
i)
1717 template <
typename T>
1723 if (
LIKELY(this->dim == 0)) {
1745 template <
typename T>
1748 for (
unsigned long i = 0;
i < this->
dim; ++
i)
1753 template <
typename T>
1756 for (
unsigned long i = 0;
i < this->
dim; ++
i)
1762 #if !defined(SMP) || defined(NOSMP_VECSCALAR)
1765 template <
typename T>
1766 inline T dot (
const Vector<T>& a,
const Vector<T>& b)
1771 do_vec_dot_exact <T> (a.
dim, a.
vec, b.
vec, s);
1773 do_vec_dot_quick <T> (a.
dim, a.
vec, b.
vec, s);
1775 # ifdef SMPVEC_DEBUG
1782 template <
typename T>
1788 do_vec_mult_exact <T> (this->
dim, this->
vec, a.
vec, s);
1790 do_vec_mult_quick <T> (this->
dim, this->
vec, a.
vec, s);
1791 # ifdef SMPVEC_DEBUG
1801 template <typename T>
1804 if (
sizeof(
T) <= 16)
1808 do_vec_dot_exact (tc->
t_size, (
const T*)(tc->
t_par[0]),
1811 do_vec_dot_quick (tc->
t_size, (
const T*)(tc->
t_par[0]),
1816 template <
typename T>
1817 inline T dot (
const Vector<T>& a,
const Vector<T>& b)
1820 T res =
T(0);
unsigned long sz = a.
size();
1823 update_n_thr(n_thr);
1826 do_vec_dot_exact<T> (sz, a.
vec, b.
vec,
res);
1828 do_vec_dot_quick<T> (sz, a.
vec, b.
vec,
res);
1834 const unsigned long first = slice_offset(1, n_thr, sz, a.
vec);
1835 unsigned long st, en = first;
1839 results =
new T[n_thr];
1842 for (
unsigned t = 0; t < n_thr-1; ++t) {
1843 st = en; en = slice_offset(t+2, n_thr, sz, a.
vec);
1845 a.
vec+st, b.
vec+st, results+t, (
void*)0);
1849 do_vec_dot_exact<T> (first, a.
vec, b.
vec,
res);
1851 do_vec_dot_quick<T> (first, a.
vec, b.
vec,
res);
1855 for (
unsigned t = 0; t < n_thr-1; ++t) {
1867 comp += (tmp - res) -
y;
1872 results[t]: *((
T*)(
threads[t].t_res_dummy)));
1882 STD__ cerr <<
"dot: " <<
STD__ setprecision (16) << res <<
STD__ endl;
1888 template <typename T>
1893 do_vec_mult_exact (tc->
t_size, (
const T*)(tc->
t_par[0]),
1896 do_vec_mult_quick (tc->
t_size, (
const T*)(tc->
t_par[0]),
1901 template <
typename T>
1905 T res =
T(0);
unsigned long sz = this->
dim;
1908 update_n_thr(n_thr);
1911 do_vec_mult_exact<T> (sz, this->
vec, a.
vec,
res);
1913 do_vec_mult_quick<T> (sz, this->
vec, a.
vec,
res);
1919 const unsigned long first = sz/n_thr - (sz/n_thr)%4;
1920 unsigned long st, en = first;
1922 T* results =
new T[n_thr];
1925 for (
unsigned t = 0; t < n_thr-1; ++t) {
1926 st = en; en = (t+2)*sz / n_thr;
1930 this->
vec+st, a.
vec+st, results+t, (
void*)0);
1934 do_vec_dot_exact<T> (first, this->
vec, a.
vec,
res);
1936 do_vec_dot_quick<T> (first, this->
vec, a.
vec,
res);
1940 for (
unsigned t = 0; t < n_thr-1; ++t) {
1942 T tmp = res + results[t];
1944 comp += (tmp -
res) - results[t];
1949 results[t]: *((
T*)(
threads[t].t_res_dummy)));
1958 STD__ cerr <<
"mlt: " <<
STD__ setprecision (16) << res <<
STD__ endl;
1968 bool par_comp (
const Vector<T>& v1,
const Vector<T>& v2))
1970 const unsigned long dim = v1.
size();
1975 if (
LIKELY(v1(0) != v2(0)))
1981 void par_copy (Vector<T>& v1,
const Vector<T>& v2))
1983 const unsigned long dim = v1.
size();
1990 INST(template <typename T>
class TVector friend bool par_comp (
const Vector<T>&,
const Vector<T>&);)
1991 INST(template <typename T>
class TVector friend bool par_comp (
const Vector<T>&, TVector<T>);)
1992 INST(template <typename T>
class TVector friend bool par_comp (TVector<T>,
const Vector<T>&);)
1995 template <typename T>
1996 inline bool par_comp (
const Vector<T>& v1, TVector<T> v2)
1998 return par_comp (v1, (
const Vector<T>)v2);
2002 template <
typename T>
2003 inline bool par_comp (TVector<T> v1,
const Vector<T>& v2)
2005 return par_comp ((
const Vector<T>)v1, v2);
2008 template <
typename T>
2011 return par_comp ((
const Vector<T>)v1, (
const Vector<T>)v2);
2014 template <
typename T>
2017 const unsigned long dim = vec.
size();
2020 _par_fill_fn(dim, &vec.
setval(0), fn, par);
2023 template <
typename T>
2028 return (
T)((*
this *
a) / div);
2031 template <
typename T>
2034 if (
LIKELY(this->dim == 0))
2037 for (
REGISTER unsigned long t = 1; t < this->
dim; t++)
2043 template <
typename T>
2046 if (
LIKELY(this->dim == 0))
2048 BCHK (pos >= this->dim,
VecErr, pos outside range, pos, 0);
2050 for (
REGISTER unsigned long t = pos+1; t < this->
dim; t++)
2052 min = this->
vec[t]; pos = t;
2057 template <
typename T>
2060 if (
LIKELY(this->dim == 0))
2063 for (
REGISTER unsigned long t = 1; t < this->
dim; t++)
2069 template <
typename T>
2072 if (
LIKELY(this->dim == 0))
2074 BCHK (pos >= this->dim,
VecErr, pos outside range, pos, 0);
2076 for (
REGISTER unsigned long t = pos+1; t < this->
dim; t++)
2078 max = this->
vec[t]; pos = t;
2085 template <typename T>
2095 template <
typename T>
2098 T res = 0.0;
const unsigned long sz = this->
dim;
2101 update_n_thr(n_thr);
2104 do_vec_sum_exact<T> (sz, this->
vec,
res);
2106 do_vec_sum_quick<T> (sz, this->
vec,
res);
2110 const unsigned long first = slice_offset(1, n_thr, sz, this->
vec);
2111 unsigned long st, en = first;
2113 T* results =
new T[n_thr];
2115 for (
unsigned t = 0; t < n_thr-1; ++t) {
2116 st = en; en = slice_offset(t+2, n_thr, sz, this->
vec);
2118 this->
vec+st, results+t, (
void*)0);
2122 do_vec_sum_exact<T> (first, this->
vec,
res);
2124 do_vec_sum_quick<T> (first, this->
vec,
res);
2127 for (
unsigned t = 0; t < n_thr-1; ++t) {
2129 T tmp = res + results[t];
2131 comp += (tmp -
res) - results[t];
2142 template <
typename T>
2147 do_vec_sum_exact<T> (this->
dim, this->
vec,
sum);
2149 do_vec_sum_quick<T> (this->
dim, this->
vec,
sum);
2156 if (!this->dim)
return 0.0;
2158 for (
unsigned long i = 1;
i < this->
dim; ++
i)
2168 template <
typename T>
2171 TVector<T> tv (this->dim);
2178 template <
typename T>
2181 TVector<T> tv (this->dim);
2190 template <
typename T>
2200 template <
typename T>
2212 template <
typename T>
2227 template <
typename T>
2243 template <
typename T>
2246 TVector<T> tv (this->dim);
2252 template <
typename T>
2255 TVector<T> tv (this->dim);
2262 template <
typename T>
2265 return TSVector<T> (*
this,
a);
2268 template <
typename T>
2271 BCHK (a==(
T)0,
VecErr, Division by zero in TSVec /
T, 0, TSVector<T> (*
this, 1));
2272 return TSVector<T> (*
this, (
T)1 / a);
2276 INST(template <typename T>
class Vector friend TVector<T>
operator + (
const T&,
const Vector<T>&);)
2278 template <typename T>
2279 inline TVector<T>
operator + (
const T& a,
const Vector<T>& v)
2281 const unsigned long dim = v.
size();
2282 TVector<T> tv (dim);
2283 const T*
const vvec(&v.
getcref(0));
2284 T*
const tvvec(&tv.
setval(0));
2291 INST(template <typename T>
class Vector friend TVector<T>
operator - (
const T&,
const Vector<T>&);)
2292 template <typename T>
2293 inline TVector<T>
operator - (
const T& a,
const Vector<T>& v)
2295 const unsigned long dim = v.
size();
2296 TVector<T> tv (dim);
2297 const T*
const vvec(&v.
getcref(0));
2298 T*
const tvvec(&tv.
setval(0));
2306 INST(template <typename T>
class Vector friend TSVector<T>
operator * (
const T&,
const Vector<T>&);)
2307 template <typename T>
2308 inline TSVector<T>
operator * (
const T& a,
const Vector<T>& b)
2310 return TSVector<T> (
b,
a);
2315 INST(template <typename T>
class Vector friend TSVector<T>
operator / (
const T&,
const Vector<T>&);)
2317 template <typename
T>
2322 return TSVector<T> (
b, dv);
2325 template <
typename T>
2328 TVector<T> tv (this->dim);
2329 do_vec_neg_vec<T> (this->
dim, tv.
vec, this->
vec);
2334 template <
typename T>
2337 BCHK (i1 < i0,
VecErr, Slicing needs ordered arguments, i0, *
this);
2338 BCHK (i0 >= this->dim,
VecErr, Slice: arg1 out of range, i0, *
this);
2339 BCHK (i1 > this->dim,
VecErr, Slice: arg2 out of range, i1, *
this);
2340 TVector<T> tv (i1 - i0);
2347 template <
typename T>
2350 TVector<T> tv (*
this);
2351 BCHK (wh >= this->dim,
VecErr, Tried to change not-exitsting idx, wh, tv);
2358 #define COST_VECFABSSQR(d) (d*(COST_UNIT_LOAD+COST_MULT+COST_ADD+COST_LOOP))
2359 #define COST_VECSCALAR(d) (d*(2*COST_UNIT_LOAD+COST_MULT+COST_ADD+COST_LOOP))
2363 INST(template <typename T>
class Vector friend double fabssqr (
const Vector<T>&);)
2364 template <typename T>
2369 #if !defined(SMP) || defined(NOSMP_VECFABS)
2371 template <
typename T>
2376 do_vec_fabssqr_exact (this->dim, this->
vec, res);
2378 do_vec_fabssqr_quick (this->dim, this->
vec, res);
2380 STD__ cerr <<
"fabssqr: " <<
STD__ setprecision (16) << res <<
STD__ endl;
2388 template <typename T>
2398 template <
typename T>
2401 double res = 0.0;
unsigned long sz = this->
dim;
2404 update_n_thr(n_thr);
2407 do_vec_fabssqr_exact<T> (sz, this->
vec,
res);
2409 do_vec_fabssqr_quick<T> (sz, this->
vec,
res);
2413 const unsigned long first = slice_offset(1, n_thr, sz, this->
vec);
2414 unsigned long st, en = first;
2417 for (
unsigned t = 0; t < n_thr-1; ++t) {
2418 st = en; en = slice_offset(t+2, n_thr, sz, this->
vec);
2420 this->
vec+st, (
void*)0);
2424 do_vec_fabssqr_exact<T> (first, this->
vec,
res);
2426 do_vec_fabssqr_quick<T> (first, this->
vec,
res);
2429 for (
unsigned t = 0; t < n_thr-1; ++t) {
2431 double tmp = res +
y;
2433 comp += (tmp -
res) - y;
2440 STD__ cerr <<
"fabssqr: " <<
STD__ setprecision (16) << res <<
STD__ endl;
2448 template <typename T>
2464 #ifdef INC_VEC_EXTRA
2474 #ifdef VEC_INT_DIV_SUPPORT
2482 double dv = (double)a / b.
fabssqr ();
2484 for (
unsigned long i = 0; i < b.
size (); ++
i)
2485 b.
set((
int)(b.
get(i) * dv), i);
2501 double dv = (double)a / b.
fabssqr ();
2504 for (
unsigned long i = 0; i < b.
size (); ++
i)
2505 r.set ((
int)(dv * b.
get(i)), i);
2514 for (
unsigned long i = 0; i < this->
dim; ++
i)
2515 r.vec[i] = this->vec[i] / a;
2538 double dv = (double)a / b.
fabssqr ();
2540 for (
unsigned long i = 0; i < b.
size (); ++
i)
2541 b.
set((
unsigned)(b.
get(i) * dv), i);
2549 double dv = (double)a / b.
fabssqr ();
2552 for (
unsigned long i = 0; i < b.
size (); ++
i)
2553 r.
set ((
unsigned)(dv * b.
get(i)), i);
2562 for (
unsigned long i = 0; i < this->
dim; ++
i)
2563 r.vec[i] = this->vec[i] / a;
TSVector< T > operator-()
#define TBCICOPY(n, o, t, s)
TVector(const T &val, const unsigned long d)
bool operator<=(const BVector< T > &bv) const
T sum(const FS_Vector< dims, T > &fv)
T & operator()(const unsigned long) HOT
Vector< T > & fill(const T &v)
TVector< T > incr(const unsigned long, const T=(T) 1) const
T & setval(const T &val, const unsigned long i) const
abstract base class (signature) for Vectors with arithmetics
void job_val_div_vec(struct thr_ctrl *tc)
vec = val/self;
void do_mat_tsv_mult(const unsigned start, const unsigned end, TVector< T > *res, const Matrix< T > *mat, const TSVector< T > *vec)
#define STD_SMP_TEMPLATE3VV(oper, dm, a1, a2, a3)
bool contains(const T &v) const
void do_mat_vec_mult(const unsigned start, const unsigned end, TVector< T > *res, const Matrix< T > *mat, const Vector< T > *vec)
void job_val_vec_mul(struct thr_ctrl *tc)
vec = val * vec;
friend NOINST void FRIEND_TBCI2__ do_mat_tsv_mult FGD(const unsigned start, const unsigned end, TVector< T > *res, const Matrix< T > *mat, const TSVector< T > *rsv)
T & setval(const unsigned long i) const
void job_val_vec_div(struct thr_ctrl *tc)
vec = val / vec;
void job_svc_vec_sub(struct thr_ctrl *tc)
vec = s*vec - vec;
provides basic Vector functionality but arithmetic operators (+=, - , *, /...).
T operator[](const unsigned long i)
void job_vec_add_svc(struct thr_ctrl *tc)
vec += s*vec;
static const char * vec_info()
TSVector(const TSVector< T > &ts)
TVector< T > operator+(const Vector< T > &) const HOT
TV = V + V.
void clone(const bool=false, const T *=0) const
TVector(const BVector< U > &bv)
#define VEC_INT_DIV_SUPPORT
BVector< T > & fill(const T &) HOT
The class BdMatrix is an implementation to store and do operations on sparse Matrices with a band str...
STD__ istream & operator>>(STD__ istream &istr, BdMatrix< T > &mat)
BdMatrix< T > & operator+=(const BdMatrix< T > &)
bool operator==(const BVector< T > &) const HOT
KG, 2001-06-29: Strange: If we don't inline this, we seems to get better performance in our solver be...
Vector(const TSVector< T > &ts)
bool operator!=(const Vector< T > &v) const
TVector< T > & operator+(const Vector< T > &) HOT
friend NOINST double FRIEND_TBCI__ fabssqr FGDT(const TSVector< T > &)
TVector< T > & operator+=(const T &)
TV += a.
void thread_start(const int thr_no, thr_job_t job, const unsigned long sz,...)
void job_val_vec_add(struct thr_ctrl *tc)
vec = val + vec;
void job_vec_div_val(struct thr_ctrl *tc)
vec /= val;
void job_vec_mul_val(struct thr_ctrl *tc)
vec *= val;
T *const & vecptr() const
tbci_traits< T >::const_refval_type get(const unsigned long i) const
TVector< T > operator-() const
Vector(const BVector< T > &bv)
T operator()(const unsigned long i) const
static const char * vec_info()
TVector< T > add_t_tsv(const T &) const
Helper member fn to prevent friendship TV = T + TSV.
void job_svc_val_add(struct thr_ctrl *tc)
vec = s*vec + val;
Vector(const T &val, const unsigned long d)
#define STD_SMP_TEMPLATE5(oper, dm, a1, a2, a3, a4, a5)
#define BCHK(cond, exc, txt, ind, rtval)
void par_fill(Matrix< T > &mat, mat_fill_fn< T > fn, void *par)
const TSVector< T > & operator*(const T &f) const
void job_svc_vec_add(struct thr_ctrl *tc)
vec = s*vec + vec;
Vector(const unsigned long d=0)
#define NAMESPACE_CSTD_END
T max(const FS_Vector< dims, T > &fv)
const TSVector< T > & operator-() const
const TSVector< T > & operator/(const T &f) const
void(* thr_job_t)(struct thr_ctrl *)
Before the double inclusion guard on purpose!
void job_vec_vec_add(struct thr_ctrl *tc)
vec = vec + vec;
TVector< T > slice(unsigned long, unsigned long) const
void job_vec_sub_vec(struct thr_ctrl *tc)
vec -= vec;
void job_val_svc_add(struct thr_ctrl *tc)
vec = val + s*vec;
TVector(const BVector< T > &bv)
void job_svc_svc_sub(struct thr_ctrl *tc)
vec = s*vec - s*vec;
const T & getcref(const unsigned long i) const HOT
BVector< T > & operator=(const T &a)
bool operator>=(const Vector< T > &v) const
T & set(const T &val, const unsigned long i) const
bool operator==(const TVector< T > &tv) const
Temporary object for scaled matrices.
#define STD_SMP_TEMPLATE4C(oper, dm, a1, a2, a3, a4)
bool operator>(const Vector< T > &v) const
T operator()(const unsigned long i)
bool operator>=(const BVector< T > &bv) const
void job_vec_svc_add(struct thr_ctrl *tc)
vec = vec + s*vec;
void job_vec_fabssqr(struct thr_ctrl *tc)
Matrix class with optimized Matrix-Vector multiplication for symmetrical Matrices.
void job_val_add_vec(struct thr_ctrl *tc)
vec += val;
T aligned_value_type TALIGN(MIN_ALIGN2)
unsigned int size() const
size: incompatibility to Matrix (!)
TSVector< T > & operator*=(const T &f)
TSVector< T > & operator=(const TSVector< T > &ts)
struct thr_struct * threads
TSVector< T > operator/(const T &)
T aligned_value_type TALIGN(MIN_ALIGN2)
TVector< T > & operator=(const T &a)
#define PREFETCH_R(addr, loc)
In case gcc does not yet support __builtin_prefetch(), we have handcoded assembly with gcc for a few ...
double fabssqr() const HOT
void job_vec_sub_vec_inv(struct thr_ctrl *tc)
vec -= vec; vec = -vec;
void job_vec_add_val(struct thr_ctrl *tc)
vec += val;
TVector(const unsigned long d=0)
bool contains(const T &, unsigned long *=0) const
void job_vec_svc_sub(struct thr_ctrl *tc)
vec = vec - s*vec;
C++ class for sparse matrices using compressed row storage.
TVector< T > & incr(const unsigned long, const T=(T) 1)
#define THREAD_MAX_RES_LN
bool operator!=(const Vector< T > &v) const
Vector< T > & operator=(const T &a)
TVector< T > slice(const unsigned long, const unsigned long)
void job_svc_val_sub(struct thr_ctrl *tc)
vec = s*vec - val;
void destroy()
destroy object explicitly
void job_vec_sum(struct thr_ctrl *tc)
void job_vec_val_mul(struct thr_ctrl *tc)
vec = vec * val;
void job_vec_dot(struct thr_ctrl *tc)
TSVector< T > operator*(const T &)
void do_mat_vec_transmult(const unsigned start, const unsigned end, TVector< T > *res, const Matrix< T > *mat, const Vector< T > *vec)
TSVector< T > operator/(const T &) const
void job_val_sub_vec(struct thr_ctrl *tc)
vec -= val; vec = -vec;
friend TVector< T > conj FGD(const Vector< T > &)
Vector(const TVector< T > &tv)
int conj(const int arg)
conj for elementary types
bool par_comp(const Vector< T > &v1, TVector< T > v2)
TSVector< T > & operator/=(const T &f)
TVector(const TVector< T > &tv) HOT
void detach(const T *=0) const
Vector(const Vector< T > &v)
const T & getcref(const unsigned long i) const
void job_vec_vec_sub(struct thr_ctrl *tc)
vec = vec - vec;
TSVector(const unsigned long d)
unsigned long size() const
#define STD_SMP_TEMPLATE2C(oper, dm, a1, a2)
TVector< T > sub_t_tsv(const T &) const
TV = T - TSV.
T aligned_value_type TALIGN(MIN_ALIGN2)
friend T dot FGD(const Vector< T > &, const Vector< T > &) HOT
TVector< T > & operator*=(const T &)
TV *= a.
void job_val_svc_div(struct thr_ctrl *tc)
vec = val / s*vec;
T dot(const T &a1, const T &a2)
bool operator!=(const TVector< T > &tv) const
Temporary Base Class (non referable!) (acc.
TVector< T > incr(const unsigned long, const T=(T) 1) const
BVector< T > & resize(const BVector< T > &)
Actually it's a resize and copy (some people would expect the assignment op to do this) ...
exception class: Use MatErr from matrix.h
#define STD_SMP_TEMPLATE2V(oper, dm, a1, a2)
const TSVector< T > & eval(const T *vv=0) const
void job_svc_svc_add(struct thr_ctrl *tc)
vec = s*vec + s*vec;
static const char * vec_info()
#define TBCIDELETE(t, v, sz)
T operator[](const unsigned long i) const
TVector< T > operator+(const Vector< T > &) const HOT
TV = TSV + V.
Temporary Base Class Idiom: Class TVector is used for temporary variables.
unsigned long size() const
void job_vec_sub_val(struct thr_ctrl *tc)
vec -= val;
void job_val_svc_sub(struct thr_ctrl *tc)
vec = val + s*vec;
void thread_wait(const int thr_no, struct job_output *out)
double thread_wait_result(const int thr_no)
#define STD_SMP_TEMPLATE3VC(oper, dm, a1, a2, a3)
unsigned int size() const
bool operator==(const Vector< T > &) const
BdMatrix< T > & operator*=(const T &)
Vector(const BVector< U > &bv)
void job_val_vec_sub(struct thr_ctrl *tc)
vec = val - vec;
const Vector< T > Vector< T > Vector< T > Vector< T > & y
const Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > long int res
TVector(const Vector< T > &v)
double fabssqr(const double a)
#define TBCICOMP(n, o, t, s)
double norm(const TBCI__ cplx< T > &c)
void job_vec_add_vec(struct thr_ctrl *tc)
vec += vec;
void job_vec_val_sub(struct thr_ctrl *tc)
vec = vec - val;
cplx< T > operator/(const T a, const cplx< T > &b)
tbci_traits< T >::const_refval_type operator[](const unsigned long i) const
void job_vec_val_add(struct thr_ctrl *tc)
vec = vec + val;
unsigned int do_exactsum()
const unsigned TMatrix< T > const Matrix< T > * a
void job_vec_sub_svc_inv(struct thr_ctrl *tc)
vec -= s*vec;
void job_vec_sub_svc(struct thr_ctrl *tc)
vec -= s*vec;
TVector< T > & operator/=(const T &)
TV /= a.
const Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > long int int char v
< find minimun of func on grid with resolution res
bool operator==(const Vector< T > &v) const
TVector< T > & operator-=(const T &)
TV -= a.
tbci_traits< T >::const_refval_type operator()(const unsigned long i) const
BdMatrix< T > & operator-=(const BdMatrix< T > &)
#define LIKELY(expr)
branch prediction note that we sometimes on purpose mark the unlikely possibility likely and vice ver...
TSVector< T > operator*(const T &) const
TVector(const TSVector< T > &ts)
void job_vec_mult(struct thr_ctrl *tc)
#define STD_SMP_TEMPLATE4V(oper, dm, a1, a2, a3, a4)