13 #ifndef TBCI_BVECTOR_H
14 #define TBCI_BVECTOR_H
16 #include "tbci/basics.h"
17 #include "tbci/vector_sig.h"
18 #include "tbci/tbci_traits.h"
21 #if !defined(NO_GD) && !defined(AUTO_DECL)
22 # include "tbci/bvector_gd.h"
27 #ifndef TBCI_DISABLE_EXCEPT
35 :
NumErr(
"Error in Vector library") {}
45 # pragma interface "bvector.h"
81 #ifdef HAVE_BCXX_INHER_BREAKS_FRIEND_BUG
88 explicit BVector (
const unsigned long = 0);
89 BVector (
const T&,
const unsigned long);
126 typename tbci_traits<T>::const_refval_type
131 typename tbci_traits<T>::const_refval_type
132 get(
const unsigned long idx)
const HOT
134 T&
set(
const unsigned long idx) HOT
136 T&
set(
const T& val,
unsigned long idx)
137 {
return this->
set(idx) = val; }
141 typename tbci_traits<T>::const_refval_type
144 inline unsigned long size () const HOT {
return dim; }
178 bool operator != (const
BVector<
T>& bv)
const {
return !(*
this == bv); }
179 bool operator <= (const BVector<T>& bv)
const;
181 bool operator < (const BVector<T>& bv)
const {
return !((*this) >= bv); }
185 bool contains (
const T&,
unsigned long * = 0)
const;
188 friend STD__ ostream& operator << FGD (STD__ ostream&, const BVector<T>&);
195 static const char*
vec_info() {
return "BVector"; }
197 #ifndef HAVE_PROMOTION_BUG
198 # ifndef HAVE_GCC295_TMPLFRNDCLS_BUG
215 for (
unsigned long i = 0;
i <
dim;
i++)
vec[
i] = bv(
i);
220 #ifdef HAVE_BCXX_INHER_BREAKS_FRIEND_BUG
234 template <
typename T>
246 template <
typename T>
256 #if !defined(__clang__) || !defined(CPLX)
257 for (
unsigned long i=0;
i <
dim; ++
i)
258 vec[
i] = va_arg (vl,
T);
260 throw VecErr(
"vararg not supported for cplx in clang");
261 #warning no vararg support with cplx numbers and clang
267 template <
typename T>
274 template <
typename T>
281 template <
typename T>
295 template <
typename T>
307 template <
typename T>
317 #ifndef LAPACK_INLINE
318 # define LAPACK_INLINE
320 template <
typename T>
341 template <
typename T>
354 return this->
fill (value);
360 template <
typename T>
377 template <
typename T>
384 template <
typename T>
389 buf[t] =
vec[dim-t-1];
393 template <
typename T>
399 for (
REGISTER unsigned long j = dim-1;
i < j; j--) {
400 if (
vec[j] <
vec[j-1] ) {
411 template <
typename T>
422 template <
typename T>
434 template <
typename T>
439 BCHK (!tmp,
VecErr, Memory allocation failed in
remove, (
dim-1)*
sizeof(
T), *
this);
450 template <
typename T>
460 template <
typename T>
461 inline typename tbci_traits<T>::const_refval_type
468 template <
typename T>
475 template <
typename T>
483 template <
typename T>
494 template <
typename T>
512 for (
unsigned long i=0;
i<
dim;
i++)
523 for (
unsigned long i=0;
i<
dim;
i++)
530 template <
typename T>
531 inline STD__ ostream& operator << (STD__ ostream& os, const BVector<T>&
v)
534 for (
unsigned long i = 0;
i <
v.dim;
i++)
535 os <<
v.vec[
i] <<
"\n";
537 for (
unsigned long i = 0;
i <
v.dim;
i++)
538 os <<
v.vec[
i] <<
" ";
544 template <
typename T>
547 STD__ cerr <<
"BVector (" << __FILE__ <<
":" << __LINE__
548 <<
") Input on pointers not possible!" <<
STD__ endl;
553 template <
typename T>
568 for (
unsigned long i = 1;
i < v.
dim;
i++) {
570 if (s !=
',') in.putback(s);
579 template <
typename T>
592 template <
typename T>
603 template <typename T>
605 {
return (b1.
concat (b2)); }
618 #if defined(SMP) && !defined(SMP_VECSLICE)
619 # define SMP_VECSLICE 262144
621 #if defined(SMP) && defined(__i386__) && !defined(SMP_VECSCALAR)
622 # define NOSMP_VECSCALAR
623 # define NOSMP_VECFABS
626 # define SMP_VECSLICE2 (SMP_VECSLICE/sizeof(T))
630 #if defined(SMP) && !defined(NO_SMP_VECVEC)
632 template <typename T>
642 int _par_comp (
const unsigned long sz,
const T* v1,
const T* v2))
644 volatile long res = 0;
651 do_vv_comp<T> (sz, v1, v2,
res);
654 const unsigned first = slice_offset(1, n_thr, sz, v1);
655 unsigned long st, en = first;
657 for (
unsigned t = 0; t < n_thr-1; ++t) {
658 st = en; en = slice_offset(t+2, n_thr, sz, v1);
660 v1+st, v2+st, (
void*)0);
663 do_vv_comp<T> (first, v1, v2,
res);
666 for (
unsigned t = 0; t < n_thr-1; ++t) {
676 template <typename T>
684 void _par_copy (
const unsigned long sz,
T* v1,
const T* v2))
692 _tbci_copy<T> (sz, v1, v2);
695 const unsigned first = slice_offset(1, n_thr, sz, v1);
696 unsigned long st, en = first;
698 for (
unsigned t = 0; t < n_thr-1; ++t) {
699 st = en; en = slice_offset(t+2, n_thr, sz, v1);
701 v1+st, v2+st, (
void*)0);
707 for (
unsigned t = 0; t < n_thr-1; ++t)
713 template <typename T>
716 _tbci_fill<T> (tc->
t_size,
722 void _par_fill (
const unsigned long sz,
T* v1,
typename tbci_traits<T>::loop_const_refval_type val))
730 _tbci_fill<T> (sz, v1, val);
733 const unsigned first = slice_offset(1, n_thr, sz, v1);
734 unsigned long st, en = first;
736 for (
unsigned t = 0; t < n_thr-1; ++t) {
737 st = en; en = slice_offset(t+2, n_thr, sz, v1);
739 v1+st, &val, (
void*)0);
742 _tbci_fill<T> (first, v1, val);
745 for (
unsigned t = 0; t < n_thr-1; ++t)
751 template <typename T>
754 _tbci_fill_fn<T> (tc->
t_size,
756 *(vec_fill_fn<T>*)(tc->
t_par[1]),
762 void _par_fill_fn (
const unsigned long sz,
T* v1, vec_fill_fn<T> fn,
void *par))
770 _tbci_fill_fn<T> (sz, v1, fn, par);
773 const unsigned first = slice_offset(1, n_thr, sz, v1);
774 unsigned long st, en = first;
776 for (
unsigned t = 0; t < n_thr-1; ++t) {
777 st = en; en = slice_offset(t+2, n_thr, sz, v1);
779 v1+st, (
void*)&fn, par, (
void*)0);
782 _tbci_fill_fn<T> (first, v1, fn, par);
785 for (
unsigned t = 0; t < n_thr-1; ++t)
791 template <
typename T>
792 int _par_comp (
const unsigned long sz,
const T* v1,
const T* v2)
795 do_vv_comp<T> (sz, v1, v2,
res);
799 template <
typename T>
800 void _par_copy(
const unsigned long sz,
T* v1,
const T* v2)
802 _tbci_copy<T> (sz, v1, v2);
805 template <
typename T>
806 void _par_fill (
const unsigned long sz,
T* v1,
typename tbci_traits<T>::loop_const_refval_type val)
808 _tbci_fill<T> (sz, v1, val);
811 template <
typename T>
812 void _par_fill_fn (
const unsigned long sz,
T* v1, vec_fill_fn<T> fn,
void *par)
814 _tbci_fill_fn<T> (sz, v1, fn, par);
819 #if defined(SMP) && defined(HAVE_LIBNUMA)
820 template <
typename T>
826 const unsigned long sz = bv.
size();
835 (
unsigned long)v1, (
unsigned long)(v1+sz));
838 const unsigned first = slice_offset(1, n_thr, sz, v1);
839 unsigned long st, en = first;
841 unsigned long res = 0;
842 for (
unsigned t = 0; t < n_thr-1; ++t) {
843 st = en; en = slice_offset(t+2, n_thr, sz, v1);
846 v1+st, v1+en, (
void*)0);
850 (
unsigned long)v1, (
unsigned long)(v1+first));
853 for (
unsigned t = 0; t < n_thr-1; ++t) {
863 template <
typename T>
#define TBCICOPY(n, o, t, s)
bool operator<=(const BVector< T > &bv) const
T & operator()(const unsigned long) HOT
void do_mat_vec_mult(const unsigned start, const unsigned end, TVector< T > *res, const Matrix< T > *mat, const Vector< T > *vec)
T aligned_value_type TALIGN(MIN_ALIGN2)
provides basic Vector functionality but arithmetic operators (+=, - , *, /...).
BVector< T > & swap(BVector< T > &v)
int _par_comp(const unsigned long sz, const T *v1, const T *v2)
BVector< T > & fill(const T &) HOT
const T & getcref(const unsigned long idx) const
STD__ istream & operator>>(STD__ istream &istr, BdMatrix< T > &mat)
bool operator==(const BVector< T > &) const HOT
KG, 2001-06-29: Strange: If we don't inline this, we seems to get better performance in our solver be...
BVector< T > & setsize(const unsigned long size)
BVector< T > concat(const BVector< T > &b1, const BVector< T > &b2)
abstract base class (signature) for Vectors without arithmetics
void thread_start(const int thr_no, thr_job_t job, const unsigned long sz,...)
unsigned long size() const HOT
T & set(const T &val, unsigned long idx)
exception base class for the TBCI NumLib
void job_vv_copy(struct thr_ctrl *tc)
const_iterator begin() const
T *const & vecptr() const
friend NOINST BVector< T > &FRIEND_TBCI2__ bvfillm FGD(BVector< T > &, const Matrix< T > &m)
BVector< T > concat(const BVector< T > &) const
#define BCHK(cond, exc, txt, ind, rtval)
#define TBCIFILL(n, v, t, s)
BVector< T > & bvfillm(BVector< T > &bv, const Matrix< T > &m)
T & operator[](const unsigned long i)
#define REALLOC(v, os, t, s)
void(* thr_job_t)(struct thr_ctrl *)
Before the double inclusion guard on purpose!
void job_vv_comp(struct thr_ctrl *tc)
T * iterator
STL-like iterator support.
BVector< T > & operator=(const T &a)
void _par_copy(const unsigned long sz, T *v1, const T *v2)
bool operator>=(const BVector< T > &bv) const
void _par_fill(const unsigned long, T *const, typename tbci_traits< T >::loop_const_refval_type)
const_iterator end() const
for(REGISTER T *p1=c.vec,*p2=b.vec;p1< c.endvec;p1++, p2++)*p1
struct thr_struct * threads
#define PREFETCH_R(addr, loc)
In case gcc does not yet support __builtin_prefetch(), we have handcoded assembly with gcc for a few ...
bool contains(const T &, unsigned long *=0) const
BVector< T > & push_back(const T &value)
performs poorly
long int Vector< T > & index
BVector(const BVector< U > &bv)
void do_mat_vec_transmult(const unsigned start, const unsigned end, TVector< T > *res, const Matrix< T > *mat, const Vector< T > *vec)
BVector< T > & setptr(T *pointer)
BVector< T > & append(const T &)
performs poorly
BVector< T > & remove(const unsigned long)
T & set(const unsigned long idx) HOT
void SWAP(T &a, T &b)
SWAP function Note: We could implement a swap function without temporaries: a -= b b += a a -= b a = ...
#define PREFETCH_W(addr, loc)
T *const & get_fortran_vector() const
#define EXPCHK(cond, exc, txt, ind, rtval)
Temporary Base Class (non referable!) (acc.
BVector< T > & alias(const BVector< T > &bv)
BVector< T > & resize(const BVector< T > &)
Actually it's a resize and copy (some people would expect the assignment op to do this) ...
BVector< T > & cheapdownsize(const unsigned long)
#define TBCICLEAR(n, t, s)
#define TBCIDELETE(t, v, sz)
void set_fortran_vector(T *pointer)
Temporary Base Class Idiom: Class TVector is used for temporary variables.
void thread_wait(const int thr_no, struct job_output *out)
bool operator>(const BVector< T > &bv) const
BVector< T > & bubble_sort()
BVector< T > & copy(const BVector< T > &bv)
copy does a resize, if necessary
static const char * vec_info()
const Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > long int res
#define TBCICOMP(n, o, t, s)
VecErr(const char *t, const long i=0)
void job_vec_fill(struct thr_ctrl *tc)
int numa_optimize(const BdMatrix< T > &bm, bool fault_in)
const unsigned TMatrix< T > const Matrix< T > * a
void thread_start_off(const int thr_no, thr_job_t job, const unsigned long off, const unsigned long sz,...)
void do_vv_comp(const unsigned long sz, const T *const v1, const T *const v2, volatile long &_f2)
f2 = number of differences vec, vec
const Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > long int int char v
< find minimun of func on grid with resolution res
#define LIKELY(expr)
branch prediction note that we sometimes on purpose mark the unlikely possibility likely and vice ver...
void job_vec_fill_fn(struct thr_ctrl *tc)
void _tbci_copy(const unsigned long sz, T *const res, const T *const v1)