TBCI Numerical high perf. C++ Library  2.8.0
bvector.h
Go to the documentation of this file.
1 
5 //-------------------------------------------------------------
6 // Attila Michael Bilgic, 1/97
7 // last update 10/04/97 AMB
8 // 12/14/97 KG
9 // $Id: bvector.h,v 1.36.2.72 2022/11/03 17:28:10 garloff Exp $
10 //-------------------------------------------------------------
11 
12 
13 #ifndef TBCI_BVECTOR_H
14 #define TBCI_BVECTOR_H
15 
16 #include "tbci/basics.h"
17 #include "tbci/vector_sig.h"
18 #include "tbci/tbci_traits.h"
19 
20 // Avoid -fguiding-decls
21 #if !defined(NO_GD) && !defined(AUTO_DECL)
22 # include "tbci/bvector_gd.h"
23 #endif
24 
26 
27 #ifndef TBCI_DISABLE_EXCEPT
28 // except.h is included by basics.h
30 
31 class VecErr : public NumErr
32 {
33  public:
34  VecErr()
35  : NumErr("Error in Vector library") {}
36  VecErr(const char* t, const long i = 0)
37  : NumErr(t, i) {}
38  VecErr(const VecErr& ve)
39  : NumErr(ve) {}
40  virtual ~VecErr() throw() {}
41 };
42 #endif
43 
44 #ifdef PRAGMA_I
45 # pragma interface "bvector.h"
46 #endif
47 
48 template <typename T> class TMatrix;
49 template <typename T> class Matrix;
50 template <typename T> class F_TMatrix;
51 template <typename T> class F_Matrix;
52 template <typename T> class TVector;
53 template <typename T> class TSVector;
54 template <typename T> class Vector;
55 
56 //template <typename T> class CTensor;
57 //template <typename T> class Tensor;
58 
59 
66 template <typename T>
67 class BVector : public BVector_Sig<T>
68 {
69  protected:
70  typedef T value_type;
71  typedef T element_type;
72  typedef T aligned_value_type TALIGN(MIN_ALIGN2);
73  T* vec;
74  unsigned long dim;
75  bool keep; // whether to destroy or not
76 
77  public:
78  friend class TMatrix<T>;
79  friend class F_TMatrix<T>;
80  friend class F_Matrix<T>;
81 #ifdef HAVE_BCXX_INHER_BREAKS_FRIEND_BUG
82  friend class TVector<T>;
83  friend class Vector<T>;
84  //friend class CTensor<T>;
85 #endif
86 
87  // constructor, destructor
88  explicit BVector (const unsigned long = 0);
89  BVector (const T&, const unsigned long);
90  BVector (const BVector<T>&) HOT;
91 #ifndef NO_POD
92  // variable argument number
93  BVector(const vararg va, ...);
94 #endif
95  inline void destroy ();
96  ~BVector ();
97 
98  // basics
100  { if (UNLIKELY(dim)) TBCICLEAR (vec, T, dim); return *this; }
101  BVector<T>& fill (const T&) HOT;
102  // 2.97 was clever enough to detect this bug!
103  //BVector<T>& fill (const Matrix<T>& m);
104  friend NOINST /*inline*/ BVector<T>& FRIEND_TBCI2__ bvfillm FGD (BVector<T>&, const Matrix<T>& m);
105 
107  BVector<T>& resize (const BVector<T>&);
108  BVector<T>& resize (const unsigned long);
109  BVector<T>& resize (const T&, const unsigned long);
110  BVector<T>& cheapdownsize (const unsigned long);
111  // BVector<T>& free () { return this->resize (0); }
112  BVector<T>& clear () { return this->fill((T)0); };
114  BVector<T>& append (const T&);
116  BVector<T>& push_back (const T& value) { append(value); return *this; }
117  BVector<T>& append (const BVector<T>&);
118  BVector<T>& remove (const unsigned long);
119  BVector<T>& revert ();
121 
123 
124  // Member access
125  T& operator () (const unsigned long) HOT;
126  typename tbci_traits<T>::const_refval_type
127  operator () (const unsigned long) const HOT;
128  // internal, no checking
129  const T& getcref (const unsigned long idx) const
130  { return vec[idx]; }
131  typename tbci_traits<T>::const_refval_type
132  get(const unsigned long idx) const HOT
133  { return vec[idx]; }
134  T& set(const unsigned long idx) HOT
135  { return vec[idx]; }
136  T& set(const T& val, unsigned long idx)
137  { return this->set(idx) = val; }
138 
139  // valarray compatibility
140  T& operator [] (const unsigned long i) { return this->operator() (i); }
141  typename tbci_traits<T>::const_refval_type
142  operator [] (const unsigned long i) const { return this->operator() (i); }
143 
144  inline unsigned long size () const HOT { return dim; }
145 
146  // Needed for emul et al.
147  T* const & vecptr () const { return vec; }
148 
153  typedef T* iterator;
154  iterator begin () { return vec; }
155  iterator end () { return vec+dim; }
156  typedef const T* const_iterator;
157  const_iterator begin () const { return vec; }
158  const_iterator end () const { return vec+dim; }
159 
160  // You'd better know, what you are doing!
161  BVector<T>& setptr (T* pointer) { vec = pointer; return *this; }
162  BVector<T>& setsize (const unsigned long size) { dim = size; return *this; }
163  // access to data pointer for e.g. fortran libs
164  T* const & get_fortran_vector () const { return vecptr (); }
165  void set_fortran_vector (T* pointer) { setptr (pointer); }
166 
167  // assignment operators
168  BVector<T>& operator = (const T& a) { return fill (a); }
170  BVector<T>& operator = (const BVector<T>&) HOT;
171  BVector<T>& alias (const BVector<T>& bv)
172  { destroy (); vec = bv.vec; dim = bv.dim; keep = true; return *this; }
174  BVector<T>& copy (const BVector<T>& bv);
175 
176  // basic comparison
177  bool operator == (const BVector<T>&) const HOT;
178  bool operator != (const BVector<T>& bv) const { return !(*this == bv); }
179  bool operator <= (const BVector<T>& bv) const;
180  bool operator >= (const BVector<T>& bv) const;
181  bool operator < (const BVector<T>& bv) const { return !((*this) >= bv); }
182  bool operator > (const BVector<T>& bv) const { return !((*this) <= bv); }
183 
184  // val in vector
185  bool contains (const T&, unsigned long * = 0) const;
186 
187  // io-streams
188  friend STD__ ostream& operator << FGD (STD__ ostream&, const BVector<T>&);
189  friend STD__ istream& operator >> FGD (STD__ istream&, BVector<T>&);
190 
191  // for Index class
192  //for friend BVector<T> concat FGD (const BVector<T>&, const BVector<T>&);
193  BVector<T> concat (const BVector<T>&) const;
194 
195  /*virtual*/ static const char* vec_info() { return "BVector"; }
196 
197 #ifndef HAVE_PROMOTION_BUG
198 # ifndef HAVE_GCC295_TMPLFRNDCLS_BUG
199  // Promotion (only explicit)
200  template <typename U> friend class BVector;
201  template <typename U> explicit BVector (const BVector<U>& bv)
202  : dim (bv.dim), keep (bv.keep)
203  {
204  if (LIKELY(dim)) vec = NEW (T, dim); else vec = 0;
205  if (UNLIKELY(!vec)) dim = 0;
206  for (unsigned long i = 0; i < dim; i++) vec[i] = bv.vec[i];
207  }
208 # else
209  template <typename U> explicit BVector (const BVector<U>& bv)
210  : keep (false)
211  {
212  dim = bv.size(); //keep = bv.keep;
213  if (LIKELY(dim)) vec = NEW (T, dim); else vec = 0;
214  if (UNLIKELY(!vec)) dim = 0;
215  for (unsigned long i = 0; i < dim; i++) vec[i] = bv(i);
216  }
217 # endif
218 #endif
219 
220 #ifdef HAVE_BCXX_INHER_BREAKS_FRIEND_BUG
221  /* Speed mat-vec-mul */
222  friend NOINST void FRIEND_TBCI2__ do_mat_vec_mult FGD (const unsigned start, const unsigned end, \
223  TVector<T> * res, const Matrix<T> * mat, const Vector<T> * vec);
224  friend NOINST void FRIEND_TBCI2__ do_mat_vec_transmult FGD (const unsigned start, const unsigned end, \
225  TVector<T> * res, const Matrix<T> * mat, const Vector<T> * vec);
226 #endif
227 
228 };
229 
230 
231 //definitions
232 
233 
234 template <typename T>
235 INLINE BVector<T>::BVector (const unsigned long c)
236  : vec ((T*)0), dim (c), keep (false)
237 {
238  if (UNLIKELY(c)) {
239  vec = NEW (T, c);
240  if (UNLIKELY(!vec))
241  dim = 0;
242  }
243 }
244 
245 #ifndef NO_POD
246 template <typename T>
248  : vec ((T*)0), dim (va), keep (false)
249 {
250  if (LIKELY(dim))
251  vec = NEW (T, dim);
252  if (UNLIKELY(!vec))
253  dim = 0;
254  va_list vl;
255  va_start (vl, va);
256 #if !defined(__clang__) || !defined(CPLX)
257  for (unsigned long i=0; i < dim; ++i)
258  vec[i] = va_arg (vl, T);
259 #else
260  throw VecErr("vararg not supported for cplx in clang");
261 #warning no vararg support with cplx numbers and clang
262 #endif
263  va_end (vl);
264 }
265 #endif
266 
267 template <typename T>
268 inline void BVector<T>::destroy ()
269 {
270  if (LIKELY(dim))
271  TBCIDELETE(T, vec, dim);
272 }
273 
274 template <typename T>
276 {
277  if (UNLIKELY(!keep))
278  destroy ();
279 }
280 
281 template <typename T>
283  : vec ((T*)0), dim(0), keep (false)
284 {
285  if (LIKELY(v.dim)) {
286  vec = NEW (T, v.dim);
287  if (LIKELY(vec)) {
288  dim = v.dim;
289  TBCICOPY(vec, v.vec, T, dim);
290  }
291  }
292 }
293 
294 
295 template <typename T>
296 INLINE BVector<T>::BVector (const T& value, const unsigned long c)
297  : vec ((T*)0), dim (c), keep (false)
298 {
299  if (LIKELY(dim))
300  vec = NEW (T, dim);
301  if (LIKELY(vec))
302  TBCIFILL (vec, value, T, dim);
303  else
304  dim = 0;
305 }
306 
307 template <typename T>
308 BVector<T>& BVector<T>::cheapdownsize (const unsigned long nd)
309 {
310  BCHK (nd > dim, VecErr, cheapdownsize does not upsize, nd, *this);
311  if (UNLIKELY(!nd))
312  return this->resize (nd);
313  dim = nd;
314  return *this;
315 }
316 
317 #ifndef LAPACK_INLINE
318 # define LAPACK_INLINE
319 #endif
320 template <typename T>
322 {
323  if (LIKELY(c == dim))
324  return *this;
325  if (UNLIKELY(c == 0)) {
326  if (LIKELY(dim))
327  TBCIDELETE(T, vec, dim);
328  dim = c;
329  return *this;
330  }
331  T* tmp = vec;
332  if (LIKELY(dim == 0))
333  vec = NEW (T, c);
334  else
335  REALLOC(vec,dim,T,c);
336  BCHK (!vec, VecErr, Realloc failed in resize, 0, (vec=tmp, *this));
337  dim = c;
338  return *this;
339 }
340 
341 template <typename T>
342 BVector<T>& BVector<T>::resize (const T& value, const unsigned long c)
343 {
344  if (UNLIKELY(dim != c)) {
345  if (LIKELY(dim))
346  TBCIDELETE(T, vec, dim);
347  dim = c;
348  if (UNLIKELY(c))
349  vec = NEW (T, c);
350  else
351  vec = (T*)0;
352  }
353  if (UNLIKELY(dim && vec))
354  return this->fill (value);
355  else
356  dim = 0;
357  return *this;
358 }
359 
360 template <typename T>
362 {
363  if (UNLIKELY(dim))
364  TBCIDELETE (T, vec, dim);
365  dim = v.dim;
366  if (UNLIKELY(v.dim)) {
367  vec = NEW (T, v.dim);
368  if (LIKELY(vec))
369  TBCICOPY(vec, v.vec, T, dim);
370  else
371  dim = 0;
372  } else
373  vec = (T*)0;
374  return *this;
375 }
376 
377 template <typename T>
379 {
380  TBCIFILL(vec, val, T, dim);
381  return *this;
382 }
383 
384 template <typename T>
386 {
387  BVector<T> buf(dim);
388  for (REGISTER unsigned long t = 0; t < dim; t++)
389  buf[t] = vec[dim-t-1];
390  return *this = buf;
391 }
392 
393 template <typename T>
395 {
396  T temp;
397  if(dim > 1) {
398  for (REGISTER unsigned long i = 0; i < dim-1; i++)
399  for (REGISTER unsigned long j = dim-1; i < j; j--) {
400  if ( vec[j] < vec[j-1] ) {
401  temp = vec[j];
402  vec[j] = vec[j-1];
403  vec[j-1] = temp;
404  }
405  }
406  }
407  return *this;
408 }
409 
410 //append one element to vector
411 template <typename T>
413 {
414  T* tmp = vec;
415  REALLOC (vec,dim,T,dim+1);
416  BCHK (!vec, VecErr, Realloc failed in append, 0, (vec=tmp, *this));
417  vec[dim++] = val;
418  return *this;
419 }
420 
421 //append one vector to vector
422 template <typename T>
424 {
425  T* tmp = vec;
426  REALLOC (vec, dim, T, dim+v.dim);
427  BCHK (!vec, VecErr, Realloc failed in append, 0, (vec=tmp, *this));
428  TBCICOPY(vec+dim, v.vec, T, v.dim);
429  dim += v.dim;
430  return *this;
431 }
432 
433 //remove one element
434 template <typename T>
435 BVector<T>& BVector<T>::remove (const unsigned long i)
436 {
437  BCHK (i>=dim, VecErr, Remove idx out of range, i, *this);
438  T* tmp = NEW (T, dim-1);
439  BCHK (!tmp, VecErr, Memory allocation failed in remove, (dim-1)*sizeof(T), *this);
440  if (UNLIKELY(i))
441  TBCICOPY (tmp, vec, T, i);
442  dim -= 1;
443  if (UNLIKELY(dim-i))
444  TBCICOPY (&tmp[i], &vec[i+1], T, (dim-i));
445  TBCIDELETE(T, vec, dim+1);
446  vec = tmp;
447  return *this;
448 }
449 
450 template <typename T>
452 {
453  //BCHK(dim != v.dim, VecErr, swap with different size vecs, v.dim, *this);
454  SWAP (v.dim, dim);
455  SWAP (v.vec, vec);
456  SWAP (v.keep, keep);
457  return *this;
458 }
459 
460 template <typename T>
461 inline typename tbci_traits<T>::const_refval_type
462  BVector<T>::operator () (const unsigned long i) const
463 {
464  EXPCHK(i>=dim, VecErr, Illegal index, i, vec[0]);
465  return vec[i];
466 }
467 
468 template <typename T>
469 inline T& BVector<T>::operator () (const unsigned long i)
470 {
471  EXPCHK(i>=dim, VecErr, Illegal index, i, vec[0]);
472  return vec[i];
473 }
474 
475 template <typename T>
477 {
478  BCHK(dim != a.dim, VecErr, Assignment from wrong dim vector, a.dim, *this);
479  TBCICOPY (vec, a.vec, T, dim);
480  return *this;
481 }
482 
483 template <typename T>
485 {
486  //BCHK(dim != a.dim, VecErr, Assignment from wrong dim vector, a.dim, *this);
487  return this->resize (a);
488  //TBCICOPY(vec, a.vec, T, dim);
489  //return *this;
490 }
491 
494 template <typename T>
495 /*inline*/ bool BVector<T>::operator == (const BVector<T>& bv) const
496 {
497  if (LIKELY(dim != bv.dim))
498  return false;
499  if (LIKELY(vec == bv.vec || dim == 0))
500  return true;
501  if (TBCICOMP (vec, bv.vec, T, dim))
502  return false;
503  else
504  return true;
505 }
506 
507 
508 template<typename T>
510 {
511  BCHK(dim != bv.dim, VecErr, comparison between wrong dim vectors, bv.dim, false);
512  for (unsigned long i=0; i<dim; i++)
513  if (UNLIKELY(vec[i] > bv.vec[i]))
514  return false;
515  return true;
516 }
517 
518 
519 template<typename T>
521 {
522  BCHK(dim != bv.dim, VecErr, comparison between wrong dim vectors, bv.dim, false);
523  for (unsigned long i=0; i<dim; i++)
524  if( UNLIKELY(vec[i] < bv.vec[i]) )
525  return false;
526  return true;
527 }
528 
529 
530 template <typename T>
531 inline STD__ ostream& operator << (STD__ ostream& os, const BVector<T>& v)
532 {
533 #ifdef VEC_COL
534  for (unsigned long i = 0; i < v.dim; i++)
535  os << v.vec[i] << "\n";
536 #else
537  for (unsigned long i = 0; i < v.dim; i++)
538  os << v.vec[i] << " ";
539 #endif
540  return os.flush();
541 }
542 
543 #ifdef PTR
544 template <typename T>
545 STD__ istream& operator >> (STD__ istream& in, BVector<T>& v)
546 {
547  STD__ cerr << "BVector (" << __FILE__ << ":" << __LINE__
548  <<") Input on pointers not possible!" << STD__ endl;
549  abort();
550  return in;
551 }
552 #else
553 template <typename T>
554 STD__ istream& operator >> (STD__ istream& in, BVector<T>& v)
555 {
556  T r ALIGN(MIN_ALIGN) = 0;
557  char s = ',';
558 
559 /*
560  if( !( in >> s ) )
561  return in;
562  if (s != '(')
563  in.putback(s);
564 */
565  if( !( in >> r ) )
566  return in;
567  v.vec[0] = r;
568  for (unsigned long i = 1; i < v.dim; i++) {
569  in >> s;
570  if (s != ',') in.putback(s);
571  if( !( in >> r ) )
572  return in;
573  v.vec[i] = r;
574  }
575  return in;
576 }
577 #endif
578 
579 template <typename T>
580 inline bool BVector<T>::contains (const T& val, unsigned long *ind) const
581 {
582  for (REGISTER unsigned long i = 0; i < dim; ++i)
583  if (UNLIKELY(val == vec[i])) {
584  if (ind)
585  *ind = i;
586  return true;
587  }
588  return false;
589 }
590 
591 
592 template <typename T>
594 {
595  BVector<T> c (dim + b2.dim);
596  TBCICOPY(c.vec, vec, T, dim);
597  TBCICOPY(c.vec+dim, b2.vec, T, dim);
598  return c;
599 }
600 
601 
602 INST(template <typename T> class BVector friend BVector<T> concat (const BVector<T>&, const BVector<T>&);)
603 template <typename T>
604 inline BVector<T> concat (const BVector<T>& b1, const BVector<T>& b2)
605 { return (b1.concat (b2)); }
606 
607 
618 #if defined(SMP) && !defined(SMP_VECSLICE)
619 # define SMP_VECSLICE 262144
620 #endif
621 #if defined(SMP) && defined(__i386__) && !defined(SMP_VECSCALAR)
622 # define NOSMP_VECSCALAR
623 # define NOSMP_VECFABS
624 #endif
625 #ifdef SMP
626 # define SMP_VECSLICE2 (SMP_VECSLICE/sizeof(T))
627 #endif
628 
629 
630 #if defined(SMP) && !defined(NO_SMP_VECVEC)
631 INST(template <typename T> class BVector friend void job_vv_comp (struct thr_ctrl*);)
632 template <typename T>
633 void job_vv_comp (struct thr_ctrl *tc)
634 {
635  tc->t_res_l = 0;
636  do_vv_comp (tc->t_size,
637  (const T*)(tc->t_par[0]), (const T*)(tc->t_par[1]),
638  tc->t_res_l);
639 }
640 
641 HOTDECL(template <typename T>
642 int _par_comp (const unsigned long sz, const T* v1, const T* v2))
643 {
644  volatile long res = 0;
645  /* use some heuristic to decide for the num of threads */
646  const unsigned n_thr = threads_avail (sz / SMP_VECSLICE2);
647  update_n_thr(n_thr);
648  //smp_barrier();
649 
650  if (LIKELY(n_thr < 2)) {
651  do_vv_comp<T> (sz, v1, v2, res);
652  } else {
653  PREFETCH_R(v1, 3); PREFETCH_R(v2, 3);
654  const unsigned first = slice_offset(1, n_thr, sz, v1);
655  unsigned long st, en = first;
656  /* Start threads */
657  for (unsigned t = 0; t < n_thr-1; ++t) {
658  st = en; en = slice_offset(t+2, n_thr, sz, v1);
659  thread_start (t, (thr_job_t)job_vv_comp<T>, en - st,
660  v1+st, v2+st, (void*)0);
661  }
662  /* The first slice is handled by the main thread */
663  do_vv_comp<T> (first, v1, v2, res);
664  //sched_yield ();
665  /* Wait for the end */
666  for (unsigned t = 0; t < n_thr-1; ++t) {
667  job_output out;
668  thread_wait (t, &out);
669  res += (long) out.t_res_l;
670  }
671  }
672  return res;
673 }
674 
675 INST(template <typename T> class BVector friend void job_vv_copy (struct thr_ctrl*);)
676 template <typename T>
677 void job_vv_copy (struct thr_ctrl *tc)
678 {
679  _tbci_copy (tc->t_size,
680  (T*)(tc->t_par[0]), (const T*)(tc->t_par[1]));
681 }
682 
683 HOTDECL(template <typename T>
684 void _par_copy (const unsigned long sz, T* v1, const T* v2))
685 {
686  /* use some heuristic to decide for the num of threads */
687  const unsigned n_thr = threads_avail (sz / SMP_VECSLICE2);
688  update_n_thr(n_thr);
689  //smp_barrier();
690 
691  if (LIKELY(n_thr < 2)) {
692  _tbci_copy<T> (sz, v1, v2);
693  } else {
694  PREFETCH_W(v1, 3); PREFETCH_R(v2, 3);
695  const unsigned first = slice_offset(1, n_thr, sz, v1);
696  unsigned long st, en = first;
697  /* Start threads */
698  for (unsigned t = 0; t < n_thr-1; ++t) {
699  st = en; en = slice_offset(t+2, n_thr, sz, v1);
700  thread_start (t, (thr_job_t)job_vv_copy<T>, en - st,
701  v1+st, v2+st, (void*)0);
702  }
703  /* The first slice is handled by the main thread */
704  _tbci_copy (first, v1, v2);
705  //sched_yield ();
706  /* Wait for the end */
707  for (unsigned t = 0; t < n_thr-1; ++t)
708  thread_wait (t);
709  }
710 }
711 
712 INST(template <typename T> class BVector friend void job_vec_fill (struct thr_ctrl*);)
713 template <typename T>
714 void job_vec_fill (struct thr_ctrl *tc)
715 {
716  _tbci_fill<T> (tc->t_size,
717  (T*)(tc->t_par[0]), *(const T*)(tc->t_par[1]));
718 }
719 
720 
721 HOTDECL(template <typename T>
722 void _par_fill (const unsigned long sz, T* v1, typename tbci_traits<T>::loop_const_refval_type val))
723 {
724  /* use some heuristic to decide for the num of threads */
725  const unsigned n_thr = threads_avail (sz / SMP_VECSLICE2);
726  update_n_thr(n_thr);
727  //smp_barrier();
728 
729  if (LIKELY(n_thr < 2)) {
730  _tbci_fill<T> (sz, v1, val);
731  } else {
732  PREFETCH_W(v1, 3);
733  const unsigned first = slice_offset(1, n_thr, sz, v1);
734  unsigned long st, en = first;
735  /* Start threads */
736  for (unsigned t = 0; t < n_thr-1; ++t) {
737  st = en; en = slice_offset(t+2, n_thr, sz, v1);
738  thread_start (t, (thr_job_t)job_vec_fill<T>, en - st,
739  v1+st, &val, (void*)0);
740  }
741  /* The first slice is handled by the main thread */
742  _tbci_fill<T> (first, v1, val);
743  //sched_yield ();
744  /* Wait for the end */
745  for (unsigned t = 0; t < n_thr-1; ++t)
746  thread_wait (t);
747  }
748 }
749 
750 INST(template <typename T> class BVector friend void job_vec_fill_fn (struct thr_ctrl*);)
751 template <typename T>
752 void job_vec_fill_fn (struct thr_ctrl *tc)
753 {
754  _tbci_fill_fn<T> (tc->t_size,
755  (T*)(tc->t_par[0]),
756  *(vec_fill_fn<T>*)(tc->t_par[1]),
757  tc->t_par[2]);
758 }
759 
760 
761 HOTDECL(template <typename T>
762 void _par_fill_fn (const unsigned long sz, T* v1, vec_fill_fn<T> fn, void *par))
763 {
764  /* use some heuristic to decide for the num of threads */
765  const unsigned n_thr = threads_avail (sz / SMP_VECSLICE2);
766  update_n_thr(n_thr);
767  //smp_barrier();
768 
769  if (LIKELY(n_thr < 2)) {
770  _tbci_fill_fn<T> (sz, v1, fn, par);
771  } else {
772  PREFETCH_W(v1, 3);
773  const unsigned first = slice_offset(1, n_thr, sz, v1);
774  unsigned long st, en = first;
775  /* Start threads */
776  for (unsigned t = 0; t < n_thr-1; ++t) {
777  st = en; en = slice_offset(t+2, n_thr, sz, v1);
778  thread_start (t, (thr_job_t)job_vec_fill_fn<T>, en - st,
779  v1+st, (void*)&fn, par, (void*)0);
780  }
781  /* The first slice is handled by the main thread */
782  _tbci_fill_fn<T> (first, v1, fn, par);
783  //sched_yield ();
784  /* Wait for the end */
785  for (unsigned t = 0; t < n_thr-1; ++t)
786  thread_wait (t);
787  }
788 }
789 
790 #else
791 template <typename T>
792 int _par_comp (const unsigned long sz, const T* v1, const T* v2)
793 {
794  /* volatile */ long res = 0;
795  do_vv_comp<T> (sz, v1, v2, res);
796  return res;
797 }
798 
799 template <typename T>
800 void _par_copy(const unsigned long sz, T* v1, const T* v2)
801 {
802  _tbci_copy<T> (sz, v1, v2);
803 }
804 
805 template <typename T>
806 void _par_fill (const unsigned long sz, T* v1, typename tbci_traits<T>::loop_const_refval_type val)
807 {
808  _tbci_fill<T> (sz, v1, val);
809 }
810 
811 template <typename T>
812 void _par_fill_fn (const unsigned long sz, T* v1, vec_fill_fn<T> fn, void *par)
813 {
814  _tbci_fill_fn<T> (sz, v1, fn, par);
815 }
816 #endif
817 
818 /* TODO: Explicit instatiation! */
819 #if defined(SMP) && defined(HAVE_LIBNUMA)
820 template <typename T>
821 int numa_optimize(const BVector<T>& bv, bool fault_in)
822 {
823  if (!numa_avail && !fault_in)
824  return 0;
825  /* use some heuristic to decide for the num of threads */
826  const unsigned long sz = bv.size();
827  const unsigned n_thr = threads_avail (sz / SMP_VECSLICE2);
828  //update_n_thr(n_thr);
829  //smp_barrier();
830  const T* v1 = &bv.getcref(0);
831 
832  if (LIKELY(n_thr < 2)) {
833  if (fault_in)
834  do_numa_move_pages(main_numa_node, 1,
835  (unsigned long)v1, (unsigned long)(v1+sz));
836  return 0;
837  } else {
838  const unsigned first = slice_offset(1, n_thr, sz, v1);
839  unsigned long st, en = first;
840  /* Start threads */
841  unsigned long res = 0;
842  for (unsigned t = 0; t < n_thr-1; ++t) {
843  st = en; en = slice_offset(t+2, n_thr, sz, v1);
844  thread_start_off (t, (thr_job_t)numa_move_pages_job,
845  threads[t].numa_node, fault_in,
846  v1+st, v1+en, (void*)0);
847  }
848  /* The first slice is handled by the main thread */
849  res = do_numa_move_pages(main_numa_node, fault_in,
850  (unsigned long)v1, (unsigned long)(v1+first));
851  //sched_yield ();
852  /* Wait for the end */
853  for (unsigned t = 0; t < n_thr-1; ++t) {
854  job_output out;
855  thread_wait (t, &out);
856  res += out.t_res_l;
857  }
858  //fprintf(stderr, "NUMA Optimize BVector %p: %li pages moved\n", v1, res);
859  return res;
860  }
861 }
862 #else
863 template <typename T>
864 int numa_optimize(const BVector<T>& bv, bool fault_in)
865 {
866  /* FIXME: We should emulate fault_in here, no? */
867  return 0;
868 }
869 #endif
870 
872 
873 #endif /* TBCI_BVECTOR_H */
#define TBCICOPY(n, o, t, s)
Definition: basics.h:894
iterator end()
Definition: bvector.h:155
void * t_par[6]
Definition: smp.h:173
bool operator<=(const BVector< T > &bv) const
Definition: bvector.h:509
T & operator()(const unsigned long) HOT
Definition: bvector.h:469
#define false
Definition: bool.h:23
#define MIN_ALIGN
Definition: basics.h:421
#define ALIGN(x)
Definition: basics.h:444
void do_mat_vec_mult(const unsigned start, const unsigned end, TVector< T > *res, const Matrix< T > *mat, const Vector< T > *vec)
T aligned_value_type TALIGN(MIN_ALIGN2)
Definition: bvector.h:72
long t_res_l
Definition: smp.h:179
provides basic Vector functionality but arithmetic operators (+=, - , *, /...).
Definition: bvector.h:67
BVector< T > & swap(BVector< T > &v)
Definition: bvector.h:451
int _par_comp(const unsigned long sz, const T *v1, const T *v2)
void destroy()
Definition: bvector.h:268
VecErr()
Definition: bvector.h:34
BVector< T > & fill(const T &) HOT
Definition: bvector.h:378
const T & getcref(const unsigned long idx) const
Definition: bvector.h:129
int numa_avail
Definition: smp.cc:105
STD__ istream & operator>>(STD__ istream &istr, BdMatrix< T > &mat)
Definition: band_matrix.h:2739
bool operator==(const BVector< T > &) const HOT
KG, 2001-06-29: Strange: If we don&#39;t inline this, we seems to get better performance in our solver be...
Definition: bvector.h:495
BVector< T > & setsize(const unsigned long size)
Definition: bvector.h:162
#define REGISTER
Definition: basics.h:108
return c
Definition: f_matrix.h:760
BVector< T > concat(const BVector< T > &b1, const BVector< T > &b2)
Definition: bvector.h:604
#define NAMESPACE_TBCI
Definition: basics.h:317
VecErr(const VecErr &ve)
Definition: bvector.h:38
abstract base class (signature) for Vectors without arithmetics
Definition: vector_sig.h:23
void thread_start(const int thr_no, thr_job_t job, const unsigned long sz,...)
Definition: smp.cc:988
unsigned long size() const HOT
Definition: bvector.h:144
T & set(const T &val, unsigned long idx)
Definition: bvector.h:136
exception base class for the TBCI NumLib
Definition: except.h:58
#define MIN_ALIGN2
Definition: basics.h:424
friend class BVector
Definition: bvector.h:200
void job_vv_copy(struct thr_ctrl *tc)
Definition: bvector.h:677
const_iterator begin() const
Definition: bvector.h:157
unsigned long dim
Definition: bvector.h:74
T *const & vecptr() const
Definition: bvector.h:147
friend NOINST BVector< T > &FRIEND_TBCI2__ bvfillm FGD(BVector< T > &, const Matrix< T > &m)
BVector< T > concat(const BVector< T > &) const
Definition: bvector.h:593
iterator begin()
Definition: bvector.h:154
#define BCHK(cond, exc, txt, ind, rtval)
Definition: basics.h:575
bool keep
Definition: bvector.h:75
long t_res_l
Definition: smp.h:148
BVector< T > & init_0()
Definition: bvector.h:99
#define TBCIFILL(n, v, t, s)
Definition: basics.h:910
~BVector()
Definition: bvector.h:275
BVector< T > & bvfillm(BVector< T > &bv, const Matrix< T > &m)
Definition: matrix.h:2248
#define UNLIKELY(expr)
Definition: basics.h:101
T & operator[](const unsigned long i)
Definition: bvector.h:140
Definition: smp.h:168
#define REALLOC(v, os, t, s)
Definition: malloc_cache.h:636
void(* thr_job_t)(struct thr_ctrl *)
Before the double inclusion guard on purpose!
Definition: smp.h:126
#define FRIEND_TBCI2__
Definition: basics.h:335
void job_vv_comp(struct thr_ctrl *tc)
Definition: bvector.h:633
T * iterator
STL-like iterator support.
Definition: bvector.h:153
BVector< T > & operator=(const T &a)
Definition: bvector.h:168
void _par_copy(const unsigned long sz, T *v1, const T *v2)
bool operator>=(const BVector< T > &bv) const
Definition: bvector.h:520
void _par_fill(const unsigned long, T *const, typename tbci_traits< T >::loop_const_refval_type)
#define NEW(t, s)
Definition: malloc_cache.h:633
const_iterator end() const
Definition: bvector.h:158
for(REGISTER T *p1=c.vec,*p2=b.vec;p1< c.endvec;p1++, p2++)*p1
struct thr_struct * threads
Definition: smp.cc:106
unsigned long t_size
Definition: smp.h:171
#define PREFETCH_R(addr, loc)
In case gcc does not yet support __builtin_prefetch(), we have handcoded assembly with gcc for a few ...
Definition: basics.h:748
BVector< T > & revert()
Definition: bvector.h:385
T element_type
Definition: bvector.h:71
bool contains(const T &, unsigned long *=0) const
Definition: bvector.h:580
BVector< T > & push_back(const T &value)
performs poorly
Definition: bvector.h:116
long int Vector< T > & index
Definition: LM_fit.h:69
BVector(const BVector< U > &bv)
Definition: bvector.h:201
void do_mat_vec_transmult(const unsigned start, const unsigned end, TVector< T > *res, const Matrix< T > *mat, const Vector< T > *vec)
BVector< T > & setptr(T *pointer)
Definition: bvector.h:161
BVector< T > & append(const T &)
performs poorly
Definition: bvector.h:412
BVector< T > & remove(const unsigned long)
Definition: bvector.h:435
T & set(const unsigned long idx) HOT
Definition: bvector.h:134
void SWAP(T &a, T &b)
SWAP function Note: We could implement a swap function without temporaries: a -= b b += a a -= b a = ...
Definition: basics.h:813
Definition: bvector.h:49
#define PREFETCH_W(addr, loc)
Definition: basics.h:749
T *const & get_fortran_vector() const
Definition: bvector.h:164
#define INST(x)
Definition: basics.h:238
#define EXPCHK(cond, exc, txt, ind, rtval)
Definition: basics.h:630
#define LAPACK_INLINE
Definition: bvector.h:318
Temporary Base Class (non referable!) (acc.
Definition: bvector.h:50
int i
Definition: LM_fit.h:71
BVector< T > & alias(const BVector< T > &bv)
Definition: bvector.h:171
BVector< T > & resize(const BVector< T > &)
Actually it&#39;s a resize and copy (some people would expect the assignment op to do this) ...
Definition: bvector.h:361
BVector< T > & cheapdownsize(const unsigned long)
Definition: bvector.h:308
#define TBCICLEAR(n, t, s)
Definition: basics.h:911
T value_type
Definition: bvector.h:70
#define STD__
Definition: basics.h:338
#define TBCIDELETE(t, v, sz)
Definition: malloc_cache.h:634
#define threads_avail(x)
Definition: smp.h:322
void set_fortran_vector(T *pointer)
Definition: bvector.h:165
Temporary Base Class Idiom: Class TVector is used for temporary variables.
Definition: bvector.h:52
void thread_wait(const int thr_no, struct job_output *out)
Definition: smp.cc:997
bool operator>(const BVector< T > &bv) const
Definition: bvector.h:182
BVector< T > & bubble_sort()
Definition: bvector.h:394
#define INLINE
Definition: basics.h:688
#define NAMESPACE_END
Definition: basics.h:323
BVector< T > & copy(const BVector< T > &bv)
copy does a resize, if necessary
Definition: bvector.h:484
Definition: bvector.h:54
#define SMP_VECSLICE2
Definition: bvector.h:626
static const char * vec_info()
Definition: bvector.h:195
#define HOTDECL(x)
Definition: basics.h:497
const Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > long int res
Definition: LM_fit.h:199
int numa_node
Definition: smp.h:133
T * vec
Definition: bvector.h:73
#define T
Definition: bdmatlib.cc:20
#define TBCICOMP(n, o, t, s)
Definition: basics.h:979
virtual ~VecErr()
Definition: bvector.h:40
#define HOT
Definition: basics.h:495
int main_numa_node
Definition: smp.cc:110
VecErr(const char *t, const long i=0)
Definition: bvector.h:36
void job_vec_fill(struct thr_ctrl *tc)
Definition: bvector.h:714
int numa_optimize(const BdMatrix< T > &bm, bool fault_in)
Definition: band_matrix.h:2915
const T * const_iterator
Definition: bvector.h:156
BVector< T > & clear()
Definition: bvector.h:112
const unsigned TMatrix< T > const Matrix< T > * a
void thread_start_off(const int thr_no, thr_job_t job, const unsigned long off, const unsigned long sz,...)
Definition: smp.cc:979
void do_vv_comp(const unsigned long sz, const T *const v1, const T *const v2, volatile long &_f2)
f2 = number of differences vec, vec
Definition: basics.h:975
const Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > Vector< T > long int int char v
&lt; find minimun of func on grid with resolution res
Definition: LM_fit.h:205
enum _vararg vararg
Definition: basics.h:1276
#define LIKELY(expr)
branch prediction note that we sometimes on purpose mark the unlikely possibility likely and vice ver...
Definition: basics.h:100
exception class
Definition: bvector.h:31
void job_vec_fill_fn(struct thr_ctrl *tc)
Definition: bvector.h:752
void _tbci_copy(const unsigned long sz, T *const res, const T *const v1)
Definition: basics.h:891