TBCI Numerical high perf. C++ Library 2.8.0
bvector.h
Go to the documentation of this file.
1
5//-------------------------------------------------------------
6// Attila Michael Bilgic, 1/97
7// last update 10/04/97 AMB
8// 12/14/97 KG
9// $Id: bvector.h,v 1.36.2.72 2022/11/03 17:28:10 garloff Exp $
10//-------------------------------------------------------------
11
12
13#ifndef TBCI_BVECTOR_H
14#define TBCI_BVECTOR_H
15
16#include "tbci/basics.h"
17#include "tbci/vector_sig.h"
18#include "tbci/tbci_traits.h"
19
20// Avoid -fguiding-decls
21#if !defined(NO_GD) && !defined(AUTO_DECL)
22# include "tbci/bvector_gd.h"
23#endif
24
26
27#ifndef TBCI_DISABLE_EXCEPT
29// except.h is included by basics.h
30
31class VecErr : public NumErr
32{
33 public:
35 : NumErr("Error in Vector library") {}
36 VecErr(const char* t, const long i = 0)
37 : NumErr(t, i) {}
38 VecErr(const VecErr& ve)
39 : NumErr(ve) {}
40 virtual ~VecErr() throw() {}
41};
42#endif
43
44#ifdef PRAGMA_I
45# pragma interface "bvector.h"
46#endif
47
48template <typename T> class TMatrix;
49template <typename T> class Matrix;
50template <typename T> class F_TMatrix;
51template <typename T> class F_Matrix;
52template <typename T> class TVector;
53template <typename T> class TSVector;
54template <typename T> class Vector;
55
56//template <typename T> class CTensor;
57//template <typename T> class Tensor;
58
59
65
66template <typename T>
67class BVector : public BVector_Sig<T>
68{
69 protected:
70 typedef T value_type;
71 typedef T element_type;
72 typedef T aligned_value_type TALIGN(MIN_ALIGN2);
74 unsigned long dim;
75 bool keep; // whether to destroy or not
76
77 public:
78 friend class TMatrix<T>;
79 friend class F_TMatrix<T>;
80 friend class F_Matrix<T>;
81#ifdef HAVE_BCXX_INHER_BREAKS_FRIEND_BUG
82 friend class TVector<T>;
83 friend class Vector<T>;
84 //friend class CTensor<T>;
85#endif
86
87 // constructor, destructor
88 explicit BVector (const unsigned long = 0);
89 BVector (const T&, const unsigned long);
90 BVector (const BVector<T>&) HOT;
91#ifndef NO_POD
92 // variable argument number
93 BVector(const vararg va, ...);
94#endif
95 inline void destroy ();
97
98 // basics
100 { if (UNLIKELY(dim)) TBCICLEAR (vec, T, dim); return *this; }
101 BVector<T>& fill (const T&) HOT;
102 // 2.97 was clever enough to detect this bug!
103 //BVector<T>& fill (const Matrix<T>& m);
105
108 BVector<T>& resize (const unsigned long);
109 BVector<T>& resize (const T&, const unsigned long);
110 BVector<T>& cheapdownsize (const unsigned long);
111 // BVector<T>& free () { return this->resize (0); }
112 BVector<T>& clear () { return this->fill((T)0); };
114 BVector<T>& append (const T&);
116 BVector<T>& push_back (const T& value) { append(value); return *this; }
117 BVector<T>& append (const BVector<T>&);
118 BVector<T>& remove (const unsigned long);
121
123
124 // Member access
125 T& operator () (const unsigned long) HOT;
126 typename tbci_traits<T>::const_refval_type
127 operator () (const unsigned long) const HOT;
128 // internal, no checking
129 const T& getcref (const unsigned long idx) const
130 { return vec[idx]; }
131 typename tbci_traits<T>::const_refval_type
132 get(const unsigned long idx) const HOT
133 { return vec[idx]; }
134 T& set(const unsigned long idx) HOT
135 { return vec[idx]; }
136 T& set(const T& val, unsigned long idx)
137 { return this->set(idx) = val; }
138
139 // valarray compatibility
140 T& operator [] (const unsigned long i) { return this->operator() (i); }
141 typename tbci_traits<T>::const_refval_type
142 operator [] (const unsigned long i) const { return this->operator() (i); }
143
144 inline unsigned long size () const HOT { return dim; }
145
146 // Needed for emul et al.
147 T* const & vecptr () const { return vec; }
148
153 typedef T* iterator;
154 iterator begin () { return vec; }
155 iterator end () { return vec+dim; }
156 typedef const T* const_iterator;
157 const_iterator begin () const { return vec; }
158 const_iterator end () const { return vec+dim; }
159
160 // You'd better know, what you are doing!
161 BVector<T>& setptr (T* pointer) { vec = pointer; return *this; }
162 BVector<T>& setsize (const unsigned long size) { dim = size; return *this; }
163 // access to data pointer for e.g. fortran libs
164 T* const & get_fortran_vector () const { return vecptr (); }
165 void set_fortran_vector (T* pointer) { setptr (pointer); }
166
167 // assignment operators
168 BVector<T>& operator = (const T& a) { return fill (a); }
172 { destroy (); vec = bv.vec; dim = bv.dim; keep = true; return *this; }
173
175
176 // basic comparison
177 bool operator == (const BVector<T>&) const HOT;
178 bool operator != (const BVector<T>& bv) const { return !(*this == bv); }
179 bool operator <= (const BVector<T>& bv) const;
180 bool operator >= (const BVector<T>& bv) const;
181 bool operator < (const BVector<T>& bv) const { return !((*this) >= bv); }
182 bool operator > (const BVector<T>& bv) const { return !((*this) <= bv); }
183
184 // val in vector
185 bool contains (const T&, unsigned long * = 0) const;
186
187 // io-streams
188 friend STD__ ostream& operator << FGD (STD__ ostream&, const BVector<T>&);
189 friend STD__ istream& operator >> FGD (STD__ istream&, BVector<T>&);
190
191 // for Index class
192 //for friend BVector<T> concat FGD (const BVector<T>&, const BVector<T>&);
194
195 /*virtual*/ static const char* vec_info() { return "BVector"; }
196
197#ifndef HAVE_PROMOTION_BUG
198# ifndef HAVE_GCC295_TMPLFRNDCLS_BUG
199 // Promotion (only explicit)
200 template <typename U> friend class BVector;
201 template <typename U> explicit BVector (const BVector<U>& bv)
202 : dim (bv.dim), keep (bv.keep)
203 {
204 if (LIKELY(dim)) vec = NEW (T, dim); else vec = 0;
205 if (UNLIKELY(!vec)) dim = 0;
206 for (unsigned long i = 0; i < dim; i++) vec[i] = bv.vec[i];
207 }
208# else
209 template <typename U> explicit BVector (const BVector<U>& bv)
210 : keep (false)
211 {
212 dim = bv.size(); //keep = bv.keep;
213 if (LIKELY(dim)) vec = NEW (T, dim); else vec = 0;
214 if (UNLIKELY(!vec)) dim = 0;
215 for (unsigned long i = 0; i < dim; i++) vec[i] = bv(i);
216 }
217# endif
218#endif
219
220#ifdef HAVE_BCXX_INHER_BREAKS_FRIEND_BUG
221 /* Speed mat-vec-mul */
222 friend NOINST void FRIEND_TBCI2__ do_mat_vec_mult FGD (const unsigned start, const unsigned end, \
223 TVector<T> * res, const Matrix<T> * mat, const Vector<T> * vec);
224 friend NOINST void FRIEND_TBCI2__ do_mat_vec_transmult FGD (const unsigned start, const unsigned end, \
225 TVector<T> * res, const Matrix<T> * mat, const Vector<T> * vec);
226#endif
227
228};
229
230
231//definitions
232
233
234template <typename T>
235INLINE BVector<T>::BVector (const unsigned long c)
236 : vec ((T*)0), dim (c), keep (false)
237{
238 if (UNLIKELY(c)) {
239 vec = NEW (T, c);
240 if (UNLIKELY(!vec))
241 dim = 0;
242 }
243}
244
245#ifndef NO_POD
246template <typename T>
247INLINE BVector<T>::BVector (const vararg va, ...)
248 : vec ((T*)0), dim (va), keep (false)
249{
250 if (LIKELY(dim))
251 vec = NEW (T, dim);
252 if (UNLIKELY(!vec))
253 dim = 0;
254 va_list vl;
255 va_start (vl, va);
256#if !defined(__clang__) || !defined(CPLX)
257 for (unsigned long i=0; i < dim; ++i)
258 vec[i] = va_arg (vl, T);
259#else
260 throw VecErr("vararg not supported for cplx in clang");
261#warning no vararg support with cplx numbers and clang
262#endif
263 va_end (vl);
264}
265#endif
266
267template <typename T>
269{
270 if (LIKELY(dim))
271 TBCIDELETE(T, vec, dim);
272}
273
274template <typename T>
276{
277 if (UNLIKELY(!keep))
278 destroy ();
279}
280
281template <typename T>
283 : vec ((T*)0), dim(0), keep (false)
284{
285 if (LIKELY(v.dim)) {
286 vec = NEW (T, v.dim);
287 if (LIKELY(vec)) {
288 dim = v.dim;
289 TBCICOPY(vec, v.vec, T, dim);
290 }
291 }
292}
293
294
295template <typename T>
296INLINE BVector<T>::BVector (const T& value, const unsigned long c)
297 : vec ((T*)0), dim (c), keep (false)
298{
299 if (LIKELY(dim))
300 vec = NEW (T, dim);
301 if (LIKELY(vec))
302 TBCIFILL (vec, value, T, dim);
303 else
304 dim = 0;
305}
306
307template <typename T>
308BVector<T>& BVector<T>::cheapdownsize (const unsigned long nd)
309{
310 BCHK (nd > dim, VecErr, cheapdownsize does not upsize, nd, *this);
311 if (UNLIKELY(!nd))
312 return this->resize (nd);
313 dim = nd;
314 return *this;
315}
316
317#ifndef LAPACK_INLINE
318# define LAPACK_INLINE
319#endif
320template <typename T>
322{
323 if (LIKELY(c == dim))
324 return *this;
325 if (UNLIKELY(c == 0)) {
326 if (LIKELY(dim))
327 TBCIDELETE(T, vec, dim);
328 dim = c;
329 return *this;
330 }
331 T* tmp = vec;
332 if (LIKELY(dim == 0))
333 vec = NEW (T, c);
334 else
335 REALLOC(vec,dim,T,c);
336 BCHK (!vec, VecErr, Realloc failed in resize, 0, (vec=tmp, *this));
337 dim = c;
338 return *this;
339}
340
341template <typename T>
342BVector<T>& BVector<T>::resize (const T& value, const unsigned long c)
343{
344 if (UNLIKELY(dim != c)) {
345 if (LIKELY(dim))
346 TBCIDELETE(T, vec, dim);
347 dim = c;
348 if (UNLIKELY(c))
349 vec = NEW (T, c);
350 else
351 vec = (T*)0;
352 }
353 if (UNLIKELY(dim && vec))
354 return this->fill (value);
355 else
356 dim = 0;
357 return *this;
358}
359
360template <typename T>
362{
363 if (UNLIKELY(dim))
364 TBCIDELETE (T, vec, dim);
365 dim = v.dim;
366 if (UNLIKELY(v.dim)) {
367 vec = NEW (T, v.dim);
368 if (LIKELY(vec))
369 TBCICOPY(vec, v.vec, T, dim);
370 else
371 dim = 0;
372 } else
373 vec = (T*)0;
374 return *this;
375}
376
377template <typename T>
379{
380 TBCIFILL(vec, val, T, dim);
381 return *this;
382}
383
384template <typename T>
386{
387 BVector<T> buf(dim);
388 for (REGISTER unsigned long t = 0; t < dim; t++)
389 buf[t] = vec[dim-t-1];
390 return *this = buf;
391}
392
393template <typename T>
395{
396 T temp;
397 if(dim > 1) {
398 for (REGISTER unsigned long i = 0; i < dim-1; i++)
399 for (REGISTER unsigned long j = dim-1; i < j; j--) {
400 if ( vec[j] < vec[j-1] ) {
401 temp = vec[j];
402 vec[j] = vec[j-1];
403 vec[j-1] = temp;
404 }
405 }
406 }
407 return *this;
408}
409
410//append one element to vector
411template <typename T>
413{
414 T* tmp = vec;
415 REALLOC (vec,dim,T,dim+1);
416 BCHK (!vec, VecErr, Realloc failed in append, 0, (vec=tmp, *this));
417 vec[dim++] = val;
418 return *this;
419}
420
421//append one vector to vector
422template <typename T>
424{
425 T* tmp = vec;
426 REALLOC (vec, dim, T, dim+v.dim);
427 BCHK (!vec, VecErr, Realloc failed in append, 0, (vec=tmp, *this));
428 TBCICOPY(vec+dim, v.vec, T, v.dim);
429 dim += v.dim;
430 return *this;
431}
432
433//remove one element
434template <typename T>
435BVector<T>& BVector<T>::remove (const unsigned long i)
436{
437 BCHK (i>=dim, VecErr, Remove idx out of range, i, *this);
438 T* tmp = NEW (T, dim-1);
439 BCHK (!tmp, VecErr, Memory allocation failed in remove, (dim-1)*sizeof(T), *this);
440 if (UNLIKELY(i))
441 TBCICOPY (tmp, vec, T, i);
442 dim -= 1;
443 if (UNLIKELY(dim-i))
444 TBCICOPY (&tmp[i], &vec[i+1], T, (dim-i));
445 TBCIDELETE(T, vec, dim+1);
446 vec = tmp;
447 return *this;
448}
449
450template <typename T>
452{
453 //BCHK(dim != v.dim, VecErr, swap with different size vecs, v.dim, *this);
454 SWAP (v.dim, dim);
455 SWAP (v.vec, vec);
456 SWAP (v.keep, keep);
457 return *this;
458}
459
460template <typename T>
461inline typename tbci_traits<T>::const_refval_type
462 BVector<T>::operator () (const unsigned long i) const
463{
464 EXPCHK(i>=dim, VecErr, Illegal index, i, vec[0]);
465 return vec[i];
466}
467
468template <typename T>
469inline T& BVector<T>::operator () (const unsigned long i)
470{
471 EXPCHK(i>=dim, VecErr, Illegal index, i, vec[0]);
472 return vec[i];
473}
474
475template <typename T>
477{
478 BCHK(dim != a.dim, VecErr, Assignment from wrong dim vector, a.dim, *this);
479 TBCICOPY (vec, a.vec, T, dim);
480 return *this;
481}
482
483template <typename T>
485{
486 //BCHK(dim != a.dim, VecErr, Assignment from wrong dim vector, a.dim, *this);
487 return this->resize (a);
488 //TBCICOPY(vec, a.vec, T, dim);
489 //return *this;
490}
491
494template <typename T>
495/*inline*/ bool BVector<T>::operator == (const BVector<T>& bv) const
496{
497 if (LIKELY(dim != bv.dim))
498 return false;
499 if (LIKELY(vec == bv.vec || dim == 0))
500 return true;
501 if (TBCICOMP (vec, bv.vec, T, dim))
502 return false;
503 else
504 return true;
505}
506
507
508template<typename T>
510{
511 BCHK(dim != bv.dim, VecErr, comparison between wrong dim vectors, bv.dim, false);
512 for (unsigned long i=0; i<dim; i++)
513 if (UNLIKELY(vec[i] > bv.vec[i]))
514 return false;
515 return true;
516}
517
518
519template<typename T>
521{
522 BCHK(dim != bv.dim, VecErr, comparison between wrong dim vectors, bv.dim, false);
523 for (unsigned long i=0; i<dim; i++)
524 if( UNLIKELY(vec[i] < bv.vec[i]) )
525 return false;
526 return true;
527}
528
529
530template <typename T>
531inline STD__ ostream& operator << (STD__ ostream& os, const BVector<T>& v)
532{
533#ifdef VEC_COL
534 for (unsigned long i = 0; i < v.dim; i++)
535 os << v.vec[i] << "\n";
536#else
537 for (unsigned long i = 0; i < v.dim; i++)
538 os << v.vec[i] << " ";
539#endif
540 return os.flush();
541}
542
543#ifdef PTR
544template <typename T>
545STD__ istream& operator >> (STD__ istream& in, BVector<T>& v)
546{
547 STD__ cerr << "BVector (" << __FILE__ << ":" << __LINE__
548 <<") Input on pointers not possible!" << STD__ endl;
549 abort();
550 return in;
551}
552#else
553template <typename T>
554STD__ istream& operator >> (STD__ istream& in, BVector<T>& v)
555{
556 T r ALIGN(MIN_ALIGN) = 0;
557 char s = ',';
558
559/*
560 if( !( in >> s ) )
561 return in;
562 if (s != '(')
563 in.putback(s);
564*/
565 if( !( in >> r ) )
566 return in;
567 v.vec[0] = r;
568 for (unsigned long i = 1; i < v.dim; i++) {
569 in >> s;
570 if (s != ',') in.putback(s);
571 if( !( in >> r ) )
572 return in;
573 v.vec[i] = r;
574 }
575 return in;
576}
577#endif
578
579template <typename T>
580inline bool BVector<T>::contains (const T& val, unsigned long *ind) const
581{
582 for (REGISTER unsigned long i = 0; i < dim; ++i)
583 if (UNLIKELY(val == vec[i])) {
584 if (ind)
585 *ind = i;
586 return true;
587 }
588 return false;
589}
590
591
592template <typename T>
594{
595 BVector<T> c (dim + b2.dim);
596 TBCICOPY(c.vec, vec, T, dim);
597 TBCICOPY(c.vec+dim, b2.vec, T, dim);
598 return c;
599}
600
601
602INST(template <typename T> class BVector friend BVector<T> concat (const BVector<T>&, const BVector<T>&);)
603template <typename T>
604inline BVector<T> concat (const BVector<T>& b1, const BVector<T>& b2)
605{ return (b1.concat (b2)); }
606
607
618#if defined(SMP) && !defined(SMP_VECSLICE)
619# define SMP_VECSLICE 262144
620#endif
621#if defined(SMP) && defined(__i386__) && !defined(SMP_VECSCALAR)
622# define NOSMP_VECSCALAR
623# define NOSMP_VECFABS
624#endif
625#ifdef SMP
626# define SMP_VECSLICE2 (SMP_VECSLICE/sizeof(T))
627#endif
628
629
630#if defined(SMP) && !defined(NO_SMP_VECVEC)
631INST(template <typename T> class BVector friend void job_vv_comp (struct thr_ctrl*);)
632template <typename T>
633void job_vv_comp (struct thr_ctrl *tc)
634{
635 tc->t_res_l = 0;
636 do_vv_comp (tc->t_size,
637 (const T*)(tc->t_par[0]), (const T*)(tc->t_par[1]),
638 tc->t_res_l);
639}
640
641HOTDECL(template <typename T>
642int _par_comp (const unsigned long sz, const T* v1, const T* v2))
643{
644 volatile long res = 0;
645 /* use some heuristic to decide for the num of threads */
646 const unsigned n_thr = threads_avail (sz / SMP_VECSLICE2);
647 update_n_thr(n_thr);
648 //smp_barrier();
649
650 if (LIKELY(n_thr < 2)) {
651 do_vv_comp<T> (sz, v1, v2, res);
652 } else {
653 PREFETCH_R(v1, 3); PREFETCH_R(v2, 3);
654 const unsigned first = slice_offset(1, n_thr, sz, v1);
655 unsigned long st, en = first;
656 /* Start threads */
657 for (unsigned t = 0; t < n_thr-1; ++t) {
658 st = en; en = slice_offset(t+2, n_thr, sz, v1);
660 v1+st, v2+st, (void*)0);
661 }
662 /* The first slice is handled by the main thread */
663 do_vv_comp<T> (first, v1, v2, res);
664 //sched_yield ();
665 /* Wait for the end */
666 for (unsigned t = 0; t < n_thr-1; ++t) {
667 job_output out;
668 thread_wait (t, &out);
669 res += (long) out.t_res_l;
670 }
671 }
672 return res;
673}
674
675INST(template <typename T> class BVector friend void job_vv_copy (struct thr_ctrl*);)
676template <typename T>
677void job_vv_copy (struct thr_ctrl *tc)
678{
679 _tbci_copy (tc->t_size,
680 (T*)(tc->t_par[0]), (const T*)(tc->t_par[1]));
681}
682
683HOTDECL(template <typename T>
684void _par_copy (const unsigned long sz, T* v1, const T* v2))
685{
686 /* use some heuristic to decide for the num of threads */
687 const unsigned n_thr = threads_avail (sz / SMP_VECSLICE2);
688 update_n_thr(n_thr);
689 //smp_barrier();
690
691 if (LIKELY(n_thr < 2)) {
692 _tbci_copy<T> (sz, v1, v2);
693 } else {
694 PREFETCH_W(v1, 3); PREFETCH_R(v2, 3);
695 const unsigned first = slice_offset(1, n_thr, sz, v1);
696 unsigned long st, en = first;
697 /* Start threads */
698 for (unsigned t = 0; t < n_thr-1; ++t) {
699 st = en; en = slice_offset(t+2, n_thr, sz, v1);
701 v1+st, v2+st, (void*)0);
702 }
703 /* The first slice is handled by the main thread */
704 _tbci_copy (first, v1, v2);
705 //sched_yield ();
706 /* Wait for the end */
707 for (unsigned t = 0; t < n_thr-1; ++t)
708 thread_wait (t);
709 }
710}
711
712INST(template <typename T> class BVector friend void job_vec_fill (struct thr_ctrl*);)
713template <typename T>
714void job_vec_fill (struct thr_ctrl *tc)
715{
716 _tbci_fill<T> (tc->t_size,
717 (T*)(tc->t_par[0]), *(const T*)(tc->t_par[1]));
718}
719
720
721HOTDECL(template <typename T>
722void _par_fill (const unsigned long sz, T* v1, typename tbci_traits<T>::loop_const_refval_type val))
723{
724 /* use some heuristic to decide for the num of threads */
725 const unsigned n_thr = threads_avail (sz / SMP_VECSLICE2);
726 update_n_thr(n_thr);
727 //smp_barrier();
728
729 if (LIKELY(n_thr < 2)) {
730 _tbci_fill<T> (sz, v1, val);
731 } else {
732 PREFETCH_W(v1, 3);
733 const unsigned first = slice_offset(1, n_thr, sz, v1);
734 unsigned long st, en = first;
735 /* Start threads */
736 for (unsigned t = 0; t < n_thr-1; ++t) {
737 st = en; en = slice_offset(t+2, n_thr, sz, v1);
739 v1+st, &val, (void*)0);
740 }
741 /* The first slice is handled by the main thread */
742 _tbci_fill<T> (first, v1, val);
743 //sched_yield ();
744 /* Wait for the end */
745 for (unsigned t = 0; t < n_thr-1; ++t)
746 thread_wait (t);
747 }
748}
749
750INST(template <typename T> class BVector friend void job_vec_fill_fn (struct thr_ctrl*);)
751template <typename T>
752void job_vec_fill_fn (struct thr_ctrl *tc)
753{
754 _tbci_fill_fn<T> (tc->t_size,
755 (T*)(tc->t_par[0]),
756 *(vec_fill_fn<T>*)(tc->t_par[1]),
757 tc->t_par[2]);
758}
759
760
761HOTDECL(template <typename T>
762void _par_fill_fn (const unsigned long sz, T* v1, vec_fill_fn<T> fn, void *par))
763{
764 /* use some heuristic to decide for the num of threads */
765 const unsigned n_thr = threads_avail (sz / SMP_VECSLICE2);
766 update_n_thr(n_thr);
767 //smp_barrier();
768
769 if (LIKELY(n_thr < 2)) {
770 _tbci_fill_fn<T> (sz, v1, fn, par);
771 } else {
772 PREFETCH_W(v1, 3);
773 const unsigned first = slice_offset(1, n_thr, sz, v1);
774 unsigned long st, en = first;
775 /* Start threads */
776 for (unsigned t = 0; t < n_thr-1; ++t) {
777 st = en; en = slice_offset(t+2, n_thr, sz, v1);
779 v1+st, (void*)&fn, par, (void*)0);
780 }
781 /* The first slice is handled by the main thread */
782 _tbci_fill_fn<T> (first, v1, fn, par);
783 //sched_yield ();
784 /* Wait for the end */
785 for (unsigned t = 0; t < n_thr-1; ++t)
786 thread_wait (t);
787 }
788}
789
790#else
791template <typename T>
792int _par_comp (const unsigned long sz, const T* v1, const T* v2)
793{
794 /* volatile */ long res = 0;
795 do_vv_comp<T> (sz, v1, v2, res);
796 return res;
797}
798
799template <typename T>
800void _par_copy(const unsigned long sz, T* v1, const T* v2)
801{
802 _tbci_copy<T> (sz, v1, v2);
803}
804
805template <typename T>
806void _par_fill (const unsigned long sz, T* v1, typename tbci_traits<T>::loop_const_refval_type val)
807{
808 _tbci_fill<T> (sz, v1, val);
809}
810
811template <typename T>
812void _par_fill_fn (const unsigned long sz, T* v1, vec_fill_fn<T> fn, void *par)
813{
814 _tbci_fill_fn<T> (sz, v1, fn, par);
815}
816#endif
817
818/* TODO: Explicit instatiation! */
819#if defined(SMP) && defined(HAVE_LIBNUMA)
820template <typename T>
821int numa_optimize(const BVector<T>& bv, bool fault_in)
822{
823 if (!numa_avail && !fault_in)
824 return 0;
825 /* use some heuristic to decide for the num of threads */
826 const unsigned long sz = bv.size();
827 const unsigned n_thr = threads_avail (sz / SMP_VECSLICE2);
828 //update_n_thr(n_thr);
829 //smp_barrier();
830 const T* v1 = &bv.getcref(0);
831
832 if (LIKELY(n_thr < 2)) {
833 if (fault_in)
834 do_numa_move_pages(main_numa_node, 1,
835 (unsigned long)v1, (unsigned long)(v1+sz));
836 return 0;
837 } else {
838 const unsigned first = slice_offset(1, n_thr, sz, v1);
839 unsigned long st, en = first;
840 /* Start threads */
841 unsigned long res = 0;
842 for (unsigned t = 0; t < n_thr-1; ++t) {
843 st = en; en = slice_offset(t+2, n_thr, sz, v1);
844 thread_start_off (t, (thr_job_t)numa_move_pages_job,
845 threads[t].numa_node, fault_in,
846 v1+st, v1+en, (void*)0);
847 }
848 /* The first slice is handled by the main thread */
849 res = do_numa_move_pages(main_numa_node, fault_in,
850 (unsigned long)v1, (unsigned long)(v1+first));
851 //sched_yield ();
852 /* Wait for the end */
853 for (unsigned t = 0; t < n_thr-1; ++t) {
854 job_output out;
855 thread_wait (t, &out);
856 res += out.t_res_l;
857 }
858 //fprintf(stderr, "NUMA Optimize BVector %p: %li pages moved\n", v1, res);
859 return res;
860 }
861}
862#else
863template <typename T>
864int numa_optimize(const BVector<T>& bv, bool fault_in)
865{
866 /* FIXME: We should emulate fault_in here, no? */
867 return 0;
868}
869#endif
870
872
873#endif /* TBCI_BVECTOR_H */
long int Vector< T > & index
Definition LM_fit.h:69
int i
Definition LM_fit.h:71
#define STD__
Definition basics.h:338
#define HOT
Definition basics.h:495
#define BCHK(cond, exc, txt, ind, rtval)
Definition basics.h:575
#define LIKELY(expr)
branch prediction note that we sometimes on purpose mark the unlikely possibility likely and vice ver...
Definition basics.h:100
#define INLINE
Definition basics.h:688
#define MIN_ALIGN2
Definition basics.h:424
#define FRIEND_TBCI2__
Definition basics.h:335
#define NAMESPACE_END
Definition basics.h:323
#define TBCICOMP(n, o, t, s)
Definition basics.h:981
#define HOTDECL(x)
Definition basics.h:497
#define TBCICLEAR(n, t, s)
Definition basics.h:913
#define INST(x)
Definition basics.h:238
#define EXPCHK(cond, exc, txt, ind, rtval)
Definition basics.h:630
#define PREFETCH_R(addr, loc)
In case gcc does not yet support __builtin_prefetch(), we have handcoded assembly with gcc for a few ...
Definition basics.h:748
#define NAMESPACE_TBCI
Definition basics.h:317
#define PREFETCH_W(addr, loc)
Definition basics.h:749
#define UNLIKELY(expr)
Definition basics.h:101
#define REGISTER
Definition basics.h:108
#define NOINST
Definition basics.h:244
#define TBCIFILL(n, v, t, s)
Definition basics.h:912
#define ALIGN(x)
Definition basics.h:444
#define TBCICOPY(n, o, t, s)
Definition basics.h:895
#define MIN_ALIGN
Definition basics.h:421
#define FGD
Definition basics.h:144
#define T
Definition bdmatlib.cc:20
#define false
Definition bool.h:23
int numa_optimize(const BVector< T > &bv, bool fault_in)
Definition bvector.h:864
#define SMP_VECSLICE2
Definition bvector.h:626
void job_vv_comp(struct thr_ctrl *tc)
Definition bvector.h:633
void job_vec_fill(struct thr_ctrl *tc)
Definition bvector.h:714
STD__ ostream & operator<<(STD__ ostream &os, const BVector< T > &v)
Definition bvector.h:531
#define LAPACK_INLINE
Definition bvector.h:318
BVector< T > concat(const BVector< T > &b1, const BVector< T > &b2)
Definition bvector.h:604
void job_vv_copy(struct thr_ctrl *tc)
Definition bvector.h:677
void job_vec_fill_fn(struct thr_ctrl *tc)
Definition bvector.h:752
STD__ istream & operator>>(STD__ istream &in, BVector< T > &v)
Definition bvector.h:554
provides basic Vector functionality but arithmetic operators (+=, - , *, /...).
Definition bvector.h:68
bool operator>(const BVector< T > &bv) const
Definition bvector.h:182
T *const & vecptr() const
Definition bvector.h:147
const_iterator begin() const
Definition bvector.h:157
unsigned long size() const HOT
Definition bvector.h:144
unsigned long dim
Definition bvector.h:74
bool keep
Definition bvector.h:75
bool operator>=(const BVector< T > &bv) const
Definition bvector.h:520
T *const & get_fortran_vector() const
Definition bvector.h:164
T value_type
Definition bvector.h:70
iterator end()
Definition bvector.h:155
BVector< T > & setsize(const unsigned long size)
Definition bvector.h:162
bool operator!=(const BVector< T > &bv) const
Definition bvector.h:178
BVector< T > & operator=(const T &a)
Definition bvector.h:168
T element_type
Definition bvector.h:71
static const char * vec_info()
Definition bvector.h:195
bool contains(const T &, unsigned long *=0) const
Definition bvector.h:580
friend class BVector
Definition bvector.h:200
BVector< T > & fill(const T &) HOT
Definition bvector.h:378
void destroy()
Definition bvector.h:268
friend NOINST BVector< T > &FRIEND_TBCI2__ bvfillm FGD(BVector< T > &, const Matrix< T > &m)
T * iterator
STL-like iterator support.
Definition bvector.h:153
BVector< T > & bubble_sort()
Definition bvector.h:394
BVector< T > & resize(const BVector< T > &)
Actually it's a resize and copy (some people would expect the assignment op to do this).
Definition bvector.h:361
T * vec
Definition bvector.h:73
BVector< T > & remove(const unsigned long)
Definition bvector.h:435
BVector< T > & init_0()
Definition bvector.h:99
BVector(const BVector< U > &bv)
Definition bvector.h:201
BVector< T > & setptr(T *pointer)
Definition bvector.h:161
bool operator==(const BVector< T > &) const HOT
KG, 2001-06-29: Strange: If we don't inline this, we seems to get better performance in our solver be...
Definition bvector.h:495
T aligned_value_type TALIGN(MIN_ALIGN2)
Definition bvector.h:72
BVector< T > & cheapdownsize(const unsigned long)
Definition bvector.h:308
bool operator<(const BVector< T > &bv) const
Definition bvector.h:181
const_iterator end() const
Definition bvector.h:158
BVector< T > & revert()
Definition bvector.h:385
const T & getcref(const unsigned long idx) const
Definition bvector.h:129
BVector< T > & swap(BVector< T > &v)
Definition bvector.h:451
tbci_traits< T >::const_refval_type get(const unsigned long idx) const HOT
Definition bvector.h:132
T & operator[](const unsigned long i)
Definition bvector.h:140
T & operator()(const unsigned long) HOT
Definition bvector.h:469
~BVector()
Definition bvector.h:275
BVector< T > concat(const BVector< T > &) const
Definition bvector.h:593
BVector< T > & push_back(const T &value)
performs poorly
Definition bvector.h:116
bool operator<=(const BVector< T > &bv) const
Definition bvector.h:509
T & set(const unsigned long idx) HOT
Definition bvector.h:134
T & set(const T &val, unsigned long idx)
Definition bvector.h:136
BVector< T > & alias(const BVector< T > &bv)
Definition bvector.h:171
iterator begin()
Definition bvector.h:154
BVector< T > & append(const T &)
performs poorly
Definition bvector.h:412
const T * const_iterator
Definition bvector.h:156
void set_fortran_vector(T *pointer)
Definition bvector.h:165
BVector< T > & copy(const BVector< T > &bv)
copy does a resize, if necessary
Definition bvector.h:484
BVector< T > & clear()
Definition bvector.h:112
Temporary Base Class (non referable!) (acc.
Definition f_matrix.h:71
NumErr()
Definition except.h:65
Temporary Base Class Idiom: Class TVector is used for temporary variables.
Definition vector.h:73
exception class
Definition bvector.h:32
VecErr()
Definition bvector.h:34
virtual ~VecErr()
Definition bvector.h:40
VecErr(const char *t, const long i=0)
Definition bvector.h:36
VecErr(const VecErr &ve)
Definition bvector.h:38
return c
Definition f_matrix.h:760
#define TBCIDELETE(t, v, sz)
#define REALLOC(v, os, t, s)
#define NEW(t, s)
void do_mat_vec_mult(const unsigned start, const unsigned end, TVector< T > *res, const Matrix< T > *mat, const Vector< T > *vec)
void do_mat_vec_transmult(const unsigned start, const unsigned end, TVector< T > *res, const Matrix< T > *mat, const Vector< T > *vec)
BVector< T > & bvfillm(BVector< T > &bv, const Matrix< T > &m)
Definition matrix.h:2248
const unsigned TMatrix< T > const Matrix< T > * a
const unsigned TMatrix< T > * res
int _par_comp(const unsigned long sz, const T *v1, const T *v2)
void _par_fill(const unsigned long, T *const, typename tbci_traits< T >::loop_const_refval_type)
void _par_copy(const unsigned long sz, T *v1, const T *v2)
void thread_start_off(const int thr_no, thr_job_t job, const unsigned long off, const unsigned long sz,...)
Definition smp.cc:979
void thread_wait(const int thr_no, struct job_output *out)
Definition smp.cc:997
int main_numa_node
Definition smp.cc:110
void thread_start(const int thr_no, thr_job_t job, const unsigned long sz,...)
Definition smp.cc:988
struct thr_struct * threads
Definition smp.cc:106
int numa_avail
Definition smp.cc:105
void(* thr_job_t)(struct thr_ctrl *)
Before the double inclusion guard on purpose!
Definition smp.h:126
#define threads_avail(x)
Definition smp.h:322
int numa_node
Definition smp.h:5
long t_res_l
Definition smp.h:148
long t_res_l
Definition smp.h:179
unsigned long t_size
Definition smp.h:171
void * t_par[6]
Definition smp.h:173