TBCI Numerical high perf. C++ Library 2.8.0
basics.h
Go to the documentation of this file.
1
31
32#ifndef TBCI_BASICS_H
33#define TBCI_BASICS_H
34
35#include "tbci/tbci_version.h"
36
37#ifdef HAVE_TBCICONFIG_H
38# include "tbci/tbciconfig.h"
39#else
40# include "tbci/config_manual.h"
41#endif
42
43// We pass __GNUC_SUBVER__ on the commandline just in case
44#if !defined(__GNUC_PATCHLEVEL__) && defined(__GNUC__SUBVER__)
45# define __GNUC_PATCHLEVEL__ __GNUC_SUBVER__
46#endif
47
48/* Win vs. Unix
49 * - HAVE_WIN_32: In case somebody wants to know
50 * - HAVE_UNIX: For the symmetry
51 */
52#undef HAVE_WIN_32
53#undef HAVE_UNIX
54#ifdef unix
55# define HAVE_UNIX 1
56#endif
57#ifdef _MSC_VER
58# define HAVE_WIN_32 1
59// Switch off annoying stuff
60# pragma warning(disable: 4003)
61#endif
62
63// SMP?
64#if defined(USE_SMP) && !defined(SMP)
65# define SMP
66#endif
67
68// Use namespaces?
69#if !defined(NO_NS) && !defined(HAVE_BUGGY_NAMESPACE)
70# define USE_NS
71# undef NO_NS
72#else
73# undef USE_NS
74#endif
75
76// Signatures
77#ifdef HAVE_SIGNATURE
78# define SIGNATURE signature
79#else
80# define SIGNATURE class
81#endif
82
83// restrict
84#if defined(HAVE_RESTRICT) && !defined(NO_RESTRICT)
85# define RESTRICT restrict
86#elif defined (HAVE___RESTRICT__) && !defined(NO_RESTRICT)
87# define RESTRICT __restrict__
88#else
89# define RESTRICT
90#endif
91
96#if defined(HAVE_BUILTIN_EXPECT) && !defined(NO_EXPECT)
97# define LIKELY(expr) __builtin_expect((expr) != 0, 1)
98# define UNLIKELY(expr) __builtin_expect((expr) != 0, 0)
99#else
100# define LIKELY(expr) (expr)
101# define UNLIKELY(expr) (expr)
102#endif
103
104// Avoid using REGISTER keyword for C++-17 and newer
105#if defined(__cplusplus) && __cplusplus >= 201703L
106# define REGISTER
107#else
108# define REGISTER register
109#endif
110
111// http://gcc.gnu.org/wiki/Visibility
112#ifdef _MSC_VER
113# ifdef BUILDING_DLL
114# define TBCI_DLLEXPORT __declspec(dllexport)
115# else
116# define TBCI_DLLEXPORT __declspec(dllimport)
117# endif
118# define TBCI_DLLLOCAL
119#else
120# ifdef HAVE_VISIBILITY_ATTR
121# define TBCI_DLLEXPORT __attribute__ ((visibility("default")))
122# define TBCI_DLLLOCAL __attribute__ ((visibility("hidden")))
123# else
124# define TBCI_DLLEXPORT
125# define TBCI_DLLLOCAL
126# endif
127#endif
128
129// pragma interface/implementation stuff
130#if defined(HAVE_PRAGMA_IFACE_IMPL) && !defined(NO_PRAGMA_I)
131# define PRAGMA_I
132#endif
133
134// guiding decl stuff; explicit template params
135#ifdef HAVE_NEED_FOR_EXPL_TMPLPARM
136# define FGD <>
137# define FGDT <T>
138# define FGDU <U>
139# define FGDTD <T,dims>
140# define FGDDT <dims,T>
141# define FGDCT <cplx<T> >
142# define FGDR <rank>
143#else
144# define FGD
145# define FGDT
146# define FGDU
147# define FGDTD
148# define FGDDT
149# define FGDCT
150# define FGDR
151#endif
152
153// BorlandC++ 0x570 compat with newer glibc headers
154// This is a problems with Borland's cdefs.h; it replaces cdefs.h
155// from glibc; glibc-2.3 has and needs the defs from below
156#if defined(__BORLANDC__) && defined(__linux__)
157# define __BEGIN_NAMESPACE_STD
158# define __END_NAMESPACE_STD
159# define __USING_NAMESPACE_STD(name)
160# define __BEGIN_NAMESPACE_C99
161# define __END_NAMESPACE_C99
162# define __USING_NAMESPACE_C99(name)
163#endif
164
165// Include headers
166# include <iostream>
167# include <fstream>
168# include <iomanip>
169# include <string>
170#ifndef HAVE_NO_NEW_HEADERS_BUG
171# include <cstring>
172# include <cstdarg>
173# include <cstdlib>
174# include <cmath>
175//# include <sstream>
176#else
177//# include <iostream.h>
178//# include <fstream.h>
179//# include <ios.h>
180//# include <iomanip.h>
181# include <string.h>
182# include <stdarg.h>
183# include <stdlib.h>
184# include <math.h>
185// no way to include C++ string within this scheme ...
186#endif
187
188#ifdef HAVE_UNISTD_H
189# include <unistd.h>
190#endif
191
192#if defined(C_MEMALLOC) && defined(HAVE_MEMALIGN)
193# include <malloc.h>
194#endif
195
196// IRIX defines MIN and MAX there
197#ifdef HAVE_MINMAX_IN_SYS_PARAM_H
198# include <sys/param.h>
199#endif
200
202#if !defined(HAVE_SSTREAM) || defined(HAVE_BUGGY_SSTREAM)
203//# include <strstream>
204# define ISTRINGSTREAM istrstream
205# define OSTRINGSTREAM ostrstream
206# define STRINGSTREAM strstream
207#else
208//# include <sstream>
209# define ISTRINGSTREAM istringstream
210# define OSTRINGSTREAM ostringstream
211# define STRINGSTREAM stringstream
212#endif
213
214
215// long long and long doubel types
216#ifdef HAVE_LONG_DOUBLE
217# define LONG_DOUBLE long double
218#else
219# define LONG_DOUBLE double
220#endif
221#ifdef HAVE_LONG_LONG
222# define LONG_LONG long long
223#else
224# define LONG_LONG long
225#endif
226
227// Auto instantiate using the auto_decl program
228#ifdef AUTO_DECL
229# define INST(x) _instantiate x
230# define INST2(x,y) _instantiate x,y
231# define INST3(x,y,z) _instantiate x,y,z
232# define INST4(x,y,z,a) _instantiate x,y,z,a
233# define INST5(x,y,z,a,b) _instantiate x,y,z,a,b
234# define INST6(x,y,z,a,b,c) _instantiate x,y,z,a,b,c
235# define NOINST _noinstantiate
236# define INSTCTL(x) _instfile x
237#else
238# define INST(x)
239# define INST2(x,y)
240# define INST3(x,y,z)
241# define INST4(x,y,z,a)
242# define INST5(x,y,z,a,b)
243# define INST6(x,y,z,a,b,c)
244# define NOINST
245# define INSTCTL(x)
246#endif
247
248// NULL
249#ifndef NULL
250# define NULL (0)
251#endif
252
253// Remark: The math functions should be put into std:: namespace
254// but neither egcs-1.1.x nor MSVC do this ...
255// egcs-1.1.x at least can access them via std:: though.
256
261#if defined(NO_NS)
262# define NAMESPACE_TBCI /* namespace TBCI { */
263# define NAMESPACE_STD /* namespace std { */
264# define NAMESPACE_CSTD /* namespace std { */
265# define NAMESPACE_GRID /* namespace Grid { */
266# define NAMESPACE_CPLX /* namespace std { */
267# define NAMESPACE_END /* } namespace */
268# define NAMESPACE_STD_END /* } namespace std */
269# define NAMESPACE_CSTD_END /* } namespace std */
270# define NAMESPACE_CPLX_END /* } namespace std */
271# define USING_TBCI /* using namespace TBCI */
272# define USING_GRID /* using namespace Grid */
273# define USING_FD /* using namespace FD */
274# define USING_END /* using namespace std */
275# define USIND_STD /* using namespace std */
276# define FRIEND_TBCI__ /* */
277# define FRIEND_TBCI2__ /* */
278# define TBCI__ /*TBCI::*/
279# define __TBCI__ /* ::TBCI:: */
280# define GRID__ /*Grid::*/
281# define FD__ /*FD::*/
282# define STD__ /*std::*/
283# define CSTD__ /*std::*/
284# define MATH__ /*std::*/
285# define CPLX__ /*std::*/
286# define GLBL__ :: /* Look outside scope of class; */
287# define GLBL2__ :: /* Look outside scope of class; */
288#elif defined(NO_NS_TBCI)
289# define NAMESPACE_TBCI /* namespace TBCI { */
290# define NAMESPACE_STD namespace std {
291# define NAMESPACE_CSTD namespace std {
292# define NAMESPACE_CPLX namespace std {
293# define NAMESPACE_GRID /* namespace Grid { */
294# define NAMESPACE_FD /* namespace Finite_Difference { */
295# define NAMESPACE_END /* } */; /* namespace */
296# define NAMESPACE_STD_END } /* namespace std */
297# define NAMESPACE_CSTD_END } /* namespace std */
298# define NAMESPACE_CPLX_END } /* namespace std */
299# define USING_TBCI /* using namespace TBCI; */
300# define USING_GRID /* using namespace Grid; */
301# define USING_FD /* using namespace Finite_Difference; */
302# define USING_END using namespace std; /* Don't use! */
303# define USING_STD using namespace std;
304# define TBCI__ /* TBCI::*/
305# define __TBCI__ /*::TBCI::*/
306# define FRIEND_TBCI__ /* */
307# define FRIEND_TCBI2__ /* */
308# define GRID__ /*::Grid::*/
309# define FD__ /*::FD::*/
310# define STD__ std::
311# define MATH__ std::
312# define CSTD__ std::
313# define CPLX__ std::
314# define GLBL__ /* not needed if ::std:: is used */
315# define GLBL2__ :: /* needed as TBCI__ is disabled */
316#else
317# define NAMESPACE_TBCI namespace TBCI {
318# define NAMESPACE_STD namespace std {
319# define NAMESPACE_CSTD namespace std {
320# define NAMESPACE_GRID namespace Grid {
321# define NAMESPACE_FD namespace Finite_Difference {
322# define NAMESPACE_CPLX namespace std {
323# define NAMESPACE_END } /* namespace */
324# define NAMESPACE_STD_END } /* namespace std */
325# define NAMESPACE_CSTD_END } /* namespace std */
326# define NAMESPACE_CPLX_END } /* namespace std */
327# define USING_TBCI using namespace TBCI;
328# define USING_GRID using namespace Grid;
329# define USING_FD using namespace Finite_Difference;
330# define USING_END using namespace std; /* Don't use! */
331# define USING_STD using namespace std;
332# define TBCI__ TBCI::
333# define __TBCI__ ::TBCI::
334# define FRIEND_TBCI__ /* */
335# define FRIEND_TBCI2__ /* */
336# define GRID__ Grid::
337# define FD__ FD::
338# define STD__ std::
339# define MATH__ std::
340# define CSTD__ std::
341# define CPLX__ std::
342# define GLBL__ /* not needed if ::std:: is used */
343# define GLBL2__ /* not needed if ::TBCI:: is used */
344#endif
350
351#ifndef NO_NS
352
353# ifdef HAVE_LIBC_GLOBAL_NS_BUG
354# undef MATH__
355# define MATH__ ::
356# undef CSTD__
357# define CSTD__ ::
358# undef NAMESPACE_CSTD
359# define NAMESPACE_CSTD /* namespace std { */
360# undef NAMESPACE_CSTD_END
361# define NAMESPACE_CSTD_END /* } namespace std */
362# endif
363
364# ifdef HAVE_CPP_GLOBAL_NS_BUG
365# undef STD__
366# define STD__ ::
367# undef USING_STD
368# define USING_STD using namespace;
369# undef NAMESPACE_STD
370# define NAMESPACE_STD /* namespace std { */
371# undef NAMESPACE_STD_END
372# define NAMESPACE_STD_END /* } namespace std */
373# endif
374
375# ifdef HAVE_CPLX_GLOBAL_NS_BUG
376# undef CPLX__
377# define CPLX__ ::
378# undef NAMESPACE_CPLX
379# define NAMESPACE_CPLX
380# undef NAMESPACE_CPLX_END
381# define NAMESPACE_CPLX_END
382# endif
383
384/* We might want to know that LIBC/MATH functions are in a namespace
385 * different from std C++ stuff. Most notably affects sqrt(std::complex)
386 * and friends. */
387# if defined(HAVE_LIBC_GLOBAL_NS_BUG) && !defined(HAVE_CPP_GLOBAL_NS_BUG)
388# define HAVE_LIBC_NEQ_CPP_BUG
389# endif
390# if defined(HAVE_LIBC_GLOBAL_NS_BUG) && !defined(HAVE_CPLX_GLOBAL_NS_BUG)
391# define HAVE_LIBC_NEQ_CPLX_BUG
392# endif
393
394/* SGI MIPSpro thinks friend decls refer to global namespace,
395 * others shadow friends with member fns of same name if not scoped */
396# if (HAVE_FRIEND_GLOBAL_NS_BUG || HAVE_NEED_FOR_FRIEND_SCOPE) && !defined(AUTO_DECL) && !defined(NO_NS_TBCI)
397# undef FRIEND_TBCI__
398# define FRIEND_TBCI__ TBCI::
399# ifdef HAVE_FRIEND_GLOBAL_NS_BUG
400# undef FRIEND_TBCI2__
401# define FRIEND_TBCI2__ TBCI::
402# endif
403# endif
404
405#endif /* NO_NS */
406
407/* Note that abortion is illegal in a lot of countries ;-) */
408#ifdef ABORT_ON_ERR
409# define ABORT_RET(x) abort ()
410# define ABORT_RET_NR abort ()
411#else
412# define ABORT_RET(x) return x
413# define ABORT_RET_NR do {} while (0)
414#endif
415
420#ifndef MIN_ALIGN
421# define MIN_ALIGN 8
422#endif
423#ifndef MIN_ALIGN2
424# define MIN_ALIGN2 16
425#endif
426
427#undef ALIGN
428#if defined(HAVE_ALIGN_ATTR) || defined(HAVE_NEW_ALIGN_ATTR)
429# ifdef HAVE_NEW_ALIGN_ATTR
430# define ALIGN3(v,i,x) v __attribute__ ((aligned(x))) (i)
431# else
432# define ALIGN3(v,i,x) v(i) __attribute__ ((aligned(x)))
433# endif
434# define ALIGN2(v,x) v __attribute__ ((aligned(x)))
435# define ALIGN(x) __attribute__ ((aligned(x)))
436#else
437# ifdef HAVE_DECLSPEC_ALIGN
438# define ALIGN3(v,i,x) __declspec(align(x)) v(i)
439# define ALIGN2(v,x) __declspec(align(x)) v
440# define ALIGN(x) __attribute__ ((aligned(x)))
441# else
442# define ALIGN3(v,i,x) v(i)
443# define ALIGN2(v,x) v
444# define ALIGN(x)
445# endif
446#endif
447#ifdef HAVE_TEMPL_ALIGN_ATTR
448# define TALIGN(x) ALIGN(x)
449#else
450# define TALIGN(x)
451#endif
452
453#ifdef HAVE_CONST_ATTR
454# define CONSTA __attribute__ ((const))
455# define TBCI_CONST(x) x __attribute__ ((const)); x
456#else
457# define CONSTA
458# define TBCI_CONST(x) x
459#endif
460#ifdef HAVE_REGPARM_ATTR
461# define REGPARMA(n) __attribute__ ((regparm(n)))
462# define REGPARM(n,x) x __attribute__ ((regparm(n))); x
463#else
464# define REGPARMA(n)
465# define REGPARM(n,x) x
466#endif
467
468#ifdef HAVE_UNUSED_ATTR
469# define UNUSED __attribute__((unused))
470#else
471# define UNUSED
472#endif
473
474
475#ifdef HAVE_WEAK_ATTR
476# define WEAKA __attribute__ ((weak))
477# define WEAK(x) x __attribute__ ((weak)); x
478#ifdef __INTEL_COMPILER // Does not like that attr in templ specializations
479# define TWEAK(x) x
480#else
481# define TWEAK(x) x __attribute__ ((weak)); x
482#endif
483#else
484# define WEAKA
485# define WEAK(x) x
486# define TWEAK(x) x
487#endif
488
489#if defined(HAVE_HOT_ATTR) && defined(USE_HOT)
490# define HOT __attribute__ ((hot))
491# define COLD __attribute__ ((cold))
492# define HOTDECL(x) x __attribute__ ((hot)); x
493# define COLDDECL(x) x __attribute__ ((cold)); x
494#else
495# define HOT
496# define COLD
497# define HOTDECL(x) x
498# define COLDDECL(x) x
499#endif
500
501
502/* Exception stuff */
503#if !defined(NO_EXCEPT) && !defined(HAVE_BUGGY_EXCEPTIONS)
504# include "tbci/except.h"
505#else
506# define TBCI_DISABLE_EXCEPT
507#endif
508
509// Error checking
510
511#ifndef HAVE_PRETTY_FUNCTION
512# if defined(HAVE_FUNC) || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L)
513# define __PRETTY_FUNCTION__ __func__
514# else
515# define __PRETTY_FUNCTION__ "<unknown>"
516# endif
517#endif
518
519
520#define TBCIERRH1 __PRETTY_FUNCTION__
521#define TBCIERRM(cond,exc,txt,ind) \
522 STD__ string(#exc) + " in " + TBCIERRH1 + ":\n " + #txt + " at "\
523 + __FILE__ + ":" + TBCI__ ltoa(__LINE__) + ":\n (" + #cond \
524 + ") == TRUE! (" + #ind + " = " + TBCI__ ltoa(ind) + ")"
525#define TBCIERRS(cond,exc,txt,ind) \
526 #exc << " in " << TBCIERRH1 << ":\n " << #txt << " at " \
527 << __FILE__ << ":" << __LINE__ << ":\n (" << #cond \
528 << ") == TRUE! (" << #ind << " = " << ind << ")"
529
530
531#ifndef TBCI_NO_ERRCHECK
575#define BCHK(cond,exc,txt,ind,rtval) \
576 /*if (UNLIKELY((cond) && __TBCI__ do_errcheck())) { */ \
577 if (UNLIKELY(__TBCI__ do_errcheck() && (cond))) { \
578 if (__TBCI__ do_except()) \
579 throw exc ((TBCIERRM(cond,exc,txt,ind)).c_str(), ind); \
580 else \
581 STD__ cerr << TBCIERRS(cond,exc,txt,ind) << STD__ endl; \
582 if (__TBCI__ do_abort()) \
583 abort(); \
584 return rtval; \
585 }
586#define BCHKNR(cond,exc,txt,ind) \
587 if (UNLIKELY(__TBCI__ do_errcheck() && (cond))) { \
588 if (__TBCI__ do_except()) \
589 throw exc ((TBCIERRM(cond,exc,txt,ind)).c_str(), ind); \
590 else \
591 STD__ cerr << TBCIERRS(cond,exc,txt,ind) << STD__ endl; \
592 if (__TBCI__ do_abort()) \
593 abort(); \
594 }
595#else
596#ifdef __GNUC__
597# warning Error checking disabled
598#endif
599# define BCHK(cond,exc,txt,ind,rtval) do {} while(0)
600# define BCHKNR(cond,exc,txt,ind) do {} while(0)
601#endif
602
603/* Expensive checks */
604#ifdef TBCI_EXPCHECK
605#define EXPCHK(cond,exc,txt,ind,rtval) \
606 /* if (UNLIKELY((cond) && do_expcheck())) { */ \
607 if (UNLIKELY(__TBCI__ do_expcheck() && (cond) )) { \
608 if (__TBCI__ do_except()) \
609 throw exc ((TBCIERRM(cond,exc,txt,ind)).c_str(), ind); \
610 else \
611 STD__ cerr << TBCIERRS(cond,exc,txt,ind) << STD__ endl; \
612 if (__TBCI__ do_abort()) \
613 abort(); \
614 return rtval; \
615 }
616#define EXPCHKNR(cond,exc,txt,ind) \
617 if (UNLIKELY(__TBCI__ do_expcheck() && (cond) )) { \
618 if (__TBCI__ do_except()) \
619 throw exc ((TBCIERRM(cond,exc,txt,ind)).c_str(), ind); \
620 else \
621 STD__ cerr << TBCIERRS(cond,exc,txt,ind) << STD__ endl; \
622 if (__TBCI__ do_abort()) \
623 abort(); \
624 }
625#ifdef __GNUC__
626# warning Expensive error checking enabled
627#endif
628#else
629/* OK, no expensive error checking */
630# define EXPCHK(cond,exc,txt,ind,rtval) do {} while(0)
631# define EXPCHKNR(cond,exc,txt,ind) do {} while(0)
632#endif
633
634
635#if defined(HAVE_ALGORITHM) && defined(HAVE_STD_MINMAX) &&!defined(HAVE_GCC_MINMAX)
636# include <algorithm>
637#endif
638
643#ifndef MIN
644# ifdef HAVE_GCC_MINMAX
645# define MIN(a,b) ((a) <? (b))
646# define MAX(a,b) ((a) >? (b))
647# elif defined(HAVE_STD_MINMAX)
648# define MIN(a,b) std::min(a,b)
649# define MAX(a,b) std::max(a,b)
650# else
651//TBCI_CONST(template <typename T> inline T MIN (const T a, const T b))
652//{ return (a < b ? a : b); }
653//TBCI_CONST(template <typename T> inline T MAX (const T a, const T b))
654//{ return (a > b ? a : b); }
655# define MIN(a,b) ((a) < (b)? (a) : (b))
656# define MAX(a,b) ((a) > (b)? (a) : (b))
657# endif
658#endif
659
660/* DEBUGINFO */
661#ifdef DEBUGINFO
662# define INFO(x) cout << x
663#else
664# define INFO(x)
665#endif
666
667
668/* Makros for return value optimization for GNU g++ */
669//return value optimization
670#if defined(RETVAL_OPT) && defined(HAVE_NAMED_RETVAL)
671# define RET(x) return(x);
672# define CONSTR(x)
673#else
674# define RET(x)
675# define CONSTR(x) x
676#endif /* RETVAL_OPT */
677
678// Work around MSVC failure to delete[] something else than void*
679#if defined(_MSC_VER)
680# define _VOID (void*)
681#else
682# define _VOID
683#endif
684
685// Work around Borland C++ 0x570 bug not handling inline in
686// templated member functions
687#ifndef HAVE_BCXX_TEMPL_INLINE_MFUNC_BUG
688# define INLINE inline
689#else
690# define INLINE
691#endif
692
694
695#if defined(_OPENMP) && !defined(TBCI_NO_OMP)
696# define TBCI_OMP
697# include <omp.h>
698# define OMP_FOR _Pragma("omp parallel for")
699# define OMP_FOR_REDUCE_F2 _Pragma("omp parallel for reduction(+:f2)")
700# define OMP_FOR_REDUCE_F1F2 _Pragma("omp parallel for reduction(+:f1,f2)")
702# define NOSMP_VECVEC
704# define NOSMP_MATVEC
705//# define NOSMP_BDMATVEC
706//# define TBCI_NO_SIMD
707#else
708# define OMP_FOR
709# define OMP_FOR_REDUCE_F2
710# define OMP_FOR_REDUCE_F1F2
711#endif
712
713#include "perf_opt.h"
714#include "tbci_traits.h"
715#include "tbci_param.h"
716
717// prefetch
718// loc argument gives hint on whether the accessed variable
719// should be left in cache. 3 = Yes (all caches), 0 = No (cache),
720// 1, 2 are intermediate (only some of the caches)
721#if defined(HAVE_BUILTIN_PREFETCH) && !defined(NO_PREFETCH)
722# define PREFETCH_R(addr,loc) __builtin_prefetch(addr, 0, loc )
723# define PREFETCH_W(addr,loc) __builtin_prefetch(addr, 1, loc )
724# define USE_PREFETCH 1
725#else
733# if defined(__alpha__) && defined(AXP_PREFETCH) && !defined(NO_PREFETCH) && defined(__GNUC__)
734# define PREFETCH_R(addr,loc) asm (" lda $31,%0 \n" : : "m" (*(addr)) )
735# define PREFETCH_W(addr,loc) asm (" lda $31,%0 \n" : : "m" (*(addr)) )
736# define USE_PREFETCH 1
737# else
738# if defined(__i386__) && defined(SSE_PREFETCH) && !defined(NO_PREFETCH) && (defined(__GNUC__) || defined(__INTEL_COMPILER))
739# define PREFETCH_R(addr,loc) asm (" prefetcht0 %0 \n" : : "m" (*(addr)) )
740# define PREFETCH_W(addr,loc) asm (" prefetcht0 %0 \n" : : "m" (*(addr)) )
741# define USE_PREFETCH 1
742# else
743# if defined(__i386__) && defined(AMD_PREFETCH) && !defined(NO_PREFETCH) && (defined(__GNUC__) || defined(__INTEL_COMPILER))
744# define PREFETCH_R(addr,loc) asm (" prefetch %0 \n" : : "m" (*(addr)) )
745# define PREFETCH_W(addr,loc) asm (" prefetchw %0 \n" : : "m" (*(addr)) )
746# define USE_PREFETCH 1
747# else
748# define PREFETCH_R(addr,loc) do {} while (0)
749# define PREFETCH_W(addr,loc) do {} while (0)
750# undef USE_PREFETCH
751# endif
752# endif
753# endif
754#endif
755
756#ifdef USE_PREFETCH
757# define PREFETCH_R_MANY(addr,loc) \
758 for (char* _prefaddr = (char*)addr; _prefaddr < (char*)addr+CACHELINE_SZ*PREFETCH_AHEAD; _prefaddr += CACHELINE_SZ) \
759 PREFETCH_R(_prefaddr,loc)
760# define PREFETCH_W_MANY(addr,loc) \
761 for (char* _prefaddr = (char*)addr; _prefaddr < (char*)addr+CACHELINE_SZ*PREFETCH_AHEAD; _prefaddr += CACHELINE_SZ) \
762 PREFETCH_W(_prefaddr,loc)
763#else
764# define PREFETCH_R_MANY(addr,loc) do {} while(0)
765# define PREFETCH_W_MANY(addr,loc) do {} while(0)
766#endif
767
768
769#ifdef HAVE_TLS
770# define THREAD__ __thread
771#elif defined(HAVE_DTLS)
772# define THREAD__ __declspec(thread)
773#else
774# define THREAD__
775#endif
776
777//#if !defined(_WIN_32) && !defined(CYGWIN)
778#ifdef HAVE_UNISTD_H
779# include "smp.h"
780#else
781# define MAIN_PID (getpid())
782# define num_threads (0)
783# define thrno (0)
784# define ismainthread (1)
785#endif
786
787
788#ifdef __SSE2__
789# define TBCI_SIMD_ALIGN 16
790#endif
791
792#ifdef USE_PLAIN_VEC_KERNELS
793# include "plain_def.h"
794#elif defined(USE_UNR_VEC_KERNELS2)
795# include "unroll_prefetch_def2.h"
796#else
797# include "unroll_prefetch_def.h"
798#endif
799
800
802
812template <typename T>
813inline void SWAP (T& a, T& b)
814{
815 REGISTER T ALIGN(MIN_ALIGN) tmp = a; a = b; b = tmp;
816}
817
819#ifdef C_MEMALLOC
828# if defined(__GNUC__) && !defined(C_MEMALLOC_WE_KNOW_WHAT_WE_DO)
829# warning "Don't define C_MEMALLOC if you use containers with elements which need intitalization"
830# warning " or take care yourself ! (Read basics.h:690)"
831# endif
832# ifdef HAVE_MEMALIGN
833# define NEW(t,s) (t*) /*CSTD__*/ memalign (sizeof(t)*MIN(4,s>>1), sizeof(t)*(s))
834# else
835# define NEW(t,s) (t*) CSTD__ malloc (sizeof(t)*s)
836# endif
837# define TBCIDELETE(t,v,sz) do { BCHKNR(!v,NumErr,free null ptr,0); CSTD__ free (v); v = 0; } while (0)
838# define TBCIDELETE_RO(t,v,sz) do { BCHKNR(!v,NumErr,free null ptr,0); CSTD__ free (v); } while (0)
839# define REALLOC(v,os,t,s) v = (t*) CSTD__ realloc ((v), sizeof(t)*(s))
840#elif !defined(NO_MALLOC_CACHE) //&& !defined(SMP)
842# if defined(PRAGMA_I) && defined(PRAGMA_IMPL_MALLOC_CACHE)
843# pragma implementation "malloc_cache.h"
844# endif
845# include "malloc_cache.h"
847#else /* ! C_MEMALLOC && NO_MALLOC_CACHE */
848# define NEW(t,s) new t[s]
849# define TBCIDELETE(t,v,sz) do { BCHKNR(!v,NumErr,delete[] null ptr,0); delete[] _VOID v; v = 0; } while (0)
850# define TBCIDELETE_RO(t,v,sz) do { BCHKNR(!v,NumErr,delete[] null ptr,0); delete[] _VOID v; } while (0)
851# ifdef ALLOW_MEMCPY
852# define REALLOC(v,os,t,s) do { \
853 t* _tmp = (v); \
854 if (LIKELY(s)) (v) = new t[(s)]; else (v) = 0; \
855 if (LIKELY(_tmp != (v) && _tmp && (v) && (os) && (s))) \
856 CSTD__ memcpy ((v), _tmp, sizeof(t)*MIN((os),(s))); \
857 if (LIKELY(_tmp)) delete[] (_tmp); \
858} while (0)
859# else
860# define REALLOC(v,os,t,s) do { \
861 t* _tmp = (v); \
862 if (LIKELY(s)) (v) = new t[(s)]; else (v) = 0; \
863 if (LIKELY(_tmp != (v) && _tmp && (v) && (os) && (s))) { \
864 for (REGISTER unsigned long _i = 0; _i < MIN((s),(os)); _i++) \
865 (v)[_i] = _tmp[_i]; } \
866 if (LIKELY(_tmp)) delete[] (_tmp); \
867 } while (0)
868# endif
869#endif
870
881#if defined(C_MEMALLOC) || defined(ALLOW_MEMCPY)
882# if defined(__GNUC__) && !defined(C_MEMALLOC_WE_KNOW_WHAT_WE_DO)
883# warning "Don't define C_MEMALLOC or ALLOW_MEMCPY if you use containers with elements which"
884# warning " use dynamic memory allocation or handle with pointers somehow. Copying otherwise"
885# warning " will break things. (Read basics.h:740)"
886# endif
887# define TBCICOPY(n,o,t,s) CSTD__ memcpy ((n),(o),(s)*sizeof(t))
888#else
889# define VEC_INLINE inline
890# define COPY2(res,v1,f1,f2) res = v1
892# ifdef SMP
893template <typename T>
894void _par_copy (const unsigned long sz, T* v1, const T* v2);
895# define TBCICOPY(n,o,t,s) _par_copy < t > (s,n,o)
896# else
897# define TBCICOPY(n,o,t,s) _tbci_copy < t > (s,n,o)
898#endif
899# undef VEC_INLINE
900#endif
901
902
903#if defined(C_MEMALLOC) || defined(ALLOW_MEMSET)
904# define TBCICLEAR(n,t,s) CSTD__ memset ((n), 0, (s)*sizeof(t))
905#else
906# define VEC_INLINE /*inline*/
907# define FILL1(res,f1,f2) res = f2
909# ifdef SMP
910template <typename T>
911void _par_fill (const unsigned long, T * const, typename tbci_traits<T>::loop_const_refval_type);
912# define TBCIFILL(n,v,t,s) _par_fill < t > (s,n,v)
913# define TBCICLEAR(n,t,s) _par_fill < t > (s,n,t(0))
914# else
915# define TBCIFILL(n,v,t,s) _tbci_fill < t > (s,n,v)
916# define TBCICLEAR(n,t,s) _tbci_fill < t > (s,n,t(0))
917# endif
918# undef VEC_INLINE
919#endif
920
921template <typename T>
923 public:
924 T (*fn)(const unsigned long idx, void* par);
925 vec_fill_fn(T (*f)(const unsigned long, void*))
926 :fn(f) {};
927};
928
929template <typename T>
930void _tbci_fill_fn(const unsigned long sz, T* vec, vec_fill_fn<T> fn, void* par)
931{
932 for (REGISTER unsigned long i = 0; i < sz; ++i)
933 vec[i] = fn.fn(i, par);
934}
935
936
944#if defined(C_MEMALLOC) || defined(ALLOW_MEMCMP)
945# define TBCICOMP(n,o,t,s) CSTD__ memcmp ((n),(o),(s)*sizeof(t))
946// Filling
947# define TBCIFILL(n,v,t,s) \
948 do { for (REGISTER unsigned long _i = 0; _i < (s); _i++) \
949 (n)[_i] = (t)(v); } while (0)
950#else
951# if defined(__GNUG__) && !defined(PEDANTIC)
952# define _TBCICOMP(n,o,t,s) ({ \
953 REGISTER int _r; REGISTER unsigned long _i; \
954 for (_r=0, _i=0; _i<(s) && !_r; _i++) { \
955 _r = (int)((n)[_i] != (o)[_i]); \
956 if (_r) break; \
957 } \
958 _r; \
959})
960# else
961
962HOTDECL(template <typename T>
963inline int _tbci_comp (const unsigned long s, T const *n, T const *o))
964{
965 // if (n == o) return 0;
966 for (REGISTER unsigned long _i = 0; _i < (s); _i++)
967 if (UNLIKELY((n)[_i] != (o)[_i])) return 1;
968 return 0;
969}
970# define TBCICOMP(n,o,t,s) _tbci_comp ((s),(n),(o))
971# endif
972#endif
973
974#define VEC_INLINE /*inline*/
975#define COMP2(r,v1,f1,f2) if (r != v1) { ++f2; i = sz; goto _fin; /* break; */ }
978#ifdef SMP
979template <typename T>
980int _par_comp (const unsigned long sz, const T* v1, const T* v2);
981# define TBCICOMP(n,o,t,s) _par_comp< t > (s, n, o)
982#else
983# define TBCICOMP(n,o,t,s) ({ long res; do_vv_comp<t>(s,n,o,res); res; })
984#endif
985#undef VEC_INLINE
986
987// Helper functions
988
993template <typename T>
994unsigned long _bin_search(const T* vec, T el, unsigned long start, unsigned long end)
995{
996 if (end == start || end-1 == start)
997 return (unsigned long)-1;
998 unsigned long half = (end+start)/2;
999 T hel = vec[half];
1000 if (hel == el)
1001 return half;
1002 else if (hel < el)
1003 return _bin_search(vec, el, half, end);
1004 else
1005 return _bin_search(vec, el, start, half);
1006}
1007
1012template <typename T>
1013unsigned long bin_search(const T* vec, T el, unsigned long start, unsigned long end)
1014{
1015 if (end == start)
1016 return (unsigned long)-1;
1017 if (vec[start] == el)
1018 return start;
1019 else if (vec[end-1] == el)
1020 return end-1;
1021 if (vec[start] > el)
1022 return (unsigned long)-1;
1023 if (vec[end-1] < el)
1024 return (unsigned long)-1;
1025 BCHK(vec[end-1] < vec[start], NumErr, unsorted vector in bin_search, end-1, (unsigned long)-1);
1026 return _bin_search(vec, el, start, end);
1027}
1028
1030
1055#define _REF_
1057TBCI_CONST(inline int conj (const int _REF_ arg)) {return arg;}
1058TBCI_CONST(inline unsigned conj (const unsigned _REF_ arg)) {return arg;}
1059TBCI_CONST(inline long conj (const long _REF_ arg)) {return arg;}
1060TBCI_CONST(inline short conj (const short _REF_ arg)) {return arg;}
1061TBCI_CONST(inline char conj (const char _REF_ arg)) {return arg;}
1062TBCI_CONST(inline float conj (const float _REF_ arg)) {return arg;}
1063TBCI_CONST(inline double conj (const double _REF_ arg)) {return arg;}
1064
1065TBCI_CONST(inline int real (const int _REF_ d)) { return d; }
1066TBCI_CONST(inline unsigned real (const unsigned _REF_ d)) { return d; }
1067TBCI_CONST(inline float real (const float _REF_ d)) { return d; }
1068TBCI_CONST(inline double real (const double _REF_ d)) { return d; }
1069
1070TBCI_CONST(inline int imag (const int _REF_ d)) { return 0; }
1071TBCI_CONST(inline unsigned imag (const unsigned _REF_ d)) { return 0; }
1072TBCI_CONST(inline float imag (const float _REF_ d)) { return 0; }
1073TBCI_CONST(inline double imag (const double _REF_ d)) { return 0; }
1074
1075#ifdef HAVE_LONG_DOUBLE
1076TBCI_CONST(inline long double conj (const long double _REF_ arg)) {return arg;}
1077TBCI_CONST(inline long double real (const long double _REF_ d)) { return d; }
1078TBCI_CONST(inline long double imag (const long double _REF_ d)) { return 0; }
1079#endif
1080#ifdef HAVE_LONG_LONG
1081TBCI_CONST(inline long long conj (const long long _REF_ arg)) {return arg;}
1082TBCI_CONST(inline long long real (const long long _REF_ d)) { return d; }
1083TBCI_CONST(inline long long imag (const long long _REF_ d)) { return 0; }
1084#endif
1085#undef _REF_
1087
1088#ifdef _INCLUDE_CPLX_H
1089# include "cplx.h"
1090#endif
1091
1092#ifdef _INCLUDE_STDCPLX_H
1093# include "std_cplx.h"
1094#endif
1095
1096#ifdef _INCLUDE_BUILTINCPLX_H
1097# include "builtin_cplx.h"
1098#endif
1099
1100
1103// intel C++ 6.00 has a preprocessor (!) bug
1104#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER == 600)
1105template <typename T> inline int sign (const T& x)
1106#else
1107TBCI_CONST(template <typename T> inline int sign (const T& x))
1108#endif
1109{
1110 if (LIKELY(CPLX__ real(x) > 0)) return 1;
1111 else if(LIKELY(CPLX__ real(x) < 0)) return -1;
1112 else return 0;
1113}
1114
1149
1150
1151#ifdef TEMPLATED_FABSSQR
1152TBCI_CONST(template <typename T> inline double fabssqr (const T& a))
1153{ return CPLX__ real(a * CPLX__ conj(a)); }
1154#else
1155# define FABSSQR(T) \
1156TBCI_CONST(inline double fabssqr (const T a)) \
1157{ return CPLX__ real(a * CPLX__ conj(a)); }
1158
1159FABSSQR(double)
1162FABSSQR(unsigned)
1163# ifdef HAVE_LONG_DOUBLE
1164FABSSQR(long double)
1165# endif
1166# ifdef HAVE_LONG_LONG
1167FABSSQR(long long)
1168# endif
1169# undef FABSSQR
1170#endif /* TEMPLATED_FABSSQR */
1171
1172// Square
1173#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER == 600)
1174template <typename T> inline T sqr (const T& a)
1175#else
1176TBCI_CONST(template <typename T> inline T sqr (const T& a))
1177#endif
1178{ return a*a; }
1179
1180// Dot product
1181#if !defined(HAVE_WIN_32) && !defined (NO_NS) // Problems with other dot
1182# if defined(__INTEL_COMPILER) && (__INTEL_COMPILER == 600)
1183template <typename T> inline T dot (const T& a1, const T& a2)
1184# else
1185TBCI_CONST(template <typename T> inline T dot (const T& a1, const T& a2))
1186# endif
1187{ return a1 * CPLX__ conj (a2); }
1188#endif
1189
1191
1193
1194#if 0
1195/* For the std namespace, we don't want to use templated functions */
1196TBCI_CONST(template <typename T> inline double fabs (const T& a))
1197{ return a<(T)0?(double)-a:(double)a; }
1198
1199#else
1200
1201/* hopefully the std::fabs(double) is not extern "C" ... */
1202# define FABS(T) \
1203TBCI_CONST(inline double fabs (const T a)) \
1204{ return a<(T)0?(double)-a:(double)a; }
1205/* Also provide sqrt(int) functions ... */
1206# define SQRT(T) \
1207TBCI_CONST(inline double sqrt (const T a)) \
1208{ return MATH__ sqrt ((double)a); }
1209
1210# ifdef HAVE_LONG_DOUBLE
1211# if defined(__GNUC__) && __GNUC__ == 2 && __GNUC_MINOR__ < 97
1212FABS(long double)
1213SQRT(long double)
1214# endif
1215# endif
1216//FABS(float);
1219//SQRT(unsigned int); // MSVC does not like it ...
1220# ifdef HAVE_LONG_LONG
1221FABS(long long)
1222SQRT(long long)
1223# endif
1224//FABS(unsigned int); // MSVC should not like it either ...
1225TBCI_CONST(inline double fabs (const unsigned int a))
1226{ return (double)a; }
1227
1228# undef FABS
1229#endif /* 0 */
1230
1232
1234#if 0
1235/* For the std namespace, we don't want to use templated functions */
1236TBCI_CONST(template <typename T> inline T abs (const T& a))
1237{ return a<(T)0?-a:a; }
1238
1239#else
1240
1241# define ABS(T) \
1242TBCI_CONST(inline T abs (const T a)) \
1243{ return a<(T)0?-a:a; }
1244
1245# ifdef HAVE_MISS_CSTD_ABS_BUG
1246# ifdef HAVE_LONG_DOUBLE
1247ABS(long double)
1248# endif
1249ABS(double)
1250ABS(float)
1251# ifdef HAVE_LONG_LONG
1252//ABS(long long)
1253# endif
1254//ABS(unsigned)
1255# endif
1256
1257# if 1 //defined(__INTEL_COMPILER) && (__INTEL_COMPILER == 600)
1258ABS(unsigned)
1259# endif
1260# undef ABS
1261#endif /* 0 */
1262
1264
1265#ifdef INLINE_VEC_KERNELS
1266# define VEC_INLINE inline
1267#else
1268# define VEC_INLINE
1269#endif
1270
1272
1277
1278typedef enum _vararg vararg;
1279
1281
1282#ifndef _NO_INCLUDE_COST_H
1283# include "cost.h"
1284#endif
1285
1286#if !defined(NO_NS) && defined (USING_STD_NS)
1287using std::cin;
1288using std::cout;
1289using std::cerr;
1290using std::endl;
1291using std::flush;
1292using std::dec;
1293using std::hex;
1294using std::setw;
1295using std::setprecision;
1296using std::istream;
1297using std::ostream;
1298using std::ifstream;
1299using std::ofstream;
1300using std::string;
1301# if defined(HAVE_SSTREAM) && !defined(HAVE_BUGGY_SSTREAM)
1302using std::istringstream;
1303using std::ostringstream;
1304# else
1305using std::istrstream;
1306using std::ostrstream;
1307# endif
1308#endif
1309
1310#endif /* TBCI_BASICS_H */
const Vector< T > const Vector< T > & x
Definition LM_fit.h:97
int i
Definition LM_fit.h:71
#define _REF_
Definition basics.h:1055
#define BCHK(cond, exc, txt, ind, rtval)
Definition basics.h:575
#define LIKELY(expr)
branch prediction note that we sometimes on purpose mark the unlikely possibility likely and vice ver...
Definition basics.h:100
#define FABSSQR(T)
Definition basics.h:1155
#define NAMESPACE_CSTD
Definition basics.h:319
#define NAMESPACE_CPLX_END
Definition basics.h:326
#define NAMESPACE_END
Definition basics.h:323
#define HOTDECL(x)
Definition basics.h:497
#define NAMESPACE_CSTD_END
Definition basics.h:325
#define COPY2(res, v1, f1, f2)
Definition basics.h:890
#define ABS(T)
Definition basics.h:1241
#define NAMESPACE_TBCI
Definition basics.h:317
#define TBCI_CONST(x)
Definition basics.h:458
#define UNLIKELY(expr)
Definition basics.h:101
#define REGISTER
Definition basics.h:108
#define FABS(T)
Definition basics.h:1202
#define SQRT(T)
Definition basics.h:1206
#define FILL1(res, f1, f2)
Definition basics.h:907
#define ALIGN(x)
Definition basics.h:444
#define NAMESPACE_CPLX
Definition basics.h:322
#define MIN_ALIGN
Definition basics.h:421
#define COMP2(r, v1, f1, f2)
Definition basics.h:975
#define T
Definition bdmatlib.cc:20
Wrapper for GCC's builtin complex type plus compatibility features (helper class TBCI::hcplx).
exception base class for the TBCI NumLib
Definition except.h:59
vec_fill_fn(T(*f)(const unsigned long, void *))
Definition basics.h:925
T(* fn)(const unsigned long idx, void *par)
Definition basics.h:924
Preprocessor macros for estimating the "cost" of operations.
Rich implementation of complex numbers TBCI::cplx.
T imag(const TBCI__ cplx< T > &z)
Definition cplx.h:674
double fabs(const TBCI__ cplx< T > &c)
Definition cplx.h:746
T arg(const TBCI__ cplx< T > &c)
Definition cplx.h:690
cplx< T > dot(const cplx< T > &a, const cplx< T > &b)
Definition cplx.h:300
double fabssqr(const cplx< T > &c)
Definition cplx.h:390
cplx< T > sqr(const cplx< T > &c)
Definition cplx.h:449
NAMESPACE_END NAMESPACE_CPLX TBCI__ cplx< T > conj(const TBCI__ cplx< T > &c)
Definition cplx.h:663
#define abs(x)
Definition f2c.h:178
F_TMatrix< T > b
Definition f_matrix.h:736
Caches memory blocks to avoid calls to __builtin_free()/new() It performs very well for strictly recu...
const unsigned TMatrix< T > const Matrix< T > * a
const unsigned end
void _tbci_fill_fn(const unsigned long sz, T *vec, vec_fill_fn< T > fn, void *par)
Definition basics.h:930
T dot(const T &a1, const T &a2)
Definition basics.h:1185
int _par_comp(const unsigned long sz, const T *v1, const T *v2)
T sqr(const T &a)
Definition basics.h:1176
enum _vararg vararg
Definition basics.h:1278
_vararg
This is a helper type to identify and count varargs.
Definition basics.h:1274
@ vag7
Definition basics.h:1274
@ vag0
Definition basics.h:1274
@ vag14
Definition basics.h:1275
@ vag_max
Definition basics.h:1276
@ vag12
Definition basics.h:1275
@ vag1
Definition basics.h:1274
@ vag13
Definition basics.h:1275
@ vag11
Definition basics.h:1275
@ vag2
Definition basics.h:1274
@ vag16
Definition basics.h:1275
@ vag10
Definition basics.h:1275
@ vag3
Definition basics.h:1274
@ vag15
Definition basics.h:1275
@ vag9
Definition basics.h:1275
@ vag8
Definition basics.h:1274
@ vag5
Definition basics.h:1274
@ vag4
Definition basics.h:1274
@ vag6
Definition basics.h:1274
unsigned long _bin_search(const T *vec, T el, unsigned long start, unsigned long end)
Search for an element el in a sorted vector between start and end-1, returns (unsigned long)-1 if ele...
Definition basics.h:994
int sign(const T &x)
Signum.
Definition basics.h:1107
void do_vv_comp(const unsigned long sz, const T *const v1, const T *const v2, volatile long &_f2)
f2 = number of differences vec, vec
Definition basics.h:977
void _par_fill(const unsigned long, T *const, typename tbci_traits< T >::loop_const_refval_type)
unsigned long bin_search(const T *vec, T el, unsigned long start, unsigned long end)
Search for an element el in a sorted vector between start and end-1, returns (unsigned long)-1 if ele...
Definition basics.h:1013
void _tbci_copy(const unsigned long sz, T *const res, const T *const v1)
Definition basics.h:891
void _par_copy(const unsigned long sz, T *v1, const T *v2)
void _tbci_fill(const unsigned long sz, T *const res, register typename tbci_traits< T >::loop_const_refval_typef2)
Definition basics.h:908
void SWAP(T &a, T &b)
SWAP function Note: We could implement a swap function without temporaries: a -= b b += a a -= b a = ...
Definition basics.h:813
int imag(const int d)
Definition basics.h:1070
int conj(const int arg)
conj for elementary types
Definition basics.h:1057
double fabs(const int a)
Definition basics.h:1217
int real(const int d)
Definition basics.h:1065
#define real
macros for composing plain loops over arrays.
#define VKERN_TEMPL_1V_C(FNAME, OP1)
Operations of type VEC OP= VAL.
Definition plain_def.h:172
#define VKERN_TEMPL_2V(FNAME, OP2)
Operations of type vec OP= vec.
Definition plain_def.h:72
#define VKERN_TEMPL_2V_T(FNAME, OP2, TYPE)
Operations of type TYPE = VEC OP VEC.
Definition plain_def.h:119
Wrapper for C++ std library complex type plus compatibility features.
This provides some parameters that control the behavior of various functions in the TBCI library.
macros for composing unrolled prefetching loops over arrays.
macros for composing unrolled prefetching loops over arrays.