TBCI Numerical high perf. C++ Library
2.8.0
Main Page
Related Pages
Namespaces
Classes
Files
File List
File Members
home
abuild
rpmbuild
BUILD
numerix-2.0
lina
include
perf_opt.h
Go to the documentation of this file.
1
8
#ifndef TBCI_PERF_OPT_H
9
#define TBCI_PERF_OPT_H
10
11
/* gcc-4:
12
* The user should use -funroll-loops -fvectorize-trees
13
* and -fprefetch-loop-arrays as needed.
14
*/
15
#if defined(__GNUC__) && __GNUC__ >= 4 && !defined(USE_PLAIN_VEC_KERNELS) \
16
&& !defined(USE_UNR_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS2)
17
# define USE_PLAIN_VEC_KERNELS
18
#endif
19
20
/* DEC Alpha architecture */
21
#ifdef __alpha__
22
# define DEF_CACHELINE_SZ 32
23
# ifdef __GNUC__
24
# if __GNUC__ <= 2
25
# if !defined(USE_PLAIN_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS2)
26
//# define USE_PLAIN_VEC_KERNELS
27
# define USE_UNR_VEC_KERNELS
28
# endif
29
# define DEF_UNROLL_DEPTH 8
30
# define DEF_PREFETCH_AHEAD 8
31
# else
/* gcc 3 */
32
# if !defined(USE_PLAIN_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS2)
33
# define USE_UNR_VEC_KERNELS
34
# endif
35
# define DEF_UNROLL_DEPTH 8
36
# define DEF_PREFETCH_AHEAD 8
37
# endif
38
# else
/* DEC/Compaq/HP compiled */
39
# if !defined(USE_PLAIN_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS2)
40
# define USE_UNR_VEC_KERNELS
41
# endif
42
# define DEF_UNROLL_DEPTH 1
43
# define DEF_PREFETCH_AHEAD 4
44
# endif
45
#endif
46
47
/* iA32 arch */
48
#if defined(__i386__) || defined(__x86_64__)
49
# if defined(OPT_PENTIUM4) || defined(OPT_CORE2) || defined(__x86_64__)
50
# define DEF_CACHELINE_SZ 64
51
# else
52
# define DEF_CACHELINE_SZ 32
53
# endif
54
# ifdef __GNUC__
55
# ifdef OPT_PENTIUM4
56
# if __GNUC__ <= 2
57
# if !defined(USE_PLAIN_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS2)
58
# define USE_UNR_VEC_KERNELS
59
# endif
60
# define DEF_UNROLL_DEPTH 1
/* Hopefully -funroll-loops is enabled */
61
# define DEF_PREFETCH_AHEAD 4
62
# else
/* gcc >= 3 */
63
# if !defined(USE_PLAIN_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS2)
64
# define USE_PLAIN_VEC_KERNELS
/* P4 does HW prefetch */
65
# endif
66
# define DEF_UNROLL_DEPTH 4
67
# define DEF_PREFETCH_AHEAD 16
68
# if !defined(FORCE_PREFETCH) && !defined(NO_PREFETCH)
69
# define NO_PREFETCH
/* P4 does HW prefetch */
70
# endif
71
# endif
72
# elif defined(OPT_PENTIUM3) || defined(OPT_CORE2) || defined(SSE_PREFETCH)
/* Pentium M / 3 */
73
# if !defined(FORCE_PREFETCH) && !defined(NO_PREFETCH)
74
# define NO_PREFETCH
/* Pentium-M does HW prefetch, Pentium-3 does not ... */
75
# endif
76
# if !defined(SSE_PREFETCH) && defined(OPT_ARCH_PENTIUM3) &&!defined(NO_PREFETCH)
77
# define SSE_PREFETCH
78
# endif
79
# if !defined(USE_PLAIN_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS2)
80
# define USE_UNR_VEC_KERNELS
81
# endif
82
# define DEF_UNROLL_DEPTH 4
83
# define DEF_PREFETCH_AHEAD 8
84
# elif defined(OPT_ATHLON) || defined(AMD_PREFETCH)
/* Athlon / Opteron */
85
# if !defined(FORCE_PREFETCH) && !defined(NO_PREFETCH) && defined(__x86_64__)
86
# define NO_PREFETCH
/* AMD64 does HW prefetch */
87
# endif
88
# if !defined(AMD_PREFETCH) && defined(OPT_ARCH_ATHLON) && !defined(NO_PREFETCH)
89
# define AMD_PREFETCH
90
# endif
91
# if !defined(USE_PLAIN_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS2)
92
# define USE_UNR_VEC_KERNELS
93
# endif
94
# define DEF_UNROLL_DEPTH 1
/* Hopefully -funroll-loops is enabled */
95
# define DEF_PREFETCH_AHEAD 8
96
# else
/* deflt proc */
97
# if !defined(USE_PLAIN_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS2)
98
# define USE_UNR_VEC_KERNELS
99
# endif
100
# define DEF_UNROLL_DEPTH 8
101
# define DEF_PREFETCH_AHEAD 4
102
# endif
103
# else
/* ! __GNUC__ */
104
# if !defined(USE_PLAIN_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS2)
105
# define USE_UNR_VEC_KERNELS
106
# endif
107
# define DEF_UNROLL_DEPTH 8
108
# define DEF_PREFETCH_AHEAD 8
109
# endif
110
#endif
/* __i386__ || __x86_64__*/
111
112
/* defaults */
113
#if !defined(USE_PLAIN_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS) && !defined(USE_UNR_VEC_KERNELS2)
114
# define USE_PLAIN_VEC_KERNELS
115
#endif
116
#ifndef DEF_UNROLL_DEPTH
117
# define DEF_UNROLL_DEPTH 4
118
#endif
119
#ifndef DEF_PREFETCH_AHEAD
120
# define DEF_PREFETCH_AHEAD 4
121
#endif
122
#ifndef DEF_CACHELINE_SZ
123
# ifdef __WORDSIZE // Heuristics: Most 64bit arches have 64byte cache lines
124
# define DEF_CACHELINE_SZ __WORDSIZE
125
# else
126
# define DEF_CACHELINE_SZ 32
127
# endif
128
#endif
129
132
#define DEF_CACHE_LOC_READ 2
133
#define DEF_CACHE_LOC_WRITE 3
134
135
/* Apply defaults */
136
139
#ifndef PREFETCH_AHEAD
140
# define PREFETCH_AHEAD DEF_PREFETCH_AHEAD
141
#endif
142
#ifndef UNROLL_DEPTH
146
# define UNROLL_DEPTH DEF_UNROLL_DEPTH
147
#endif
148
#ifndef CACHELINE_SZ
152
# define CACHELINE_SZ DEF_CACHELINE_SZ
153
#endif
154
164
#ifndef CACHE_LOC_READ
165
# define CACHE_LOC_READ DEF_CACHE_LOC_READ
166
#endif
167
#ifndef CACHE_LOC_WRITE
168
# define CACHE_LOC_WRITE DEF_CACHE_LOC_WRITE
169
#endif
170
171
// Those should be evaluated at compile time
172
#define EL_PER_CL(T) (signed)((CACHELINE_SZ/sizeof( T ))?(CACHELINE_SZ/sizeof( T )):1)
173
#define PREF_OFFS(T) (EL_PER_CL(T)*PREFETCH_AHEAD)
174
175
#endif
/* TBCI_PERF_OPT_H */
Generated by
1.8.5