9 #ifndef TBCI_VEC_KERN_UNR_PREF_H
10 #define TBCI_VEC_KERN_UNR_PREF_H
11 #include "tbci/basics.h"
17 #define ADD3(r,v1,v2,f1,f2) r = v1 + v2
21 #define SUB3(r,v1,v2,f1,f2) r = v1 - v2
25 #define MUL3(r,v1,v2,f1,f2) r = v1 * v2
29 #define CMUL3(r,v1,v2,f1,f2) r = CPLX__ conj(v1) * v2
33 #define DIV3(r,v1,v2,f1,f2) r = v1 / v2
37 #define CDIV3(r,v1,v2,f1,f2) r = CPLX__ conj(v1) / v2
42 #define ADD2(r,v1,f1,f2) r += v1
46 #define SUB2(r,v1,f1,f2) r -= v1
50 #define SUB2I(r,v1,f1,f2) r = v1 - r
54 #define MUL2(r,v1,f1,f2) r *= v1
58 #define CMUL2(r,v1,f1,f2) r = CPLX__ conj(r) * v1
62 #define CMUL2I(r,v1,f1,f2) r *= CPLX__ conj(v1)
66 #define DIV2(r,v1,f1,f2) r /= v1
70 #define DIV2I(r,v1,f1,f2) r = v1 / r
74 #define CDIV2(r,v1,f1,f2) r = CPLX__ conj(r) / v1
78 #define CDIV2I(r,v1,f1,f2) r = CPLX__ conj(v1) / r
84 #define ADD2NV(r,v1,f1,f2) r = v1 + f2
88 #define SUB2NV(r,v1,f1,f2) r = v1 - f2
92 #define MUL2NV(r,v1,f1,f2) r = v1 * f2
97 #define ADD2RV(r,v1,f1,f2) r = f2 + v1
101 #define SUB2RV(r,v1,f1,f2) r = f2 - v1
105 #define MUL2RV(r,v1,f1,f2) r = f2 * v1
109 #define DIV2RV(r,v1,f1,f2) r = f2 / v1
114 #define ADD1NV(r,f1,f2) r += f2
118 #define SUB1NV(r,f1,f2) r -= f2
122 #define SUB1RV(r,f1,f2) r = f2 - r
126 #define MUL1NV(r,f1,f2) r *= f2
130 #define DIV1NV(r,f1,f2) r /= f2
134 #define DIV1RV(r,f1,f2) r = f2 / r
138 #define ADD1RV(r,f1,f2) r = f2 + r
143 #define MUL1RV(r,f1,f2) r = f2 * r;
148 #define ADD2NS(r,v1,f1,f2) r += f2*v1
152 #define SUB2NS(r,v1,f1,f2) r -= f2*v1
156 #define SUB2RS(r,v1,f1,f2) r = f2*v1 - r
161 #define ADD3NS(r,v1,v2,f1,f2) r = v1 + f2*v2
165 #define SUB3NS(r,v1,v2,f1,f2) r = v1 - f2*v2
170 #define ADD3SN(r,v1,v2,f1,f2) r = f2*v1 + v2
174 #define SUB3SN(r,v1,v2,f1,f2) r = f2*v1 - v2
179 #define ADD3SS(r,v1,v2,f1,f2) r = f1*v1 + f2*v2
183 #define SUB3SS(r,v1,v2,f1,f2) r = f1*v1 - f2*v2
188 #define ADD2SN(r,v1,f1,f2) r = f2*r + v1
192 #define SUB2SN(r,v1,f1,f2) r = f2*r - v1
197 #define ADD2SS(r,v1,f1,f2) r = f1*r + f2*v1
201 #define SUB2SS(r,v1,f1,f2) r = f1*r - f2*v1
206 #define ADD2SV(r,v1,f1,f2) r = f1*v1 + f2
210 #define SUB2SV(r,v1,f1,f2) r = f1*v1 - f2
215 #define ADD1SV(r,f1,f2) r = f1*r + f2
219 #define SUB1SV(r,f1,f2) r = f1*r - f2
224 #define ADD2VS(r,v1,f1,f2) r = f1 + f2*v1
228 #define SUB2VS(r,v1,f1,f2) r = f1 - f2*v1
232 #define DIV2VS(r,v1,f1,f2) r = f1 / (f2*v1)
237 #define NEG2(r,v1,f1,f2) r = -v1
241 #define NEG1(r,f1,f2) r = -r
245 #define DOT2(r,v1,f1,f2) f2 += CPLX__ conj(r) * v1
249 #define XDOT2(r,v1,f1,f2) { T y = CPLX__ conj(r) * v1; T t = f2+y; f1 += (t-f2)-y; f2 = t; }
253 #define MULT2(r,v1,f1,f2) f2 += r * v1
258 #define XMULT2(r,v1,f1,f2) { T y = r * v1; T t = f2+y; f1 += (t-f2)-y; f2 = t; }
263 #define FABS1(r,f1,f2) f2 += fabssqr(r)
267 #define XFABS1(r,f1,f2) { double y = fabssqr(r); double t = f2+y; f1 += (t-f2)-y; f2 = t; }
271 #define SQR1(r,f1,f2) f2 += r*r
275 #define XSQR1(r,f1,f2) { T y = r*r; T t = f2+y; f1 += (t-f2)-y; f2 = t; }
279 #define SUM1(r,f1,f2) f2 += r
283 #define XSUM1(r,f1,f2) { T t = f2+r; f1 += (t-f2)-r; f2 = t; }
289 #define SUMMULT3(r,v1,v2,f1,f2) r += v1*v2
291 #define SUMCMULT3(r,v1,v2,f1,f2) r += CPLX__ conj(v1)*v2
#define CMUL2I(r, v1, f1, f2)
#define ADD2SN(r, v1, f1, f2)
#define SUB3NS(r, v1, v2, f1, f2)
#define CMUL3(r, v1, v2, f1, f2)
#define ADD2NS(r, v1, f1, f2)
#define XFABS1(r, f1, f2)
#define SUB2NV(r, v1, f1, f2)
#define ADD2SV(r, v1, f1, f2)
#define ADD1RV(r, f1, f2)
#define SUB1RV(r, f1, f2)
#define SUMCMULT3(r, v1, v2, f1, f2)
VKERN_TEMPL_1V(do_vec_neg, NEG1)
vec = -vec
#define SUMMULT3(r, v1, v2, f1, f2)
#define ADD3SN(r, v1, v2, f1, f2)
#define VKERN_TEMPL_3V(FNAME, OP3)
We leave unrolling and prefetching to the compiler.
#define MUL3(r, v1, v2, f1, f2)
#define SUB2(r, v1, f1, f2)
#define SUB2VS(r, v1, f1, f2)
#define ADD2NV(r, v1, f1, f2)
#define DIV1NV(r, f1, f2)
#define SUB2SV(r, v1, f1, f2)
#define DIV2(r, v1, f1, f2)
#define SUB3(r, v1, v2, f1, f2)
#define ADD2RV(r, v1, f1, f2)
#define VKERN_TEMPL_2V_T(FNAME, OP2, TYPE)
Operations of type TYPE = VEC OP VEC.
#define XMULT2(r, v1, f1, f2)
#define DIV2VS(r, v1, f1, f2)
#define VKERN_TEMPL_2V(FNAME, OP2)
Operations of type vec OP= vec.
#define VKERN_TEMPL_1V_T(FNAME, OP1, TYPE)
Operations of type TYPE = OP VEC.
#define DIV1RV(r, f1, f2)
#define SUB2RV(r, v1, f1, f2)
#define ADD1NV(r, f1, f2)
#define DOT2(r, v1, f1, f2)
#define DIV2I(r, v1, f1, f2)
#define SUB1SV(r, f1, f2)
#define NEG2(r, v1, f1, f2)
VKERN_TEMPL_1V_T_LD(do_vec_fabssqr_quick, FABS1, double)
val = SUM fabssqr vec ;
#define ADD2(r, v1, f1, f2)
#define MUL1NV(r, f1, f2)
#define MUL2(r, v1, f1, f2)
#define DIV3(r, v1, v2, f1, f2)
#define ADD3(r, v1, v2, f1, f2)
#define VKERN_TEMPL_1V_CC(FNAME, OP1)
Operations of type VEC *= S OP= VAL.
#define ADD3SS(r, v1, v2, f1, f2)
#define ADD2SS(r, v1, f1, f2)
#define ADD1SV(r, f1, f2)
#define ADD2VS(r, v1, f1, f2)
#define CMUL2(r, v1, f1, f2)
#define VKERN_TEMPL_1V_C(FNAME, OP1)
Operations of type VEC OP= VAL.
#define CDIV3(r, v1, v2, f1, f2)
#define SUB2NS(r, v1, f1, f2)
#define VKERN_TEMPL_3V_C(FNAME, OP3)
Operations of type vec = vec OP val * vec.
#define VKERN_TEMPL_2V_T_STRIDE(FNAME, OP2, TYPE)
Operations of type TYPE = VEC OP VEC.
#define DIV2RV(r, v1, f1, f2)
#define XDOT2(r, v1, f1, f2)
#define ADD3NS(r, v1, v2, f1, f2)
#define SUB2SN(r, v1, f1, f2)
#define CDIV2(r, v1, f1, f2)
#define SUB2RS(r, v1, f1, f2)
#define MUL2NV(r, v1, f1, f2)
#define MULT2(r, v1, f1, f2)
#define VKERN_TEMPL_2V_CC(FNAME, OP2)
Operations of type VEC = VEC OP VAL or VAL OP VEC.
#define SUB3SN(r, v1, v2, f1, f2)
#define VKERN_TEMPL_2V_C(FNAME, OP2)
Operations of type VEC = VEC OP VAL or VAL OP VEC.
#define SUB1NV(r, f1, f2)
#define MUL2RV(r, v1, f1, f2)
#define CDIV2I(r, v1, f1, f2)
#define VKERN_TEMPL_3V_CC(FNAME, OP3)
Operations of type vec = val * vec OP val * vec.
#define SUB2SS(r, v1, f1, f2)
#define SUB3SS(r, v1, v2, f1, f2)
#define SUB2I(r, v1, f1, f2)