9#ifndef TBCI_VEC_KERN_UNR_PREF_H
10#define TBCI_VEC_KERN_UNR_PREF_H
11#include "tbci/basics.h"
17#define ADD3(r,v1,v2,f1,f2) r = v1 + v2
21#define SUB3(r,v1,v2,f1,f2) r = v1 - v2
25#define MUL3(r,v1,v2,f1,f2) r = v1 * v2
29#define CMUL3(r,v1,v2,f1,f2) r = CPLX__ conj(v1) * v2
33#define DIV3(r,v1,v2,f1,f2) r = v1 / v2
37#define CDIV3(r,v1,v2,f1,f2) r = CPLX__ conj(v1) / v2
42#define ADD2(r,v1,f1,f2) r += v1
46#define SUB2(r,v1,f1,f2) r -= v1
50#define SUB2I(r,v1,f1,f2) r = v1 - r
54#define MUL2(r,v1,f1,f2) r *= v1
58#define CMUL2(r,v1,f1,f2) r = CPLX__ conj(r) * v1
62#define CMUL2I(r,v1,f1,f2) r *= CPLX__ conj(v1)
66#define DIV2(r,v1,f1,f2) r /= v1
70#define DIV2I(r,v1,f1,f2) r = v1 / r
74#define CDIV2(r,v1,f1,f2) r = CPLX__ conj(r) / v1
78#define CDIV2I(r,v1,f1,f2) r = CPLX__ conj(v1) / r
84#define ADD2NV(r,v1,f1,f2) r = v1 + f2
88#define SUB2NV(r,v1,f1,f2) r = v1 - f2
92#define MUL2NV(r,v1,f1,f2) r = v1 * f2
97#define ADD2RV(r,v1,f1,f2) r = f2 + v1
101#define SUB2RV(r,v1,f1,f2) r = f2 - v1
105#define MUL2RV(r,v1,f1,f2) r = f2 * v1
109#define DIV2RV(r,v1,f1,f2) r = f2 / v1
114#define ADD1NV(r,f1,f2) r += f2
118#define SUB1NV(r,f1,f2) r -= f2
122#define SUB1RV(r,f1,f2) r = f2 - r
126#define MUL1NV(r,f1,f2) r *= f2
130#define DIV1NV(r,f1,f2) r /= f2
134#define DIV1RV(r,f1,f2) r = f2 / r
138#define ADD1RV(r,f1,f2) r = f2 + r
143#define MUL1RV(r,f1,f2) r = f2 * r;
148#define ADD2NS(r,v1,f1,f2) r += f2*v1
152#define SUB2NS(r,v1,f1,f2) r -= f2*v1
156#define SUB2RS(r,v1,f1,f2) r = f2*v1 - r
161#define ADD3NS(r,v1,v2,f1,f2) r = v1 + f2*v2
165#define SUB3NS(r,v1,v2,f1,f2) r = v1 - f2*v2
170#define ADD3SN(r,v1,v2,f1,f2) r = f2*v1 + v2
174#define SUB3SN(r,v1,v2,f1,f2) r = f2*v1 - v2
179#define ADD3SS(r,v1,v2,f1,f2) r = f1*v1 + f2*v2
183#define SUB3SS(r,v1,v2,f1,f2) r = f1*v1 - f2*v2
188#define ADD2SN(r,v1,f1,f2) r = f2*r + v1
192#define SUB2SN(r,v1,f1,f2) r = f2*r - v1
197#define ADD2SS(r,v1,f1,f2) r = f1*r + f2*v1
201#define SUB2SS(r,v1,f1,f2) r = f1*r - f2*v1
206#define ADD2SV(r,v1,f1,f2) r = f1*v1 + f2
210#define SUB2SV(r,v1,f1,f2) r = f1*v1 - f2
215#define ADD1SV(r,f1,f2) r = f1*r + f2
219#define SUB1SV(r,f1,f2) r = f1*r - f2
224#define ADD2VS(r,v1,f1,f2) r = f1 + f2*v1
228#define SUB2VS(r,v1,f1,f2) r = f1 - f2*v1
232#define DIV2VS(r,v1,f1,f2) r = f1 / (f2*v1)
237#define NEG2(r,v1,f1,f2) r = -v1
241#define NEG1(r,f1,f2) r = -r
245#define DOT2(r,v1,f1,f2) f2 += CPLX__ conj(r) * v1
249#define XDOT2(r,v1,f1,f2) { T y = CPLX__ conj(r) * v1; T t = f2+y; f1 += (t-f2)-y; f2 = t; }
253#define MULT2(r,v1,f1,f2) f2 += r * v1
258#define XMULT2(r,v1,f1,f2) { T y = r * v1; T t = f2+y; f1 += (t-f2)-y; f2 = t; }
263#define FABS1(r,f1,f2) f2 += fabssqr(r)
267#define XFABS1(r,f1,f2) { double y = fabssqr(r); double t = f2+y; f1 += (t-f2)-y; f2 = t; }
271#define SQR1(r,f1,f2) f2 += r*r
275#define XSQR1(r,f1,f2) { T y = r*r; T t = f2+y; f1 += (t-f2)-y; f2 = t; }
279#define SUM1(r,f1,f2) f2 += r
283#define XSUM1(r,f1,f2) { T t = f2+r; f1 += (t-f2)-r; f2 = t; }
289#define SUMMULT3(r,v1,v2,f1,f2) r += v1*v2
291#define SUMCMULT3(r,v1,v2,f1,f2) r += CPLX__ conj(v1)*v2
#define VKERN_TEMPL_1V(FNAME, OP1)
Operations of type VEC = OP self.
#define VKERN_TEMPL_1V_C(FNAME, OP1)
Operations of type VEC OP= VAL.
#define VKERN_TEMPL_3V_CC(FNAME, OP3)
Operations of type vec = val * vec OP val * vec.
#define VKERN_TEMPL_2V_CC(FNAME, OP2)
Operations of type VEC = VEC OP VAL or VAL OP VEC.
#define VKERN_TEMPL_2V_T_STRIDE(FNAME, OP2, TYPE)
Operations of type TYPE = VEC OP VEC.
#define VKERN_TEMPL_2V(FNAME, OP2)
Operations of type vec OP= vec.
#define VKERN_TEMPL_2V_C(FNAME, OP2)
Operations of type VEC = VEC OP VAL or VAL OP VEC.
#define VKERN_TEMPL_1V_T_LD(FNAME, OP1, TYPE)
Operations of type TYPE = OP VEC (using LONG_DOUBLE internally).
#define VKERN_TEMPL_3V(FNAME, OP3)
We leave unrolling and prefetching to the compiler.
#define VKERN_TEMPL_3V_C(FNAME, OP3)
Operations of type vec = vec OP val * vec.
#define VKERN_TEMPL_1V_T(FNAME, OP1, TYPE)
Operations of type TYPE = OP VEC.
#define VKERN_TEMPL_2V_T(FNAME, OP2, TYPE)
Operations of type TYPE = VEC OP VEC.
#define VKERN_TEMPL_1V_CC(FNAME, OP1)
Operations of type VEC *= S OP= VAL.
#define SUB3(r, v1, v2, f1, f2)
#define MUL2RV(r, v1, f1, f2)
#define DIV3(r, v1, v2, f1, f2)
#define XFABS1(r, f1, f2)
#define MUL2(r, v1, f1, f2)
#define ADD3NS(r, v1, v2, f1, f2)
#define SUB2RV(r, v1, f1, f2)
#define ADD1RV(r, f1, f2)
#define ADD1NV(r, f1, f2)
#define CDIV3(r, v1, v2, f1, f2)
#define XMULT2(r, v1, f1, f2)
#define ADD2SS(r, v1, f1, f2)
#define SUB2VS(r, v1, f1, f2)
#define SUB2NV(r, v1, f1, f2)
#define ADD3(r, v1, v2, f1, f2)
#define MUL2NV(r, v1, f1, f2)
#define SUB1RV(r, f1, f2)
#define SUB2SV(r, v1, f1, f2)
#define ADD1SV(r, f1, f2)
#define SUB3SN(r, v1, v2, f1, f2)
#define DIV2VS(r, v1, f1, f2)
#define XDOT2(r, v1, f1, f2)
#define ADD2SV(r, v1, f1, f2)
#define MUL3(r, v1, v2, f1, f2)
#define DIV2I(r, v1, f1, f2)
#define ADD2NV(r, v1, f1, f2)
#define DIV1RV(r, f1, f2)
#define ADD2VS(r, v1, f1, f2)
#define SUB2(r, v1, f1, f2)
#define CMUL3(r, v1, v2, f1, f2)
#define CDIV2I(r, v1, f1, f2)
#define DIV2RV(r, v1, f1, f2)
#define SUB3SS(r, v1, v2, f1, f2)
#define SUB2NS(r, v1, f1, f2)
#define SUB1SV(r, f1, f2)
#define ADD2SN(r, v1, f1, f2)
#define DOT2(r, v1, f1, f2)
#define DIV1NV(r, f1, f2)
#define SUB2SS(r, v1, f1, f2)
#define MULT2(r, v1, f1, f2)
#define SUMCMULT3(r, v1, v2, f1, f2)
#define SUMMULT3(r, v1, v2, f1, f2)
#define CMUL2(r, v1, f1, f2)
#define CDIV2(r, v1, f1, f2)
#define SUB1NV(r, f1, f2)
#define SUB2SN(r, v1, f1, f2)
#define CMUL2I(r, v1, f1, f2)
#define MUL1NV(r, f1, f2)
#define SUB2I(r, v1, f1, f2)
#define ADD2RV(r, v1, f1, f2)
#define NEG2(r, v1, f1, f2)
#define SUB3NS(r, v1, v2, f1, f2)
#define SUB2RS(r, v1, f1, f2)
#define DIV2(r, v1, f1, f2)
#define ADD3SS(r, v1, v2, f1, f2)
#define ADD2NS(r, v1, f1, f2)
#define ADD2(r, v1, f1, f2)
#define ADD3SN(r, v1, v2, f1, f2)