9 #ifndef TBCI_VEC_KERN_UNR_PREF_H 10 #define TBCI_VEC_KERN_UNR_PREF_H 11 #include "tbci/basics.h" 17 #define ADD3(r,v1,v2,f1,f2) r = v1 + v2 21 #define SUB3(r,v1,v2,f1,f2) r = v1 - v2 25 #define MUL3(r,v1,v2,f1,f2) r = v1 * v2 29 #define CMUL3(r,v1,v2,f1,f2) r = CPLX__ conj(v1) * v2 33 #define DIV3(r,v1,v2,f1,f2) r = v1 / v2 37 #define CDIV3(r,v1,v2,f1,f2) r = CPLX__ conj(v1) / v2 42 #define ADD2(r,v1,f1,f2) r += v1 46 #define SUB2(r,v1,f1,f2) r -= v1 50 #define SUB2I(r,v1,f1,f2) r = v1 - r 54 #define MUL2(r,v1,f1,f2) r *= v1 58 #define CMUL2(r,v1,f1,f2) r = CPLX__ conj(r) * v1 62 #define CMUL2I(r,v1,f1,f2) r *= CPLX__ conj(v1) 66 #define DIV2(r,v1,f1,f2) r /= v1 70 #define DIV2I(r,v1,f1,f2) r = v1 / r 74 #define CDIV2(r,v1,f1,f2) r = CPLX__ conj(r) / v1 78 #define CDIV2I(r,v1,f1,f2) r = CPLX__ conj(v1) / r 84 #define ADD2NV(r,v1,f1,f2) r = v1 + f2 88 #define SUB2NV(r,v1,f1,f2) r = v1 - f2 92 #define MUL2NV(r,v1,f1,f2) r = v1 * f2 97 #define ADD2RV(r,v1,f1,f2) r = f2 + v1 101 #define SUB2RV(r,v1,f1,f2) r = f2 - v1 105 #define MUL2RV(r,v1,f1,f2) r = f2 * v1 109 #define DIV2RV(r,v1,f1,f2) r = f2 / v1 114 #define ADD1NV(r,f1,f2) r += f2 118 #define SUB1NV(r,f1,f2) r -= f2 122 #define SUB1RV(r,f1,f2) r = f2 - r 126 #define MUL1NV(r,f1,f2) r *= f2 130 #define DIV1NV(r,f1,f2) r /= f2 134 #define DIV1RV(r,f1,f2) r = f2 / r 138 #define ADD1RV(r,f1,f2) r = f2 + r 143 #define MUL1RV(r,f1,f2) r = f2 * r; 148 #define ADD2NS(r,v1,f1,f2) r += f2*v1 152 #define SUB2NS(r,v1,f1,f2) r -= f2*v1 156 #define SUB2RS(r,v1,f1,f2) r = f2*v1 - r 161 #define ADD3NS(r,v1,v2,f1,f2) r = v1 + f2*v2 165 #define SUB3NS(r,v1,v2,f1,f2) r = v1 - f2*v2 170 #define ADD3SN(r,v1,v2,f1,f2) r = f2*v1 + v2 174 #define SUB3SN(r,v1,v2,f1,f2) r = f2*v1 - v2 179 #define ADD3SS(r,v1,v2,f1,f2) r = f1*v1 + f2*v2 183 #define SUB3SS(r,v1,v2,f1,f2) r = f1*v1 - f2*v2 188 #define ADD2SN(r,v1,f1,f2) r = f2*r + v1 192 #define SUB2SN(r,v1,f1,f2) r = f2*r - v1 197 #define ADD2SS(r,v1,f1,f2) r = f1*r + f2*v1 201 #define SUB2SS(r,v1,f1,f2) r = f1*r - f2*v1 206 #define ADD2SV(r,v1,f1,f2) r = f1*v1 + f2 210 #define SUB2SV(r,v1,f1,f2) r = f1*v1 - f2 215 #define ADD1SV(r,f1,f2) r = f1*r + f2 219 #define SUB1SV(r,f1,f2) r = f1*r - f2 224 #define ADD2VS(r,v1,f1,f2) r = f1 + f2*v1 228 #define SUB2VS(r,v1,f1,f2) r = f1 - f2*v1 232 #define DIV2VS(r,v1,f1,f2) r = f1 / (f2*v1) 237 #define NEG2(r,v1,f1,f2) r = -v1 241 #define NEG1(r,f1,f2) r = -r 245 #define DOT2(r,v1,f1,f2) f2 += CPLX__ conj(r) * v1 249 #define XDOT2(r,v1,f1,f2) { T y = CPLX__ conj(r) * v1; T t = f2+y; f1 += (t-f2)-y; f2 = t; } 253 #define MULT2(r,v1,f1,f2) f2 += r * v1 258 #define XMULT2(r,v1,f1,f2) { T y = r * v1; T t = f2+y; f1 += (t-f2)-y; f2 = t; } 263 #define FABS1(r,f1,f2) f2 += fabssqr(r) 267 #define XFABS1(r,f1,f2) { double y = fabssqr(r); double t = f2+y; f1 += (t-f2)-y; f2 = t; } 271 #define SQR1(r,f1,f2) f2 += r*r 275 #define XSQR1(r,f1,f2) { T y = r*r; T t = f2+y; f1 += (t-f2)-y; f2 = t; } 279 #define SUM1(r,f1,f2) f2 += r 283 #define XSUM1(r,f1,f2) { T t = f2+r; f1 += (t-f2)-r; f2 = t; } 289 #define SUMMULT3(r,v1,v2,f1,f2) r += v1*v2 291 #define SUMCMULT3(r,v1,v2,f1,f2) r += CPLX__ conj(v1)*v2 #define CMUL2I(r, v1, f1, f2)
VKERN_TEMPL_3V_C(do_vec_svc_add, ADD3NS)
vec = vec + s*vec;
#define ADD2SN(r, v1, f1, f2)
#define SUB3NS(r, v1, v2, f1, f2)
#define CMUL3(r, v1, v2, f1, f2)
#define ADD2NS(r, v1, f1, f2)
#define XFABS1(r, f1, f2)
#define SUB2NV(r, v1, f1, f2)
#define ADD2SV(r, v1, f1, f2)
#define ADD1RV(r, f1, f2)
#define SUB1RV(r, f1, f2)
#define SUMCMULT3(r, v1, v2, f1, f2)
VKERN_TEMPL_1V_T(do_vec_fabssqr_exact, XFABS1, double)
val = SUM fabssqr vec ;
VKERN_TEMPL_1V(do_vec_neg, NEG1)
vec = -vec
#define SUMMULT3(r, v1, v2, f1, f2)
#define ADD3SN(r, v1, v2, f1, f2)
#define MUL3(r, v1, v2, f1, f2)
#define SUB2(r, v1, f1, f2)
#define SUB2VS(r, v1, f1, f2)
#define ADD2NV(r, v1, f1, f2)
#define DIV1NV(r, f1, f2)
VKERN_TEMPL_2V_T_STRIDE(do_vec_mult_stride_quick, MULT2, T)
#define SUB2SV(r, v1, f1, f2)
#define DIV2(r, v1, f1, f2)
#define SUB3(r, v1, v2, f1, f2)
VKERN_TEMPL_2V_CC(do_svc_add_svc, ADD2SS)
vec *= s; vec += s*vec;
#define ADD2RV(r, v1, f1, f2)
#define XMULT2(r, v1, f1, f2)
#define DIV2VS(r, v1, f1, f2)
VKERN_TEMPL_1V_C(do_vec_add_val, ADD1NV)
vec += val;
#define DIV1RV(r, f1, f2)
#define SUB2RV(r, v1, f1, f2)
#define ADD1NV(r, f1, f2)
#define DOT2(r, v1, f1, f2)
#define DIV2I(r, v1, f1, f2)
#define SUB1SV(r, f1, f2)
#define NEG2(r, v1, f1, f2)
VKERN_TEMPL_1V_T_LD(do_vec_fabssqr_quick, FABS1, double)
val = SUM fabssqr vec ;
#define ADD2(r, v1, f1, f2)
#define MUL1NV(r, f1, f2)
#define MUL2(r, v1, f1, f2)
#define DIV3(r, v1, v2, f1, f2)
#define ADD3(r, v1, v2, f1, f2)
#define ADD3SS(r, v1, v2, f1, f2)
VKERN_TEMPL_1V_CC(do_svc_add_val, ADD1SV)
vec = s*vec; vec += val;
#define ADD2SS(r, v1, f1, f2)
#define ADD1SV(r, f1, f2)
VKERN_TEMPL_3V_CC(do_svc_svc_add, ADD3SS)
vec = s*vec + s*vec;
#define ADD2VS(r, v1, f1, f2)
#define CMUL2(r, v1, f1, f2)
#define CDIV3(r, v1, v2, f1, f2)
#define SUB2NS(r, v1, f1, f2)
#define DIV2RV(r, v1, f1, f2)
#define XDOT2(r, v1, f1, f2)
#define ADD3NS(r, v1, v2, f1, f2)
#define SUB2SN(r, v1, f1, f2)
VKERN_TEMPL_2V_C(do_vec_val_add, ADD2NV)
vec = vec + val
#define CDIV2(r, v1, f1, f2)
#define SUB2RS(r, v1, f1, f2)
#define MUL2NV(r, v1, f1, f2)
#define MULT2(r, v1, f1, f2)
VKERN_TEMPL_2V(do_vec_add_vec, ADD2)
vec += vec;
#define SUB3SN(r, v1, v2, f1, f2)
VKERN_TEMPL_2V_T(do_vec_dot_quick, DOT2, T)
val = SUM vec * ~vec;
#define SUB1NV(r, f1, f2)
#define MUL2RV(r, v1, f1, f2)
#define CDIV2I(r, v1, f1, f2)
#define SUB2SS(r, v1, f1, f2)
VKERN_TEMPL_3V(do_vec_vec_add, ADD3)
vec = vec + vec;
#define SUB3SS(r, v1, v2, f1, f2)
#define SUB2I(r, v1, f1, f2)