|
TBCI Numerical high perf. C++ Library
2.8.0
|
macros for composing unrolled prefetching loops over arrays using SIMD instrinsics. More...
Go to the source code of this file.
Macros | |
| #define | UNROLL4_PREF_KERNEL5_SIMD(MDOP, ADV, T, SUF, UNA1, UNA2) |
| TODO: Should be merged with unroll_prefetch_def.h. More... | |
| #define | UNROLL4_KERNEL5_SIMD(MDOP, ADV, SUF, UNA1, UNA2) |
| Four times unrolled kernel for 5 args without prefetching. More... | |
| #define | VKERN_TEMPL_3V_NP_SIMD(MDOP, ADV, STP, SUF, UNA1, UNA2) |
| #define | VKERN_TEMPL_3V_PLAIN_SIMD(MDOP, ADV, STP, SUF, UNA1, UNA2) |
| #define | VKERN_TEMPL_3V_SISD(SDOP, COND, STP, SUF) |
| #define | UNROLL4_PREF_KERNEL4_SIMD(MDOP, ADV, T, SUF, UNA) |
| Four times unrolled kernel for 4 args with prefetching. More... | |
| #define | UNROLL4_KERNEL4_SIMD(MDOP, ADV, SUF, UNA) |
| Four times unrolled kernel for 4 args without prefetching. More... | |
| #define | VKERN_TEMPL_2V_NP_SIMD(MDOP, ADV, STP, SUF, UNA) |
| #define | VKERN_TEMPL_2V_PLAIN_SIMD(MDOP, ADV, STP, SUF, UNA) |
| #define | VKERN_TEMPL_2V_SISD(SDOP, COND, STP, SUF) |
| #define | UNROLL4_PREF_KERNEL3_SIMD(MDOP, ADV, T, SUF) |
| Four times unrolled kernel for 3 args with prefetching TODO: Prefetching. More... | |
| #define | UNROLL4_KERNEL3_SIMD(MDOP, ADV, SUF) |
| Four times unrolled kernel for 3 args without prefetching. More... | |
| #define | VKERN_TEMPL_1V_NP_SIMD(MDOP, ADV, STP, SUF) |
| #define | VKERN_TEMPL_1V_PLAIN_SIMD(MDOP, ADV, STP, SUF) |
| #define | VKERN_TEMPL_1V_SISD(SDOP, COND, STP, SUF) |
| #define | NO_TBCI_SIMD_UNROLL |
| To unroll or not to unroll: define TBCI_SIMD_UNROLL if it's beneficial for your CPU (it's not for most newer ones) More... | |
| #define | VKERN_TEMPL_3V_K_SIMD(m, a, s, f, u1, u2) VKERN_TEMPL_3V_PLAIN_SIMD(m,a,s,f,u1,u2) |
| #define | VKERN_TEMPL_2V_K_SIMD(m, a, s, f, u) VKERN_TEMPL_2V_PLAIN_SIMD(m,a,s,f,u) |
| #define | VKERN_TEMPL_1V_K_SIMD(m, a, s, f) VKERN_TEMPL_1V_PLAIN_SIMD(m,a,s,f) |
| #define | ALIGN_REQ 0x0f |
| #define | MISALIGNMENT_CHECK(x) ((unsigned long)x & ALIGN_REQ) |
| #define | WARN_UNALIGN(v) do {} while (0) |
| WARN_UNALIGNED macro: If defined, the TBCI library will print a warning to stderr for unaligned SIMD accesses, which will be slower ... More... | |
| #define | VKERN_TEMPL_3V_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP) |
| TODO: Check whether enabling the non-unrolled fixup (loop tail) is beneficial. More... | |
| #define | VKERN_TEMPL_3V_SIMD_UA(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP) |
| Without the unaligned warning. More... | |
| #define | VKERN_TEMPL_3V_C_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP) |
| #define | VKERN_TEMPL_3V_CC_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP) |
| #define | VKERN_TEMPL_2V_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP) |
| #define | VKERN_TEMPL_2V_C_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP) |
| #define | VKERN_TEMPL_2V_CC_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP) |
| #define | VKERN_TEMPL_2V_T_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP) |
| #define | VKERN_TEMPL_2V_T_SIMD_VL(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP) |
| #define | VKERN_TEMPL_1V_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP) |
| #define | VKERN_TEMPL_1V_C_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP) |
| #define | VKERN_TEMPL_1V_CC_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP) |
| #define | VKERN_TEMPL_1V_T_SIMD(FNAME, OP, SSUF, MSUF, PREP, SFIN, FIN, ADV, TYPE, STP) |
macros for composing unrolled prefetching loops over arrays using SIMD instrinsics.
(c) Kurt Garloff, kurt@garloff.de, 4/2005, GNU LGPL v2
Definition in file unroll_prefetch_simd_def.h.
| #define ALIGN_REQ 0x0f |
Definition at line 257 of file unroll_prefetch_simd_def.h.
Definition at line 266 of file unroll_prefetch_simd_def.h.
| #define NO_TBCI_SIMD_UNROLL |
To unroll or not to unroll: define TBCI_SIMD_UNROLL if it's beneficial for your CPU (it's not for most newer ones)
Definition at line 230 of file unroll_prefetch_simd_def.h.
| #define UNROLL4_KERNEL3_SIMD | ( | MDOP, | |
| ADV, | |||
| SUF | |||
| ) |
Four times unrolled kernel for 3 args without prefetching.
Definition at line 186 of file unroll_prefetch_simd_def.h.
| #define UNROLL4_KERNEL4_SIMD | ( | MDOP, | |
| ADV, | |||
| SUF, | |||
| UNA | |||
| ) |
Four times unrolled kernel for 4 args without prefetching.
Definition at line 119 of file unroll_prefetch_simd_def.h.
| #define UNROLL4_KERNEL5_SIMD | ( | MDOP, | |
| ADV, | |||
| SUF, | |||
| UNA1, | |||
| UNA2 | |||
| ) |
Four times unrolled kernel for 5 args without prefetching.
Definition at line 51 of file unroll_prefetch_simd_def.h.
| #define UNROLL4_PREF_KERNEL3_SIMD | ( | MDOP, | |
| ADV, | |||
| T, | |||
| SUF | |||
| ) |
Four times unrolled kernel for 3 args with prefetching TODO: Prefetching.
(FIXME: Is it needed? SSE2 capable CPUs do hardware prefetching, no???)
Definition at line 160 of file unroll_prefetch_simd_def.h.
| #define UNROLL4_PREF_KERNEL4_SIMD | ( | MDOP, | |
| ADV, | |||
| T, | |||
| SUF, | |||
| UNA | |||
| ) |
Four times unrolled kernel for 4 args with prefetching.
Definition at line 90 of file unroll_prefetch_simd_def.h.
| #define UNROLL4_PREF_KERNEL5_SIMD | ( | MDOP, | |
| ADV, | |||
| T, | |||
| SUF, | |||
| UNA1, | |||
| UNA2 | |||
| ) |
TODO: Should be merged with unroll_prefetch_def.h.
Note that we dropped all PREFETCH insns, HW that does SSE2 in general does prefetching as well, so we rather settle for smaller kernels.Four times unrolled kernel for 5 args with prefetching
Definition at line 19 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_1V_C_SIMD | ( | FNAME, | |
| OP, | |||
| SSUF, | |||
| MSUF, | |||
| PREP, | |||
| SFIN, | |||
| FIN, | |||
| ADV, | |||
| TYPE, | |||
| STP | |||
| ) |
Definition at line 571 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_1V_CC_SIMD | ( | FNAME, | |
| OP, | |||
| SSUF, | |||
| MSUF, | |||
| PREP, | |||
| SFIN, | |||
| FIN, | |||
| ADV, | |||
| TYPE, | |||
| STP | |||
| ) |
Definition at line 588 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_1V_K_SIMD | ( | m, | |
| a, | |||
| s, | |||
| f | |||
| ) | VKERN_TEMPL_1V_PLAIN_SIMD(m,a,s,f) |
Definition at line 241 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_1V_NP_SIMD | ( | MDOP, | |
| ADV, | |||
| STP, | |||
| SUF | |||
| ) |
Definition at line 195 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_1V_PLAIN_SIMD | ( | MDOP, | |
| ADV, | |||
| STP, | |||
| SUF | |||
| ) |
Definition at line 206 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_1V_SIMD | ( | FNAME, | |
| OP, | |||
| SSUF, | |||
| MSUF, | |||
| PREP, | |||
| SFIN, | |||
| FIN, | |||
| ADV, | |||
| TYPE, | |||
| STP | |||
| ) |
Definition at line 555 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_1V_SISD | ( | SDOP, | |
| COND, | |||
| STP, | |||
| SUF | |||
| ) |
Definition at line 216 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_1V_T_SIMD | ( | FNAME, | |
| OP, | |||
| SSUF, | |||
| MSUF, | |||
| PREP, | |||
| SFIN, | |||
| FIN, | |||
| ADV, | |||
| TYPE, | |||
| STP | |||
| ) |
Definition at line 606 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_2V_C_SIMD | ( | FNAME, | |
| OP, | |||
| SSUF, | |||
| MSUF, | |||
| PREP, | |||
| SFIN, | |||
| FIN, | |||
| ADV, | |||
| TYPE, | |||
| STP | |||
| ) |
Definition at line 454 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_2V_CC_SIMD | ( | FNAME, | |
| OP, | |||
| SSUF, | |||
| MSUF, | |||
| PREP, | |||
| SFIN, | |||
| FIN, | |||
| ADV, | |||
| TYPE, | |||
| STP | |||
| ) |
Definition at line 478 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_2V_K_SIMD | ( | m, | |
| a, | |||
| s, | |||
| f, | |||
| u | |||
| ) | VKERN_TEMPL_2V_PLAIN_SIMD(m,a,s,f,u) |
Definition at line 240 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_2V_NP_SIMD | ( | MDOP, | |
| ADV, | |||
| STP, | |||
| SUF, | |||
| UNA | |||
| ) |
Definition at line 129 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_2V_PLAIN_SIMD | ( | MDOP, | |
| ADV, | |||
| STP, | |||
| SUF, | |||
| UNA | |||
| ) |
Definition at line 138 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_2V_SIMD | ( | FNAME, | |
| OP, | |||
| SSUF, | |||
| MSUF, | |||
| PREP, | |||
| SFIN, | |||
| FIN, | |||
| ADV, | |||
| TYPE, | |||
| STP | |||
| ) |
Definition at line 431 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_2V_SISD | ( | SDOP, | |
| COND, | |||
| STP, | |||
| SUF | |||
| ) |
Definition at line 146 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_2V_T_SIMD | ( | FNAME, | |
| OP, | |||
| SSUF, | |||
| MSUF, | |||
| PREP, | |||
| SFIN, | |||
| FIN, | |||
| ADV, | |||
| TYPE, | |||
| STP | |||
| ) |
Definition at line 503 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_2V_T_SIMD_VL | ( | FNAME, | |
| OP, | |||
| SSUF, | |||
| MSUF, | |||
| PREP, | |||
| SFIN, | |||
| FIN, | |||
| ADV, | |||
| TYPE, | |||
| STP | |||
| ) |
Definition at line 528 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_3V_C_SIMD | ( | FNAME, | |
| OP, | |||
| SSUF, | |||
| MSUF, | |||
| PREP, | |||
| SFIN, | |||
| FIN, | |||
| ADV, | |||
| TYPE, | |||
| STP | |||
| ) |
Definition at line 367 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_3V_CC_SIMD | ( | FNAME, | |
| OP, | |||
| SSUF, | |||
| MSUF, | |||
| PREP, | |||
| SFIN, | |||
| FIN, | |||
| ADV, | |||
| TYPE, | |||
| STP | |||
| ) |
Definition at line 398 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_3V_K_SIMD | ( | m, | |
| a, | |||
| s, | |||
| f, | |||
| u1, | |||
| u2 | |||
| ) | VKERN_TEMPL_3V_PLAIN_SIMD(m,a,s,f,u1,u2) |
Definition at line 239 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_3V_NP_SIMD | ( | MDOP, | |
| ADV, | |||
| STP, | |||
| SUF, | |||
| UNA1, | |||
| UNA2 | |||
| ) |
Definition at line 63 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_3V_PLAIN_SIMD | ( | MDOP, | |
| ADV, | |||
| STP, | |||
| SUF, | |||
| UNA1, | |||
| UNA2 | |||
| ) |
Definition at line 72 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_3V_SIMD | ( | FNAME, | |
| OP, | |||
| SSUF, | |||
| MSUF, | |||
| PREP, | |||
| SFIN, | |||
| FIN, | |||
| ADV, | |||
| TYPE, | |||
| STP | |||
| ) |
TODO: Check whether enabling the non-unrolled fixup (loop tail) is beneficial.
Macro abuse ... FNAME:Function name OP: operation for each loop (macro), sse2 intrinsics SSUF: argument passed to OP macro (suffix for single data operation) MSUF: dito (suffix used for multiple data operation (SIMD)) PREP: Preparation macro before loop, called with _f1, _f2 as args (as available) SFIN: Cleanup macro after we're done with SIMD part FIN: Cleanup macro before leaving, called with _f1, _f2 (as avail) ADV: How many elements the SIMD instructions handle per insn OP (2/4) TYPE: Standard C data type (float/double) STP: SIMD data type (__m128/__m128d)
Definition at line 306 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_3V_SIMD_UA | ( | FNAME, | |
| OP, | |||
| SSUF, | |||
| MSUF, | |||
| PREP, | |||
| SFIN, | |||
| FIN, | |||
| ADV, | |||
| TYPE, | |||
| STP | |||
| ) |
Without the unaligned warning.
Definition at line 337 of file unroll_prefetch_simd_def.h.
| #define VKERN_TEMPL_3V_SISD | ( | SDOP, | |
| COND, | |||
| STP, | |||
| SUF | |||
| ) |
Definition at line 80 of file unroll_prefetch_simd_def.h.
WARN_UNALIGNED macro: If defined, the TBCI library will print a warning to stderr for unaligned SIMD accesses, which will be slower ...
Definition at line 283 of file unroll_prefetch_simd_def.h.
1.8.5