1 #ifndef VARIANT2_INT_SQRT_H
2 #define VARIANT2_INT_SQRT_H
7 #define VARIANT2_INTEGER_MATH_SQRT_STEP_SSE2() \
9 const __m128i exp_double_bias = _mm_set_epi64x(0, 1023ULL << 52); \
10 __m128d x = _mm_castsi128_pd(_mm_add_epi64(_mm_cvtsi64_si128(sqrt_input >> 12), exp_double_bias)); \
11 x = _mm_sqrt_sd(_mm_setzero_pd(), x); \
12 sqrt_result = (uint64_t)(_mm_cvtsi128_si64(_mm_sub_epi64(_mm_castpd_si128(x), exp_double_bias))) >> 19; \
15 #define VARIANT2_INTEGER_MATH_SQRT_STEP_FP64() \
17 sqrt_result = sqrt(sqrt_input + 18446744073709551616.0) * 2.0 - 8589934592.0; \
20 #define VARIANT2_INTEGER_MATH_SQRT_STEP_REF() \
21 sqrt_result = integer_square_root_v2(sqrt_input)
49 uint64_t r = 1ULL << 63;
51 for (uint64_t bit = 1ULL << 60; bit; bit >>= 2)
53 const bool b = (n < r + bit);
54 const uint64_t n_next = n - (r + bit);
55 const uint64_t r_next = r + bit * 2;
61 return r * 2 + ((n > r) ? 1 : 0);
155 #define VARIANT2_INTEGER_MATH_SQRT_FIXUP(r) \
157 const uint64_t s = r >> 1; \
158 const uint64_t b = r & 1; \
159 const uint64_t r2 = (uint64_t)(s) * (s + b) + (r << 32); \
160 r += ((r2 + b > sqrt_input) ? -1 : 0) + ((r2 + (1ULL << 32) < sqrt_input - s) ? 1 : 0); \
int b
Definition: base.py:1
static uint32_t integer_square_root_v2(uint64_t n)
Definition: variant2_int_sqrt.h:47