52#if !defined(__SSE2__) && (defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2))
57#if defined(_MSC_VER) && defined(__SSE2__)
64#if defined(_MSC_VER) && defined(_M_IX86)
65inline double __cdecl
rx_sqrt(
double x) {
71#define rx_sqrt rx_sqrt
74#define RANDOMX_USE_X87
79#define RANDOMX_USE_X87
88#if !defined(RANDOMX_USE_X87)
89#define rx_set_double_precision(x)
102#define rx_aligned_alloc(a, b) _mm_malloc(a,b)
103#define rx_aligned_free(a) _mm_free(a)
104#define rx_prefetch_nta(x) _mm_prefetch((const char *)(x), _MM_HINT_NTA)
105#define rx_prefetch_t0(x) _mm_prefetch((const char *)(x), _MM_HINT_T0)
107#define rx_load_vec_f128 _mm_load_pd
108#define rx_store_vec_f128 _mm_store_pd
109#define rx_add_vec_f128 _mm_add_pd
110#define rx_sub_vec_f128 _mm_sub_pd
111#define rx_mul_vec_f128 _mm_mul_pd
112#define rx_div_vec_f128 _mm_div_pd
113#define rx_sqrt_vec_f128 _mm_sqrt_pd
116 return _mm_shuffle_pd(
a,
a, 1);
120 return _mm_castsi128_pd(_mm_set_epi64x(x1, x0));
124 return _mm_castsi128_pd(_mm_set1_epi64x(x));
127#define rx_xor_vec_f128 _mm_xor_pd
128#define rx_and_vec_f128 _mm_and_pd
129#define rx_or_vec_f128 _mm_or_pd
133#define rx_aesenc_vec_i128 _mm_aesenc_si128
134#define rx_aesdec_vec_i128 _mm_aesdec_si128
141 return _mm_cvtsi128_si32(
a);
145 return _mm_cvtsi128_si32(_mm_shuffle_epi32(
a, 0x55));
149 return _mm_cvtsi128_si32(_mm_shuffle_epi32(
a, 0xaa));
153 return _mm_cvtsi128_si32(_mm_shuffle_epi32(
a, 0xff));
156#define rx_set_int_vec_i128 _mm_set_epi32
157#define rx_xor_vec_i128 _mm_xor_si128
158#define rx_load_vec_i128 _mm_load_si128
159#define rx_store_vec_i128 _mm_store_si128
162 __m128i ix = _mm_loadl_epi64((
const __m128i*)addr);
163 return _mm_cvtepi32_pd(ix);
166constexpr uint32_t rx_mxcsr_default = 0x9FC0;
169 _mm_setcsr(rx_mxcsr_default);
173 _mm_setcsr(rx_mxcsr_default | (
mode << 13));
177 return (_mm_getcsr() >> 13) & 3;
180#elif defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__)
189typedef __vector
uint8_t __m128i;
191typedef __vector
int __m128li;
193typedef __vector
double __m128d;
206#define rx_aligned_alloc(a, b) malloc(a)
207#define rx_aligned_free(a) free(a)
208#define rx_prefetch_nta(x)
209#define rx_prefetch_t0(x)
213{
return (__m128i) vec_splats (scalar); }
216#if defined(NATIVE_LITTLE_ENDIAN)
227#if defined(NATIVE_LITTLE_ENDIAN)
232 store64(mem_addr + 0, _a.u64[0]);
233 store64(mem_addr + 1, _a.u64[1]);
238 return (
rx_vec_f128)vec_perm((__m128i)
a,(__m128i)
a,(__m128i){8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7});
289#if defined(__CRYPTO__)
292#if defined(NATIVE_LITTLE_ENDIAN)
293 return (__m128ll)vec_perm((__m128i)v,(__m128i){0},(__m128i){15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0});
295 return (__m128ll)vec_perm((__m128i)v,(__m128i){0},(__m128i){3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12});
300 __m128ll _v = vrev(v);
301 __m128ll _rkey = vrev(rkey);
302 __m128ll
result = vrev((__m128i)__builtin_crypto_vcipher(_v,_rkey));
307 __m128ll _v = vrev(v);
308 __m128ll
zero = (__m128ll){0};
309 __m128ll
out = vrev((__m128i)__builtin_crypto_vncipher(_v,
zero));
349#if defined(NATIVE_LITTLE_ENDIAN)
363#if defined(NATIVE_LITTLE_ENDIAN)
383#define RANDOMX_DEFAULT_FENV
385#elif defined(__aarch64__)
396 if (posix_memalign(&p, align, size) == 0)
402#define rx_aligned_free(a) free(a)
405 asm volatile (
"prfm pldl1strm, [%0]\n" : :
"r" (ptr));
409 asm volatile (
"prfm pldl1strm, [%0]\n" : :
"r" (ptr));
413 return vld1q_f64((
const float64_t*)pd);
417 vst1q_f64((float64_t*)mem_addr, val);
422 temp = vcopyq_laneq_f64(temp, 1,
a, 1);
423 a = vcopyq_laneq_f64(
a, 1,
a, 0);
424 return vcopyq_laneq_f64(
a, 0, temp, 1);
428 uint64x2_t temp0 = vdupq_n_u64(x0);
429 uint64x2_t temp1 = vdupq_n_u64(x1);
430 return vreinterpretq_f64_u64(vcopyq_laneq_u64(temp0, 1, temp1, 0));
434 return vreinterpretq_f64_u64(vdupq_n_u64(x));
437#define rx_add_vec_f128 vaddq_f64
438#define rx_sub_vec_f128 vsubq_f64
439#define rx_mul_vec_f128 vmulq_f64
440#define rx_div_vec_f128 vdivq_f64
441#define rx_sqrt_vec_f128 vsqrtq_f64
444 return vreinterpretq_f64_u8(veorq_u8(vreinterpretq_u8_f64(
a), vreinterpretq_u8_f64(
b)));
448 return vreinterpretq_f64_u8(vandq_u8(vreinterpretq_u8_f64(
a), vreinterpretq_u8_f64(
b)));
452 return vreinterpretq_f64_u8(vorrq_u8(vreinterpretq_u8_f64(
a), vreinterpretq_u8_f64(
b)));
455#ifdef __ARM_FEATURE_CRYPTO
459 const uint8x16_t
zero = { 0 };
460 return vaesmcq_u8(vaeseq_u8(
a,
zero)) ^
key;
464 const uint8x16_t
zero = { 0 };
465 return vaesimcq_u8(vaesdq_u8(
a,
zero)) ^
key;
472#define rx_xor_vec_i128 veorq_u8
475 return vgetq_lane_s32(vreinterpretq_s32_u8(
a), 0);
479 return vgetq_lane_s32(vreinterpretq_s32_u8(
a), 1);
483 return vgetq_lane_s32(vreinterpretq_s32_u8(
a), 2);
487 return vgetq_lane_s32(vreinterpretq_s32_u8(
a), 3);
496 return vreinterpretq_u8_s32(vld1q_s32(
data));
499#define rx_xor_vec_i128 veorq_u8
502 return vld1q_u8((
const uint8_t*)mem_addr);
506 vst1q_u8((
uint8_t*)mem_addr, val);
513 x = vsetq_lane_f64(lo, x, 0);
514 x = vsetq_lane_f64(hi, x, 1);
518#define RANDOMX_DEFAULT_FENV
542#define rx_aligned_alloc(a, b) malloc(a)
543#define rx_aligned_free(a) free(a)
544#define rx_prefetch_nta(x)
545#define rx_prefetch_t0(x)
630 x.
i.
u64[0] =
a.i.u64[0] ^
b.i.u64[0];
631 x.
i.
u64[1] =
a.i.u64[1] ^
b.i.u64[1];
637 x.
i.
u64[0] =
a.i.u64[0] &
b.i.u64[0];
638 x.
i.
u64[1] =
a.i.u64[1] &
b.i.u64[1];
644 x.
i.
u64[0] =
a.i.u64[0] |
b.i.u64[0];
645 x.
i.
u64[1] =
a.i.u64[1] |
b.i.u64[1];
676 c.u32[0] =
a.u32[0] ^
b.u32[0];
677 c.u32[1] =
a.u32[1] ^
b.u32[1];
678 c.u32[2] =
a.u32[2] ^
b.u32[2];
679 c.u32[3] =
a.u32[3] ^
b.u32[3];
684#if defined(NATIVE_LITTLE_ENDIAN)
689 c.u32[0] =
load32(ptr + 0);
690 c.u32[1] =
load32(ptr + 1);
691 c.u32[2] =
load32(ptr + 2);
692 c.u32[3] =
load32(ptr + 3);
698#if defined(NATIVE_LITTLE_ENDIAN)
716#define RANDOMX_DEFAULT_FENV
737#ifdef RANDOMX_DEFAULT_FENV
cryptonote::block b
Definition block.cpp:40
static FORCE_INLINE uint64_t load64(const void *src)
Definition endian.h:50
static FORCE_INLINE void store64(void *dst, uint64_t w)
Definition endian.h:86
static FORCE_INLINE uint32_t load32(const void *src)
Definition endian.h:29
#define FORCE_INLINE
Definition endian.h:10
static FORCE_INLINE void store32(void *dst, uint32_t w)
Definition endian.h:67
const char * key
Definition hmac_keccak.cpp:40
int64_t smulh(int64_t, int64_t)
Definition instructions_portable.cpp:125
FORCE_INLINE rx_vec_f128 rx_add_vec_f128(rx_vec_f128 a, rx_vec_f128 b)
Definition intrin_portable.h:566
#define rx_set_double_precision(x)
Definition intrin_portable.h:89
constexpr int RoundToNearest
Definition intrin_portable.h:46
uint64_t rotr(uint64_t, unsigned int)
Definition instructions_portable.cpp:92
constexpr int32_t unsigned32ToSigned2sCompl(uint32_t x)
Definition intrin_portable.h:34
FORCE_INLINE rx_vec_i128 rx_load_vec_i128(rx_vec_i128 const *p)
Definition intrin_portable.h:683
constexpr uint64_t signExtend2sCompl(uint32_t x)
Definition intrin_portable.h:42
void rx_reset_float_state()
Definition instructions_portable.cpp:136
FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a)
Definition intrin_portable.h:653
FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0)
Definition intrin_portable.h:614
uint32_t rx_get_rounding_mode()
Definition instructions_portable.cpp:160
FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey)
Definition intrin_portable.h:725
FORCE_INLINE rx_vec_i128 rx_set1_long_vec_i128(uint64_t a)
Definition intrin_portable.h:601
#define rx_prefetch_nta(x)
Definition intrin_portable.h:544
FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a)
Definition intrin_portable.h:661
FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b)
Definition intrin_portable.h:628
FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b)
Definition intrin_portable.h:642
uint64_t rotl(uint64_t, unsigned int)
Definition instructions_portable.cpp:99
FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey)
Definition intrin_portable.h:729
FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x)
Definition intrin_portable.h:621
FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a)
Definition intrin_portable.h:649
FORCE_INLINE void rx_store_vec_i128(rx_vec_i128 *p, rx_vec_i128 b)
Definition intrin_portable.h:697
FORCE_INLINE rx_vec_f128 rx_vec_i128_vec_f128(rx_vec_i128 a)
Definition intrin_portable.h:608
FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a)
Definition intrin_portable.h:559
FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a)
Definition intrin_portable.h:657
constexpr int RoundToZero
Definition intrin_portable.h:49
FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b)
Definition intrin_portable.h:635
FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int i3, int i2, int i1, int i0)
Definition intrin_portable.h:665
void rx_set_rounding_mode(uint32_t mode)
Definition instructions_portable.cpp:141
static const char * platformError
Definition intrin_portable.h:721
double loadDoublePortable(const void *addr)
Definition instructions_portable.cpp:204
FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double *pd)
Definition intrin_portable.h:547
#define rx_sqrt
Definition intrin_portable.h:85
FORCE_INLINE rx_vec_f128 rx_sub_vec_f128(rx_vec_f128 a, rx_vec_f128 b)
Definition intrin_portable.h:573
FORCE_INLINE rx_vec_f128 rx_mul_vec_f128(rx_vec_f128 a, rx_vec_f128 b)
Definition intrin_portable.h:580
FORCE_INLINE rx_vec_f128 rx_div_vec_f128(rx_vec_f128 a, rx_vec_f128 b)
Definition intrin_portable.h:587
#define rx_aligned_alloc(a, b)
Definition intrin_portable.h:542
#define rx_prefetch_t0(x)
Definition intrin_portable.h:545
FORCE_INLINE void rx_store_vec_f128(double *mem_addr, rx_vec_f128 a)
Definition intrin_portable.h:554
FORCE_INLINE rx_vec_i128 rx_xor_vec_i128(rx_vec_i128 a, rx_vec_i128 b)
Definition intrin_portable.h:674
constexpr int RoundUp
Definition intrin_portable.h:48
constexpr int RoundDown
Definition intrin_portable.h:47
FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void *addr)
Definition intrin_portable.h:709
uint64_t mulh(uint64_t, uint64_t)
Definition instructions_portable.cpp:108
FORCE_INLINE rx_vec_f128 rx_sqrt_vec_f128(rx_vec_f128 a)
Definition intrin_portable.h:594
constexpr int64_t unsigned64ToSigned2sCompl(uint64_t x)
Definition intrin_portable.h:38
static int mode
Definition mdb_dump.c:26
t
Definition console.py:33
@ out
Definition message_store.h:75
c
Definition pymoduletest.py:79
int i
Definition pymoduletest.py:23
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition pointer.h:1124
static const unsigned char zero[32]
Definition fe_isnonzero.c:12
tools::wallet2::message_signature_result_t result
Definition signature.cpp:62
#define INT32_MAX
Definition stdint.h:183
unsigned short uint16_t
Definition stdint.h:125
#define UINT64_MAX
Definition stdint.h:189
signed __int64 int64_t
Definition stdint.h:135
unsigned int uint32_t
Definition stdint.h:126
signed int int32_t
Definition stdint.h:123
#define UINT32_MAX
Definition stdint.h:188
unsigned char uint8_t
Definition stdint.h:124
#define INT64_MAX
Definition stdint.h:185
unsigned __int64 uint64_t
Definition stdint.h:136
std::string data
Definition base58.cpp:37
Definition intrin_portable.h:534
double hi
Definition intrin_portable.h:537
rx_vec_i128 i
Definition intrin_portable.h:539
double lo
Definition intrin_portable.h:536
Definition intrin_portable.h:527
uint64_t u64[2]
Definition intrin_portable.h:528
uint8_t u8[16]
Definition intrin_portable.h:531
uint16_t u16[8]
Definition intrin_portable.h:530
uint32_t u32[4]
Definition intrin_portable.h:529