Monero
intrin_portable.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2018-2019, tevador <tevador@gmail.com>
3 
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8  * Redistributions of source code must retain the above copyright
9  notice, this list of conditions and the following disclaimer.
10  * Redistributions in binary form must reproduce the above copyright
11  notice, this list of conditions and the following disclaimer in the
12  documentation and/or other materials provided with the distribution.
13  * Neither the name of the copyright holder nor the
14  names of its contributors may be used to endorse or promote products
15  derived from this software without specific prior written permission.
16 
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28 
29 #pragma once
30 
31 #include <cstdint>
32 #include "blake2/endian.h"
33 
35  return (-1 == ~0) ? (int32_t)x : (x > INT32_MAX ? (-(int32_t)(UINT32_MAX - x) - 1) : (int32_t)x);
36 }
37 
39  return (-1 == ~0) ? (int64_t)x : (x > INT64_MAX ? (-(int64_t)(UINT64_MAX - x) - 1) : (int64_t)x);
40 }
41 
43  return (-1 == ~0) ? (int64_t)(int32_t)(x) : (x > INT32_MAX ? (x | 0xffffffff00000000ULL) : (uint64_t)x);
44 }
45 
46 constexpr int RoundToNearest = 0;
47 constexpr int RoundDown = 1;
48 constexpr int RoundUp = 2;
49 constexpr int RoundToZero = 3;
50 
51 //MSVC doesn't define __SSE2__, so we have to define it manually if SSE2 is available
52 #if !defined(__SSE2__) && (defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2))
53 #define __SSE2__ 1
54 #endif
55 
56 //MSVC doesn't define __AES__
57 #if defined(_MSC_VER) && defined(__SSE2__)
58 #define __AES__
59 #endif
60 
61 //the library "sqrt" function provided by MSVC for x86 targets doesn't give
62 //the correct results, so we have to use inline assembly to call x87 fsqrt directly
63 #if !defined(__SSE2__)
64 #if defined(_MSC_VER) && defined(_M_IX86)
65 inline double __cdecl rx_sqrt(double x) {
66  __asm {
67  fld x
68  fsqrt
69  }
70 }
71 #define rx_sqrt rx_sqrt
72 
74 #define RANDOMX_USE_X87
75 
76 #elif defined(__i386)
77 
79 #define RANDOMX_USE_X87
80 
81 #endif
82 #endif //__SSE2__
83 
84 #if !defined(rx_sqrt)
85 #define rx_sqrt sqrt
86 #endif
87 
88 #if !defined(RANDOMX_USE_X87)
89 #define rx_set_double_precision(x)
90 #endif
91 
92 #ifdef __SSE2__
93 #ifdef __GNUC__
94 #include <x86intrin.h>
95 #else
96 #include <intrin.h>
97 #endif
98 
99 typedef __m128i rx_vec_i128;
100 typedef __m128d rx_vec_f128;
101 
102 #define rx_aligned_alloc(a, b) _mm_malloc(a,b)
103 #define rx_aligned_free(a) _mm_free(a)
104 #define rx_prefetch_nta(x) _mm_prefetch((const char *)(x), _MM_HINT_NTA)
105 #define rx_prefetch_t0(x) _mm_prefetch((const char *)(x), _MM_HINT_T0)
106 
107 #define rx_load_vec_f128 _mm_load_pd
108 #define rx_store_vec_f128 _mm_store_pd
109 #define rx_add_vec_f128 _mm_add_pd
110 #define rx_sub_vec_f128 _mm_sub_pd
111 #define rx_mul_vec_f128 _mm_mul_pd
112 #define rx_div_vec_f128 _mm_div_pd
113 #define rx_sqrt_vec_f128 _mm_sqrt_pd
114 
116  return _mm_shuffle_pd(a, a, 1);
117 }
118 
120  return _mm_castsi128_pd(_mm_set_epi64x(x1, x0));
121 }
122 
124  return _mm_castsi128_pd(_mm_set1_epi64x(x));
125 }
126 
127 #define rx_xor_vec_f128 _mm_xor_pd
128 #define rx_and_vec_f128 _mm_and_pd
129 #define rx_or_vec_f128 _mm_or_pd
130 
131 #ifdef __AES__
132 
133 #define rx_aesenc_vec_i128 _mm_aesenc_si128
134 #define rx_aesdec_vec_i128 _mm_aesdec_si128
135 
136 #define HAVE_AES 1
137 
138 #endif //__AES__
139 
141  return _mm_cvtsi128_si32(a);
142 }
143 
145  return _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0x55));
146 }
147 
149  return _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0xaa));
150 }
151 
153  return _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0xff));
154 }
155 
156 #define rx_set_int_vec_i128 _mm_set_epi32
157 #define rx_xor_vec_i128 _mm_xor_si128
158 #define rx_load_vec_i128 _mm_load_si128
159 #define rx_store_vec_i128 _mm_store_si128
160 
162  __m128i ix = _mm_loadl_epi64((const __m128i*)addr);
163  return _mm_cvtepi32_pd(ix);
164 }
165 
166 constexpr uint32_t rx_mxcsr_default = 0x9FC0; //Flush to zero, denormals are zero, default rounding mode, all exceptions disabled
167 
169  _mm_setcsr(rx_mxcsr_default);
170 }
171 
173  _mm_setcsr(rx_mxcsr_default | (mode << 13));
174 }
175 
177  return (_mm_getcsr() >> 13) & 3;
178 }
179 
180 #elif defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__) //sadly only POWER7 and newer will be able to use SIMD acceleration. Earlier processors cant use doubles or 64 bit integers with SIMD
181 #include <cstdint>
182 #include <stdexcept>
183 #include <cstdlib>
184 #include <altivec.h>
185 #undef vector
186 #undef pixel
187 #undef bool
188 
189 typedef __vector uint8_t __m128i;
190 typedef __vector uint32_t __m128l;
191 typedef __vector int __m128li;
192 typedef __vector uint64_t __m128ll;
193 typedef __vector double __m128d;
194 
195 typedef __m128i rx_vec_i128;
196 typedef __m128d rx_vec_f128;
197 typedef union{
198  rx_vec_i128 i;
199  rx_vec_f128 d;
200  uint64_t u64[2];
201  double d64[2];
202  uint32_t u32[4];
203  int i32[4];
204 } vec_u;
205 
206 #define rx_aligned_alloc(a, b) malloc(a)
207 #define rx_aligned_free(a) free(a)
208 #define rx_prefetch_nta(x)
209 #define rx_prefetch_t0(x)
210 
211 /* Splat 64-bit long long to 2 64-bit long longs */
212 FORCE_INLINE __m128i vec_splat2sd (int64_t scalar)
213 { return (__m128i) vec_splats (scalar); }
214 
215 FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
216 #if defined(NATIVE_LITTLE_ENDIAN)
217  return (rx_vec_f128)vec_vsx_ld(0,pd);
218 #else
219  vec_u t;
220  t.u64[0] = load64(pd + 0);
221  t.u64[1] = load64(pd + 1);
222  return (rx_vec_f128)t.d;
223 #endif
224 }
225 
226 FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 a) {
227 #if defined(NATIVE_LITTLE_ENDIAN)
228  vec_vsx_st(a,0,(rx_vec_f128*)mem_addr);
229 #else
230  vec_u _a;
231  _a.d = a;
232  store64(mem_addr + 0, _a.u64[0]);
233  store64(mem_addr + 1, _a.u64[1]);
234 #endif
235 }
236 
238  return (rx_vec_f128)vec_perm((__m128i)a,(__m128i)a,(__m128i){8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7});
239 }
240 
242  return (rx_vec_f128)vec_add(a,b);
243 }
244 
246  return (rx_vec_f128)vec_sub(a,b);
247 }
248 
250  return (rx_vec_f128)vec_mul(a,b);
251 }
252 
254  return (rx_vec_f128)vec_div(a,b);
255 }
256 
258  return (rx_vec_f128)vec_sqrt(a);
259 }
260 
262  return (rx_vec_i128)vec_splat2sd(a);
263 }
264 
266  return (rx_vec_f128)a;
267 }
268 
270  return (rx_vec_f128)(__m128ll){x0,x1};
271 }
272 
274  return (rx_vec_f128)vec_splat2sd(x);
275 }
276 
278  return (rx_vec_f128)vec_xor(a,b);
279 }
280 
282  return (rx_vec_f128)vec_and(a,b);
283 }
284 
286  return (rx_vec_f128)vec_or(a,b);
287 }
288 
289 #if defined(__CRYPTO__)
290 
291 FORCE_INLINE __m128ll vrev(__m128i v){
292 #if defined(NATIVE_LITTLE_ENDIAN)
293  return (__m128ll)vec_perm((__m128i)v,(__m128i){0},(__m128i){15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0});
294 #else
295  return (__m128ll)vec_perm((__m128i)v,(__m128i){0},(__m128i){3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12});
296 #endif
297 }
298 
300  __m128ll _v = vrev(v);
301  __m128ll _rkey = vrev(rkey);
302  __m128ll result = vrev((__m128i)__builtin_crypto_vcipher(_v,_rkey));
303  return (rx_vec_i128)result;
304 }
305 
307  __m128ll _v = vrev(v);
308  __m128ll zero = (__m128ll){0};
309  __m128ll out = vrev((__m128i)__builtin_crypto_vncipher(_v,zero));
310  return (rx_vec_i128)vec_xor((__m128i)out,rkey);
311 }
312 #define HAVE_AES 1
313 
314 #endif //__CRYPTO__
315 
317  vec_u _a;
318  _a.i = a;
319  return _a.i32[0];
320 }
321 
323  vec_u _a;
324  _a.i = a;
325  return _a.i32[1];
326 }
327 
329  vec_u _a;
330  _a.i = a;
331  return _a.i32[2];
332 }
333 
335  vec_u _a;
336  _a.i = a;
337  return _a.i32[3];
338 }
339 
340 FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int i3, int i2, int i1, int i0) {
341  return (rx_vec_i128)((__m128li){i0,i1,i2,i3});
342 };
343 
345  return (rx_vec_i128)vec_xor(a,b);
346 }
347 
349 #if defined(NATIVE_LITTLE_ENDIAN)
350  return *p;
351 #else
352  const uint32_t* ptr = (const uint32_t*)p;
353  vec_u c;
354  c.u32[0] = load32(ptr + 0);
355  c.u32[1] = load32(ptr + 1);
356  c.u32[2] = load32(ptr + 2);
357  c.u32[3] = load32(ptr + 3);
358  return (rx_vec_i128)c.i;
359 #endif
360 }
361 
363 #if defined(NATIVE_LITTLE_ENDIAN)
364  *p = b;
365 #else
366  uint32_t* ptr = (uint32_t*)p;
367  vec_u B;
368  B.i = b;
369  store32(ptr + 0, B.u32[0]);
370  store32(ptr + 1, B.u32[1]);
371  store32(ptr + 2, B.u32[2]);
372  store32(ptr + 3, B.u32[3]);
373 #endif
374 }
375 
377  vec_u x;
378  x.d64[0] = (double)unsigned32ToSigned2sCompl(load32((const uint8_t*)addr + 0));
379  x.d64[1] = (double)unsigned32ToSigned2sCompl(load32((const uint8_t*)addr + 4));
380  return (rx_vec_f128)x.d;
381 }
382 
383 #define RANDOMX_DEFAULT_FENV
384 
385 #elif defined(__aarch64__)
386 
387 #include <stdlib.h>
388 #include <arm_neon.h>
389 #include <arm_acle.h>
390 
391 typedef uint8x16_t rx_vec_i128;
392 typedef float64x2_t rx_vec_f128;
393 
394 inline void* rx_aligned_alloc(size_t size, size_t align) {
395  void* p;
396  if (posix_memalign(&p, align, size) == 0)
397  return p;
398 
399  return 0;
400 };
401 
402 #define rx_aligned_free(a) free(a)
403 
404 inline void rx_prefetch_nta(void* ptr) {
405  asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
406 }
407 
408 inline void rx_prefetch_t0(const void* ptr) {
409  asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
410 }
411 
412 FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
413  return vld1q_f64((const float64_t*)pd);
414 }
415 
416 FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 val) {
417  vst1q_f64((float64_t*)mem_addr, val);
418 }
419 
421  float64x2_t temp;
422  temp = vcopyq_laneq_f64(temp, 1, a, 1);
423  a = vcopyq_laneq_f64(a, 1, a, 0);
424  return vcopyq_laneq_f64(a, 0, temp, 1);
425 }
426 
428  uint64x2_t temp0 = vdupq_n_u64(x0);
429  uint64x2_t temp1 = vdupq_n_u64(x1);
430  return vreinterpretq_f64_u64(vcopyq_laneq_u64(temp0, 1, temp1, 0));
431 }
432 
434  return vreinterpretq_f64_u64(vdupq_n_u64(x));
435 }
436 
437 #define rx_add_vec_f128 vaddq_f64
438 #define rx_sub_vec_f128 vsubq_f64
439 #define rx_mul_vec_f128 vmulq_f64
440 #define rx_div_vec_f128 vdivq_f64
441 #define rx_sqrt_vec_f128 vsqrtq_f64
442 
444  return vreinterpretq_f64_u8(veorq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
445 }
446 
448  return vreinterpretq_f64_u8(vandq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
449 }
450 
452  return vreinterpretq_f64_u8(vorrq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
453 }
454 
455 #ifdef __ARM_FEATURE_CRYPTO
456 
457 
459  const uint8x16_t zero = { 0 };
460  return vaesmcq_u8(vaeseq_u8(a, zero)) ^ key;
461 }
462 
464  const uint8x16_t zero = { 0 };
465  return vaesimcq_u8(vaesdq_u8(a, zero)) ^ key;
466 }
467 
468 #define HAVE_AES 1
469 
470 #endif
471 
472 #define rx_xor_vec_i128 veorq_u8
473 
475  return vgetq_lane_s32(vreinterpretq_s32_u8(a), 0);
476 }
477 
479  return vgetq_lane_s32(vreinterpretq_s32_u8(a), 1);
480 }
481 
483  return vgetq_lane_s32(vreinterpretq_s32_u8(a), 2);
484 }
485 
487  return vgetq_lane_s32(vreinterpretq_s32_u8(a), 3);
488 }
489 
490 FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int i3, int i2, int i1, int i0) {
491  int32_t data[4];
492  data[0] = i0;
493  data[1] = i1;
494  data[2] = i2;
495  data[3] = i3;
496  return vreinterpretq_u8_s32(vld1q_s32(data));
497 };
498 
499 #define rx_xor_vec_i128 veorq_u8
500 
502  return vld1q_u8((const uint8_t*)mem_addr);
503 }
504 
506  vst1q_u8((uint8_t*)mem_addr, val);
507 }
508 
510  double lo = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0));
511  double hi = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4));
512  rx_vec_f128 x;
513  x = vsetq_lane_f64(lo, x, 0);
514  x = vsetq_lane_f64(hi, x, 1);
515  return x;
516 }
517 
518 #define RANDOMX_DEFAULT_FENV
519 
520 #else //portable fallback
521 
522 #include <cstdint>
523 #include <stdexcept>
524 #include <cstdlib>
525 #include <cmath>
526 
527 typedef union {
528  uint64_t u64[2];
529  uint32_t u32[4];
530  uint16_t u16[8];
531  uint8_t u8[16];
532 } rx_vec_i128;
533 
534 typedef union {
535  struct {
536  double lo;
537  double hi;
538  };
540 } rx_vec_f128;
541 
542 #define rx_aligned_alloc(a, b) malloc(a)
543 #define rx_aligned_free(a) free(a)
544 #define rx_prefetch_nta(x)
545 #define rx_prefetch_t0(x)
546 
548  rx_vec_f128 x;
549  x.i.u64[0] = load64(pd + 0);
550  x.i.u64[1] = load64(pd + 1);
551  return x;
552 }
553 
554 FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 a) {
555  store64(mem_addr + 0, a.i.u64[0]);
556  store64(mem_addr + 1, a.i.u64[1]);
557 }
558 
560  double temp = a.hi;
561  a.hi = a.lo;
562  a.lo = temp;
563  return a;
564 }
565 
567  rx_vec_f128 x;
568  x.lo = a.lo + b.lo;
569  x.hi = a.hi + b.hi;
570  return x;
571 }
572 
574  rx_vec_f128 x;
575  x.lo = a.lo - b.lo;
576  x.hi = a.hi - b.hi;
577  return x;
578 }
579 
581  rx_vec_f128 x;
582  x.lo = a.lo * b.lo;
583  x.hi = a.hi * b.hi;
584  return x;
585 }
586 
588  rx_vec_f128 x;
589  x.lo = a.lo / b.lo;
590  x.hi = a.hi / b.hi;
591  return x;
592 }
593 
595  rx_vec_f128 x;
596  x.lo = rx_sqrt(a.lo);
597  x.hi = rx_sqrt(a.hi);
598  return x;
599 }
600 
602  rx_vec_i128 x;
603  x.u64[0] = a;
604  x.u64[1] = a;
605  return x;
606 }
607 
609  rx_vec_f128 x;
610  x.i = a;
611  return x;
612 }
613 
615  rx_vec_f128 v;
616  v.i.u64[0] = x0;
617  v.i.u64[1] = x1;
618  return v;
619 }
620 
622  rx_vec_f128 v;
623  v.i.u64[0] = x;
624  v.i.u64[1] = x;
625  return v;
626 }
627 
629  rx_vec_f128 x;
630  x.i.u64[0] = a.i.u64[0] ^ b.i.u64[0];
631  x.i.u64[1] = a.i.u64[1] ^ b.i.u64[1];
632  return x;
633 }
634 
636  rx_vec_f128 x;
637  x.i.u64[0] = a.i.u64[0] & b.i.u64[0];
638  x.i.u64[1] = a.i.u64[1] & b.i.u64[1];
639  return x;
640 }
641 
643  rx_vec_f128 x;
644  x.i.u64[0] = a.i.u64[0] | b.i.u64[0];
645  x.i.u64[1] = a.i.u64[1] | b.i.u64[1];
646  return x;
647 }
648 
650  return a.u32[0];
651 }
652 
654  return a.u32[1];
655 }
656 
658  return a.u32[2];
659 }
660 
662  return a.u32[3];
663 }
664 
665 FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int i3, int i2, int i1, int i0) {
666  rx_vec_i128 v;
667  v.u32[0] = i0;
668  v.u32[1] = i1;
669  v.u32[2] = i2;
670  v.u32[3] = i3;
671  return v;
672 };
673 
675  rx_vec_i128 c;
676  c.u32[0] = a.u32[0] ^ b.u32[0];
677  c.u32[1] = a.u32[1] ^ b.u32[1];
678  c.u32[2] = a.u32[2] ^ b.u32[2];
679  c.u32[3] = a.u32[3] ^ b.u32[3];
680  return c;
681 }
682 
684 #if defined(NATIVE_LITTLE_ENDIAN)
685  return *p;
686 #else
687  const uint32_t* ptr = (const uint32_t*)p;
688  rx_vec_i128 c;
689  c.u32[0] = load32(ptr + 0);
690  c.u32[1] = load32(ptr + 1);
691  c.u32[2] = load32(ptr + 2);
692  c.u32[3] = load32(ptr + 3);
693  return c;
694 #endif
695 }
696 
698 #if defined(NATIVE_LITTLE_ENDIAN)
699  *p = b;
700 #else
701  uint32_t* ptr = (uint32_t*)p;
702  store32(ptr + 0, b.u32[0]);
703  store32(ptr + 1, b.u32[1]);
704  store32(ptr + 2, b.u32[2]);
705  store32(ptr + 3, b.u32[3]);
706 #endif
707 }
708 
710  rx_vec_f128 x;
711  x.lo = (double)unsigned32ToSigned2sCompl(load32((const uint8_t*)addr + 0));
712  x.hi = (double)unsigned32ToSigned2sCompl(load32((const uint8_t*)addr + 4));
713  return x;
714 }
715 
716 #define RANDOMX_DEFAULT_FENV
717 
718 #endif
719 
720 #ifndef HAVE_AES
721 static const char* platformError = "Platform doesn't support hardware AES";
722 
723 #include <stdexcept>
724 
726  throw std::runtime_error(platformError);
727 }
728 
730  throw std::runtime_error(platformError);
731 }
732 
733 #define HAVE_AES 0
734 
735 #endif
736 
737 #ifdef RANDOMX_DEFAULT_FENV
738 
739 void rx_reset_float_state();
740 
742 
744 
745 #endif
746 
747 double loadDoublePortable(const void* addr);
750 uint64_t rotl(uint64_t, unsigned int);
751 uint64_t rotr(uint64_t, unsigned int);
static const char * platformError
Definition: intrin_portable.h:721
#define INT32_MAX
Definition: stdint.h:183
FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a)
Definition: intrin_portable.h:661
#define FORCE_INLINE
Definition: endian.h:10
Definition: intrin_portable.h:527
constexpr int RoundToZero
Definition: intrin_portable.h:49
int64_t smulh(int64_t, int64_t)
Definition: instructions_portable.cpp:125
FORCE_INLINE rx_vec_i128 rx_xor_vec_i128(rx_vec_i128 a, rx_vec_i128 b)
Definition: intrin_portable.h:674
int i
Definition: pymoduletest.py:23
#define INT64_MAX
Definition: stdint.h:185
uint64_t u64[2]
Definition: intrin_portable.h:528
constexpr int RoundToNearest
Definition: intrin_portable.h:46
t
Definition: console.py:33
Definition: intrin_portable.h:534
std::string data
Definition: base58.cpp:37
const char * key
Definition: hmac_keccak.cpp:40
uint64_t rotr(uint64_t, unsigned int)
Definition: instructions_portable.cpp:92
unsigned short uint16_t
Definition: stdint.h:125
uint64_t rotl(uint64_t, unsigned int)
Definition: instructions_portable.cpp:99
uint64_t mulh(uint64_t, uint64_t)
Definition: instructions_portable.cpp:108
FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0)
Definition: intrin_portable.h:614
FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b)
Definition: intrin_portable.h:635
#define rx_set_double_precision(x)
Definition: intrin_portable.h:89
FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int i3, int i2, int i1, int i0)
Definition: intrin_portable.h:665
tools::wallet2::message_signature_result_t result
Definition: signature.cpp:62
unsigned char uint8_t
Definition: stdint.h:124
uint32_t rx_get_rounding_mode()
Definition: instructions_portable.cpp:160
FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b)
Definition: intrin_portable.h:642
static FORCE_INLINE uint64_t load64(const void *src)
Definition: endian.h:50
constexpr int64_t unsigned64ToSigned2sCompl(uint64_t x)
Definition: intrin_portable.h:38
FORCE_INLINE rx_vec_f128 rx_add_vec_f128(rx_vec_f128 a, rx_vec_f128 b)
Definition: intrin_portable.h:566
#define rx_aligned_alloc(a, b)
Definition: intrin_portable.h:542
#define rx_sqrt
Definition: intrin_portable.h:85
static const unsigned char zero[32]
Definition: fe_isnonzero.c:12
void rx_reset_float_state()
Definition: instructions_portable.cpp:136
constexpr uint32_t B
Definition: jit_compiler_a64.cpp:38
FORCE_INLINE rx_vec_i128 rx_load_vec_i128(rx_vec_i128 const *p)
Definition: intrin_portable.h:683
unsigned int uint32_t
Definition: stdint.h:126
FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a)
Definition: intrin_portable.h:649
constexpr int RoundUp
Definition: intrin_portable.h:48
FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x)
Definition: intrin_portable.h:621
unsigned __int64 uint64_t
Definition: stdint.h:136
FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b)
Definition: intrin_portable.h:628
void rx_set_rounding_mode(uint32_t mode)
Definition: instructions_portable.cpp:141
FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a)
Definition: intrin_portable.h:559
#define UINT64_MAX
Definition: stdint.h:189
enum modes mode
Definition: minihttptestserver.c:268
FORCE_INLINE rx_vec_f128 rx_mul_vec_f128(rx_vec_f128 a, rx_vec_f128 b)
Definition: intrin_portable.h:580
FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey)
Definition: intrin_portable.h:729
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition: pointer.h:1124
double hi
Definition: intrin_portable.h:537
#define UINT32_MAX
Definition: stdint.h:188
FORCE_INLINE rx_vec_f128 rx_div_vec_f128(rx_vec_f128 a, rx_vec_f128 b)
Definition: intrin_portable.h:587
FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double *pd)
Definition: intrin_portable.h:547
FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a)
Definition: intrin_portable.h:653
FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey)
Definition: intrin_portable.h:725
static FORCE_INLINE void store32(void *dst, uint32_t w)
Definition: endian.h:67
constexpr uint64_t signExtend2sCompl(uint32_t x)
Definition: intrin_portable.h:42
signed __int64 int64_t
Definition: stdint.h:135
FORCE_INLINE rx_vec_i128 rx_set1_long_vec_i128(uint64_t a)
Definition: intrin_portable.h:601
static FORCE_INLINE void store64(void *dst, uint64_t w)
Definition: endian.h:86
d
Definition: pymoduletest.py:79
p
Definition: pymoduletest.py:75
FORCE_INLINE void rx_store_vec_i128(rx_vec_i128 *p, rx_vec_i128 b)
Definition: intrin_portable.h:697
rx_vec_i128 i
Definition: intrin_portable.h:539
#define rx_prefetch_t0(x)
Definition: intrin_portable.h:545
double loadDoublePortable(const void *addr)
Definition: instructions_portable.cpp:204
signed int int32_t
Definition: stdint.h:123
FORCE_INLINE rx_vec_f128 rx_sqrt_vec_f128(rx_vec_f128 a)
Definition: intrin_portable.h:594
FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a)
Definition: intrin_portable.h:657
double lo
Definition: intrin_portable.h:536
FORCE_INLINE void rx_store_vec_f128(double *mem_addr, rx_vec_f128 a)
Definition: intrin_portable.h:554
static FORCE_INLINE uint32_t load32(const void *src)
Definition: endian.h:29
#define rx_prefetch_nta(x)
Definition: intrin_portable.h:544
FORCE_INLINE rx_vec_f128 rx_sub_vec_f128(rx_vec_f128 a, rx_vec_f128 b)
Definition: intrin_portable.h:573
c
Definition: pymoduletest.py:79
FORCE_INLINE rx_vec_f128 rx_vec_i128_vec_f128(rx_vec_i128 a)
Definition: intrin_portable.h:608
cryptonote::block b
Definition: block.cpp:40
FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void *addr)
Definition: intrin_portable.h:709
uint32_t u32[4]
Definition: intrin_portable.h:529
constexpr int32_t unsigned32ToSigned2sCompl(uint32_t x)
Definition: intrin_portable.h:34
constexpr int RoundDown
Definition: intrin_portable.h:47