Monero
blamka-round-ssse3.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2018-2019, tevador <tevador@gmail.com>
3 
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8  * Redistributions of source code must retain the above copyright
9  notice, this list of conditions and the following disclaimer.
10  * Redistributions in binary form must reproduce the above copyright
11  notice, this list of conditions and the following disclaimer in the
12  documentation and/or other materials provided with the distribution.
13  * Neither the name of the copyright holder nor the
14  names of its contributors may be used to endorse or promote products
15  derived from this software without specific prior written permission.
16 
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28 
29 /* Original code from Argon2 reference source code package used under CC0 Licence
30  * https://github.com/P-H-C/phc-winner-argon2
31  * Copyright 2015
32  * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
33 */
34 
35 #ifndef BLAKE_ROUND_MKA_OPT_H
36 #define BLAKE_ROUND_MKA_OPT_H
37 
38 #include "blake2-impl.h"
39 
40 #ifdef __GNUC__
41 #include <x86intrin.h>
42 #else
43 #include <intrin.h>
44 #endif
45 
46 #ifdef _mm_roti_epi64 //clang defines it using the XOP instruction set
47 #undef _mm_roti_epi64
48 #endif
49 
50 #define r16 \
51  (_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
52 #define r24 \
53  (_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
54 #define _mm_roti_epi64(x, c) \
55  (-(c) == 32) \
56  ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
57  : (-(c) == 24) \
58  ? _mm_shuffle_epi8((x), r24) \
59  : (-(c) == 16) \
60  ? _mm_shuffle_epi8((x), r16) \
61  : (-(c) == 63) \
62  ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
63  _mm_add_epi64((x), (x))) \
64  : _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
65  _mm_slli_epi64((x), 64 - (-(c))))
66 
67 static FORCE_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
68  const __m128i z = _mm_mul_epu32(x, y);
69  return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z));
70 }
71 
72 #define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
73  do { \
74  A0 = fBlaMka(A0, B0); \
75  A1 = fBlaMka(A1, B1); \
76  \
77  D0 = _mm_xor_si128(D0, A0); \
78  D1 = _mm_xor_si128(D1, A1); \
79  \
80  D0 = _mm_roti_epi64(D0, -32); \
81  D1 = _mm_roti_epi64(D1, -32); \
82  \
83  C0 = fBlaMka(C0, D0); \
84  C1 = fBlaMka(C1, D1); \
85  \
86  B0 = _mm_xor_si128(B0, C0); \
87  B1 = _mm_xor_si128(B1, C1); \
88  \
89  B0 = _mm_roti_epi64(B0, -24); \
90  B1 = _mm_roti_epi64(B1, -24); \
91  } while ((void)0, 0)
92 
93 #define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
94  do { \
95  A0 = fBlaMka(A0, B0); \
96  A1 = fBlaMka(A1, B1); \
97  \
98  D0 = _mm_xor_si128(D0, A0); \
99  D1 = _mm_xor_si128(D1, A1); \
100  \
101  D0 = _mm_roti_epi64(D0, -16); \
102  D1 = _mm_roti_epi64(D1, -16); \
103  \
104  C0 = fBlaMka(C0, D0); \
105  C1 = fBlaMka(C1, D1); \
106  \
107  B0 = _mm_xor_si128(B0, C0); \
108  B1 = _mm_xor_si128(B1, C1); \
109  \
110  B0 = _mm_roti_epi64(B0, -63); \
111  B1 = _mm_roti_epi64(B1, -63); \
112  } while ((void)0, 0)
113 
114 #define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
115  do { \
116  __m128i t0 = _mm_alignr_epi8(B1, B0, 8); \
117  __m128i t1 = _mm_alignr_epi8(B0, B1, 8); \
118  B0 = t0; \
119  B1 = t1; \
120  \
121  t0 = C0; \
122  C0 = C1; \
123  C1 = t0; \
124  \
125  t0 = _mm_alignr_epi8(D1, D0, 8); \
126  t1 = _mm_alignr_epi8(D0, D1, 8); \
127  D0 = t1; \
128  D1 = t0; \
129  } while ((void)0, 0)
130 
131 #define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
132  do { \
133  __m128i t0 = _mm_alignr_epi8(B0, B1, 8); \
134  __m128i t1 = _mm_alignr_epi8(B1, B0, 8); \
135  B0 = t0; \
136  B1 = t1; \
137  \
138  t0 = C0; \
139  C0 = C1; \
140  C1 = t0; \
141  \
142  t0 = _mm_alignr_epi8(D0, D1, 8); \
143  t1 = _mm_alignr_epi8(D1, D0, 8); \
144  D0 = t1; \
145  D1 = t0; \
146  } while ((void)0, 0)
147 
148 #define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \
149  do { \
150  G1(A0, B0, C0, D0, A1, B1, C1, D1); \
151  G2(A0, B0, C0, D0, A1, B1, C1, D1); \
152  \
153  DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
154  \
155  G1(A0, B0, C0, D0, A1, B1, C1, D1); \
156  G2(A0, B0, C0, D0, A1, B1, C1, D1); \
157  \
158  UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
159  } while ((void)0, 0)
160 
161 
162 #endif /* BLAKE_ROUND_MKA_OPT_H */
#define FORCE_INLINE
Definition: endian.h:10
static FORCE_INLINE __m128i fBlaMka(__m128i x, __m128i y)
Definition: blamka-round-ssse3.h:67