Monero
Loading...
Searching...
No Matches
external
randomx
src
blake2
blamka-round-ssse3.h
Go to the documentation of this file.
1
/*
2
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
3
4
All rights reserved.
5
6
Redistribution and use in source and binary forms, with or without
7
modification, are permitted provided that the following conditions are met:
8
* Redistributions of source code must retain the above copyright
9
notice, this list of conditions and the following disclaimer.
10
* Redistributions in binary form must reproduce the above copyright
11
notice, this list of conditions and the following disclaimer in the
12
documentation and/or other materials provided with the distribution.
13
* Neither the name of the copyright holder nor the
14
names of its contributors may be used to endorse or promote products
15
derived from this software without specific prior written permission.
16
17
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
*/
28
29
/* Original code from Argon2 reference source code package used under CC0 Licence
30
* https://github.com/P-H-C/phc-winner-argon2
31
* Copyright 2015
32
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
33
*/
34
35
#ifndef BLAKE_ROUND_MKA_OPT_H
36
#define BLAKE_ROUND_MKA_OPT_H
37
38
#include "
blake2-impl.h
"
39
40
#ifdef __GNUC__
41
#include <x86intrin.h>
42
#else
43
#include <intrin.h>
44
#endif
45
46
#ifdef _mm_roti_epi64
//clang defines it using the XOP instruction set
47
#undef _mm_roti_epi64
48
#endif
49
50
#define r16 \
51
(_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
52
#define r24 \
53
(_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
54
#define _mm_roti_epi64(x, c) \
55
(-(c) == 32) \
56
? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
57
: (-(c) == 24) \
58
? _mm_shuffle_epi8((x), r24) \
59
: (-(c) == 16) \
60
? _mm_shuffle_epi8((x), r16) \
61
: (-(c) == 63) \
62
? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
63
_mm_add_epi64((x), (x))) \
64
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
65
_mm_slli_epi64((x), 64 - (-(c))))
66
67
static
FORCE_INLINE
__m128i
fBlaMka
(__m128i x, __m128i y) {
68
const
__m128i z = _mm_mul_epu32(x, y);
69
return
_mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z));
70
}
71
72
#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
73
do { \
74
A0 = fBlaMka(A0, B0); \
75
A1 = fBlaMka(A1, B1); \
76
\
77
D0 = _mm_xor_si128(D0, A0); \
78
D1 = _mm_xor_si128(D1, A1); \
79
\
80
D0 = _mm_roti_epi64(D0, -32); \
81
D1 = _mm_roti_epi64(D1, -32); \
82
\
83
C0 = fBlaMka(C0, D0); \
84
C1 = fBlaMka(C1, D1); \
85
\
86
B0 = _mm_xor_si128(B0, C0); \
87
B1 = _mm_xor_si128(B1, C1); \
88
\
89
B0 = _mm_roti_epi64(B0, -24); \
90
B1 = _mm_roti_epi64(B1, -24); \
91
} while ((void)0, 0)
92
93
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
94
do { \
95
A0 = fBlaMka(A0, B0); \
96
A1 = fBlaMka(A1, B1); \
97
\
98
D0 = _mm_xor_si128(D0, A0); \
99
D1 = _mm_xor_si128(D1, A1); \
100
\
101
D0 = _mm_roti_epi64(D0, -16); \
102
D1 = _mm_roti_epi64(D1, -16); \
103
\
104
C0 = fBlaMka(C0, D0); \
105
C1 = fBlaMka(C1, D1); \
106
\
107
B0 = _mm_xor_si128(B0, C0); \
108
B1 = _mm_xor_si128(B1, C1); \
109
\
110
B0 = _mm_roti_epi64(B0, -63); \
111
B1 = _mm_roti_epi64(B1, -63); \
112
} while ((void)0, 0)
113
114
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
115
do { \
116
__m128i t0 = _mm_alignr_epi8(B1, B0, 8); \
117
__m128i t1 = _mm_alignr_epi8(B0, B1, 8); \
118
B0 = t0; \
119
B1 = t1; \
120
\
121
t0 = C0; \
122
C0 = C1; \
123
C1 = t0; \
124
\
125
t0 = _mm_alignr_epi8(D1, D0, 8); \
126
t1 = _mm_alignr_epi8(D0, D1, 8); \
127
D0 = t1; \
128
D1 = t0; \
129
} while ((void)0, 0)
130
131
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
132
do { \
133
__m128i t0 = _mm_alignr_epi8(B0, B1, 8); \
134
__m128i t1 = _mm_alignr_epi8(B1, B0, 8); \
135
B0 = t0; \
136
B1 = t1; \
137
\
138
t0 = C0; \
139
C0 = C1; \
140
C1 = t0; \
141
\
142
t0 = _mm_alignr_epi8(D0, D1, 8); \
143
t1 = _mm_alignr_epi8(D1, D0, 8); \
144
D0 = t1; \
145
D1 = t0; \
146
} while ((void)0, 0)
147
148
#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \
149
do { \
150
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
151
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
152
\
153
DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
154
\
155
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
156
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
157
\
158
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
159
} while ((void)0, 0)
160
161
162
#endif
/* BLAKE_ROUND_MKA_OPT_H */
blake2-impl.h
fBlaMka
static FORCE_INLINE __m128i fBlaMka(__m128i x, __m128i y)
Definition
blamka-round-ssse3.h:67
FORCE_INLINE
#define FORCE_INLINE
Definition
endian.h:10
Generated on
for Monero by
1.16.1