Monero
Loading...
Searching...
No Matches
blamka-round-ssse3.h
Go to the documentation of this file.
1/*
2Copyright (c) 2018-2019, tevador <tevador@gmail.com>
3
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of the copyright holder nor the
14 names of its contributors may be used to endorse or promote products
15 derived from this software without specific prior written permission.
16
17THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27*/
28
29/* Original code from Argon2 reference source code package used under CC0 Licence
30 * https://github.com/P-H-C/phc-winner-argon2
31 * Copyright 2015
32 * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
33*/
34
35#ifndef BLAKE_ROUND_MKA_OPT_H
36#define BLAKE_ROUND_MKA_OPT_H
37
38#include "blake2-impl.h"
39
40#ifdef __GNUC__
41#include <x86intrin.h>
42#else
43#include <intrin.h>
44#endif
45
46#ifdef _mm_roti_epi64 //clang defines it using the XOP instruction set
47#undef _mm_roti_epi64
48#endif
49
50#define r16 \
51 (_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
52#define r24 \
53 (_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
54#define _mm_roti_epi64(x, c) \
55 (-(c) == 32) \
56 ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
57 : (-(c) == 24) \
58 ? _mm_shuffle_epi8((x), r24) \
59 : (-(c) == 16) \
60 ? _mm_shuffle_epi8((x), r16) \
61 : (-(c) == 63) \
62 ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
63 _mm_add_epi64((x), (x))) \
64 : _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
65 _mm_slli_epi64((x), 64 - (-(c))))
66
67static FORCE_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
68 const __m128i z = _mm_mul_epu32(x, y);
69 return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z));
70}
71
72#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
73 do { \
74 A0 = fBlaMka(A0, B0); \
75 A1 = fBlaMka(A1, B1); \
76 \
77 D0 = _mm_xor_si128(D0, A0); \
78 D1 = _mm_xor_si128(D1, A1); \
79 \
80 D0 = _mm_roti_epi64(D0, -32); \
81 D1 = _mm_roti_epi64(D1, -32); \
82 \
83 C0 = fBlaMka(C0, D0); \
84 C1 = fBlaMka(C1, D1); \
85 \
86 B0 = _mm_xor_si128(B0, C0); \
87 B1 = _mm_xor_si128(B1, C1); \
88 \
89 B0 = _mm_roti_epi64(B0, -24); \
90 B1 = _mm_roti_epi64(B1, -24); \
91 } while ((void)0, 0)
92
93#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
94 do { \
95 A0 = fBlaMka(A0, B0); \
96 A1 = fBlaMka(A1, B1); \
97 \
98 D0 = _mm_xor_si128(D0, A0); \
99 D1 = _mm_xor_si128(D1, A1); \
100 \
101 D0 = _mm_roti_epi64(D0, -16); \
102 D1 = _mm_roti_epi64(D1, -16); \
103 \
104 C0 = fBlaMka(C0, D0); \
105 C1 = fBlaMka(C1, D1); \
106 \
107 B0 = _mm_xor_si128(B0, C0); \
108 B1 = _mm_xor_si128(B1, C1); \
109 \
110 B0 = _mm_roti_epi64(B0, -63); \
111 B1 = _mm_roti_epi64(B1, -63); \
112 } while ((void)0, 0)
113
114#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
115 do { \
116 __m128i t0 = _mm_alignr_epi8(B1, B0, 8); \
117 __m128i t1 = _mm_alignr_epi8(B0, B1, 8); \
118 B0 = t0; \
119 B1 = t1; \
120 \
121 t0 = C0; \
122 C0 = C1; \
123 C1 = t0; \
124 \
125 t0 = _mm_alignr_epi8(D1, D0, 8); \
126 t1 = _mm_alignr_epi8(D0, D1, 8); \
127 D0 = t1; \
128 D1 = t0; \
129 } while ((void)0, 0)
130
131#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
132 do { \
133 __m128i t0 = _mm_alignr_epi8(B0, B1, 8); \
134 __m128i t1 = _mm_alignr_epi8(B1, B0, 8); \
135 B0 = t0; \
136 B1 = t1; \
137 \
138 t0 = C0; \
139 C0 = C1; \
140 C1 = t0; \
141 \
142 t0 = _mm_alignr_epi8(D0, D1, 8); \
143 t1 = _mm_alignr_epi8(D1, D0, 8); \
144 D0 = t1; \
145 D1 = t0; \
146 } while ((void)0, 0)
147
148#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \
149 do { \
150 G1(A0, B0, C0, D0, A1, B1, C1, D1); \
151 G2(A0, B0, C0, D0, A1, B1, C1, D1); \
152 \
153 DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
154 \
155 G1(A0, B0, C0, D0, A1, B1, C1, D1); \
156 G2(A0, B0, C0, D0, A1, B1, C1, D1); \
157 \
158 UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
159 } while ((void)0, 0)
160
161
162#endif /* BLAKE_ROUND_MKA_OPT_H */
static FORCE_INLINE __m128i fBlaMka(__m128i x, __m128i y)
Definition blamka-round-ssse3.h:67
#define FORCE_INLINE
Definition endian.h:10