27 constexpr
int FINAL_LIMB_POSITION = 3072 / SIGNED_LIMB_SIZE;
28 constexpr
int FINAL_LIMB_MODULUS_BITS = 3072 % SIGNED_LIMB_SIZE;
29 constexpr limb_t MAX_LIMB = (limb_t)(-1);
30 constexpr limb_t MAX_SIGNED_LIMB = (((limb_t)1) << SIGNED_LIMB_SIZE) - 1;
32 constexpr limb_t MAX_PRIME_DIFF = 1103717;
34 constexpr limb_t MODULUS_INVERSE = limb_t(0x70a1421da087d93);
38 inline void extract3(limb_t& c0, limb_t& c1, limb_t& c2, limb_t& n)
47 inline void mul(limb_t& c0, limb_t& c1,
const limb_t& a,
const limb_t& b)
49 double_limb_t
t = (double_limb_t)a * b;
55 inline void mulnadd3(limb_t& c0, limb_t& c1, limb_t& c2, limb_t& d0, limb_t& d1, limb_t& d2,
const limb_t& n)
57 double_limb_t
t = (double_limb_t)d0 * n + c0;
60 t += (double_limb_t)d1 * n + c1;
67 inline void muln2(limb_t& c0, limb_t& c1,
const limb_t& n)
69 double_limb_t
t = (double_limb_t)c0 * n;
72 t += (double_limb_t)c1 * n;
77 inline void muladd3(limb_t& c0, limb_t& c1, limb_t& c2,
const limb_t& a,
const limb_t& b)
79 double_limb_t
t = (double_limb_t)a * b;
80 limb_t th =
t >> LIMB_SIZE;
84 th += (c0 < tl) ? 1 : 0;
86 c2 += (c1 < th) ? 1 : 0;
93 inline void addnextract2(limb_t& c0, limb_t& c1,
const limb_t& a, limb_t& n)
117 if (this->
limbs[0] <= std::numeric_limits<limb_t>::max() - MAX_PRIME_DIFF)
return false;
118 for (
int i = 1; i <
LIMBS; ++i) {
119 if (this->
limbs[i] != std::numeric_limits<limb_t>::max())
return false;
126 limb_t c0 = MAX_PRIME_DIFF;
128 for (
int i = 0; i <
LIMBS; ++i) {
129 addnextract2(c0, c1, this->
limbs[i], this->
limbs[i]);
139 signed_limb_t limbs[SIGNED_LIMBS];
144 memset(limbs, 0,
sizeof(limbs));
149 void FromNum3072(
const Num3072& in)
152 int b = 0, outpos = 0;
153 for (
int i = 0; i < LIMBS; ++i) {
154 c += double_limb_t{in.
limbs[i]} << b;
156 while (b >= SIGNED_LIMB_SIZE) {
157 limbs[outpos++] = limb_t(c) & MAX_SIGNED_LIMB;
158 c >>= SIGNED_LIMB_SIZE;
159 b -= SIGNED_LIMB_SIZE;
162 Assume(outpos == SIGNED_LIMBS - 1);
163 limbs[SIGNED_LIMBS - 1] = c;
164 c >>= SIGNED_LIMB_SIZE;
172 int b = 0, outpos = 0;
173 for (
int i = 0; i < SIGNED_LIMBS; ++i) {
174 c += double_limb_t(limbs[i]) << b;
175 b += SIGNED_LIMB_SIZE;
176 if (b >= LIMB_SIZE) {
177 out.limbs[outpos++] = c;
191 void Normalize(
bool negate)
194 signed_limb_t cond_add = limbs[SIGNED_LIMBS-1] >> (LIMB_SIZE-1);
195 limbs[0] += signed_limb_t(-MAX_PRIME_DIFF) & cond_add;
196 limbs[FINAL_LIMB_POSITION] += (signed_limb_t(1) << FINAL_LIMB_MODULUS_BITS) & cond_add;
198 signed_limb_t cond_negate = -signed_limb_t(negate);
199 for (
int i = 0; i < SIGNED_LIMBS; ++i) {
200 limbs[i] = (limbs[i] ^ cond_negate) - cond_negate;
203 for (
int i = 0; i < SIGNED_LIMBS - 1; ++i) {
204 limbs[i + 1] += limbs[i] >> SIGNED_LIMB_SIZE;
205 limbs[i] &= MAX_SIGNED_LIMB;
208 cond_add = limbs[SIGNED_LIMBS-1] >> (LIMB_SIZE-1);
209 limbs[0] += signed_limb_t(-MAX_PRIME_DIFF) & cond_add;
210 limbs[FINAL_LIMB_POSITION] += (signed_limb_t(1) << FINAL_LIMB_MODULUS_BITS) & cond_add;
212 for (
int i = 0; i < SIGNED_LIMBS - 1; ++i) {
213 limbs[i + 1] += limbs[i] >> SIGNED_LIMB_SIZE;
214 limbs[i] &= MAX_SIGNED_LIMB;
222 signed_limb_t u, v, q, r;
233 inline limb_t ComputeDivstepMatrix(signed_limb_t eta, limb_t f, limb_t g, SignedMatrix&
out)
236 static const uint8_t NEGINV256[128] = {
237 0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59,
238 0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31,
239 0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89,
240 0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61,
241 0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9,
242 0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91,
243 0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9,
244 0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1,
245 0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19,
246 0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1,
247 0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01
250 limb_t u = 1, v = 0, q = 0, r = 1;
252 int i = SIGNED_LIMB_SIZE;
255 int zeros = std::countr_zero(g | (MAX_LIMB << i));
268 tmp = f; f =
g;
g = -tmp;
269 tmp = u; u = q; q = -tmp;
270 tmp = v; v = r; r = -tmp;
275 int limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
277 limb_t
m = (MAX_LIMB >> (LIMB_SIZE - limit)) & 255U;
279 limb_t w = (
g * NEGINV256[(f >> 1) & 127]) &
m;
285 out.u = (signed_limb_t)u;
286 out.v = (signed_limb_t)v;
287 out.q = (signed_limb_t)q;
288 out.r = (signed_limb_t)r;
296 inline void UpdateDE(Num3072Signed& d, Num3072Signed& e,
const SignedMatrix& t)
298 const signed_limb_t u =
t.u, v=
t.v, q=
t.q, r=
t.r;
301 signed_limb_t sd = d.limbs[SIGNED_LIMBS - 1] >> (LIMB_SIZE - 1);
302 signed_limb_t se = e.limbs[SIGNED_LIMBS - 1] >> (LIMB_SIZE - 1);
303 signed_limb_t md = (u & sd) + (v & se);
304 signed_limb_t me = (q & sd) + (r & se);
306 signed_limb_t di = d.limbs[0], ei = e.limbs[0];
307 signed_double_limb_t cd = (signed_double_limb_t)u * di + (signed_double_limb_t)v * ei;
308 signed_double_limb_t ce = (signed_double_limb_t)q * di + (signed_double_limb_t)r * ei;
310 md -= (MODULUS_INVERSE * limb_t(cd) + md) & MAX_SIGNED_LIMB;
311 me -= (MODULUS_INVERSE * limb_t(ce) + me) & MAX_SIGNED_LIMB;
313 cd -= (signed_double_limb_t)1103717 * md;
314 ce -= (signed_double_limb_t)1103717 * me;
316 Assume((cd & MAX_SIGNED_LIMB) == 0);
317 Assume((ce & MAX_SIGNED_LIMB) == 0);
318 cd >>= SIGNED_LIMB_SIZE;
319 ce >>= SIGNED_LIMB_SIZE;
323 for (
int i = 1; i < SIGNED_LIMBS - 1; ++i) {
326 cd += (signed_double_limb_t)u * di + (signed_double_limb_t)v * ei;
327 ce += (signed_double_limb_t)q * di + (signed_double_limb_t)r * ei;
328 d.limbs[i - 1] = (signed_limb_t)cd & MAX_SIGNED_LIMB; cd >>= SIGNED_LIMB_SIZE;
329 e.limbs[i - 1] = (signed_limb_t)ce & MAX_SIGNED_LIMB; ce >>= SIGNED_LIMB_SIZE;
332 di = d.limbs[SIGNED_LIMBS - 1];
333 ei = e.limbs[SIGNED_LIMBS - 1];
334 cd += (signed_double_limb_t)u * di + (signed_double_limb_t)v * ei;
335 ce += (signed_double_limb_t)q * di + (signed_double_limb_t)r * ei;
336 cd += (signed_double_limb_t)md << FINAL_LIMB_MODULUS_BITS;
337 ce += (signed_double_limb_t)me << FINAL_LIMB_MODULUS_BITS;
338 d.limbs[SIGNED_LIMBS - 2] = (signed_limb_t)cd & MAX_SIGNED_LIMB; cd >>= SIGNED_LIMB_SIZE;
339 e.limbs[SIGNED_LIMBS - 2] = (signed_limb_t)ce & MAX_SIGNED_LIMB; ce >>= SIGNED_LIMB_SIZE;
341 d.limbs[SIGNED_LIMBS - 1] = (signed_limb_t)cd;
342 e.limbs[SIGNED_LIMBS - 1] = (signed_limb_t)ce;
350 inline void UpdateFG(Num3072Signed& f, Num3072Signed& g,
const SignedMatrix& t,
int len)
352 const signed_limb_t u =
t.u, v=
t.v, q=
t.q, r=
t.r;
354 signed_limb_t fi, gi;
355 signed_double_limb_t cf, cg;
359 cf = (signed_double_limb_t)u * fi + (signed_double_limb_t)v * gi;
360 cg = (signed_double_limb_t)q * fi + (signed_double_limb_t)r * gi;
362 Assume((cf & MAX_SIGNED_LIMB) == 0);
363 Assume((cg & MAX_SIGNED_LIMB) == 0);
364 cf >>= SIGNED_LIMB_SIZE;
365 cg >>= SIGNED_LIMB_SIZE;
368 for (
int i = 1; i < len; ++i) {
371 cf += (signed_double_limb_t)u * fi + (signed_double_limb_t)v * gi;
372 cg += (signed_double_limb_t)q * fi + (signed_double_limb_t)r * gi;
373 f.limbs[i - 1] = (signed_limb_t)cf & MAX_SIGNED_LIMB; cf >>= SIGNED_LIMB_SIZE;
374 g.limbs[i - 1] = (signed_limb_t)cg & MAX_SIGNED_LIMB; cg >>= SIGNED_LIMB_SIZE;
377 f.limbs[len - 1] = (signed_limb_t)cf;
378 g.limbs[len - 1] = (signed_limb_t)cg;
399 Num3072Signed d, e, f,
g;
403 f.limbs[0] = -MAX_PRIME_DIFF;
404 f.limbs[FINAL_LIMB_POSITION] = ((
limb_t)1) << FINAL_LIMB_MODULUS_BITS;
405 g.FromNum3072(*
this);
413 eta = ComputeDivstepMatrix(eta, f.limbs[0],
g.limbs[0],
t);
415 UpdateFG(f,
g,
t, len);
420 if (
g.limbs[0] == 0) {
422 for (
int j = 1; j < len; ++j) {
426 if (cond == 0)
break;
445 Assume((f.limbs[0] & MAX_SIGNED_LIMB) == 1 || (f.limbs[0] & MAX_SIGNED_LIMB) == MAX_SIGNED_LIMB);
449 d.Normalize(f.limbs[len - 1] >> (
LIMB_SIZE - 1));
457 limb_t c0 = 0, c1 = 0, c2 = 0;
461 for (
int j = 0; j <
LIMBS - 1; ++j) {
462 limb_t d0 = 0, d1 = 0, d2 = 0;
463 mul(d0, d1, this->limbs[1 + j], a.
limbs[
LIMBS + j - (1 + j)]);
464 for (
int i = 2 + j; i <
LIMBS; ++i) muladd3(d0, d1, d2, this->limbs[i], a.
limbs[
LIMBS + j - i]);
465 mulnadd3(c0, c1, c2, d0, d1, d2, MAX_PRIME_DIFF);
466 for (
int i = 0; i < j + 1; ++i) muladd3(c0, c1, c2, this->limbs[i], a.
limbs[j - i]);
467 extract3(c0, c1, c2, tmp.
limbs[j]);
472 for (
int i = 0; i <
LIMBS; ++i) muladd3(c0, c1, c2, this->limbs[i], a.
limbs[
LIMBS - 1 - i]);
476 muln2(c0, c1, MAX_PRIME_DIFF);
477 for (
int j = 0; j <
LIMBS; ++j) {
478 addnextract2(c0, c1, tmp.
limbs[j], this->limbs[j]);
482 assert(c0 == 0 || c0 == 1);
495 for (
int i = 1; i <
LIMBS; ++i) this->limbs[i] = 0;
516 for (
int i = 0; i <
LIMBS; ++i) {
517 if (
sizeof(
limb_t) == 4) {
519 }
else if (
sizeof(
limb_t) == 8) {
526 for (
int i = 0; i <
LIMBS; ++i) {
527 if (
sizeof(
limb_t) == 4) {
529 }
else if (
sizeof(
limb_t) == 8) {
548 m_numerator = ToNum3072(in);
553 m_numerator.Divide(m_denominator);
554 m_denominator.SetToOne();
557 m_numerator.ToBytes(
data);
564 m_numerator.Multiply(mul.m_numerator);
565 m_denominator.Multiply(mul.m_denominator);
571 m_numerator.Multiply(div.m_denominator);
572 m_denominator.Multiply(div.m_numerator);
577 m_numerator.Multiply(ToNum3072(in));
582 m_denominator.Multiply(ToNum3072(in));
Span< std::byte > MakeWritableByteSpan(V &&v) noexcept
static constexpr size_t BYTE_SIZE
static constexpr unsigned BLOCKLEN
Block size (inputs/outputs to Keystream / Crypt should be multiples of this).
bool IsOverflow() const
Indicates whether d is larger than the modulus.
void Divide(const Num3072 &a)
static constexpr int LIMB_SIZE
void WriteLE64(B *ptr, uint64_t x)
static constexpr int LIMBS
uint32_t ReadLE32(const B *ptr)
MuHash3072() noexcept=default
void WriteLE32(B *ptr, uint32_t x)
void Multiply(const Num3072 &a)
A writer stream (for serialization) that computes a 256-bit hash.
uint64_t ReadLE64(const B *ptr)
#define Assume(val)
Assume is the identity function.
int64_t signed_double_limb_t
MuHash3072 & Remove(Span< const unsigned char > in) noexcept
void Finalize(uint256 &out) noexcept
Num3072 GetInverse() const
void ToBytes(unsigned char(&out)[BYTE_SIZE])
static constexpr int SIGNED_LIMB_SIZE
Num3072 ToNum3072(Span< const unsigned char > in)
MuHash3072 & operator*=(const MuHash3072 &mul) noexcept
Span< const std::byte > MakeByteSpan(V &&v) noexcept
A class representing MuHash sets.
MuHash3072 & Insert(Span< const unsigned char > in) noexcept
ChaCha20 cipher that only operates on multiples of 64 bytes.
static constexpr int SIGNED_LIMBS
MuHash3072 & operator/=(const MuHash3072 &div) noexcept