Bitcoin Core  29.1.0
P2P Digital Currency
modinv32_impl.h
Go to the documentation of this file.
1 /***********************************************************************
2  * Copyright (c) 2020 Peter Dettman *
3  * Distributed under the MIT software license, see the accompanying *
4  * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
5  **********************************************************************/
6 
7 #ifndef SECP256K1_MODINV32_IMPL_H
8 #define SECP256K1_MODINV32_IMPL_H
9 
10 #include "modinv32.h"
11 
12 #include "util.h"
13 
14 #include <stdlib.h>
15 
16 /* This file implements modular inversion based on the paper "Fast constant-time gcd computation and
17  * modular inversion" by Daniel J. Bernstein and Bo-Yin Yang.
18  *
19  * For an explanation of the algorithm, see doc/safegcd_implementation.md. This file contains an
20  * implementation for N=30, using 30-bit signed limbs represented as int32_t.
21  */
22 
23 #ifdef VERIFY
24 static const secp256k1_modinv32_signed30 SECP256K1_SIGNED30_ONE = {{1}};
25 
26 /* Compute a*factor and put it in r. All but the top limb in r will be in range [0,2^30). */
27 static void secp256k1_modinv32_mul_30(secp256k1_modinv32_signed30 *r, const secp256k1_modinv32_signed30 *a, int alen, int32_t factor) {
28  const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
29  int64_t c = 0;
30  int i;
31  for (i = 0; i < 8; ++i) {
32  if (i < alen) c += (int64_t)a->v[i] * factor;
33  r->v[i] = (int32_t)c & M30; c >>= 30;
34  }
35  if (8 < alen) c += (int64_t)a->v[8] * factor;
36  VERIFY_CHECK(c == (int32_t)c);
37  r->v[8] = (int32_t)c;
38 }
39 
40 /* Return -1 for a<b*factor, 0 for a==b*factor, 1 for a>b*factor. A consists of alen limbs; b has 9. */
41 static int secp256k1_modinv32_mul_cmp_30(const secp256k1_modinv32_signed30 *a, int alen, const secp256k1_modinv32_signed30 *b, int32_t factor) {
42  int i;
44  secp256k1_modinv32_mul_30(&am, a, alen, 1); /* Normalize all but the top limb of a. */
45  secp256k1_modinv32_mul_30(&bm, b, 9, factor);
46  for (i = 0; i < 8; ++i) {
47  /* Verify that all but the top limb of a and b are normalized. */
48  VERIFY_CHECK(am.v[i] >> 30 == 0);
49  VERIFY_CHECK(bm.v[i] >> 30 == 0);
50  }
51  for (i = 8; i >= 0; --i) {
52  if (am.v[i] < bm.v[i]) return -1;
53  if (am.v[i] > bm.v[i]) return 1;
54  }
55  return 0;
56 }
57 #endif
58 
59 /* Take as input a signed30 number in range (-2*modulus,modulus), and add a multiple of the modulus
60  * to it to bring it to range [0,modulus). If sign < 0, the input will also be negated in the
61  * process. The input must have limbs in range (-2^30,2^30). The output will have limbs in range
62  * [0,2^30). */
64  const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
65  int32_t r0 = r->v[0], r1 = r->v[1], r2 = r->v[2], r3 = r->v[3], r4 = r->v[4],
66  r5 = r->v[5], r6 = r->v[6], r7 = r->v[7], r8 = r->v[8];
67  volatile int32_t cond_add, cond_negate;
68 
69 #ifdef VERIFY
70  /* Verify that all limbs are in range (-2^30,2^30). */
71  int i;
72  for (i = 0; i < 9; ++i) {
73  VERIFY_CHECK(r->v[i] >= -M30);
74  VERIFY_CHECK(r->v[i] <= M30);
75  }
76  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, -2) > 0); /* r > -2*modulus */
77  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
78 #endif
79 
80  /* In a first step, add the modulus if the input is negative, and then negate if requested.
81  * This brings r from range (-2*modulus,modulus) to range (-modulus,modulus). As all input
82  * limbs are in range (-2^30,2^30), this cannot overflow an int32_t. Note that the right
83  * shifts below are signed sign-extending shifts (see assumptions.h for tests that that is
84  * indeed the behavior of the right shift operator). */
85  cond_add = r8 >> 31;
86  r0 += modinfo->modulus.v[0] & cond_add;
87  r1 += modinfo->modulus.v[1] & cond_add;
88  r2 += modinfo->modulus.v[2] & cond_add;
89  r3 += modinfo->modulus.v[3] & cond_add;
90  r4 += modinfo->modulus.v[4] & cond_add;
91  r5 += modinfo->modulus.v[5] & cond_add;
92  r6 += modinfo->modulus.v[6] & cond_add;
93  r7 += modinfo->modulus.v[7] & cond_add;
94  r8 += modinfo->modulus.v[8] & cond_add;
95  cond_negate = sign >> 31;
96  r0 = (r0 ^ cond_negate) - cond_negate;
97  r1 = (r1 ^ cond_negate) - cond_negate;
98  r2 = (r2 ^ cond_negate) - cond_negate;
99  r3 = (r3 ^ cond_negate) - cond_negate;
100  r4 = (r4 ^ cond_negate) - cond_negate;
101  r5 = (r5 ^ cond_negate) - cond_negate;
102  r6 = (r6 ^ cond_negate) - cond_negate;
103  r7 = (r7 ^ cond_negate) - cond_negate;
104  r8 = (r8 ^ cond_negate) - cond_negate;
105  /* Propagate the top bits, to bring limbs back to range (-2^30,2^30). */
106  r1 += r0 >> 30; r0 &= M30;
107  r2 += r1 >> 30; r1 &= M30;
108  r3 += r2 >> 30; r2 &= M30;
109  r4 += r3 >> 30; r3 &= M30;
110  r5 += r4 >> 30; r4 &= M30;
111  r6 += r5 >> 30; r5 &= M30;
112  r7 += r6 >> 30; r6 &= M30;
113  r8 += r7 >> 30; r7 &= M30;
114 
115  /* In a second step add the modulus again if the result is still negative, bringing r to range
116  * [0,modulus). */
117  cond_add = r8 >> 31;
118  r0 += modinfo->modulus.v[0] & cond_add;
119  r1 += modinfo->modulus.v[1] & cond_add;
120  r2 += modinfo->modulus.v[2] & cond_add;
121  r3 += modinfo->modulus.v[3] & cond_add;
122  r4 += modinfo->modulus.v[4] & cond_add;
123  r5 += modinfo->modulus.v[5] & cond_add;
124  r6 += modinfo->modulus.v[6] & cond_add;
125  r7 += modinfo->modulus.v[7] & cond_add;
126  r8 += modinfo->modulus.v[8] & cond_add;
127  /* And propagate again. */
128  r1 += r0 >> 30; r0 &= M30;
129  r2 += r1 >> 30; r1 &= M30;
130  r3 += r2 >> 30; r2 &= M30;
131  r4 += r3 >> 30; r3 &= M30;
132  r5 += r4 >> 30; r4 &= M30;
133  r6 += r5 >> 30; r5 &= M30;
134  r7 += r6 >> 30; r6 &= M30;
135  r8 += r7 >> 30; r7 &= M30;
136 
137  r->v[0] = r0;
138  r->v[1] = r1;
139  r->v[2] = r2;
140  r->v[3] = r3;
141  r->v[4] = r4;
142  r->v[5] = r5;
143  r->v[6] = r6;
144  r->v[7] = r7;
145  r->v[8] = r8;
146 
147  VERIFY_CHECK(r0 >> 30 == 0);
148  VERIFY_CHECK(r1 >> 30 == 0);
149  VERIFY_CHECK(r2 >> 30 == 0);
150  VERIFY_CHECK(r3 >> 30 == 0);
151  VERIFY_CHECK(r4 >> 30 == 0);
152  VERIFY_CHECK(r5 >> 30 == 0);
153  VERIFY_CHECK(r6 >> 30 == 0);
154  VERIFY_CHECK(r7 >> 30 == 0);
155  VERIFY_CHECK(r8 >> 30 == 0);
156  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 0) >= 0); /* r >= 0 */
157  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
158 }
159 
160 /* Data type for transition matrices (see section 3 of explanation).
161  *
162  * t = [ u v ]
163  * [ q r ]
164  */
165 typedef struct {
166  int32_t u, v, q, r;
168 
169 /* Compute the transition matrix and zeta for 30 divsteps.
170  *
171  * Input: zeta: initial zeta
172  * f0: bottom limb of initial f
173  * g0: bottom limb of initial g
174  * Output: t: transition matrix
175  * Return: final zeta
176  *
177  * Implements the divsteps_n_matrix function from the explanation.
178  */
179 static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
180  /* u,v,q,r are the elements of the transformation matrix being built up,
181  * starting with the identity matrix. Semantically they are signed integers
182  * in range [-2^30,2^30], but here represented as unsigned mod 2^32. This
183  * permits left shifting (which is UB for negative numbers). The range
184  * being inside [-2^31,2^31) means that casting to signed works correctly.
185  */
186  uint32_t u = 1, v = 0, q = 0, r = 1;
187  volatile uint32_t c1, c2;
188  uint32_t mask1, mask2, f = f0, g = g0, x, y, z;
189  int i;
190 
191  for (i = 0; i < 30; ++i) {
192  VERIFY_CHECK((f & 1) == 1); /* f must always be odd */
193  VERIFY_CHECK((u * f0 + v * g0) == f << i);
194  VERIFY_CHECK((q * f0 + r * g0) == g << i);
195  /* Compute conditional masks for (zeta < 0) and for (g & 1). */
196  c1 = zeta >> 31;
197  mask1 = c1;
198  c2 = g & 1;
199  mask2 = -c2;
200  /* Compute x,y,z, conditionally negated versions of f,u,v. */
201  x = (f ^ mask1) - mask1;
202  y = (u ^ mask1) - mask1;
203  z = (v ^ mask1) - mask1;
204  /* Conditionally add x,y,z to g,q,r. */
205  g += x & mask2;
206  q += y & mask2;
207  r += z & mask2;
208  /* In what follows, mask1 is a condition mask for (zeta < 0) and (g & 1). */
209  mask1 &= mask2;
210  /* Conditionally change zeta into -zeta-2 or zeta-1. */
211  zeta = (zeta ^ mask1) - 1;
212  /* Conditionally add g,q,r to f,u,v. */
213  f += g & mask1;
214  u += q & mask1;
215  v += r & mask1;
216  /* Shifts */
217  g >>= 1;
218  u <<= 1;
219  v <<= 1;
220  /* Bounds on zeta that follow from the bounds on iteration count (max 20*30 divsteps). */
221  VERIFY_CHECK(zeta >= -601 && zeta <= 601);
222  }
223  /* Return data in t and return value. */
224  t->u = (int32_t)u;
225  t->v = (int32_t)v;
226  t->q = (int32_t)q;
227  t->r = (int32_t)r;
228  /* The determinant of t must be a power of two. This guarantees that multiplication with t
229  * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
230  * will be divided out again). As each divstep's individual matrix has determinant 2, the
231  * aggregate of 30 of them will have determinant 2^30. */
232  VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30);
233  return zeta;
234 }
235 
236 /* secp256k1_modinv32_inv256[i] = -(2*i+1)^-1 (mod 256) */
237 static const uint8_t secp256k1_modinv32_inv256[128] = {
238  0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59,
239  0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31,
240  0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89,
241  0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61,
242  0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9,
243  0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91,
244  0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9,
245  0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1,
246  0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19,
247  0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1,
248  0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01
249 };
250 
251 /* Compute the transition matrix and eta for 30 divsteps (variable time).
252  *
253  * Input: eta: initial eta
254  * f0: bottom limb of initial f
255  * g0: bottom limb of initial g
256  * Output: t: transition matrix
257  * Return: final eta
258  *
259  * Implements the divsteps_n_matrix_var function from the explanation.
260  */
261 static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
262  /* Transformation matrix; see comments in secp256k1_modinv32_divsteps_30. */
263  uint32_t u = 1, v = 0, q = 0, r = 1;
264  uint32_t f = f0, g = g0, m;
265  uint16_t w;
266  int i = 30, limit, zeros;
267 
268  for (;;) {
269  /* Use a sentinel bit to count zeros only up to i. */
270  zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i));
271  /* Perform zeros divsteps at once; they all just divide g by two. */
272  g >>= zeros;
273  u <<= zeros;
274  v <<= zeros;
275  eta -= zeros;
276  i -= zeros;
277  /* We're done once we've done 30 divsteps. */
278  if (i == 0) break;
279  VERIFY_CHECK((f & 1) == 1);
280  VERIFY_CHECK((g & 1) == 1);
281  VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i));
282  VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i));
283  /* Bounds on eta that follow from the bounds on iteration count (max 25*30 divsteps). */
284  VERIFY_CHECK(eta >= -751 && eta <= 751);
285  /* If eta is negative, negate it and replace f,g with g,-f. */
286  if (eta < 0) {
287  uint32_t tmp;
288  eta = -eta;
289  tmp = f; f = g; g = -tmp;
290  tmp = u; u = q; q = -tmp;
291  tmp = v; v = r; r = -tmp;
292  }
293  /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
294  * than i can be cancelled out (as we'd be done before that point), and no more than eta+1
295  * can be done as its sign will flip once that happens. */
296  limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
297  /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
298  VERIFY_CHECK(limit > 0 && limit <= 30);
299  m = (UINT32_MAX >> (32 - limit)) & 255U;
300  /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
301  w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m;
302  /* Do so. */
303  g += f * w;
304  q += u * w;
305  r += v * w;
306  VERIFY_CHECK((g & m) == 0);
307  }
308  /* Return data in t and return value. */
309  t->u = (int32_t)u;
310  t->v = (int32_t)v;
311  t->q = (int32_t)q;
312  t->r = (int32_t)r;
313  /* The determinant of t must be a power of two. This guarantees that multiplication with t
314  * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
315  * will be divided out again). As each divstep's individual matrix has determinant 2, the
316  * aggregate of 30 of them will have determinant 2^30. */
317  VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30);
318  return eta;
319 }
320 
321 /* Compute the transition matrix and eta for 30 posdivsteps (variable time, eta=-delta), and keeps track
322  * of the Jacobi symbol along the way. f0 and g0 must be f and g mod 2^32 rather than 2^30, because
323  * Jacobi tracking requires knowing (f mod 8) rather than just (f mod 2).
324  *
325  * Input: eta: initial eta
326  * f0: bottom limb of initial f
327  * g0: bottom limb of initial g
328  * Output: t: transition matrix
329  * Input/Output: (*jacp & 1) is bitflipped if and only if the Jacobi symbol of (f | g) changes sign
330  * by applying the returned transformation matrix to it. The other bits of *jacp may
331  * change, but are meaningless.
332  * Return: final eta
333  */
334 static int32_t secp256k1_modinv32_posdivsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t, int *jacp) {
335  /* Transformation matrix. */
336  uint32_t u = 1, v = 0, q = 0, r = 1;
337  uint32_t f = f0, g = g0, m;
338  uint16_t w;
339  int i = 30, limit, zeros;
340  int jac = *jacp;
341 
342  for (;;) {
343  /* Use a sentinel bit to count zeros only up to i. */
344  zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i));
345  /* Perform zeros divsteps at once; they all just divide g by two. */
346  g >>= zeros;
347  u <<= zeros;
348  v <<= zeros;
349  eta -= zeros;
350  i -= zeros;
351  /* Update the bottom bit of jac: when dividing g by an odd power of 2,
352  * if (f mod 8) is 3 or 5, the Jacobi symbol changes sign. */
353  jac ^= (zeros & ((f >> 1) ^ (f >> 2)));
354  /* We're done once we've done 30 posdivsteps. */
355  if (i == 0) break;
356  VERIFY_CHECK((f & 1) == 1);
357  VERIFY_CHECK((g & 1) == 1);
358  VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i));
359  VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i));
360  /* If eta is negative, negate it and replace f,g with g,f. */
361  if (eta < 0) {
362  uint32_t tmp;
363  eta = -eta;
364  /* Update bottom bit of jac: when swapping f and g, the Jacobi symbol changes sign
365  * if both f and g are 3 mod 4. */
366  jac ^= ((f & g) >> 1);
367  tmp = f; f = g; g = tmp;
368  tmp = u; u = q; q = tmp;
369  tmp = v; v = r; r = tmp;
370  }
371  /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
372  * than i can be cancelled out (as we'd be done before that point), and no more than eta+1
373  * can be done as its sign will flip once that happens. */
374  limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
375  /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
376  VERIFY_CHECK(limit > 0 && limit <= 30);
377  m = (UINT32_MAX >> (32 - limit)) & 255U;
378  /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
379  w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m;
380  /* Do so. */
381  g += f * w;
382  q += u * w;
383  r += v * w;
384  VERIFY_CHECK((g & m) == 0);
385  }
386  /* Return data in t and return value. */
387  t->u = (int32_t)u;
388  t->v = (int32_t)v;
389  t->q = (int32_t)q;
390  t->r = (int32_t)r;
391  /* The determinant of t must be a power of two. This guarantees that multiplication with t
392  * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
393  * will be divided out again). As each divstep's individual matrix has determinant 2 or -2,
394  * the aggregate of 30 of them will have determinant 2^30 or -2^30. */
395  VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30 ||
396  (int64_t)t->u * t->r - (int64_t)t->v * t->q == -(((int64_t)1) << 30));
397  *jacp = jac;
398  return eta;
399 }
400 
401 /* Compute (t/2^30) * [d, e] mod modulus, where t is a transition matrix for 30 divsteps.
402  *
403  * On input and output, d and e are in range (-2*modulus,modulus). All output limbs will be in range
404  * (-2^30,2^30).
405  *
406  * This implements the update_de function from the explanation.
407  */
409  const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
410  const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
411  int32_t di, ei, md, me, sd, se;
412  int64_t cd, ce;
413  int i;
414  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
415  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0); /* d < modulus */
416  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
417  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0); /* e < modulus */
418  VERIFY_CHECK(labs(u) <= (M30 + 1 - labs(v))); /* |u|+|v| <= 2^30 */
419  VERIFY_CHECK(labs(q) <= (M30 + 1 - labs(r))); /* |q|+|r| <= 2^30 */
420 
421  /* [md,me] start as zero; plus [u,q] if d is negative; plus [v,r] if e is negative. */
422  sd = d->v[8] >> 31;
423  se = e->v[8] >> 31;
424  md = (u & sd) + (v & se);
425  me = (q & sd) + (r & se);
426  /* Begin computing t*[d,e]. */
427  di = d->v[0];
428  ei = e->v[0];
429  cd = (int64_t)u * di + (int64_t)v * ei;
430  ce = (int64_t)q * di + (int64_t)r * ei;
431  /* Correct md,me so that t*[d,e]+modulus*[md,me] has 30 zero bottom bits. */
432  md -= (modinfo->modulus_inv30 * (uint32_t)cd + md) & M30;
433  me -= (modinfo->modulus_inv30 * (uint32_t)ce + me) & M30;
434  /* Update the beginning of computation for t*[d,e]+modulus*[md,me] now md,me are known. */
435  cd += (int64_t)modinfo->modulus.v[0] * md;
436  ce += (int64_t)modinfo->modulus.v[0] * me;
437  /* Verify that the low 30 bits of the computation are indeed zero, and then throw them away. */
438  VERIFY_CHECK(((int32_t)cd & M30) == 0); cd >>= 30;
439  VERIFY_CHECK(((int32_t)ce & M30) == 0); ce >>= 30;
440  /* Now iteratively compute limb i=1..8 of t*[d,e]+modulus*[md,me], and store them in output
441  * limb i-1 (shifting down by 30 bits). */
442  for (i = 1; i < 9; ++i) {
443  di = d->v[i];
444  ei = e->v[i];
445  cd += (int64_t)u * di + (int64_t)v * ei;
446  ce += (int64_t)q * di + (int64_t)r * ei;
447  cd += (int64_t)modinfo->modulus.v[i] * md;
448  ce += (int64_t)modinfo->modulus.v[i] * me;
449  d->v[i - 1] = (int32_t)cd & M30; cd >>= 30;
450  e->v[i - 1] = (int32_t)ce & M30; ce >>= 30;
451  }
452  /* What remains is limb 9 of t*[d,e]+modulus*[md,me]; store it as output limb 8. */
453  d->v[8] = (int32_t)cd;
454  e->v[8] = (int32_t)ce;
455 
456  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
457  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0); /* d < modulus */
458  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
459  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0); /* e < modulus */
460 }
461 
462 /* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps.
463  *
464  * This implements the update_fg function from the explanation.
465  */
467  const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
468  const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
469  int32_t fi, gi;
470  int64_t cf, cg;
471  int i;
472  /* Start computing t*[f,g]. */
473  fi = f->v[0];
474  gi = g->v[0];
475  cf = (int64_t)u * fi + (int64_t)v * gi;
476  cg = (int64_t)q * fi + (int64_t)r * gi;
477  /* Verify that the bottom 30 bits of the result are zero, and then throw them away. */
478  VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30;
479  VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30;
480  /* Now iteratively compute limb i=1..8 of t*[f,g], and store them in output limb i-1 (shifting
481  * down by 30 bits). */
482  for (i = 1; i < 9; ++i) {
483  fi = f->v[i];
484  gi = g->v[i];
485  cf += (int64_t)u * fi + (int64_t)v * gi;
486  cg += (int64_t)q * fi + (int64_t)r * gi;
487  f->v[i - 1] = (int32_t)cf & M30; cf >>= 30;
488  g->v[i - 1] = (int32_t)cg & M30; cg >>= 30;
489  }
490  /* What remains is limb 9 of t*[f,g]; store it as output limb 8. */
491  f->v[8] = (int32_t)cf;
492  g->v[8] = (int32_t)cg;
493 }
494 
495 /* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps.
496  *
497  * Version that operates on a variable number of limbs in f and g.
498  *
499  * This implements the update_fg function from the explanation in modinv64_impl.h.
500  */
502  const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
503  const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
504  int32_t fi, gi;
505  int64_t cf, cg;
506  int i;
507  VERIFY_CHECK(len > 0);
508  /* Start computing t*[f,g]. */
509  fi = f->v[0];
510  gi = g->v[0];
511  cf = (int64_t)u * fi + (int64_t)v * gi;
512  cg = (int64_t)q * fi + (int64_t)r * gi;
513  /* Verify that the bottom 62 bits of the result are zero, and then throw them away. */
514  VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30;
515  VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30;
516  /* Now iteratively compute limb i=1..len of t*[f,g], and store them in output limb i-1 (shifting
517  * down by 30 bits). */
518  for (i = 1; i < len; ++i) {
519  fi = f->v[i];
520  gi = g->v[i];
521  cf += (int64_t)u * fi + (int64_t)v * gi;
522  cg += (int64_t)q * fi + (int64_t)r * gi;
523  f->v[i - 1] = (int32_t)cf & M30; cf >>= 30;
524  g->v[i - 1] = (int32_t)cg & M30; cg >>= 30;
525  }
526  /* What remains is limb (len) of t*[f,g]; store it as output limb (len-1). */
527  f->v[len - 1] = (int32_t)cf;
528  g->v[len - 1] = (int32_t)cg;
529 }
530 
531 /* Compute the inverse of x modulo modinfo->modulus, and replace x with it (constant time in x). */
533  /* Start with d=0, e=1, f=modulus, g=x, zeta=-1. */
534  secp256k1_modinv32_signed30 d = {{0}};
535  secp256k1_modinv32_signed30 e = {{1}};
538  int i;
539  int32_t zeta = -1; /* zeta = -(delta+1/2); delta is initially 1/2. */
540 
541  /* Do 20 iterations of 30 divsteps each = 600 divsteps. 590 suffices for 256-bit inputs. */
542  for (i = 0; i < 20; ++i) {
543  /* Compute transition matrix and new zeta after 30 divsteps. */
545  zeta = secp256k1_modinv32_divsteps_30(zeta, f.v[0], g.v[0], &t);
546  /* Update d,e using that transition matrix. */
547  secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
548  /* Update f,g using that transition matrix. */
549  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
550  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
551  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
552  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0); /* g < modulus */
553 
555 
556  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
557  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
558  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
559  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0); /* g < modulus */
560  }
561 
562  /* At this point sufficient iterations have been performed that g must have reached 0
563  * and (if g was not originally 0) f must now equal +/- GCD of the initial f, g
564  * values i.e. +/- 1, and d now contains +/- the modular inverse. */
565 
566  /* g == 0 */
567  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &SECP256K1_SIGNED30_ONE, 0) == 0);
568  /* |f| == 1, or (x == 0 and d == 0 and f == modulus) */
569  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
570  secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
571  (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
572  secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
573  secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) == 0));
574 
575  /* Optionally negate d, normalize to [0,modulus), and return it. */
576  secp256k1_modinv32_normalize_30(&d, f.v[8], modinfo);
577  *x = d;
578 }
579 
580 /* Compute the inverse of x modulo modinfo->modulus, and replace x with it (variable time). */
582  /* Start with d=0, e=1, f=modulus, g=x, eta=-1. */
583  secp256k1_modinv32_signed30 d = {{0, 0, 0, 0, 0, 0, 0, 0, 0}};
584  secp256k1_modinv32_signed30 e = {{1, 0, 0, 0, 0, 0, 0, 0, 0}};
587 #ifdef VERIFY
588  int i = 0;
589 #endif
590  int j, len = 9;
591  int32_t eta = -1; /* eta = -delta; delta is initially 1 (faster for the variable-time code) */
592  int32_t cond, fn, gn;
593 
594  /* Do iterations of 30 divsteps each until g=0. */
595  while (1) {
596  /* Compute transition matrix and new eta after 30 divsteps. */
598  eta = secp256k1_modinv32_divsteps_30_var(eta, f.v[0], g.v[0], &t);
599  /* Update d,e using that transition matrix. */
600  secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
601  /* Update f,g using that transition matrix. */
602 
603  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
604  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
605  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
606  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
607 
609  /* If the bottom limb of g is 0, there is a chance g=0. */
610  if (g.v[0] == 0) {
611  cond = 0;
612  /* Check if all other limbs are also 0. */
613  for (j = 1; j < len; ++j) {
614  cond |= g.v[j];
615  }
616  /* If so, we're done. */
617  if (cond == 0) break;
618  }
619 
620  /* Determine if len>1 and limb (len-1) of both f and g is 0 or -1. */
621  fn = f.v[len - 1];
622  gn = g.v[len - 1];
623  cond = ((int32_t)len - 2) >> 31;
624  cond |= fn ^ (fn >> 31);
625  cond |= gn ^ (gn >> 31);
626  /* If so, reduce length, propagating the sign of f and g's top limb into the one below. */
627  if (cond == 0) {
628  f.v[len - 2] |= (uint32_t)fn << 30;
629  g.v[len - 2] |= (uint32_t)gn << 30;
630  --len;
631  }
632 
633  VERIFY_CHECK(++i < 25); /* We should never need more than 25*30 = 750 divsteps */
634  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
635  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
636  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
637  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
638  }
639 
640  /* At this point g is 0 and (if g was not originally 0) f must now equal +/- GCD of
641  * the initial f, g values i.e. +/- 1, and d now contains +/- the modular inverse. */
642 
643  /* g == 0 */
644  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &SECP256K1_SIGNED30_ONE, 0) == 0);
645  /* |f| == 1, or (x == 0 and d == 0 and f == modulus) */
646  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
647  secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
648  (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
649  secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
650  secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) == 0));
651 
652  /* Optionally negate d, normalize to [0,modulus), and return it. */
653  secp256k1_modinv32_normalize_30(&d, f.v[len - 1], modinfo);
654  *x = d;
655 }
656 
657 /* Do up to 50 iterations of 30 posdivsteps (up to 1500 steps; more is extremely rare) each until f=1.
658  * In VERIFY mode use a lower number of iterations (750, close to the median 756), so failure actually occurs. */
659 #ifdef VERIFY
660 #define JACOBI32_ITERATIONS 25
661 #else
662 #define JACOBI32_ITERATIONS 50
663 #endif
664 
665 /* Compute the Jacobi symbol of x modulo modinfo->modulus (variable time). gcd(x,modulus) must be 1. */
667  /* Start with f=modulus, g=x, eta=-1. */
670  int j, len = 9;
671  int32_t eta = -1; /* eta = -delta; delta is initially 1 */
672  int32_t cond, fn, gn;
673  int jac = 0;
674  int count;
675 
676  /* The input limbs must all be non-negative. */
677  VERIFY_CHECK(g.v[0] >= 0 && g.v[1] >= 0 && g.v[2] >= 0 && g.v[3] >= 0 && g.v[4] >= 0 && g.v[5] >= 0 && g.v[6] >= 0 && g.v[7] >= 0 && g.v[8] >= 0);
678 
679  /* If x > 0, then if the loop below converges, it converges to f=g=gcd(x,modulus). Since we
680  * require that gcd(x,modulus)=1 and modulus>=3, x cannot be 0. Thus, we must reach f=1 (or
681  * time out). */
682  VERIFY_CHECK((g.v[0] | g.v[1] | g.v[2] | g.v[3] | g.v[4] | g.v[5] | g.v[6] | g.v[7] | g.v[8]) != 0);
683 
684  for (count = 0; count < JACOBI32_ITERATIONS; ++count) {
685  /* Compute transition matrix and new eta after 30 posdivsteps. */
687  eta = secp256k1_modinv32_posdivsteps_30_var(eta, f.v[0] | ((uint32_t)f.v[1] << 30), g.v[0] | ((uint32_t)g.v[1] << 30), &t, &jac);
688  /* Update f,g using that transition matrix. */
689  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
690  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
691  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
692  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
693 
695  /* If the bottom limb of f is 1, there is a chance that f=1. */
696  if (f.v[0] == 1) {
697  cond = 0;
698  /* Check if the other limbs are also 0. */
699  for (j = 1; j < len; ++j) {
700  cond |= f.v[j];
701  }
702  /* If so, we're done. If f=1, the Jacobi symbol (g | f)=1. */
703  if (cond == 0) return 1 - 2*(jac & 1);
704  }
705 
706  /* Determine if len>1 and limb (len-1) of both f and g is 0. */
707  fn = f.v[len - 1];
708  gn = g.v[len - 1];
709  cond = ((int32_t)len - 2) >> 31;
710  cond |= fn;
711  cond |= gn;
712  /* If so, reduce length. */
713  if (cond == 0) --len;
714 
715  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
716  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
717  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
718  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
719  }
720 
721  /* The loop failed to converge to f=g after 1500 iterations. Return 0, indicating unknown result. */
722  return 0;
723 }
724 
725 #endif /* SECP256K1_MODINV32_IMPL_H */
#define VERIFY_CHECK(cond)
Definition: util.h:159
static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo)
static void secp256k1_modinv32_update_fg_30(secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t)
static int secp256k1_jacobi32_maybe_var(const secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo)
static SECP256K1_INLINE int secp256k1_ctz32_var(uint32_t x)
Definition: util.h:364
static void secp256k1_modinv32_normalize_30(secp256k1_modinv32_signed30 *r, int32_t sign, const secp256k1_modinv32_modinfo *modinfo)
Definition: modinv32_impl.h:63
#define JACOBI32_ITERATIONS
secp256k1_modinv32_signed30 modulus
Definition: modinv32.h:21
static const uint8_t secp256k1_modinv32_inv256[128]
static void secp256k1_modinv32_update_de_30(secp256k1_modinv32_signed30 *d, secp256k1_modinv32_signed30 *e, const secp256k1_modinv32_trans2x2 *t, const secp256k1_modinv32_modinfo *modinfo)
static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo)
static int sign(const secp256k1_context *ctx, struct signer_secrets *signer_secrets, struct signer *signer, const secp256k1_musig_keyagg_cache *cache, const unsigned char *msg32, unsigned char *sig64)
Definition: musig.c:105
static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t)
static int32_t secp256k1_modinv32_posdivsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t, int *jacp)
static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t)
static void secp256k1_modinv32_update_fg_30_var(int len, secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t)
static int count