Bitcoin Core  26.1.0
P2P Digital Currency
modinv32_impl.h
Go to the documentation of this file.
1 /***********************************************************************
2  * Copyright (c) 2020 Peter Dettman *
3  * Distributed under the MIT software license, see the accompanying *
4  * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
5  **********************************************************************/
6 
7 #ifndef SECP256K1_MODINV32_IMPL_H
8 #define SECP256K1_MODINV32_IMPL_H
9 
10 #include "modinv32.h"
11 
12 #include "util.h"
13 
14 #include <stdlib.h>
15 
16 /* This file implements modular inversion based on the paper "Fast constant-time gcd computation and
17  * modular inversion" by Daniel J. Bernstein and Bo-Yin Yang.
18  *
19  * For an explanation of the algorithm, see doc/safegcd_implementation.md. This file contains an
20  * implementation for N=30, using 30-bit signed limbs represented as int32_t.
21  */
22 
23 #ifdef VERIFY
24 static const secp256k1_modinv32_signed30 SECP256K1_SIGNED30_ONE = {{1}};
25 
26 /* Compute a*factor and put it in r. All but the top limb in r will be in range [0,2^30). */
27 static void secp256k1_modinv32_mul_30(secp256k1_modinv32_signed30 *r, const secp256k1_modinv32_signed30 *a, int alen, int32_t factor) {
28  const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
29  int64_t c = 0;
30  int i;
31  for (i = 0; i < 8; ++i) {
32  if (i < alen) c += (int64_t)a->v[i] * factor;
33  r->v[i] = (int32_t)c & M30; c >>= 30;
34  }
35  if (8 < alen) c += (int64_t)a->v[8] * factor;
36  VERIFY_CHECK(c == (int32_t)c);
37  r->v[8] = (int32_t)c;
38 }
39 
40 /* Return -1 for a<b*factor, 0 for a==b*factor, 1 for a>b*factor. A consists of alen limbs; b has 9. */
41 static int secp256k1_modinv32_mul_cmp_30(const secp256k1_modinv32_signed30 *a, int alen, const secp256k1_modinv32_signed30 *b, int32_t factor) {
42  int i;
44  secp256k1_modinv32_mul_30(&am, a, alen, 1); /* Normalize all but the top limb of a. */
45  secp256k1_modinv32_mul_30(&bm, b, 9, factor);
46  for (i = 0; i < 8; ++i) {
47  /* Verify that all but the top limb of a and b are normalized. */
48  VERIFY_CHECK(am.v[i] >> 30 == 0);
49  VERIFY_CHECK(bm.v[i] >> 30 == 0);
50  }
51  for (i = 8; i >= 0; --i) {
52  if (am.v[i] < bm.v[i]) return -1;
53  if (am.v[i] > bm.v[i]) return 1;
54  }
55  return 0;
56 }
57 #endif
58 
59 /* Take as input a signed30 number in range (-2*modulus,modulus), and add a multiple of the modulus
60  * to it to bring it to range [0,modulus). If sign < 0, the input will also be negated in the
61  * process. The input must have limbs in range (-2^30,2^30). The output will have limbs in range
62  * [0,2^30). */
64  const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
65  int32_t r0 = r->v[0], r1 = r->v[1], r2 = r->v[2], r3 = r->v[3], r4 = r->v[4],
66  r5 = r->v[5], r6 = r->v[6], r7 = r->v[7], r8 = r->v[8];
67  volatile int32_t cond_add, cond_negate;
68 
69 #ifdef VERIFY
70  /* Verify that all limbs are in range (-2^30,2^30). */
71  int i;
72  for (i = 0; i < 9; ++i) {
73  VERIFY_CHECK(r->v[i] >= -M30);
74  VERIFY_CHECK(r->v[i] <= M30);
75  }
76  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, -2) > 0); /* r > -2*modulus */
77  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
78 #endif
79 
80  /* In a first step, add the modulus if the input is negative, and then negate if requested.
81  * This brings r from range (-2*modulus,modulus) to range (-modulus,modulus). As all input
82  * limbs are in range (-2^30,2^30), this cannot overflow an int32_t. Note that the right
83  * shifts below are signed sign-extending shifts (see assumptions.h for tests that that is
84  * indeed the behavior of the right shift operator). */
85  cond_add = r8 >> 31;
86  r0 += modinfo->modulus.v[0] & cond_add;
87  r1 += modinfo->modulus.v[1] & cond_add;
88  r2 += modinfo->modulus.v[2] & cond_add;
89  r3 += modinfo->modulus.v[3] & cond_add;
90  r4 += modinfo->modulus.v[4] & cond_add;
91  r5 += modinfo->modulus.v[5] & cond_add;
92  r6 += modinfo->modulus.v[6] & cond_add;
93  r7 += modinfo->modulus.v[7] & cond_add;
94  r8 += modinfo->modulus.v[8] & cond_add;
95  cond_negate = sign >> 31;
96  r0 = (r0 ^ cond_negate) - cond_negate;
97  r1 = (r1 ^ cond_negate) - cond_negate;
98  r2 = (r2 ^ cond_negate) - cond_negate;
99  r3 = (r3 ^ cond_negate) - cond_negate;
100  r4 = (r4 ^ cond_negate) - cond_negate;
101  r5 = (r5 ^ cond_negate) - cond_negate;
102  r6 = (r6 ^ cond_negate) - cond_negate;
103  r7 = (r7 ^ cond_negate) - cond_negate;
104  r8 = (r8 ^ cond_negate) - cond_negate;
105  /* Propagate the top bits, to bring limbs back to range (-2^30,2^30). */
106  r1 += r0 >> 30; r0 &= M30;
107  r2 += r1 >> 30; r1 &= M30;
108  r3 += r2 >> 30; r2 &= M30;
109  r4 += r3 >> 30; r3 &= M30;
110  r5 += r4 >> 30; r4 &= M30;
111  r6 += r5 >> 30; r5 &= M30;
112  r7 += r6 >> 30; r6 &= M30;
113  r8 += r7 >> 30; r7 &= M30;
114 
115  /* In a second step add the modulus again if the result is still negative, bringing r to range
116  * [0,modulus). */
117  cond_add = r8 >> 31;
118  r0 += modinfo->modulus.v[0] & cond_add;
119  r1 += modinfo->modulus.v[1] & cond_add;
120  r2 += modinfo->modulus.v[2] & cond_add;
121  r3 += modinfo->modulus.v[3] & cond_add;
122  r4 += modinfo->modulus.v[4] & cond_add;
123  r5 += modinfo->modulus.v[5] & cond_add;
124  r6 += modinfo->modulus.v[6] & cond_add;
125  r7 += modinfo->modulus.v[7] & cond_add;
126  r8 += modinfo->modulus.v[8] & cond_add;
127  /* And propagate again. */
128  r1 += r0 >> 30; r0 &= M30;
129  r2 += r1 >> 30; r1 &= M30;
130  r3 += r2 >> 30; r2 &= M30;
131  r4 += r3 >> 30; r3 &= M30;
132  r5 += r4 >> 30; r4 &= M30;
133  r6 += r5 >> 30; r5 &= M30;
134  r7 += r6 >> 30; r6 &= M30;
135  r8 += r7 >> 30; r7 &= M30;
136 
137  r->v[0] = r0;
138  r->v[1] = r1;
139  r->v[2] = r2;
140  r->v[3] = r3;
141  r->v[4] = r4;
142  r->v[5] = r5;
143  r->v[6] = r6;
144  r->v[7] = r7;
145  r->v[8] = r8;
146 
147 #ifdef VERIFY
148  VERIFY_CHECK(r0 >> 30 == 0);
149  VERIFY_CHECK(r1 >> 30 == 0);
150  VERIFY_CHECK(r2 >> 30 == 0);
151  VERIFY_CHECK(r3 >> 30 == 0);
152  VERIFY_CHECK(r4 >> 30 == 0);
153  VERIFY_CHECK(r5 >> 30 == 0);
154  VERIFY_CHECK(r6 >> 30 == 0);
155  VERIFY_CHECK(r7 >> 30 == 0);
156  VERIFY_CHECK(r8 >> 30 == 0);
157  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 0) >= 0); /* r >= 0 */
158  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
159 #endif
160 }
161 
162 /* Data type for transition matrices (see section 3 of explanation).
163  *
164  * t = [ u v ]
165  * [ q r ]
166  */
167 typedef struct {
168  int32_t u, v, q, r;
170 
171 /* Compute the transition matrix and zeta for 30 divsteps.
172  *
173  * Input: zeta: initial zeta
174  * f0: bottom limb of initial f
175  * g0: bottom limb of initial g
176  * Output: t: transition matrix
177  * Return: final zeta
178  *
179  * Implements the divsteps_n_matrix function from the explanation.
180  */
181 static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
182  /* u,v,q,r are the elements of the transformation matrix being built up,
183  * starting with the identity matrix. Semantically they are signed integers
184  * in range [-2^30,2^30], but here represented as unsigned mod 2^32. This
185  * permits left shifting (which is UB for negative numbers). The range
186  * being inside [-2^31,2^31) means that casting to signed works correctly.
187  */
188  uint32_t u = 1, v = 0, q = 0, r = 1;
189  volatile uint32_t c1, c2;
190  uint32_t mask1, mask2, f = f0, g = g0, x, y, z;
191  int i;
192 
193  for (i = 0; i < 30; ++i) {
194  VERIFY_CHECK((f & 1) == 1); /* f must always be odd */
195  VERIFY_CHECK((u * f0 + v * g0) == f << i);
196  VERIFY_CHECK((q * f0 + r * g0) == g << i);
197  /* Compute conditional masks for (zeta < 0) and for (g & 1). */
198  c1 = zeta >> 31;
199  mask1 = c1;
200  c2 = g & 1;
201  mask2 = -c2;
202  /* Compute x,y,z, conditionally negated versions of f,u,v. */
203  x = (f ^ mask1) - mask1;
204  y = (u ^ mask1) - mask1;
205  z = (v ^ mask1) - mask1;
206  /* Conditionally add x,y,z to g,q,r. */
207  g += x & mask2;
208  q += y & mask2;
209  r += z & mask2;
210  /* In what follows, mask1 is a condition mask for (zeta < 0) and (g & 1). */
211  mask1 &= mask2;
212  /* Conditionally change zeta into -zeta-2 or zeta-1. */
213  zeta = (zeta ^ mask1) - 1;
214  /* Conditionally add g,q,r to f,u,v. */
215  f += g & mask1;
216  u += q & mask1;
217  v += r & mask1;
218  /* Shifts */
219  g >>= 1;
220  u <<= 1;
221  v <<= 1;
222  /* Bounds on zeta that follow from the bounds on iteration count (max 20*30 divsteps). */
223  VERIFY_CHECK(zeta >= -601 && zeta <= 601);
224  }
225  /* Return data in t and return value. */
226  t->u = (int32_t)u;
227  t->v = (int32_t)v;
228  t->q = (int32_t)q;
229  t->r = (int32_t)r;
230  /* The determinant of t must be a power of two. This guarantees that multiplication with t
231  * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
232  * will be divided out again). As each divstep's individual matrix has determinant 2, the
233  * aggregate of 30 of them will have determinant 2^30. */
234  VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30);
235  return zeta;
236 }
237 
238 /* secp256k1_modinv32_inv256[i] = -(2*i+1)^-1 (mod 256) */
239 static const uint8_t secp256k1_modinv32_inv256[128] = {
240  0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59,
241  0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31,
242  0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89,
243  0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61,
244  0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9,
245  0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91,
246  0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9,
247  0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1,
248  0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19,
249  0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1,
250  0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01
251 };
252 
253 /* Compute the transition matrix and eta for 30 divsteps (variable time).
254  *
255  * Input: eta: initial eta
256  * f0: bottom limb of initial f
257  * g0: bottom limb of initial g
258  * Output: t: transition matrix
259  * Return: final eta
260  *
261  * Implements the divsteps_n_matrix_var function from the explanation.
262  */
263 static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
264  /* Transformation matrix; see comments in secp256k1_modinv32_divsteps_30. */
265  uint32_t u = 1, v = 0, q = 0, r = 1;
266  uint32_t f = f0, g = g0, m;
267  uint16_t w;
268  int i = 30, limit, zeros;
269 
270  for (;;) {
271  /* Use a sentinel bit to count zeros only up to i. */
272  zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i));
273  /* Perform zeros divsteps at once; they all just divide g by two. */
274  g >>= zeros;
275  u <<= zeros;
276  v <<= zeros;
277  eta -= zeros;
278  i -= zeros;
279  /* We're done once we've done 30 divsteps. */
280  if (i == 0) break;
281  VERIFY_CHECK((f & 1) == 1);
282  VERIFY_CHECK((g & 1) == 1);
283  VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i));
284  VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i));
285  /* Bounds on eta that follow from the bounds on iteration count (max 25*30 divsteps). */
286  VERIFY_CHECK(eta >= -751 && eta <= 751);
287  /* If eta is negative, negate it and replace f,g with g,-f. */
288  if (eta < 0) {
289  uint32_t tmp;
290  eta = -eta;
291  tmp = f; f = g; g = -tmp;
292  tmp = u; u = q; q = -tmp;
293  tmp = v; v = r; r = -tmp;
294  }
295  /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
296  * than i can be cancelled out (as we'd be done before that point), and no more than eta+1
297  * can be done as its sign will flip once that happens. */
298  limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
299  /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
300  VERIFY_CHECK(limit > 0 && limit <= 30);
301  m = (UINT32_MAX >> (32 - limit)) & 255U;
302  /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
303  w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m;
304  /* Do so. */
305  g += f * w;
306  q += u * w;
307  r += v * w;
308  VERIFY_CHECK((g & m) == 0);
309  }
310  /* Return data in t and return value. */
311  t->u = (int32_t)u;
312  t->v = (int32_t)v;
313  t->q = (int32_t)q;
314  t->r = (int32_t)r;
315  /* The determinant of t must be a power of two. This guarantees that multiplication with t
316  * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
317  * will be divided out again). As each divstep's individual matrix has determinant 2, the
318  * aggregate of 30 of them will have determinant 2^30. */
319  VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30);
320  return eta;
321 }
322 
323 /* Compute the transition matrix and eta for 30 posdivsteps (variable time, eta=-delta), and keeps track
324  * of the Jacobi symbol along the way. f0 and g0 must be f and g mod 2^32 rather than 2^30, because
325  * Jacobi tracking requires knowing (f mod 8) rather than just (f mod 2).
326  *
327  * Input: eta: initial eta
328  * f0: bottom limb of initial f
329  * g0: bottom limb of initial g
330  * Output: t: transition matrix
331  * Input/Output: (*jacp & 1) is bitflipped if and only if the Jacobi symbol of (f | g) changes sign
332  * by applying the returned transformation matrix to it. The other bits of *jacp may
333  * change, but are meaningless.
334  * Return: final eta
335  */
336 static int32_t secp256k1_modinv32_posdivsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t, int *jacp) {
337  /* Transformation matrix. */
338  uint32_t u = 1, v = 0, q = 0, r = 1;
339  uint32_t f = f0, g = g0, m;
340  uint16_t w;
341  int i = 30, limit, zeros;
342  int jac = *jacp;
343 
344  for (;;) {
345  /* Use a sentinel bit to count zeros only up to i. */
346  zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i));
347  /* Perform zeros divsteps at once; they all just divide g by two. */
348  g >>= zeros;
349  u <<= zeros;
350  v <<= zeros;
351  eta -= zeros;
352  i -= zeros;
353  /* Update the bottom bit of jac: when dividing g by an odd power of 2,
354  * if (f mod 8) is 3 or 5, the Jacobi symbol changes sign. */
355  jac ^= (zeros & ((f >> 1) ^ (f >> 2)));
356  /* We're done once we've done 30 posdivsteps. */
357  if (i == 0) break;
358  VERIFY_CHECK((f & 1) == 1);
359  VERIFY_CHECK((g & 1) == 1);
360  VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i));
361  VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i));
362  /* If eta is negative, negate it and replace f,g with g,f. */
363  if (eta < 0) {
364  uint32_t tmp;
365  eta = -eta;
366  /* Update bottom bit of jac: when swapping f and g, the Jacobi symbol changes sign
367  * if both f and g are 3 mod 4. */
368  jac ^= ((f & g) >> 1);
369  tmp = f; f = g; g = tmp;
370  tmp = u; u = q; q = tmp;
371  tmp = v; v = r; r = tmp;
372  }
373  /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
374  * than i can be cancelled out (as we'd be done before that point), and no more than eta+1
375  * can be done as its sign will flip once that happens. */
376  limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
377  /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
378  VERIFY_CHECK(limit > 0 && limit <= 30);
379  m = (UINT32_MAX >> (32 - limit)) & 255U;
380  /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
381  w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m;
382  /* Do so. */
383  g += f * w;
384  q += u * w;
385  r += v * w;
386  VERIFY_CHECK((g & m) == 0);
387  }
388  /* Return data in t and return value. */
389  t->u = (int32_t)u;
390  t->v = (int32_t)v;
391  t->q = (int32_t)q;
392  t->r = (int32_t)r;
393  /* The determinant of t must be a power of two. This guarantees that multiplication with t
394  * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
395  * will be divided out again). As each divstep's individual matrix has determinant 2 or -2,
396  * the aggregate of 30 of them will have determinant 2^30 or -2^30. */
397  VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30 ||
398  (int64_t)t->u * t->r - (int64_t)t->v * t->q == -(((int64_t)1) << 30));
399  *jacp = jac;
400  return eta;
401 }
402 
403 /* Compute (t/2^30) * [d, e] mod modulus, where t is a transition matrix for 30 divsteps.
404  *
405  * On input and output, d and e are in range (-2*modulus,modulus). All output limbs will be in range
406  * (-2^30,2^30).
407  *
408  * This implements the update_de function from the explanation.
409  */
411  const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
412  const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
413  int32_t di, ei, md, me, sd, se;
414  int64_t cd, ce;
415  int i;
416 #ifdef VERIFY
417  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
418  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0); /* d < modulus */
419  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
420  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0); /* e < modulus */
421  VERIFY_CHECK(labs(u) <= (M30 + 1 - labs(v))); /* |u|+|v| <= 2^30 */
422  VERIFY_CHECK(labs(q) <= (M30 + 1 - labs(r))); /* |q|+|r| <= 2^30 */
423 #endif
424  /* [md,me] start as zero; plus [u,q] if d is negative; plus [v,r] if e is negative. */
425  sd = d->v[8] >> 31;
426  se = e->v[8] >> 31;
427  md = (u & sd) + (v & se);
428  me = (q & sd) + (r & se);
429  /* Begin computing t*[d,e]. */
430  di = d->v[0];
431  ei = e->v[0];
432  cd = (int64_t)u * di + (int64_t)v * ei;
433  ce = (int64_t)q * di + (int64_t)r * ei;
434  /* Correct md,me so that t*[d,e]+modulus*[md,me] has 30 zero bottom bits. */
435  md -= (modinfo->modulus_inv30 * (uint32_t)cd + md) & M30;
436  me -= (modinfo->modulus_inv30 * (uint32_t)ce + me) & M30;
437  /* Update the beginning of computation for t*[d,e]+modulus*[md,me] now md,me are known. */
438  cd += (int64_t)modinfo->modulus.v[0] * md;
439  ce += (int64_t)modinfo->modulus.v[0] * me;
440  /* Verify that the low 30 bits of the computation are indeed zero, and then throw them away. */
441  VERIFY_CHECK(((int32_t)cd & M30) == 0); cd >>= 30;
442  VERIFY_CHECK(((int32_t)ce & M30) == 0); ce >>= 30;
443  /* Now iteratively compute limb i=1..8 of t*[d,e]+modulus*[md,me], and store them in output
444  * limb i-1 (shifting down by 30 bits). */
445  for (i = 1; i < 9; ++i) {
446  di = d->v[i];
447  ei = e->v[i];
448  cd += (int64_t)u * di + (int64_t)v * ei;
449  ce += (int64_t)q * di + (int64_t)r * ei;
450  cd += (int64_t)modinfo->modulus.v[i] * md;
451  ce += (int64_t)modinfo->modulus.v[i] * me;
452  d->v[i - 1] = (int32_t)cd & M30; cd >>= 30;
453  e->v[i - 1] = (int32_t)ce & M30; ce >>= 30;
454  }
455  /* What remains is limb 9 of t*[d,e]+modulus*[md,me]; store it as output limb 8. */
456  d->v[8] = (int32_t)cd;
457  e->v[8] = (int32_t)ce;
458 #ifdef VERIFY
459  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
460  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0); /* d < modulus */
461  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
462  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0); /* e < modulus */
463 #endif
464 }
465 
466 /* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps.
467  *
468  * This implements the update_fg function from the explanation.
469  */
471  const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
472  const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
473  int32_t fi, gi;
474  int64_t cf, cg;
475  int i;
476  /* Start computing t*[f,g]. */
477  fi = f->v[0];
478  gi = g->v[0];
479  cf = (int64_t)u * fi + (int64_t)v * gi;
480  cg = (int64_t)q * fi + (int64_t)r * gi;
481  /* Verify that the bottom 30 bits of the result are zero, and then throw them away. */
482  VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30;
483  VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30;
484  /* Now iteratively compute limb i=1..8 of t*[f,g], and store them in output limb i-1 (shifting
485  * down by 30 bits). */
486  for (i = 1; i < 9; ++i) {
487  fi = f->v[i];
488  gi = g->v[i];
489  cf += (int64_t)u * fi + (int64_t)v * gi;
490  cg += (int64_t)q * fi + (int64_t)r * gi;
491  f->v[i - 1] = (int32_t)cf & M30; cf >>= 30;
492  g->v[i - 1] = (int32_t)cg & M30; cg >>= 30;
493  }
494  /* What remains is limb 9 of t*[f,g]; store it as output limb 8. */
495  f->v[8] = (int32_t)cf;
496  g->v[8] = (int32_t)cg;
497 }
498 
499 /* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps.
500  *
501  * Version that operates on a variable number of limbs in f and g.
502  *
503  * This implements the update_fg function from the explanation in modinv64_impl.h.
504  */
506  const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
507  const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
508  int32_t fi, gi;
509  int64_t cf, cg;
510  int i;
511  VERIFY_CHECK(len > 0);
512  /* Start computing t*[f,g]. */
513  fi = f->v[0];
514  gi = g->v[0];
515  cf = (int64_t)u * fi + (int64_t)v * gi;
516  cg = (int64_t)q * fi + (int64_t)r * gi;
517  /* Verify that the bottom 62 bits of the result are zero, and then throw them away. */
518  VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30;
519  VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30;
520  /* Now iteratively compute limb i=1..len of t*[f,g], and store them in output limb i-1 (shifting
521  * down by 30 bits). */
522  for (i = 1; i < len; ++i) {
523  fi = f->v[i];
524  gi = g->v[i];
525  cf += (int64_t)u * fi + (int64_t)v * gi;
526  cg += (int64_t)q * fi + (int64_t)r * gi;
527  f->v[i - 1] = (int32_t)cf & M30; cf >>= 30;
528  g->v[i - 1] = (int32_t)cg & M30; cg >>= 30;
529  }
530  /* What remains is limb (len) of t*[f,g]; store it as output limb (len-1). */
531  f->v[len - 1] = (int32_t)cf;
532  g->v[len - 1] = (int32_t)cg;
533 }
534 
535 /* Compute the inverse of x modulo modinfo->modulus, and replace x with it (constant time in x). */
537  /* Start with d=0, e=1, f=modulus, g=x, zeta=-1. */
538  secp256k1_modinv32_signed30 d = {{0}};
539  secp256k1_modinv32_signed30 e = {{1}};
542  int i;
543  int32_t zeta = -1; /* zeta = -(delta+1/2); delta is initially 1/2. */
544 
545  /* Do 20 iterations of 30 divsteps each = 600 divsteps. 590 suffices for 256-bit inputs. */
546  for (i = 0; i < 20; ++i) {
547  /* Compute transition matrix and new zeta after 30 divsteps. */
549  zeta = secp256k1_modinv32_divsteps_30(zeta, f.v[0], g.v[0], &t);
550  /* Update d,e using that transition matrix. */
551  secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
552  /* Update f,g using that transition matrix. */
553 #ifdef VERIFY
554  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
555  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
556  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
557  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0); /* g < modulus */
558 #endif
560 #ifdef VERIFY
561  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
562  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
563  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
564  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0); /* g < modulus */
565 #endif
566  }
567 
568  /* At this point sufficient iterations have been performed that g must have reached 0
569  * and (if g was not originally 0) f must now equal +/- GCD of the initial f, g
570  * values i.e. +/- 1, and d now contains +/- the modular inverse. */
571 #ifdef VERIFY
572  /* g == 0 */
573  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &SECP256K1_SIGNED30_ONE, 0) == 0);
574  /* |f| == 1, or (x == 0 and d == 0 and |f|=modulus) */
575  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
576  secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
577  (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
578  secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
579  (secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) == 0 ||
580  secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) == 0)));
581 #endif
582 
583  /* Optionally negate d, normalize to [0,modulus), and return it. */
584  secp256k1_modinv32_normalize_30(&d, f.v[8], modinfo);
585  *x = d;
586 }
587 
588 /* Compute the inverse of x modulo modinfo->modulus, and replace x with it (variable time). */
590  /* Start with d=0, e=1, f=modulus, g=x, eta=-1. */
591  secp256k1_modinv32_signed30 d = {{0, 0, 0, 0, 0, 0, 0, 0, 0}};
592  secp256k1_modinv32_signed30 e = {{1, 0, 0, 0, 0, 0, 0, 0, 0}};
595 #ifdef VERIFY
596  int i = 0;
597 #endif
598  int j, len = 9;
599  int32_t eta = -1; /* eta = -delta; delta is initially 1 (faster for the variable-time code) */
600  int32_t cond, fn, gn;
601 
602  /* Do iterations of 30 divsteps each until g=0. */
603  while (1) {
604  /* Compute transition matrix and new eta after 30 divsteps. */
606  eta = secp256k1_modinv32_divsteps_30_var(eta, f.v[0], g.v[0], &t);
607  /* Update d,e using that transition matrix. */
608  secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
609  /* Update f,g using that transition matrix. */
610 #ifdef VERIFY
611  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
612  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
613  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
614  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
615 #endif
617  /* If the bottom limb of g is 0, there is a chance g=0. */
618  if (g.v[0] == 0) {
619  cond = 0;
620  /* Check if all other limbs are also 0. */
621  for (j = 1; j < len; ++j) {
622  cond |= g.v[j];
623  }
624  /* If so, we're done. */
625  if (cond == 0) break;
626  }
627 
628  /* Determine if len>1 and limb (len-1) of both f and g is 0 or -1. */
629  fn = f.v[len - 1];
630  gn = g.v[len - 1];
631  cond = ((int32_t)len - 2) >> 31;
632  cond |= fn ^ (fn >> 31);
633  cond |= gn ^ (gn >> 31);
634  /* If so, reduce length, propagating the sign of f and g's top limb into the one below. */
635  if (cond == 0) {
636  f.v[len - 2] |= (uint32_t)fn << 30;
637  g.v[len - 2] |= (uint32_t)gn << 30;
638  --len;
639  }
640 #ifdef VERIFY
641  VERIFY_CHECK(++i < 25); /* We should never need more than 25*30 = 750 divsteps */
642  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
643  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
644  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
645  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
646 #endif
647  }
648 
649  /* At this point g is 0 and (if g was not originally 0) f must now equal +/- GCD of
650  * the initial f, g values i.e. +/- 1, and d now contains +/- the modular inverse. */
651 #ifdef VERIFY
652  /* g == 0 */
653  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &SECP256K1_SIGNED30_ONE, 0) == 0);
654  /* |f| == 1, or (x == 0 and d == 0 and |f|=modulus) */
655  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
656  secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
657  (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
658  secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
659  (secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) == 0 ||
660  secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) == 0)));
661 #endif
662 
663  /* Optionally negate d, normalize to [0,modulus), and return it. */
664  secp256k1_modinv32_normalize_30(&d, f.v[len - 1], modinfo);
665  *x = d;
666 }
667 
668 /* Do up to 50 iterations of 30 posdivsteps (up to 1500 steps; more is extremely rare) each until f=1.
669  * In VERIFY mode use a lower number of iterations (750, close to the median 756), so failure actually occurs. */
670 #ifdef VERIFY
671 #define JACOBI32_ITERATIONS 25
672 #else
673 #define JACOBI32_ITERATIONS 50
674 #endif
675 
676 /* Compute the Jacobi symbol of x modulo modinfo->modulus (variable time). gcd(x,modulus) must be 1. */
678  /* Start with f=modulus, g=x, eta=-1. */
681  int j, len = 9;
682  int32_t eta = -1; /* eta = -delta; delta is initially 1 */
683  int32_t cond, fn, gn;
684  int jac = 0;
685  int count;
686 
687  /* The input limbs must all be non-negative. */
688  VERIFY_CHECK(g.v[0] >= 0 && g.v[1] >= 0 && g.v[2] >= 0 && g.v[3] >= 0 && g.v[4] >= 0 && g.v[5] >= 0 && g.v[6] >= 0 && g.v[7] >= 0 && g.v[8] >= 0);
689 
690  /* If x > 0, then if the loop below converges, it converges to f=g=gcd(x,modulus). Since we
691  * require that gcd(x,modulus)=1 and modulus>=3, x cannot be 0. Thus, we must reach f=1 (or
692  * time out). */
693  VERIFY_CHECK((g.v[0] | g.v[1] | g.v[2] | g.v[3] | g.v[4] | g.v[5] | g.v[6] | g.v[7] | g.v[8]) != 0);
694 
695  for (count = 0; count < JACOBI32_ITERATIONS; ++count) {
696  /* Compute transition matrix and new eta after 30 posdivsteps. */
698  eta = secp256k1_modinv32_posdivsteps_30_var(eta, f.v[0] | ((uint32_t)f.v[1] << 30), g.v[0] | ((uint32_t)g.v[1] << 30), &t, &jac);
699  /* Update f,g using that transition matrix. */
700 #ifdef VERIFY
701  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
702  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
703  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
704  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
705 #endif
707  /* If the bottom limb of f is 1, there is a chance that f=1. */
708  if (f.v[0] == 1) {
709  cond = 0;
710  /* Check if the other limbs are also 0. */
711  for (j = 1; j < len; ++j) {
712  cond |= f.v[j];
713  }
714  /* If so, we're done. If f=1, the Jacobi symbol (g | f)=1. */
715  if (cond == 0) return 1 - 2*(jac & 1);
716  }
717 
718  /* Determine if len>1 and limb (len-1) of both f and g is 0. */
719  fn = f.v[len - 1];
720  gn = g.v[len - 1];
721  cond = ((int32_t)len - 2) >> 31;
722  cond |= fn;
723  cond |= gn;
724  /* If so, reduce length. */
725  if (cond == 0) --len;
726 #ifdef VERIFY
727  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
728  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
729  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
730  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
731 #endif
732  }
733 
734  /* The loop failed to converge to f=g after 1500 iterations. Return 0, indicating unknown result. */
735  return 0;
736 }
737 
738 #endif /* SECP256K1_MODINV32_IMPL_H */
#define VERIFY_CHECK(cond)
Definition: util.h:143
static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo)
static void secp256k1_modinv32_update_fg_30(secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t)
static int secp256k1_jacobi32_maybe_var(const secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo)
static SECP256K1_INLINE int secp256k1_ctz32_var(uint32_t x)
Definition: util.h:310
static void secp256k1_modinv32_normalize_30(secp256k1_modinv32_signed30 *r, int32_t sign, const secp256k1_modinv32_modinfo *modinfo)
Definition: modinv32_impl.h:63
#define JACOBI32_ITERATIONS
secp256k1_modinv32_signed30 modulus
Definition: modinv32.h:21
static const uint8_t secp256k1_modinv32_inv256[128]
static void secp256k1_modinv32_update_de_30(secp256k1_modinv32_signed30 *d, secp256k1_modinv32_signed30 *e, const secp256k1_modinv32_trans2x2 *t, const secp256k1_modinv32_modinfo *modinfo)
static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo)
static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t)
static int32_t secp256k1_modinv32_posdivsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t, int *jacp)
static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t)
static void secp256k1_modinv32_update_fg_30_var(int len, secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t)
static int count