Bitcoin Core  26.1.0
P2P Digital Currency
field_10x26_impl.h
Go to the documentation of this file.
1 /***********************************************************************
2  * Copyright (c) 2013, 2014 Pieter Wuille *
3  * Distributed under the MIT software license, see the accompanying *
4  * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
5  ***********************************************************************/
6 
7 #ifndef SECP256K1_FIELD_REPR_IMPL_H
8 #define SECP256K1_FIELD_REPR_IMPL_H
9 
10 #include "checkmem.h"
11 #include "util.h"
12 #include "field.h"
13 #include "modinv32_impl.h"
14 
15 #ifdef VERIFY
16 static void secp256k1_fe_impl_verify(const secp256k1_fe *a) {
17  const uint32_t *d = a->n;
18  int m = a->normalized ? 1 : 2 * a->magnitude;
19  VERIFY_CHECK(d[0] <= 0x3FFFFFFUL * m);
20  VERIFY_CHECK(d[1] <= 0x3FFFFFFUL * m);
21  VERIFY_CHECK(d[2] <= 0x3FFFFFFUL * m);
22  VERIFY_CHECK(d[3] <= 0x3FFFFFFUL * m);
23  VERIFY_CHECK(d[4] <= 0x3FFFFFFUL * m);
24  VERIFY_CHECK(d[5] <= 0x3FFFFFFUL * m);
25  VERIFY_CHECK(d[6] <= 0x3FFFFFFUL * m);
26  VERIFY_CHECK(d[7] <= 0x3FFFFFFUL * m);
27  VERIFY_CHECK(d[8] <= 0x3FFFFFFUL * m);
28  VERIFY_CHECK(d[9] <= 0x03FFFFFUL * m);
29  if (a->normalized) {
30  if (d[9] == 0x03FFFFFUL) {
31  uint32_t mid = d[8] & d[7] & d[6] & d[5] & d[4] & d[3] & d[2];
32  if (mid == 0x3FFFFFFUL) {
33  VERIFY_CHECK((d[1] + 0x40UL + ((d[0] + 0x3D1UL) >> 26)) <= 0x3FFFFFFUL);
34  }
35  }
36  }
37 }
38 #endif
39 
41  r->n[0] = 0x3FFFFFFUL * 2 * m;
42  r->n[1] = 0x3FFFFFFUL * 2 * m;
43  r->n[2] = 0x3FFFFFFUL * 2 * m;
44  r->n[3] = 0x3FFFFFFUL * 2 * m;
45  r->n[4] = 0x3FFFFFFUL * 2 * m;
46  r->n[5] = 0x3FFFFFFUL * 2 * m;
47  r->n[6] = 0x3FFFFFFUL * 2 * m;
48  r->n[7] = 0x3FFFFFFUL * 2 * m;
49  r->n[8] = 0x3FFFFFFUL * 2 * m;
50  r->n[9] = 0x03FFFFFUL * 2 * m;
51 }
52 
54  uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
55  t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
56 
57  /* Reduce t9 at the start so there will be at most a single carry from the first pass */
58  uint32_t m;
59  uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
60 
61  /* The first pass ensures the magnitude is 1, ... */
62  t0 += x * 0x3D1UL; t1 += (x << 6);
63  t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
64  t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
65  t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2;
66  t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3;
67  t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4;
68  t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5;
69  t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6;
70  t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7;
71  t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8;
72 
73  /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
74  VERIFY_CHECK(t9 >> 23 == 0);
75 
76  /* At most a single final reduction is needed; check if the value is >= the field characteristic */
77  x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL)
78  & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));
79 
80  /* Apply the final reduction (for constant-time behaviour, we do it always) */
81  t0 += x * 0x3D1UL; t1 += (x << 6);
82  t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
83  t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
84  t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
85  t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
86  t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
87  t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
88  t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
89  t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
90  t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
91 
92  /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */
93  VERIFY_CHECK(t9 >> 22 == x);
94 
95  /* Mask off the possible multiple of 2^256 from the final reduction */
96  t9 &= 0x03FFFFFUL;
97 
98  r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
99  r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
100 }
101 
103  uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
104  t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
105 
106  /* Reduce t9 at the start so there will be at most a single carry from the first pass */
107  uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
108 
109  /* The first pass ensures the magnitude is 1, ... */
110  t0 += x * 0x3D1UL; t1 += (x << 6);
111  t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
112  t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
113  t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
114  t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
115  t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
116  t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
117  t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
118  t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
119  t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
120 
121  /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
122  VERIFY_CHECK(t9 >> 23 == 0);
123 
124  r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
125  r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
126 }
127 
129  uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
130  t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
131 
132  /* Reduce t9 at the start so there will be at most a single carry from the first pass */
133  uint32_t m;
134  uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
135 
136  /* The first pass ensures the magnitude is 1, ... */
137  t0 += x * 0x3D1UL; t1 += (x << 6);
138  t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
139  t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
140  t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2;
141  t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3;
142  t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4;
143  t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5;
144  t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6;
145  t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7;
146  t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8;
147 
148  /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
149  VERIFY_CHECK(t9 >> 23 == 0);
150 
151  /* At most a single final reduction is needed; check if the value is >= the field characteristic */
152  x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL)
153  & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));
154 
155  if (x) {
156  t0 += 0x3D1UL; t1 += (x << 6);
157  t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
158  t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
159  t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
160  t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
161  t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
162  t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
163  t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
164  t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
165  t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
166 
167  /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */
168  VERIFY_CHECK(t9 >> 22 == x);
169 
170  /* Mask off the possible multiple of 2^256 from the final reduction */
171  t9 &= 0x03FFFFFUL;
172  }
173 
174  r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
175  r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
176 }
177 
179  uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
180  t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
181 
182  /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
183  uint32_t z0, z1;
184 
185  /* Reduce t9 at the start so there will be at most a single carry from the first pass */
186  uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
187 
188  /* The first pass ensures the magnitude is 1, ... */
189  t0 += x * 0x3D1UL; t1 += (x << 6);
190  t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; z0 = t0; z1 = t0 ^ 0x3D0UL;
191  t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL;
192  t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2;
193  t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3;
194  t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4;
195  t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5;
196  t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6;
197  t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7;
198  t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8;
199  z0 |= t9; z1 &= t9 ^ 0x3C00000UL;
200 
201  /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
202  VERIFY_CHECK(t9 >> 23 == 0);
203 
204  return (z0 == 0) | (z1 == 0x3FFFFFFUL);
205 }
206 
208  uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
209  uint32_t z0, z1;
210  uint32_t x;
211 
212  t0 = r->n[0];
213  t9 = r->n[9];
214 
215  /* Reduce t9 at the start so there will be at most a single carry from the first pass */
216  x = t9 >> 22;
217 
218  /* The first pass ensures the magnitude is 1, ... */
219  t0 += x * 0x3D1UL;
220 
221  /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
222  z0 = t0 & 0x3FFFFFFUL;
223  z1 = z0 ^ 0x3D0UL;
224 
225  /* Fast return path should catch the majority of cases */
226  if ((z0 != 0UL) & (z1 != 0x3FFFFFFUL)) {
227  return 0;
228  }
229 
230  t1 = r->n[1];
231  t2 = r->n[2];
232  t3 = r->n[3];
233  t4 = r->n[4];
234  t5 = r->n[5];
235  t6 = r->n[6];
236  t7 = r->n[7];
237  t8 = r->n[8];
238 
239  t9 &= 0x03FFFFFUL;
240  t1 += (x << 6);
241 
242  t1 += (t0 >> 26);
243  t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL;
244  t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2;
245  t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3;
246  t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4;
247  t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5;
248  t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6;
249  t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7;
250  t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8;
251  z0 |= t9; z1 &= t9 ^ 0x3C00000UL;
252 
253  /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
254  VERIFY_CHECK(t9 >> 23 == 0);
255 
256  return (z0 == 0) | (z1 == 0x3FFFFFFUL);
257 }
258 
260  r->n[0] = a;
261  r->n[1] = r->n[2] = r->n[3] = r->n[4] = r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
262 }
263 
265  const uint32_t *t = a->n;
266  return (t[0] | t[1] | t[2] | t[3] | t[4] | t[5] | t[6] | t[7] | t[8] | t[9]) == 0;
267 }
268 
270  return a->n[0] & 1;
271 }
272 
274  int i;
275  for (i=0; i<10; i++) {
276  a->n[i] = 0;
277  }
278 }
279 
280 static int secp256k1_fe_impl_cmp_var(const secp256k1_fe *a, const secp256k1_fe *b) {
281  int i;
282  for (i = 9; i >= 0; i--) {
283  if (a->n[i] > b->n[i]) {
284  return 1;
285  }
286  if (a->n[i] < b->n[i]) {
287  return -1;
288  }
289  }
290  return 0;
291 }
292 
293 static void secp256k1_fe_impl_set_b32_mod(secp256k1_fe *r, const unsigned char *a) {
294  r->n[0] = (uint32_t)a[31] | ((uint32_t)a[30] << 8) | ((uint32_t)a[29] << 16) | ((uint32_t)(a[28] & 0x3) << 24);
295  r->n[1] = (uint32_t)((a[28] >> 2) & 0x3f) | ((uint32_t)a[27] << 6) | ((uint32_t)a[26] << 14) | ((uint32_t)(a[25] & 0xf) << 22);
296  r->n[2] = (uint32_t)((a[25] >> 4) & 0xf) | ((uint32_t)a[24] << 4) | ((uint32_t)a[23] << 12) | ((uint32_t)(a[22] & 0x3f) << 20);
297  r->n[3] = (uint32_t)((a[22] >> 6) & 0x3) | ((uint32_t)a[21] << 2) | ((uint32_t)a[20] << 10) | ((uint32_t)a[19] << 18);
298  r->n[4] = (uint32_t)a[18] | ((uint32_t)a[17] << 8) | ((uint32_t)a[16] << 16) | ((uint32_t)(a[15] & 0x3) << 24);
299  r->n[5] = (uint32_t)((a[15] >> 2) & 0x3f) | ((uint32_t)a[14] << 6) | ((uint32_t)a[13] << 14) | ((uint32_t)(a[12] & 0xf) << 22);
300  r->n[6] = (uint32_t)((a[12] >> 4) & 0xf) | ((uint32_t)a[11] << 4) | ((uint32_t)a[10] << 12) | ((uint32_t)(a[9] & 0x3f) << 20);
301  r->n[7] = (uint32_t)((a[9] >> 6) & 0x3) | ((uint32_t)a[8] << 2) | ((uint32_t)a[7] << 10) | ((uint32_t)a[6] << 18);
302  r->n[8] = (uint32_t)a[5] | ((uint32_t)a[4] << 8) | ((uint32_t)a[3] << 16) | ((uint32_t)(a[2] & 0x3) << 24);
303  r->n[9] = (uint32_t)((a[2] >> 2) & 0x3f) | ((uint32_t)a[1] << 6) | ((uint32_t)a[0] << 14);
304 }
305 
306 static int secp256k1_fe_impl_set_b32_limit(secp256k1_fe *r, const unsigned char *a) {
308  return !((r->n[9] == 0x3FFFFFUL) & ((r->n[8] & r->n[7] & r->n[6] & r->n[5] & r->n[4] & r->n[3] & r->n[2]) == 0x3FFFFFFUL) & ((r->n[1] + 0x40UL + ((r->n[0] + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));
309 }
310 
312 static void secp256k1_fe_impl_get_b32(unsigned char *r, const secp256k1_fe *a) {
313  r[0] = (a->n[9] >> 14) & 0xff;
314  r[1] = (a->n[9] >> 6) & 0xff;
315  r[2] = ((a->n[9] & 0x3F) << 2) | ((a->n[8] >> 24) & 0x3);
316  r[3] = (a->n[8] >> 16) & 0xff;
317  r[4] = (a->n[8] >> 8) & 0xff;
318  r[5] = a->n[8] & 0xff;
319  r[6] = (a->n[7] >> 18) & 0xff;
320  r[7] = (a->n[7] >> 10) & 0xff;
321  r[8] = (a->n[7] >> 2) & 0xff;
322  r[9] = ((a->n[7] & 0x3) << 6) | ((a->n[6] >> 20) & 0x3f);
323  r[10] = (a->n[6] >> 12) & 0xff;
324  r[11] = (a->n[6] >> 4) & 0xff;
325  r[12] = ((a->n[6] & 0xf) << 4) | ((a->n[5] >> 22) & 0xf);
326  r[13] = (a->n[5] >> 14) & 0xff;
327  r[14] = (a->n[5] >> 6) & 0xff;
328  r[15] = ((a->n[5] & 0x3f) << 2) | ((a->n[4] >> 24) & 0x3);
329  r[16] = (a->n[4] >> 16) & 0xff;
330  r[17] = (a->n[4] >> 8) & 0xff;
331  r[18] = a->n[4] & 0xff;
332  r[19] = (a->n[3] >> 18) & 0xff;
333  r[20] = (a->n[3] >> 10) & 0xff;
334  r[21] = (a->n[3] >> 2) & 0xff;
335  r[22] = ((a->n[3] & 0x3) << 6) | ((a->n[2] >> 20) & 0x3f);
336  r[23] = (a->n[2] >> 12) & 0xff;
337  r[24] = (a->n[2] >> 4) & 0xff;
338  r[25] = ((a->n[2] & 0xf) << 4) | ((a->n[1] >> 22) & 0xf);
339  r[26] = (a->n[1] >> 14) & 0xff;
340  r[27] = (a->n[1] >> 6) & 0xff;
341  r[28] = ((a->n[1] & 0x3f) << 2) | ((a->n[0] >> 24) & 0x3);
342  r[29] = (a->n[0] >> 16) & 0xff;
343  r[30] = (a->n[0] >> 8) & 0xff;
344  r[31] = a->n[0] & 0xff;
345 }
346 
348  /* For all legal values of m (0..31), the following properties hold: */
349  VERIFY_CHECK(0x3FFFC2FUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m);
350  VERIFY_CHECK(0x3FFFFBFUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m);
351  VERIFY_CHECK(0x3FFFFFFUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m);
352  VERIFY_CHECK(0x03FFFFFUL * 2 * (m + 1) >= 0x03FFFFFUL * 2 * m);
353 
354  /* Due to the properties above, the left hand in the subtractions below is never less than
355  * the right hand. */
356  r->n[0] = 0x3FFFC2FUL * 2 * (m + 1) - a->n[0];
357  r->n[1] = 0x3FFFFBFUL * 2 * (m + 1) - a->n[1];
358  r->n[2] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[2];
359  r->n[3] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[3];
360  r->n[4] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[4];
361  r->n[5] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[5];
362  r->n[6] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[6];
363  r->n[7] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[7];
364  r->n[8] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[8];
365  r->n[9] = 0x03FFFFFUL * 2 * (m + 1) - a->n[9];
366 }
367 
369  r->n[0] *= a;
370  r->n[1] *= a;
371  r->n[2] *= a;
372  r->n[3] *= a;
373  r->n[4] *= a;
374  r->n[5] *= a;
375  r->n[6] *= a;
376  r->n[7] *= a;
377  r->n[8] *= a;
378  r->n[9] *= a;
379 }
380 
382  r->n[0] += a->n[0];
383  r->n[1] += a->n[1];
384  r->n[2] += a->n[2];
385  r->n[3] += a->n[3];
386  r->n[4] += a->n[4];
387  r->n[5] += a->n[5];
388  r->n[6] += a->n[6];
389  r->n[7] += a->n[7];
390  r->n[8] += a->n[8];
391  r->n[9] += a->n[9];
392 }
393 
395  r->n[0] += a;
396 }
397 
398 #if defined(USE_EXTERNAL_ASM)
399 
400 /* External assembler implementation */
401 void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b);
402 void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a);
403 
404 #else
405 
406 #ifdef VERIFY
407 #define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0)
408 #else
409 #define VERIFY_BITS(x, n) do { } while(0)
410 #endif
411 
412 SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) {
413  uint64_t c, d;
414  uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;
415  uint32_t t9, t1, t0, t2, t3, t4, t5, t6, t7;
416  const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
417 
418  VERIFY_BITS(a[0], 30);
419  VERIFY_BITS(a[1], 30);
420  VERIFY_BITS(a[2], 30);
421  VERIFY_BITS(a[3], 30);
422  VERIFY_BITS(a[4], 30);
423  VERIFY_BITS(a[5], 30);
424  VERIFY_BITS(a[6], 30);
425  VERIFY_BITS(a[7], 30);
426  VERIFY_BITS(a[8], 30);
427  VERIFY_BITS(a[9], 26);
428  VERIFY_BITS(b[0], 30);
429  VERIFY_BITS(b[1], 30);
430  VERIFY_BITS(b[2], 30);
431  VERIFY_BITS(b[3], 30);
432  VERIFY_BITS(b[4], 30);
433  VERIFY_BITS(b[5], 30);
434  VERIFY_BITS(b[6], 30);
435  VERIFY_BITS(b[7], 30);
436  VERIFY_BITS(b[8], 30);
437  VERIFY_BITS(b[9], 26);
438 
445  d = (uint64_t)a[0] * b[9]
446  + (uint64_t)a[1] * b[8]
447  + (uint64_t)a[2] * b[7]
448  + (uint64_t)a[3] * b[6]
449  + (uint64_t)a[4] * b[5]
450  + (uint64_t)a[5] * b[4]
451  + (uint64_t)a[6] * b[3]
452  + (uint64_t)a[7] * b[2]
453  + (uint64_t)a[8] * b[1]
454  + (uint64_t)a[9] * b[0];
455  /* VERIFY_BITS(d, 64); */
456  /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
457  t9 = d & M; d >>= 26;
458  VERIFY_BITS(t9, 26);
459  VERIFY_BITS(d, 38);
460  /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
461 
462  c = (uint64_t)a[0] * b[0];
463  VERIFY_BITS(c, 60);
464  /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */
465  d += (uint64_t)a[1] * b[9]
466  + (uint64_t)a[2] * b[8]
467  + (uint64_t)a[3] * b[7]
468  + (uint64_t)a[4] * b[6]
469  + (uint64_t)a[5] * b[5]
470  + (uint64_t)a[6] * b[4]
471  + (uint64_t)a[7] * b[3]
472  + (uint64_t)a[8] * b[2]
473  + (uint64_t)a[9] * b[1];
474  VERIFY_BITS(d, 63);
475  /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
476  u0 = d & M; d >>= 26; c += u0 * R0;
477  VERIFY_BITS(u0, 26);
478  VERIFY_BITS(d, 37);
479  VERIFY_BITS(c, 61);
480  /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
481  t0 = c & M; c >>= 26; c += u0 * R1;
482  VERIFY_BITS(t0, 26);
483  VERIFY_BITS(c, 37);
484  /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
485  /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
486 
487  c += (uint64_t)a[0] * b[1]
488  + (uint64_t)a[1] * b[0];
489  VERIFY_BITS(c, 62);
490  /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */
491  d += (uint64_t)a[2] * b[9]
492  + (uint64_t)a[3] * b[8]
493  + (uint64_t)a[4] * b[7]
494  + (uint64_t)a[5] * b[6]
495  + (uint64_t)a[6] * b[5]
496  + (uint64_t)a[7] * b[4]
497  + (uint64_t)a[8] * b[3]
498  + (uint64_t)a[9] * b[2];
499  VERIFY_BITS(d, 63);
500  /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
501  u1 = d & M; d >>= 26; c += u1 * R0;
502  VERIFY_BITS(u1, 26);
503  VERIFY_BITS(d, 37);
504  VERIFY_BITS(c, 63);
505  /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
506  t1 = c & M; c >>= 26; c += u1 * R1;
507  VERIFY_BITS(t1, 26);
508  VERIFY_BITS(c, 38);
509  /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
510  /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
511 
512  c += (uint64_t)a[0] * b[2]
513  + (uint64_t)a[1] * b[1]
514  + (uint64_t)a[2] * b[0];
515  VERIFY_BITS(c, 62);
516  /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
517  d += (uint64_t)a[3] * b[9]
518  + (uint64_t)a[4] * b[8]
519  + (uint64_t)a[5] * b[7]
520  + (uint64_t)a[6] * b[6]
521  + (uint64_t)a[7] * b[5]
522  + (uint64_t)a[8] * b[4]
523  + (uint64_t)a[9] * b[3];
524  VERIFY_BITS(d, 63);
525  /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
526  u2 = d & M; d >>= 26; c += u2 * R0;
527  VERIFY_BITS(u2, 26);
528  VERIFY_BITS(d, 37);
529  VERIFY_BITS(c, 63);
530  /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
531  t2 = c & M; c >>= 26; c += u2 * R1;
532  VERIFY_BITS(t2, 26);
533  VERIFY_BITS(c, 38);
534  /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
535  /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
536 
537  c += (uint64_t)a[0] * b[3]
538  + (uint64_t)a[1] * b[2]
539  + (uint64_t)a[2] * b[1]
540  + (uint64_t)a[3] * b[0];
541  VERIFY_BITS(c, 63);
542  /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
543  d += (uint64_t)a[4] * b[9]
544  + (uint64_t)a[5] * b[8]
545  + (uint64_t)a[6] * b[7]
546  + (uint64_t)a[7] * b[6]
547  + (uint64_t)a[8] * b[5]
548  + (uint64_t)a[9] * b[4];
549  VERIFY_BITS(d, 63);
550  /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
551  u3 = d & M; d >>= 26; c += u3 * R0;
552  VERIFY_BITS(u3, 26);
553  VERIFY_BITS(d, 37);
554  /* VERIFY_BITS(c, 64); */
555  /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
556  t3 = c & M; c >>= 26; c += u3 * R1;
557  VERIFY_BITS(t3, 26);
558  VERIFY_BITS(c, 39);
559  /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
560  /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
561 
562  c += (uint64_t)a[0] * b[4]
563  + (uint64_t)a[1] * b[3]
564  + (uint64_t)a[2] * b[2]
565  + (uint64_t)a[3] * b[1]
566  + (uint64_t)a[4] * b[0];
567  VERIFY_BITS(c, 63);
568  /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
569  d += (uint64_t)a[5] * b[9]
570  + (uint64_t)a[6] * b[8]
571  + (uint64_t)a[7] * b[7]
572  + (uint64_t)a[8] * b[6]
573  + (uint64_t)a[9] * b[5];
574  VERIFY_BITS(d, 62);
575  /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
576  u4 = d & M; d >>= 26; c += u4 * R0;
577  VERIFY_BITS(u4, 26);
578  VERIFY_BITS(d, 36);
579  /* VERIFY_BITS(c, 64); */
580  /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
581  t4 = c & M; c >>= 26; c += u4 * R1;
582  VERIFY_BITS(t4, 26);
583  VERIFY_BITS(c, 39);
584  /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
585  /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
586 
587  c += (uint64_t)a[0] * b[5]
588  + (uint64_t)a[1] * b[4]
589  + (uint64_t)a[2] * b[3]
590  + (uint64_t)a[3] * b[2]
591  + (uint64_t)a[4] * b[1]
592  + (uint64_t)a[5] * b[0];
593  VERIFY_BITS(c, 63);
594  /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
595  d += (uint64_t)a[6] * b[9]
596  + (uint64_t)a[7] * b[8]
597  + (uint64_t)a[8] * b[7]
598  + (uint64_t)a[9] * b[6];
599  VERIFY_BITS(d, 62);
600  /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
601  u5 = d & M; d >>= 26; c += u5 * R0;
602  VERIFY_BITS(u5, 26);
603  VERIFY_BITS(d, 36);
604  /* VERIFY_BITS(c, 64); */
605  /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
606  t5 = c & M; c >>= 26; c += u5 * R1;
607  VERIFY_BITS(t5, 26);
608  VERIFY_BITS(c, 39);
609  /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
610  /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
611 
612  c += (uint64_t)a[0] * b[6]
613  + (uint64_t)a[1] * b[5]
614  + (uint64_t)a[2] * b[4]
615  + (uint64_t)a[3] * b[3]
616  + (uint64_t)a[4] * b[2]
617  + (uint64_t)a[5] * b[1]
618  + (uint64_t)a[6] * b[0];
619  VERIFY_BITS(c, 63);
620  /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
621  d += (uint64_t)a[7] * b[9]
622  + (uint64_t)a[8] * b[8]
623  + (uint64_t)a[9] * b[7];
624  VERIFY_BITS(d, 61);
625  /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
626  u6 = d & M; d >>= 26; c += u6 * R0;
627  VERIFY_BITS(u6, 26);
628  VERIFY_BITS(d, 35);
629  /* VERIFY_BITS(c, 64); */
630  /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
631  t6 = c & M; c >>= 26; c += u6 * R1;
632  VERIFY_BITS(t6, 26);
633  VERIFY_BITS(c, 39);
634  /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
635  /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
636 
637  c += (uint64_t)a[0] * b[7]
638  + (uint64_t)a[1] * b[6]
639  + (uint64_t)a[2] * b[5]
640  + (uint64_t)a[3] * b[4]
641  + (uint64_t)a[4] * b[3]
642  + (uint64_t)a[5] * b[2]
643  + (uint64_t)a[6] * b[1]
644  + (uint64_t)a[7] * b[0];
645  /* VERIFY_BITS(c, 64); */
646  VERIFY_CHECK(c <= 0x8000007C00000007ULL);
647  /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
648  d += (uint64_t)a[8] * b[9]
649  + (uint64_t)a[9] * b[8];
650  VERIFY_BITS(d, 58);
651  /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
652  u7 = d & M; d >>= 26; c += u7 * R0;
653  VERIFY_BITS(u7, 26);
654  VERIFY_BITS(d, 32);
655  /* VERIFY_BITS(c, 64); */
656  VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL);
657  /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
658  t7 = c & M; c >>= 26; c += u7 * R1;
659  VERIFY_BITS(t7, 26);
660  VERIFY_BITS(c, 38);
661  /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
662  /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
663 
664  c += (uint64_t)a[0] * b[8]
665  + (uint64_t)a[1] * b[7]
666  + (uint64_t)a[2] * b[6]
667  + (uint64_t)a[3] * b[5]
668  + (uint64_t)a[4] * b[4]
669  + (uint64_t)a[5] * b[3]
670  + (uint64_t)a[6] * b[2]
671  + (uint64_t)a[7] * b[1]
672  + (uint64_t)a[8] * b[0];
673  /* VERIFY_BITS(c, 64); */
674  VERIFY_CHECK(c <= 0x9000007B80000008ULL);
675  /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
676  d += (uint64_t)a[9] * b[9];
677  VERIFY_BITS(d, 57);
678  /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
679  u8 = d & M; d >>= 26; c += u8 * R0;
680  VERIFY_BITS(u8, 26);
681  VERIFY_BITS(d, 31);
682  /* VERIFY_BITS(c, 64); */
683  VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL);
684  /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
685 
686  r[3] = t3;
687  VERIFY_BITS(r[3], 26);
688  /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
689  r[4] = t4;
690  VERIFY_BITS(r[4], 26);
691  /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
692  r[5] = t5;
693  VERIFY_BITS(r[5], 26);
694  /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
695  r[6] = t6;
696  VERIFY_BITS(r[6], 26);
697  /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
698  r[7] = t7;
699  VERIFY_BITS(r[7], 26);
700  /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
701 
702  r[8] = c & M; c >>= 26; c += u8 * R1;
703  VERIFY_BITS(r[8], 26);
704  VERIFY_BITS(c, 39);
705  /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
706  /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
707  c += d * R0 + t9;
708  VERIFY_BITS(c, 45);
709  /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
710  r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4);
711  VERIFY_BITS(r[9], 22);
712  VERIFY_BITS(c, 46);
713  /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
714  /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
715  /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
716 
717  d = c * (R0 >> 4) + t0;
718  VERIFY_BITS(d, 56);
719  /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
720  r[0] = d & M; d >>= 26;
721  VERIFY_BITS(r[0], 26);
722  VERIFY_BITS(d, 30);
723  /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
724  d += c * (R1 >> 4) + t1;
725  VERIFY_BITS(d, 53);
726  VERIFY_CHECK(d <= 0x10000003FFFFBFULL);
727  /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
728  /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
729  r[1] = d & M; d >>= 26;
730  VERIFY_BITS(r[1], 26);
731  VERIFY_BITS(d, 27);
732  VERIFY_CHECK(d <= 0x4000000ULL);
733  /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
734  d += t2;
735  VERIFY_BITS(d, 27);
736  /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
737  r[2] = d;
738  VERIFY_BITS(r[2], 27);
739  /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
740 }
741 
742 SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a) {
743  uint64_t c, d;
744  uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;
745  uint32_t t9, t0, t1, t2, t3, t4, t5, t6, t7;
746  const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
747 
748  VERIFY_BITS(a[0], 30);
749  VERIFY_BITS(a[1], 30);
750  VERIFY_BITS(a[2], 30);
751  VERIFY_BITS(a[3], 30);
752  VERIFY_BITS(a[4], 30);
753  VERIFY_BITS(a[5], 30);
754  VERIFY_BITS(a[6], 30);
755  VERIFY_BITS(a[7], 30);
756  VERIFY_BITS(a[8], 30);
757  VERIFY_BITS(a[9], 26);
758 
764  d = (uint64_t)(a[0]*2) * a[9]
765  + (uint64_t)(a[1]*2) * a[8]
766  + (uint64_t)(a[2]*2) * a[7]
767  + (uint64_t)(a[3]*2) * a[6]
768  + (uint64_t)(a[4]*2) * a[5];
769  /* VERIFY_BITS(d, 64); */
770  /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
771  t9 = d & M; d >>= 26;
772  VERIFY_BITS(t9, 26);
773  VERIFY_BITS(d, 38);
774  /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
775 
776  c = (uint64_t)a[0] * a[0];
777  VERIFY_BITS(c, 60);
778  /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */
779  d += (uint64_t)(a[1]*2) * a[9]
780  + (uint64_t)(a[2]*2) * a[8]
781  + (uint64_t)(a[3]*2) * a[7]
782  + (uint64_t)(a[4]*2) * a[6]
783  + (uint64_t)a[5] * a[5];
784  VERIFY_BITS(d, 63);
785  /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
786  u0 = d & M; d >>= 26; c += u0 * R0;
787  VERIFY_BITS(u0, 26);
788  VERIFY_BITS(d, 37);
789  VERIFY_BITS(c, 61);
790  /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
791  t0 = c & M; c >>= 26; c += u0 * R1;
792  VERIFY_BITS(t0, 26);
793  VERIFY_BITS(c, 37);
794  /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
795  /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
796 
797  c += (uint64_t)(a[0]*2) * a[1];
798  VERIFY_BITS(c, 62);
799  /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */
800  d += (uint64_t)(a[2]*2) * a[9]
801  + (uint64_t)(a[3]*2) * a[8]
802  + (uint64_t)(a[4]*2) * a[7]
803  + (uint64_t)(a[5]*2) * a[6];
804  VERIFY_BITS(d, 63);
805  /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
806  u1 = d & M; d >>= 26; c += u1 * R0;
807  VERIFY_BITS(u1, 26);
808  VERIFY_BITS(d, 37);
809  VERIFY_BITS(c, 63);
810  /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
811  t1 = c & M; c >>= 26; c += u1 * R1;
812  VERIFY_BITS(t1, 26);
813  VERIFY_BITS(c, 38);
814  /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
815  /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
816 
817  c += (uint64_t)(a[0]*2) * a[2]
818  + (uint64_t)a[1] * a[1];
819  VERIFY_BITS(c, 62);
820  /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
821  d += (uint64_t)(a[3]*2) * a[9]
822  + (uint64_t)(a[4]*2) * a[8]
823  + (uint64_t)(a[5]*2) * a[7]
824  + (uint64_t)a[6] * a[6];
825  VERIFY_BITS(d, 63);
826  /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
827  u2 = d & M; d >>= 26; c += u2 * R0;
828  VERIFY_BITS(u2, 26);
829  VERIFY_BITS(d, 37);
830  VERIFY_BITS(c, 63);
831  /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
832  t2 = c & M; c >>= 26; c += u2 * R1;
833  VERIFY_BITS(t2, 26);
834  VERIFY_BITS(c, 38);
835  /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
836  /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
837 
838  c += (uint64_t)(a[0]*2) * a[3]
839  + (uint64_t)(a[1]*2) * a[2];
840  VERIFY_BITS(c, 63);
841  /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
842  d += (uint64_t)(a[4]*2) * a[9]
843  + (uint64_t)(a[5]*2) * a[8]
844  + (uint64_t)(a[6]*2) * a[7];
845  VERIFY_BITS(d, 63);
846  /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
847  u3 = d & M; d >>= 26; c += u3 * R0;
848  VERIFY_BITS(u3, 26);
849  VERIFY_BITS(d, 37);
850  /* VERIFY_BITS(c, 64); */
851  /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
852  t3 = c & M; c >>= 26; c += u3 * R1;
853  VERIFY_BITS(t3, 26);
854  VERIFY_BITS(c, 39);
855  /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
856  /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
857 
858  c += (uint64_t)(a[0]*2) * a[4]
859  + (uint64_t)(a[1]*2) * a[3]
860  + (uint64_t)a[2] * a[2];
861  VERIFY_BITS(c, 63);
862  /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
863  d += (uint64_t)(a[5]*2) * a[9]
864  + (uint64_t)(a[6]*2) * a[8]
865  + (uint64_t)a[7] * a[7];
866  VERIFY_BITS(d, 62);
867  /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
868  u4 = d & M; d >>= 26; c += u4 * R0;
869  VERIFY_BITS(u4, 26);
870  VERIFY_BITS(d, 36);
871  /* VERIFY_BITS(c, 64); */
872  /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
873  t4 = c & M; c >>= 26; c += u4 * R1;
874  VERIFY_BITS(t4, 26);
875  VERIFY_BITS(c, 39);
876  /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
877  /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
878 
879  c += (uint64_t)(a[0]*2) * a[5]
880  + (uint64_t)(a[1]*2) * a[4]
881  + (uint64_t)(a[2]*2) * a[3];
882  VERIFY_BITS(c, 63);
883  /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
884  d += (uint64_t)(a[6]*2) * a[9]
885  + (uint64_t)(a[7]*2) * a[8];
886  VERIFY_BITS(d, 62);
887  /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
888  u5 = d & M; d >>= 26; c += u5 * R0;
889  VERIFY_BITS(u5, 26);
890  VERIFY_BITS(d, 36);
891  /* VERIFY_BITS(c, 64); */
892  /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
893  t5 = c & M; c >>= 26; c += u5 * R1;
894  VERIFY_BITS(t5, 26);
895  VERIFY_BITS(c, 39);
896  /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
897  /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
898 
899  c += (uint64_t)(a[0]*2) * a[6]
900  + (uint64_t)(a[1]*2) * a[5]
901  + (uint64_t)(a[2]*2) * a[4]
902  + (uint64_t)a[3] * a[3];
903  VERIFY_BITS(c, 63);
904  /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
905  d += (uint64_t)(a[7]*2) * a[9]
906  + (uint64_t)a[8] * a[8];
907  VERIFY_BITS(d, 61);
908  /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
909  u6 = d & M; d >>= 26; c += u6 * R0;
910  VERIFY_BITS(u6, 26);
911  VERIFY_BITS(d, 35);
912  /* VERIFY_BITS(c, 64); */
913  /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
914  t6 = c & M; c >>= 26; c += u6 * R1;
915  VERIFY_BITS(t6, 26);
916  VERIFY_BITS(c, 39);
917  /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
918  /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
919 
920  c += (uint64_t)(a[0]*2) * a[7]
921  + (uint64_t)(a[1]*2) * a[6]
922  + (uint64_t)(a[2]*2) * a[5]
923  + (uint64_t)(a[3]*2) * a[4];
924  /* VERIFY_BITS(c, 64); */
925  VERIFY_CHECK(c <= 0x8000007C00000007ULL);
926  /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
927  d += (uint64_t)(a[8]*2) * a[9];
928  VERIFY_BITS(d, 58);
929  /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
930  u7 = d & M; d >>= 26; c += u7 * R0;
931  VERIFY_BITS(u7, 26);
932  VERIFY_BITS(d, 32);
933  /* VERIFY_BITS(c, 64); */
934  VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL);
935  /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
936  t7 = c & M; c >>= 26; c += u7 * R1;
937  VERIFY_BITS(t7, 26);
938  VERIFY_BITS(c, 38);
939  /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
940  /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
941 
942  c += (uint64_t)(a[0]*2) * a[8]
943  + (uint64_t)(a[1]*2) * a[7]
944  + (uint64_t)(a[2]*2) * a[6]
945  + (uint64_t)(a[3]*2) * a[5]
946  + (uint64_t)a[4] * a[4];
947  /* VERIFY_BITS(c, 64); */
948  VERIFY_CHECK(c <= 0x9000007B80000008ULL);
949  /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
950  d += (uint64_t)a[9] * a[9];
951  VERIFY_BITS(d, 57);
952  /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
953  u8 = d & M; d >>= 26; c += u8 * R0;
954  VERIFY_BITS(u8, 26);
955  VERIFY_BITS(d, 31);
956  /* VERIFY_BITS(c, 64); */
957  VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL);
958  /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
959 
960  r[3] = t3;
961  VERIFY_BITS(r[3], 26);
962  /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
963  r[4] = t4;
964  VERIFY_BITS(r[4], 26);
965  /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
966  r[5] = t5;
967  VERIFY_BITS(r[5], 26);
968  /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
969  r[6] = t6;
970  VERIFY_BITS(r[6], 26);
971  /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
972  r[7] = t7;
973  VERIFY_BITS(r[7], 26);
974  /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
975 
976  r[8] = c & M; c >>= 26; c += u8 * R1;
977  VERIFY_BITS(r[8], 26);
978  VERIFY_BITS(c, 39);
979  /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
980  /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
981  c += d * R0 + t9;
982  VERIFY_BITS(c, 45);
983  /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
984  r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4);
985  VERIFY_BITS(r[9], 22);
986  VERIFY_BITS(c, 46);
987  /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
988  /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
989  /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
990 
991  d = c * (R0 >> 4) + t0;
992  VERIFY_BITS(d, 56);
993  /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
994  r[0] = d & M; d >>= 26;
995  VERIFY_BITS(r[0], 26);
996  VERIFY_BITS(d, 30);
997  /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
998  d += c * (R1 >> 4) + t1;
999  VERIFY_BITS(d, 53);
1000  VERIFY_CHECK(d <= 0x10000003FFFFBFULL);
1001  /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1002  /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1003  r[1] = d & M; d >>= 26;
1004  VERIFY_BITS(r[1], 26);
1005  VERIFY_BITS(d, 27);
1006  VERIFY_CHECK(d <= 0x4000000ULL);
1007  /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1008  d += t2;
1009  VERIFY_BITS(d, 27);
1010  /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1011  r[2] = d;
1012  VERIFY_BITS(r[2], 27);
1013  /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1014 }
1015 #endif
1016 
1018  secp256k1_fe_mul_inner(r->n, a->n, b->n);
1019 }
1020 
1022  secp256k1_fe_sqr_inner(r->n, a->n);
1023 }
1024 
1026  uint32_t mask0, mask1;
1027  volatile int vflag = flag;
1028  SECP256K1_CHECKMEM_CHECK_VERIFY(r->n, sizeof(r->n));
1029  mask0 = vflag + ~((uint32_t)0);
1030  mask1 = ~mask0;
1031  r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
1032  r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
1033  r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
1034  r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
1035  r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1);
1036  r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1);
1037  r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1);
1038  r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1);
1039  r->n[8] = (r->n[8] & mask0) | (a->n[8] & mask1);
1040  r->n[9] = (r->n[9] & mask0) | (a->n[9] & mask1);
1041 }
1042 
1044  uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
1045  t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
1046  uint32_t one = (uint32_t)1;
1047  uint32_t mask = -(t0 & one) >> 6;
1048 
1049  /* Bounds analysis (over the rationals).
1050  *
1051  * Let m = r->magnitude
1052  * C = 0x3FFFFFFUL * 2
1053  * D = 0x03FFFFFUL * 2
1054  *
1055  * Initial bounds: t0..t8 <= C * m
1056  * t9 <= D * m
1057  */
1058 
1059  t0 += 0x3FFFC2FUL & mask;
1060  t1 += 0x3FFFFBFUL & mask;
1061  t2 += mask;
1062  t3 += mask;
1063  t4 += mask;
1064  t5 += mask;
1065  t6 += mask;
1066  t7 += mask;
1067  t8 += mask;
1068  t9 += mask >> 4;
1069 
1070  VERIFY_CHECK((t0 & one) == 0);
1071 
1072  /* t0..t8: added <= C/2
1073  * t9: added <= D/2
1074  *
1075  * Current bounds: t0..t8 <= C * (m + 1/2)
1076  * t9 <= D * (m + 1/2)
1077  */
1078 
1079  r->n[0] = (t0 >> 1) + ((t1 & one) << 25);
1080  r->n[1] = (t1 >> 1) + ((t2 & one) << 25);
1081  r->n[2] = (t2 >> 1) + ((t3 & one) << 25);
1082  r->n[3] = (t3 >> 1) + ((t4 & one) << 25);
1083  r->n[4] = (t4 >> 1) + ((t5 & one) << 25);
1084  r->n[5] = (t5 >> 1) + ((t6 & one) << 25);
1085  r->n[6] = (t6 >> 1) + ((t7 & one) << 25);
1086  r->n[7] = (t7 >> 1) + ((t8 & one) << 25);
1087  r->n[8] = (t8 >> 1) + ((t9 & one) << 25);
1088  r->n[9] = (t9 >> 1);
1089 
1090  /* t0..t8: shifted right and added <= C/4 + 1/2
1091  * t9: shifted right
1092  *
1093  * Current bounds: t0..t8 <= C * (m/2 + 1/2)
1094  * t9 <= D * (m/2 + 1/4)
1095  *
1096  * Therefore the output magnitude (M) has to be set such that:
1097  * t0..t8: C * M >= C * (m/2 + 1/2)
1098  * t9: D * M >= D * (m/2 + 1/4)
1099  *
1100  * It suffices for all limbs that, for any input magnitude m:
1101  * M >= m/2 + 1/2
1102  *
1103  * and since we want the smallest such integer value for M:
1104  * M == floor(m/2) + 1
1105  */
1106 }
1107 
1109  uint32_t mask0, mask1;
1110  volatile int vflag = flag;
1111  SECP256K1_CHECKMEM_CHECK_VERIFY(r->n, sizeof(r->n));
1112  mask0 = vflag + ~((uint32_t)0);
1113  mask1 = ~mask0;
1114  r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
1115  r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
1116  r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
1117  r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
1118  r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1);
1119  r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1);
1120  r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1);
1121  r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1);
1122 }
1123 
1125  r->n[0] = a->n[0] | a->n[1] << 26;
1126  r->n[1] = a->n[1] >> 6 | a->n[2] << 20;
1127  r->n[2] = a->n[2] >> 12 | a->n[3] << 14;
1128  r->n[3] = a->n[3] >> 18 | a->n[4] << 8;
1129  r->n[4] = a->n[4] >> 24 | a->n[5] << 2 | a->n[6] << 28;
1130  r->n[5] = a->n[6] >> 4 | a->n[7] << 22;
1131  r->n[6] = a->n[7] >> 10 | a->n[8] << 16;
1132  r->n[7] = a->n[8] >> 16 | a->n[9] << 10;
1133 }
1134 
1136  r->n[0] = a->n[0] & 0x3FFFFFFUL;
1137  r->n[1] = a->n[0] >> 26 | ((a->n[1] << 6) & 0x3FFFFFFUL);
1138  r->n[2] = a->n[1] >> 20 | ((a->n[2] << 12) & 0x3FFFFFFUL);
1139  r->n[3] = a->n[2] >> 14 | ((a->n[3] << 18) & 0x3FFFFFFUL);
1140  r->n[4] = a->n[3] >> 8 | ((a->n[4] << 24) & 0x3FFFFFFUL);
1141  r->n[5] = (a->n[4] >> 2) & 0x3FFFFFFUL;
1142  r->n[6] = a->n[4] >> 28 | ((a->n[5] << 4) & 0x3FFFFFFUL);
1143  r->n[7] = a->n[5] >> 22 | ((a->n[6] << 10) & 0x3FFFFFFUL);
1144  r->n[8] = a->n[6] >> 16 | ((a->n[7] << 16) & 0x3FFFFFFUL);
1145  r->n[9] = a->n[7] >> 10;
1146 }
1147 
1149  const uint32_t M26 = UINT32_MAX >> 6;
1150  const uint32_t a0 = a->v[0], a1 = a->v[1], a2 = a->v[2], a3 = a->v[3], a4 = a->v[4],
1151  a5 = a->v[5], a6 = a->v[6], a7 = a->v[7], a8 = a->v[8];
1152 
1153  /* The output from secp256k1_modinv32{_var} should be normalized to range [0,modulus), and
1154  * have limbs in [0,2^30). The modulus is < 2^256, so the top limb must be below 2^(256-30*8).
1155  */
1156  VERIFY_CHECK(a0 >> 30 == 0);
1157  VERIFY_CHECK(a1 >> 30 == 0);
1158  VERIFY_CHECK(a2 >> 30 == 0);
1159  VERIFY_CHECK(a3 >> 30 == 0);
1160  VERIFY_CHECK(a4 >> 30 == 0);
1161  VERIFY_CHECK(a5 >> 30 == 0);
1162  VERIFY_CHECK(a6 >> 30 == 0);
1163  VERIFY_CHECK(a7 >> 30 == 0);
1164  VERIFY_CHECK(a8 >> 16 == 0);
1165 
1166  r->n[0] = a0 & M26;
1167  r->n[1] = (a0 >> 26 | a1 << 4) & M26;
1168  r->n[2] = (a1 >> 22 | a2 << 8) & M26;
1169  r->n[3] = (a2 >> 18 | a3 << 12) & M26;
1170  r->n[4] = (a3 >> 14 | a4 << 16) & M26;
1171  r->n[5] = (a4 >> 10 | a5 << 20) & M26;
1172  r->n[6] = (a5 >> 6 | a6 << 24) & M26;
1173  r->n[7] = (a6 >> 2 ) & M26;
1174  r->n[8] = (a6 >> 28 | a7 << 2) & M26;
1175  r->n[9] = (a7 >> 24 | a8 << 6);
1176 }
1177 
1179  const uint32_t M30 = UINT32_MAX >> 2;
1180  const uint64_t a0 = a->n[0], a1 = a->n[1], a2 = a->n[2], a3 = a->n[3], a4 = a->n[4],
1181  a5 = a->n[5], a6 = a->n[6], a7 = a->n[7], a8 = a->n[8], a9 = a->n[9];
1182 
1183  r->v[0] = (a0 | a1 << 26) & M30;
1184  r->v[1] = (a1 >> 4 | a2 << 22) & M30;
1185  r->v[2] = (a2 >> 8 | a3 << 18) & M30;
1186  r->v[3] = (a3 >> 12 | a4 << 14) & M30;
1187  r->v[4] = (a4 >> 16 | a5 << 10) & M30;
1188  r->v[5] = (a5 >> 20 | a6 << 6) & M30;
1189  r->v[6] = (a6 >> 24 | a7 << 2
1190  | a8 << 28) & M30;
1191  r->v[7] = (a8 >> 2 | a9 << 24) & M30;
1192  r->v[8] = a9 >> 6;
1193 }
1194 
1196  {{-0x3D1, -4, 0, 0, 0, 0, 0, 0, 65536}},
1197  0x2DDACACFL
1198 };
1199 
1201  secp256k1_fe tmp = *x;
1203 
1204  secp256k1_fe_normalize(&tmp);
1205  secp256k1_fe_to_signed30(&s, &tmp);
1208 }
1209 
1211  secp256k1_fe tmp = *x;
1213 
1215  secp256k1_fe_to_signed30(&s, &tmp);
1218 }
1219 
1221  secp256k1_fe tmp;
1223  int jac, ret;
1224 
1225  tmp = *x;
1227  /* secp256k1_jacobi32_maybe_var cannot deal with input 0. */
1228  if (secp256k1_fe_is_zero(&tmp)) return 1;
1229  secp256k1_fe_to_signed30(&s, &tmp);
1231  if (jac == 0) {
1232  /* secp256k1_jacobi32_maybe_var failed to compute the Jacobi symbol. Fall back
1233  * to computing a square root. This should be extremely rare with random
1234  * input (except in VERIFY mode, where a lower iteration count is used). */
1235  secp256k1_fe dummy;
1236  ret = secp256k1_fe_sqrt(&dummy, &tmp);
1237  } else {
1238  ret = jac >= 0;
1239  }
1240  return ret;
1241 }
1242 
1243 #endif /* SECP256K1_FIELD_REPR_IMPL_H */
#define VERIFY_CHECK(cond)
Definition: util.h:143
static SECP256K1_INLINE void secp256k1_fe_impl_half(secp256k1_fe *r)
static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r, const secp256k1_fe_storage *a, int flag)
This field implementation represents the value as 10 uint32_t limbs in base 2^26. ...
Definition: field_10x26.h:14
int ret
static SECP256K1_INLINE void secp256k1_fe_impl_cmov(secp256k1_fe *r, const secp256k1_fe *a, int flag)
static int secp256k1_fe_sqrt(secp256k1_fe *SECP256K1_RESTRICT r, const secp256k1_fe *SECP256K1_RESTRICT a)
Compute a square root of a field element.
static SECP256K1_INLINE void secp256k1_fe_impl_sqr(secp256k1_fe *r, const secp256k1_fe *a)
static int secp256k1_fe_impl_normalizes_to_zero_var(const secp256k1_fe *r)
#define secp256k1_fe_is_zero
Definition: field.h:85
static SECP256K1_INLINE void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a)
static int secp256k1_jacobi32_maybe_var(const secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo)
static int secp256k1_fe_impl_normalizes_to_zero(const secp256k1_fe *r)
static SECP256K1_INLINE void secp256k1_fe_impl_add_int(secp256k1_fe *r, int a)
static SECP256K1_INLINE void secp256k1_fe_impl_set_int(secp256k1_fe *r, int a)
static void secp256k1_fe_impl_set_b32_mod(secp256k1_fe *r, const unsigned char *a)
static void secp256k1_fe_impl_get_bounds(secp256k1_fe *r, int m)
static void secp256k1_fe_impl_normalize(secp256k1_fe *r)
static SECP256K1_INLINE int secp256k1_fe_impl_is_zero(const secp256k1_fe *a)
static void secp256k1_fe_impl_normalize_weak(secp256k1_fe *r)
static void secp256k1_fe_impl_inv_var(secp256k1_fe *r, const secp256k1_fe *x)
static SECP256K1_INLINE void secp256k1_fe_impl_from_storage(secp256k1_fe *r, const secp256k1_fe_storage *a)
static int secp256k1_fe_impl_is_square_var(const secp256k1_fe *x)
#define SECP256K1_INLINE
Definition: util.h:48
static SECP256K1_INLINE void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t *SECP256K1_RESTRICT b)
static int secp256k1_fe_impl_cmp_var(const secp256k1_fe *a, const secp256k1_fe *b)
static SECP256K1_INLINE void secp256k1_fe_impl_mul(secp256k1_fe *r, const secp256k1_fe *a, const secp256k1_fe *SECP256K1_RESTRICT b)
static const secp256k1_modinv32_modinfo secp256k1_const_modinfo_fe
static void secp256k1_fe_impl_normalize_var(secp256k1_fe *r)
static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo)
static SECP256K1_INLINE int secp256k1_fe_impl_is_odd(const secp256k1_fe *a)
static void secp256k1_fe_from_signed30(secp256k1_fe *r, const secp256k1_modinv32_signed30 *a)
#define SECP256K1_RESTRICT
Definition: util.h:176
static SECP256K1_INLINE void secp256k1_fe_impl_negate_unchecked(secp256k1_fe *r, const secp256k1_fe *a, int m)
static void secp256k1_fe_impl_to_storage(secp256k1_fe_storage *r, const secp256k1_fe *a)
uint32_t n[10]
Definition: field_10x26.h:22
static void secp256k1_fe_impl_get_b32(unsigned char *r, const secp256k1_fe *a)
Convert a field element to a 32-byte big endian value.
static void secp256k1_fe_impl_inv(secp256k1_fe *r, const secp256k1_fe *x)
static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo)
#define VERIFY_BITS(x, n)
#define secp256k1_fe_normalize
Definition: field.h:78
#define secp256k1_fe_normalize_var
Definition: field.h:80
static SECP256K1_INLINE void secp256k1_fe_impl_add(secp256k1_fe *r, const secp256k1_fe *a)
static void secp256k1_fe_to_signed30(secp256k1_modinv32_signed30 *r, const secp256k1_fe *a)
#define SECP256K1_CHECKMEM_CHECK_VERIFY(p, len)
Definition: checkmem.h:92
unsigned char u8
static SECP256K1_INLINE void secp256k1_fe_impl_clear(secp256k1_fe *a)
static SECP256K1_INLINE void secp256k1_fe_impl_mul_int_unchecked(secp256k1_fe *r, int a)
static int secp256k1_fe_impl_set_b32_limit(secp256k1_fe *r, const unsigned char *a)