Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(56)

Side by Side Diff: mozilla/security/nss/lib/freebl/ecl/ecp_256_32.c

Issue 14249009: Change the NSS and NSPR source tree to the new directory structure to be (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/nss/
Patch Set: Created 7 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 /* A 32-bit implementation of the NIST P-256 elliptic curve. */
6
7 #include <string.h>
8
9 #include "prtypes.h"
10 #include "mpi.h"
11 #include "mpi-priv.h"
12 #include "ecp.h"
13 #include "secport.h"
14
15 typedef PRUint8 u8;
16 typedef PRUint32 u32;
17 typedef PRUint64 u64;
18
19 /* Our field elements are represented as nine, unsigned 32-bit words. Freebl's
20 * MPI library calls them digits, but here they are called limbs, which is
21 * GMP's terminology.
22 *
23 * The value of an felem (field element) is:
24 * x[0] + (x[1] * 2**29) + (x[2] * 2**57) + ... + (x[8] * 2**228)
25 *
26 * That is, each limb is alternately 29 or 28-bits wide in little-endian
27 * order.
28 *
29 * This means that an felem hits 2**257, rather than 2**256 as we would like. A
30 * 28, 29, ... pattern would cause us to hit 2**256, but that causes problems
31 * when multiplying as terms end up one bit short of a limb which would require
32 * much bit-shifting to correct.
33 *
34 * Finally, the values stored in an felem are in Montgomery form. So the value
35 * |y| is stored as (y*R) mod p, where p is the P-256 prime and R is 2**257.
36 */
37 typedef u32 limb;
38 #define NLIMBS 9
39 typedef limb felem[NLIMBS];
40
41 static const limb kBottom28Bits = 0xfffffff;
42 static const limb kBottom29Bits = 0x1fffffff;
43
44 /* kOne is the number 1 as an felem. It's 2**257 mod p split up into 29 and
45 * 28-bit words.
46 */
47 static const felem kOne = {
48 2, 0, 0, 0xffff800,
49 0x1fffffff, 0xfffffff, 0x1fbfffff, 0x1ffffff,
50 0
51 };
52 static const felem kZero = {0};
53 static const felem kP = {
54 0x1fffffff, 0xfffffff, 0x1fffffff, 0x3ff,
55 0, 0, 0x200000, 0xf000000,
56 0xfffffff
57 };
58 static const felem k2P = {
59 0x1ffffffe, 0xfffffff, 0x1fffffff, 0x7ff,
60 0, 0, 0x400000, 0xe000000,
61 0x1fffffff
62 };
63
64 /* kPrecomputed contains precomputed values to aid the calculation of scalar
65 * multiples of the base point, G. It's actually two, equal length, tables
66 * concatenated.
67 *
68 * The first table contains (x,y) felem pairs for 16 multiples of the base
69 * point, G.
70 *
71 * Index | Index (binary) | Value
72 * 0 | 0000 | 0G (all zeros, omitted)
73 * 1 | 0001 | G
74 * 2 | 0010 | 2**64G
75 * 3 | 0011 | 2**64G + G
76 * 4 | 0100 | 2**128G
77 * 5 | 0101 | 2**128G + G
78 * 6 | 0110 | 2**128G + 2**64G
79 * 7 | 0111 | 2**128G + 2**64G + G
80 * 8 | 1000 | 2**192G
81 * 9 | 1001 | 2**192G + G
82 * 10 | 1010 | 2**192G + 2**64G
83 * 11 | 1011 | 2**192G + 2**64G + G
84 * 12 | 1100 | 2**192G + 2**128G
85 * 13 | 1101 | 2**192G + 2**128G + G
86 * 14 | 1110 | 2**192G + 2**128G + 2**64G
87 * 15 | 1111 | 2**192G + 2**128G + 2**64G + G
88 *
89 * The second table follows the same style, but the terms are 2**32G,
90 * 2**96G, 2**160G, 2**224G.
91 *
92 * This is ~2KB of data.
93 */
94 static const limb kPrecomputed[NLIMBS * 2 * 15 * 2] = {
95 0x11522878, 0xe730d41, 0xdb60179, 0x4afe2ff, 0x12883add, 0xcaddd88, 0x119e7e dc, 0xd4a6eab, 0x3120bee,
96 0x1d2aac15, 0xf25357c, 0x19e45cdd, 0x5c721d0, 0x1992c5a5, 0xa237487, 0x154ba 21, 0x14b10bb, 0xae3fe3,
97 0xd41a576, 0x922fc51, 0x234994f, 0x60b60d3, 0x164586ae, 0xce95f18, 0x1fe4907 3, 0x3fa36cc, 0x5ebcd2c,
98 0xb402f2f, 0x15c70bf, 0x1561925c, 0x5a26704, 0xda91e90, 0xcdc1c7f, 0x1ea1244 6, 0xe1ade1e, 0xec91f22,
99 0x26f7778, 0x566847e, 0xa0bec9e, 0x234f453, 0x1a31f21a, 0xd85e75c, 0x56c7109 , 0xa267a00, 0xb57c050,
100 0x98fb57, 0xaa837cc, 0x60c0792, 0xcfa5e19, 0x61bab9e, 0x589e39b, 0xa324c5, 0 x7d6dee7, 0x2976e4b,
101 0x1fc4124a, 0xa8c244b, 0x1ce86762, 0xcd61c7e, 0x1831c8e0, 0x75774e1, 0x1d96a 5a9, 0x843a649, 0xc3ab0fa,
102 0x6e2e7d5, 0x7673a2a, 0x178b65e8, 0x4003e9b, 0x1a1f11c2, 0x7816ea, 0xf643e11 , 0x58c43df, 0xf423fc2,
103 0x19633ffa, 0x891f2b2, 0x123c231c, 0x46add8c, 0x54700dd, 0x59e2b17, 0x172db4 0f, 0x83e277d, 0xb0dd609,
104 0xfd1da12, 0x35c6e52, 0x19ede20c, 0xd19e0c0, 0x97d0f40, 0xb015b19, 0x449e3f5 , 0xe10c9e, 0x33ab581,
105 0x56a67ab, 0x577734d, 0x1dddc062, 0xc57b10d, 0x149b39d, 0x26a9e7b, 0xc35df9f , 0x48764cd, 0x76dbcca,
106 0xca4b366, 0xe9303ab, 0x1a7480e7, 0x57e9e81, 0x1e13eb50, 0xf466cf3, 0x6f16b2 0, 0x4ba3173, 0xc168c33,
107 0x15cb5439, 0x6a38e11, 0x73658bd, 0xb29564f, 0x3f6dc5b, 0x53b97e, 0x1322c4c0 , 0x65dd7ff, 0x3a1e4f6,
108 0x14e614aa, 0x9246317, 0x1bc83aca, 0xad97eed, 0xd38ce4a, 0xf82b006, 0x341f07 7, 0xa6add89, 0x4894acd,
109 0x9f162d5, 0xf8410ef, 0x1b266a56, 0xd7f223, 0x3e0cb92, 0xe39b672, 0x6a2901a, 0x69a8556, 0x7e7c0,
110 0x9b7d8d3, 0x309a80, 0x1ad05f7f, 0xc2fb5dd, 0xcbfd41d, 0x9ceb638, 0x1051825c , 0xda0cf5b, 0x812e881,
111 0x6f35669, 0x6a56f2c, 0x1df8d184, 0x345820, 0x1477d477, 0x1645db1, 0xbe80c51 , 0xc22be3e, 0xe35e65a,
112 0x1aeb7aa0, 0xc375315, 0xf67bc99, 0x7fdd7b9, 0x191fc1be, 0x61235d, 0x2c184e9 , 0x1c5a839, 0x47a1e26,
113 0xb7cb456, 0x93e225d, 0x14f3c6ed, 0xccc1ac9, 0x17fe37f3, 0x4988989, 0x1a90c5 02, 0x2f32042, 0xa17769b,
114 0xafd8c7c, 0x8191c6e, 0x1dcdb237, 0x16200c0, 0x107b32a1, 0x66c08db, 0x10d06a 02, 0x3fc93, 0x5620023,
115 0x16722b27, 0x68b5c59, 0x270fcfc, 0xfad0ecc, 0xe5de1c2, 0xeab466b, 0x2fc513c , 0x407f75c, 0xbaab133,
116 0x9705fe9, 0xb88b8e7, 0x734c993, 0x1e1ff8f, 0x19156970, 0xabd0f00, 0x10469ea 7, 0x3293ac0, 0xcdc98aa,
117 0x1d843fd, 0xe14bfe8, 0x15be825f, 0x8b5212, 0xeb3fb67, 0x81cbd29, 0xbc62f16, 0x2b6fcc7, 0xf5a4e29,
118 0x13560b66, 0xc0b6ac2, 0x51ae690, 0xd41e271, 0xf3e9bd4, 0x1d70aab, 0x1029f72 , 0x73e1c35, 0xee70fbc,
119 0xad81baf, 0x9ecc49a, 0x86c741e, 0xfe6be30, 0x176752e7, 0x23d416, 0x1f83de85 , 0x27de188, 0x66f70b8,
120 0x181cd51f, 0x96b6e4c, 0x188f2335, 0xa5df759, 0x17a77eb6, 0xfeb0e73, 0x154ae 914, 0x2f3ec51, 0x3826b59,
121 0xb91f17d, 0x1c72949, 0x1362bf0a, 0xe23fddf, 0xa5614b0, 0xf7d8f, 0x79061, 0x 823d9d2, 0x8213f39,
122 0x1128ae0b, 0xd095d05, 0xb85c0c2, 0x1ecb2ef, 0x24ddc84, 0xe35e901, 0x18411a4 a, 0xf5ddc3d, 0x3786689,
123 0x52260e8, 0x5ae3564, 0x542b10d, 0x8d93a45, 0x19952aa4, 0x996cc41, 0x1051a72 9, 0x4be3499, 0x52b23aa,
124 0x109f307e, 0x6f5b6bb, 0x1f84e1e7, 0x77a0cfa, 0x10c4df3f, 0x25a02ea, 0xb0480 35, 0xe31de66, 0xc6ecaa3,
125 0x28ea335, 0x2886024, 0x1372f020, 0xf55d35, 0x15e4684c, 0xf2a9e17, 0x1a4a752 9, 0xcb7beb1, 0xb2a78a1,
126 0x1ab21f1f, 0x6361ccf, 0x6c9179d, 0xb135627, 0x1267b974, 0x4408bad, 0x1cbff6 58, 0xe3d6511, 0xc7d76f,
127 0x1cc7a69, 0xe7ee31b, 0x54fab4f, 0x2b914f, 0x1ad27a30, 0xcd3579e, 0xc50124c, 0x50daa90, 0xb13f72,
128 0xb06aa75, 0x70f5cc6, 0x1649e5aa, 0x84a5312, 0x329043c, 0x41c4011, 0x13d3241 1, 0xb04a838, 0xd760d2d,
129 0x1713b532, 0xbaa0c03, 0x84022ab, 0x6bcf5c1, 0x2f45379, 0x18ae070, 0x18c9e11 e, 0x20bca9a, 0x66f496b,
130 0x3eef294, 0x67500d2, 0xd7f613c, 0x2dbbeb, 0xb741038, 0xe04133f, 0x1582968d, 0xbe985f7, 0x1acbc1a,
131 0x1a6a939f, 0x33e50f6, 0xd665ed4, 0xb4b7bd6, 0x1e5a3799, 0x6b33847, 0x17fa56 ff, 0x65ef930, 0x21dc4a,
132 0x2b37659, 0x450fe17, 0xb357b65, 0xdf5efac, 0x15397bef, 0x9d35a7f, 0x112ac15 f, 0x624e62e, 0xa90ae2f,
133 0x107eecd2, 0x1f69bbe, 0x77d6bce, 0x5741394, 0x13c684fc, 0x950c910, 0x725522 b, 0xdc78583, 0x40eeabb,
134 0x1fde328a, 0xbd61d96, 0xd28c387, 0x9e77d89, 0x12550c40, 0x759cb7d, 0x367ef3 4, 0xae2a960, 0x91b8bdc,
135 0x93462a9, 0xf469ef, 0xb2e9aef, 0xd2ca771, 0x54e1f42, 0x7aaa49, 0x6316abb, 0 x2413c8e, 0x5425bf9,
136 0x1bed3e3a, 0xf272274, 0x1f5e7326, 0x6416517, 0xea27072, 0x9cedea7, 0x6e7633 , 0x7c91952, 0xd806dce,
137 0x8e2a7e1, 0xe421e1a, 0x418c9e1, 0x1dbc890, 0x1b395c36, 0xa1dc175, 0x1dc4ef7 3, 0x8956f34, 0xe4b5cf2,
138 0x1b0d3a18, 0x3194a36, 0x6c2641f, 0xe44124c, 0xa2f4eaa, 0xa8c25ba, 0xf927ed7 , 0x627b614, 0x7371cca,
139 0xba16694, 0x417bc03, 0x7c0a7e3, 0x9c35c19, 0x1168a205, 0x8b6b00d, 0x10e3edc 9, 0x9c19bf2, 0x5882229,
140 0x1b2b4162, 0xa5cef1a, 0x1543622b, 0x9bd433e, 0x364e04d, 0x7480792, 0x5c9b5b 3, 0xe85ff25, 0x408ef57,
141 0x1814cfa4, 0x121b41b, 0xd248a0f, 0x3b05222, 0x39bb16a, 0xc75966d, 0xa038113 , 0xa4a1769, 0x11fbc6c,
142 0x917e50e, 0xeec3da8, 0x169d6eac, 0x10c1699, 0xa416153, 0xf724912, 0x15cd60b 7, 0x4acbad9, 0x5efc5fa,
143 0xf150ed7, 0x122b51, 0x1104b40a, 0xcb7f442, 0xfbb28ff, 0x6ac53ca, 0x196142cc , 0x7bf0fa9, 0x957651,
144 0x4e0f215, 0xed439f8, 0x3f46bd5, 0x5ace82f, 0x110916b6, 0x6db078, 0xffd7d57, 0xf2ecaac, 0xca86dec,
145 0x15d6b2da, 0x965ecc9, 0x1c92b4c2, 0x1f3811, 0x1cb080f5, 0x2d8b804, 0x19d1c1 2d, 0xf20bd46, 0x1951fa7,
146 0xa3656c3, 0x523a425, 0xfcd0692, 0xd44ddc8, 0x131f0f5b, 0xaf80e4a, 0xcd9fc74 , 0x99bb618, 0x2db944c,
147 0xa673090, 0x1c210e1, 0x178c8d23, 0x1474383, 0x10b8743d, 0x985a55b, 0x2e7477 9, 0x576138, 0x9587927,
148 0x133130fa, 0xbe05516, 0x9f4d619, 0xbb62570, 0x99ec591, 0xd9468fe, 0x1d07782 d, 0xfc72e0b, 0x701b298,
149 0x1863863b, 0x85954b8, 0x121a0c36, 0x9e7fedf, 0xf64b429, 0x9b9d71e, 0x14e2f5 d8, 0xf858d3a, 0x942eea8,
150 0xda5b765, 0x6edafff, 0xa9d18cc, 0xc65e4ba, 0x1c747e86, 0xe4ea915, 0x1981d7a 1, 0x8395659, 0x52ed4e2,
151 0x87d43b7, 0x37ab11b, 0x19d292ce, 0xf8d4692, 0x18c3053f, 0x8863e13, 0x4c146c 0, 0x6bdf55a, 0x4e4457d,
152 0x16152289, 0xac78ec2, 0x1a59c5a2, 0x2028b97, 0x71c2d01, 0x295851f, 0x404747 b, 0x878558d, 0x7d29aa4,
153 0x13d8341f, 0x8daefd7, 0x139c972d, 0x6b7ea75, 0xd4a9dde, 0xff163d8, 0x81d55d 7, 0xa5bef68, 0xb7b30d8,
154 0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f5195 1, 0x9d0c177, 0x1c49a78,
155 };
156
157 /* Field element operations:
158 */
159
160 /* NON_ZERO_TO_ALL_ONES returns:
161 * 0xffffffff for 0 < x <= 2**31
162 * 0 for x == 0 or x > 2**31.
163 *
164 * x must be a u32 or an equivalent type such as limb.
165 */
166 #define NON_ZERO_TO_ALL_ONES(x) ((((u32)(x) - 1) >> 31) - 1)
167
168 /* felem_reduce_carry adds a multiple of p in order to cancel |carry|,
169 * which is a term at 2**257.
170 *
171 * On entry: carry < 2**3, inout[0,2,...] < 2**29, inout[1,3,...] < 2**28.
172 * On exit: inout[0,2,..] < 2**30, inout[1,3,...] < 2**29.
173 */
174 static void felem_reduce_carry(felem inout, limb carry)
175 {
176 const u32 carry_mask = NON_ZERO_TO_ALL_ONES(carry);
177
178 inout[0] += carry << 1;
179 inout[3] += 0x10000000 & carry_mask;
180 /* carry < 2**3 thus (carry << 11) < 2**14 and we added 2**28 in the
181 * previous line therefore this doesn't underflow.
182 */
183 inout[3] -= carry << 11;
184 inout[4] += (0x20000000 - 1) & carry_mask;
185 inout[5] += (0x10000000 - 1) & carry_mask;
186 inout[6] += (0x20000000 - 1) & carry_mask;
187 inout[6] -= carry << 22;
188 /* This may underflow if carry is non-zero but, if so, we'll fix it in the
189 * next line.
190 */
191 inout[7] -= 1 & carry_mask;
192 inout[7] += carry << 25;
193 }
194
195 /* felem_sum sets out = in+in2.
196 *
197 * On entry, in[i]+in2[i] must not overflow a 32-bit word.
198 * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29
199 */
200 static void felem_sum(felem out, const felem in, const felem in2)
201 {
202 limb carry = 0;
203 unsigned int i;
204 for (i = 0;; i++) {
205 out[i] = in[i] + in2[i];
206 out[i] += carry;
207 carry = out[i] >> 29;
208 out[i] &= kBottom29Bits;
209
210 i++;
211 if (i == NLIMBS)
212 break;
213
214 out[i] = in[i] + in2[i];
215 out[i] += carry;
216 carry = out[i] >> 28;
217 out[i] &= kBottom28Bits;
218 }
219
220 felem_reduce_carry(out, carry);
221 }
222
223 #define two31m3 (((limb)1) << 31) - (((limb)1) << 3)
224 #define two30m2 (((limb)1) << 30) - (((limb)1) << 2)
225 #define two30p13m2 (((limb)1) << 30) + (((limb)1) << 13) - (((limb)1) << 2)
226 #define two31m2 (((limb)1) << 31) - (((limb)1) << 2)
227 #define two31p24m2 (((limb)1) << 31) + (((limb)1) << 24) - (((limb)1) << 2)
228 #define two30m27m2 (((limb)1) << 30) - (((limb)1) << 27) - (((limb)1) << 2)
229
230 /* zero31 is 0 mod p.
231 */
232 static const felem zero31 = {
233 two31m3, two30m2, two31m2, two30p13m2,
234 two31m2, two30m2, two31p24m2, two30m27m2,
235 two31m2
236 };
237
238 /* felem_diff sets out = in-in2.
239 *
240 * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and
241 * in2[0,2,...] < 2**30, in2[1,3,...] < 2**29.
242 * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
243 */
244 static void felem_diff(felem out, const felem in, const felem in2)
245 {
246 limb carry = 0;
247 unsigned int i;
248
249 for (i = 0;; i++) {
250 out[i] = in[i] - in2[i];
251 out[i] += zero31[i];
252 out[i] += carry;
253 carry = out[i] >> 29;
254 out[i] &= kBottom29Bits;
255
256 i++;
257 if (i == NLIMBS)
258 break;
259
260 out[i] = in[i] - in2[i];
261 out[i] += zero31[i];
262 out[i] += carry;
263 carry = out[i] >> 28;
264 out[i] &= kBottom28Bits;
265 }
266
267 felem_reduce_carry(out, carry);
268 }
269
270 /* felem_reduce_degree sets out = tmp/R mod p where tmp contains 64-bit words
271 * with the same 29,28,... bit positions as an felem.
272 *
273 * The values in felems are in Montgomery form: x*R mod p where R = 2**257.
274 * Since we just multiplied two Montgomery values together, the result is
275 * x*y*R*R mod p. We wish to divide by R in order for the result also to be
276 * in Montgomery form.
277 *
278 * On entry: tmp[i] < 2**64
279 * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29
280 */
281 static void felem_reduce_degree(felem out, u64 tmp[17])
282 {
283 /* The following table may be helpful when reading this code:
284 *
285 * Limb number: 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10...
286 * Width (bits): 29| 28| 29| 28| 29| 28| 29| 28| 29| 28| 29
287 * Start bit: 0 | 29| 57| 86|114|143|171|200|228|257|285
288 * (odd phase): 0 | 28| 57| 85|114|142|171|199|228|256|285
289 */
290 limb tmp2[18], carry, x, xMask;
291 unsigned int i;
292
293 /* tmp contains 64-bit words with the same 29,28,29-bit positions as an
294 * felem. So the top of an element of tmp might overlap with another
295 * element two positions down. The following loop eliminates this
296 * overlap.
297 */
298 tmp2[0] = tmp[0] & kBottom29Bits;
299
300 /* In the following we use "(limb) tmp[x]" and "(limb) (tmp[x]>>32)" to try
301 * and hint to the compiler that it can do a single-word shift by selecting
302 * the right register rather than doing a double-word shift and truncating
303 * afterwards.
304 */
305 tmp2[1] = ((limb) tmp[0]) >> 29;
306 tmp2[1] |= (((limb) (tmp[0] >> 32)) << 3) & kBottom28Bits;
307 tmp2[1] += ((limb) tmp[1]) & kBottom28Bits;
308 carry = tmp2[1] >> 28;
309 tmp2[1] &= kBottom28Bits;
310
311 for (i = 2; i < 17; i++) {
312 tmp2[i] = ((limb) (tmp[i - 2] >> 32)) >> 25;
313 tmp2[i] += ((limb) (tmp[i - 1])) >> 28;
314 tmp2[i] += (((limb) (tmp[i - 1] >> 32)) << 4) & kBottom29Bits;
315 tmp2[i] += ((limb) tmp[i]) & kBottom29Bits;
316 tmp2[i] += carry;
317 carry = tmp2[i] >> 29;
318 tmp2[i] &= kBottom29Bits;
319
320 i++;
321 if (i == 17)
322 break;
323 tmp2[i] = ((limb) (tmp[i - 2] >> 32)) >> 25;
324 tmp2[i] += ((limb) (tmp[i - 1])) >> 29;
325 tmp2[i] += (((limb) (tmp[i - 1] >> 32)) << 3) & kBottom28Bits;
326 tmp2[i] += ((limb) tmp[i]) & kBottom28Bits;
327 tmp2[i] += carry;
328 carry = tmp2[i] >> 28;
329 tmp2[i] &= kBottom28Bits;
330 }
331
332 tmp2[17] = ((limb) (tmp[15] >> 32)) >> 25;
333 tmp2[17] += ((limb) (tmp[16])) >> 29;
334 tmp2[17] += (((limb) (tmp[16] >> 32)) << 3);
335 tmp2[17] += carry;
336
337 /* Montgomery elimination of terms:
338 *
339 * Since R is 2**257, we can divide by R with a bitwise shift if we can
340 * ensure that the right-most 257 bits are all zero. We can make that true
341 * by adding multiplies of p without affecting the value.
342 *
343 * So we eliminate limbs from right to left. Since the bottom 29 bits of p
344 * are all ones, then by adding tmp2[0]*p to tmp2 we'll make tmp2[0] == 0.
345 * We can do that for 8 further limbs and then right shift to eliminate the
346 * extra factor of R.
347 */
348 for (i = 0;; i += 2) {
349 tmp2[i + 1] += tmp2[i] >> 29;
350 x = tmp2[i] & kBottom29Bits;
351 xMask = NON_ZERO_TO_ALL_ONES(x);
352 tmp2[i] = 0;
353
354 /* The bounds calculations for this loop are tricky. Each iteration of
355 * the loop eliminates two words by adding values to words to their
356 * right.
357 *
358 * The following table contains the amounts added to each word (as an
359 * offset from the value of i at the top of the loop). The amounts are
360 * accounted for from the first and second half of the loop separately
361 * and are written as, for example, 28 to mean a value <2**28.
362 *
363 * Word: 3 4 5 6 7 8 9 10
364 * Added in top half: 28 11 29 21 29 28
365 * 28 29
366 * 29
367 * Added in bottom half: 29 10 28 21 28 28
368 * 29
369 *
370 * The value that is currently offset 7 will be offset 5 for the next
371 * iteration and then offset 3 for the iteration after that. Therefore
372 * the total value added will be the values added at 7, 5 and 3.
373 *
374 * The following table accumulates these values. The sums at the bottom
375 * are written as, for example, 29+28, to mean a value < 2**29+2**28.
376 *
377 * Word: 3 4 5 6 7 8 9 10 11 12 13
378 * 28 11 10 29 21 29 28 28 28 28 28
379 * 29 28 11 28 29 28 29 28 29 28
380 * 29 28 21 21 29 21 29 21
381 * 10 29 28 21 28 21 28
382 * 28 29 28 29 28 29 28
383 * 11 10 29 10 29 10
384 * 29 28 11 28 11
385 * 29 29
386 * --------------------------------------------
387 * 30+ 31+ 30+ 31+ 30+
388 * 28+ 29+ 28+ 29+ 21+
389 * 21+ 28+ 21+ 28+ 10
390 * 10 21+ 10 21+
391 * 11 11
392 *
393 * So the greatest amount is added to tmp2[10] and tmp2[12]. If
394 * tmp2[10/12] has an initial value of <2**29, then the maximum value
395 * will be < 2**31 + 2**30 + 2**28 + 2**21 + 2**11, which is < 2**32,
396 * as required.
397 */
398 tmp2[i + 3] += (x << 10) & kBottom28Bits;
399 tmp2[i + 4] += (x >> 18);
400
401 tmp2[i + 6] += (x << 21) & kBottom29Bits;
402 tmp2[i + 7] += x >> 8;
403
404 /* At position 200, which is the starting bit position for word 7, we
405 * have a factor of 0xf000000 = 2**28 - 2**24.
406 */
407 tmp2[i + 7] += 0x10000000 & xMask;
408 /* Word 7 is 28 bits wide, so the 2**28 term exactly hits word 8. */
409 tmp2[i + 8] += (x - 1) & xMask;
410 tmp2[i + 7] -= (x << 24) & kBottom28Bits;
411 tmp2[i + 8] -= x >> 4;
412
413 tmp2[i + 8] += 0x20000000 & xMask;
414 tmp2[i + 8] -= x;
415 tmp2[i + 8] += (x << 28) & kBottom29Bits;
416 tmp2[i + 9] += ((x >> 1) - 1) & xMask;
417
418 if (i+1 == NLIMBS)
419 break;
420 tmp2[i + 2] += tmp2[i + 1] >> 28;
421 x = tmp2[i + 1] & kBottom28Bits;
422 xMask = NON_ZERO_TO_ALL_ONES(x);
423 tmp2[i + 1] = 0;
424
425 tmp2[i + 4] += (x << 11) & kBottom29Bits;
426 tmp2[i + 5] += (x >> 18);
427
428 tmp2[i + 7] += (x << 21) & kBottom28Bits;
429 tmp2[i + 8] += x >> 7;
430
431 /* At position 199, which is the starting bit of the 8th word when
432 * dealing with a context starting on an odd word, we have a factor of
433 * 0x1e000000 = 2**29 - 2**25. Since we have not updated i, the 8th
434 * word from i+1 is i+8.
435 */
436 tmp2[i + 8] += 0x20000000 & xMask;
437 tmp2[i + 9] += (x - 1) & xMask;
438 tmp2[i + 8] -= (x << 25) & kBottom29Bits;
439 tmp2[i + 9] -= x >> 4;
440
441 tmp2[i + 9] += 0x10000000 & xMask;
442 tmp2[i + 9] -= x;
443 tmp2[i + 10] += (x - 1) & xMask;
444 }
445
446 /* We merge the right shift with a carry chain. The words above 2**257 have
447 * widths of 28,29,... which we need to correct when copying them down.
448 */
449 carry = 0;
450 for (i = 0; i < 8; i++) {
451 /* The maximum value of tmp2[i + 9] occurs on the first iteration and
452 * is < 2**30+2**29+2**28. Adding 2**29 (from tmp2[i + 10]) is
453 * therefore safe.
454 */
455 out[i] = tmp2[i + 9];
456 out[i] += carry;
457 out[i] += (tmp2[i + 10] << 28) & kBottom29Bits;
458 carry = out[i] >> 29;
459 out[i] &= kBottom29Bits;
460
461 i++;
462 out[i] = tmp2[i + 9] >> 1;
463 out[i] += carry;
464 carry = out[i] >> 28;
465 out[i] &= kBottom28Bits;
466 }
467
468 out[8] = tmp2[17];
469 out[8] += carry;
470 carry = out[8] >> 29;
471 out[8] &= kBottom29Bits;
472
473 felem_reduce_carry(out, carry);
474 }
475
476 /* felem_square sets out=in*in.
477 *
478 * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29.
479 * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
480 */
481 static void felem_square(felem out, const felem in)
482 {
483 u64 tmp[17];
484
485 tmp[0] = ((u64) in[0]) * in[0];
486 tmp[1] = ((u64) in[0]) * (in[1] << 1);
487 tmp[2] = ((u64) in[0]) * (in[2] << 1) +
488 ((u64) in[1]) * (in[1] << 1);
489 tmp[3] = ((u64) in[0]) * (in[3] << 1) +
490 ((u64) in[1]) * (in[2] << 1);
491 tmp[4] = ((u64) in[0]) * (in[4] << 1) +
492 ((u64) in[1]) * (in[3] << 2) +
493 ((u64) in[2]) * in[2];
494 tmp[5] = ((u64) in[0]) * (in[5] << 1) +
495 ((u64) in[1]) * (in[4] << 1) +
496 ((u64) in[2]) * (in[3] << 1);
497 tmp[6] = ((u64) in[0]) * (in[6] << 1) +
498 ((u64) in[1]) * (in[5] << 2) +
499 ((u64) in[2]) * (in[4] << 1) +
500 ((u64) in[3]) * (in[3] << 1);
501 tmp[7] = ((u64) in[0]) * (in[7] << 1) +
502 ((u64) in[1]) * (in[6] << 1) +
503 ((u64) in[2]) * (in[5] << 1) +
504 ((u64) in[3]) * (in[4] << 1);
505 /* tmp[8] has the greatest value of 2**61 + 2**60 + 2**61 + 2**60 + 2**60,
506 * which is < 2**64 as required.
507 */
508 tmp[8] = ((u64) in[0]) * (in[8] << 1) +
509 ((u64) in[1]) * (in[7] << 2) +
510 ((u64) in[2]) * (in[6] << 1) +
511 ((u64) in[3]) * (in[5] << 2) +
512 ((u64) in[4]) * in[4];
513 tmp[9] = ((u64) in[1]) * (in[8] << 1) +
514 ((u64) in[2]) * (in[7] << 1) +
515 ((u64) in[3]) * (in[6] << 1) +
516 ((u64) in[4]) * (in[5] << 1);
517 tmp[10] = ((u64) in[2]) * (in[8] << 1) +
518 ((u64) in[3]) * (in[7] << 2) +
519 ((u64) in[4]) * (in[6] << 1) +
520 ((u64) in[5]) * (in[5] << 1);
521 tmp[11] = ((u64) in[3]) * (in[8] << 1) +
522 ((u64) in[4]) * (in[7] << 1) +
523 ((u64) in[5]) * (in[6] << 1);
524 tmp[12] = ((u64) in[4]) * (in[8] << 1) +
525 ((u64) in[5]) * (in[7] << 2) +
526 ((u64) in[6]) * in[6];
527 tmp[13] = ((u64) in[5]) * (in[8] << 1) +
528 ((u64) in[6]) * (in[7] << 1);
529 tmp[14] = ((u64) in[6]) * (in[8] << 1) +
530 ((u64) in[7]) * (in[7] << 1);
531 tmp[15] = ((u64) in[7]) * (in[8] << 1);
532 tmp[16] = ((u64) in[8]) * in[8];
533
534 felem_reduce_degree(out, tmp);
535 }
536
537 /* felem_mul sets out=in*in2.
538 *
539 * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and
540 * in2[0,2,...] < 2**30, in2[1,3,...] < 2**29.
541 * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
542 */
543 static void felem_mul(felem out, const felem in, const felem in2)
544 {
545 u64 tmp[17];
546
547 tmp[0] = ((u64) in[0]) * in2[0];
548 tmp[1] = ((u64) in[0]) * (in2[1] << 0) +
549 ((u64) in[1]) * (in2[0] << 0);
550 tmp[2] = ((u64) in[0]) * (in2[2] << 0) +
551 ((u64) in[1]) * (in2[1] << 1) +
552 ((u64) in[2]) * (in2[0] << 0);
553 tmp[3] = ((u64) in[0]) * (in2[3] << 0) +
554 ((u64) in[1]) * (in2[2] << 0) +
555 ((u64) in[2]) * (in2[1] << 0) +
556 ((u64) in[3]) * (in2[0] << 0);
557 tmp[4] = ((u64) in[0]) * (in2[4] << 0) +
558 ((u64) in[1]) * (in2[3] << 1) +
559 ((u64) in[2]) * (in2[2] << 0) +
560 ((u64) in[3]) * (in2[1] << 1) +
561 ((u64) in[4]) * (in2[0] << 0);
562 tmp[5] = ((u64) in[0]) * (in2[5] << 0) +
563 ((u64) in[1]) * (in2[4] << 0) +
564 ((u64) in[2]) * (in2[3] << 0) +
565 ((u64) in[3]) * (in2[2] << 0) +
566 ((u64) in[4]) * (in2[1] << 0) +
567 ((u64) in[5]) * (in2[0] << 0);
568 tmp[6] = ((u64) in[0]) * (in2[6] << 0) +
569 ((u64) in[1]) * (in2[5] << 1) +
570 ((u64) in[2]) * (in2[4] << 0) +
571 ((u64) in[3]) * (in2[3] << 1) +
572 ((u64) in[4]) * (in2[2] << 0) +
573 ((u64) in[5]) * (in2[1] << 1) +
574 ((u64) in[6]) * (in2[0] << 0);
575 tmp[7] = ((u64) in[0]) * (in2[7] << 0) +
576 ((u64) in[1]) * (in2[6] << 0) +
577 ((u64) in[2]) * (in2[5] << 0) +
578 ((u64) in[3]) * (in2[4] << 0) +
579 ((u64) in[4]) * (in2[3] << 0) +
580 ((u64) in[5]) * (in2[2] << 0) +
581 ((u64) in[6]) * (in2[1] << 0) +
582 ((u64) in[7]) * (in2[0] << 0);
583 /* tmp[8] has the greatest value but doesn't overflow. See logic in
584 * felem_square.
585 */
586 tmp[8] = ((u64) in[0]) * (in2[8] << 0) +
587 ((u64) in[1]) * (in2[7] << 1) +
588 ((u64) in[2]) * (in2[6] << 0) +
589 ((u64) in[3]) * (in2[5] << 1) +
590 ((u64) in[4]) * (in2[4] << 0) +
591 ((u64) in[5]) * (in2[3] << 1) +
592 ((u64) in[6]) * (in2[2] << 0) +
593 ((u64) in[7]) * (in2[1] << 1) +
594 ((u64) in[8]) * (in2[0] << 0);
595 tmp[9] = ((u64) in[1]) * (in2[8] << 0) +
596 ((u64) in[2]) * (in2[7] << 0) +
597 ((u64) in[3]) * (in2[6] << 0) +
598 ((u64) in[4]) * (in2[5] << 0) +
599 ((u64) in[5]) * (in2[4] << 0) +
600 ((u64) in[6]) * (in2[3] << 0) +
601 ((u64) in[7]) * (in2[2] << 0) +
602 ((u64) in[8]) * (in2[1] << 0);
603 tmp[10] = ((u64) in[2]) * (in2[8] << 0) +
604 ((u64) in[3]) * (in2[7] << 1) +
605 ((u64) in[4]) * (in2[6] << 0) +
606 ((u64) in[5]) * (in2[5] << 1) +
607 ((u64) in[6]) * (in2[4] << 0) +
608 ((u64) in[7]) * (in2[3] << 1) +
609 ((u64) in[8]) * (in2[2] << 0);
610 tmp[11] = ((u64) in[3]) * (in2[8] << 0) +
611 ((u64) in[4]) * (in2[7] << 0) +
612 ((u64) in[5]) * (in2[6] << 0) +
613 ((u64) in[6]) * (in2[5] << 0) +
614 ((u64) in[7]) * (in2[4] << 0) +
615 ((u64) in[8]) * (in2[3] << 0);
616 tmp[12] = ((u64) in[4]) * (in2[8] << 0) +
617 ((u64) in[5]) * (in2[7] << 1) +
618 ((u64) in[6]) * (in2[6] << 0) +
619 ((u64) in[7]) * (in2[5] << 1) +
620 ((u64) in[8]) * (in2[4] << 0);
621 tmp[13] = ((u64) in[5]) * (in2[8] << 0) +
622 ((u64) in[6]) * (in2[7] << 0) +
623 ((u64) in[7]) * (in2[6] << 0) +
624 ((u64) in[8]) * (in2[5] << 0);
625 tmp[14] = ((u64) in[6]) * (in2[8] << 0) +
626 ((u64) in[7]) * (in2[7] << 1) +
627 ((u64) in[8]) * (in2[6] << 0);
628 tmp[15] = ((u64) in[7]) * (in2[8] << 0) +
629 ((u64) in[8]) * (in2[7] << 0);
630 tmp[16] = ((u64) in[8]) * (in2[8] << 0);
631
632 felem_reduce_degree(out, tmp);
633 }
634
635 static void felem_assign(felem out, const felem in)
636 {
637 memcpy(out, in, sizeof(felem));
638 }
639
640 /* felem_inv calculates |out| = |in|^{-1}
641 *
642 * Based on Fermat's Little Theorem:
643 * a^p = a (mod p)
644 * a^{p-1} = 1 (mod p)
645 * a^{p-2} = a^{-1} (mod p)
646 */
647 static void felem_inv(felem out, const felem in)
648 {
649 felem ftmp, ftmp2;
650 /* each e_I will hold |in|^{2^I - 1} */
651 felem e2, e4, e8, e16, e32, e64;
652 unsigned int i;
653
654 felem_square(ftmp, in); /* 2^1 */
655 felem_mul(ftmp, in, ftmp); /* 2^2 - 2^0 */
656 felem_assign(e2, ftmp);
657 felem_square(ftmp, ftmp); /* 2^3 - 2^1 */
658 felem_square(ftmp, ftmp); /* 2^4 - 2^2 */
659 felem_mul(ftmp, ftmp, e2); /* 2^4 - 2^0 */
660 felem_assign(e4, ftmp);
661 felem_square(ftmp, ftmp); /* 2^5 - 2^1 */
662 felem_square(ftmp, ftmp); /* 2^6 - 2^2 */
663 felem_square(ftmp, ftmp); /* 2^7 - 2^3 */
664 felem_square(ftmp, ftmp); /* 2^8 - 2^4 */
665 felem_mul(ftmp, ftmp, e4); /* 2^8 - 2^0 */
666 felem_assign(e8, ftmp);
667 for (i = 0; i < 8; i++) {
668 felem_square(ftmp, ftmp);
669 } /* 2^16 - 2^8 */
670 felem_mul(ftmp, ftmp, e8); /* 2^16 - 2^0 */
671 felem_assign(e16, ftmp);
672 for (i = 0; i < 16; i++) {
673 felem_square(ftmp, ftmp);
674 } /* 2^32 - 2^16 */
675 felem_mul(ftmp, ftmp, e16); /* 2^32 - 2^0 */
676 felem_assign(e32, ftmp);
677 for (i = 0; i < 32; i++) {
678 felem_square(ftmp, ftmp);
679 } /* 2^64 - 2^32 */
680 felem_assign(e64, ftmp);
681 felem_mul(ftmp, ftmp, in); /* 2^64 - 2^32 + 2^0 */
682 for (i = 0; i < 192; i++) {
683 felem_square(ftmp, ftmp);
684 } /* 2^256 - 2^224 + 2^192 */
685
686 felem_mul(ftmp2, e64, e32); /* 2^64 - 2^0 */
687 for (i = 0; i < 16; i++) {
688 felem_square(ftmp2, ftmp2);
689 } /* 2^80 - 2^16 */
690 felem_mul(ftmp2, ftmp2, e16); /* 2^80 - 2^0 */
691 for (i = 0; i < 8; i++) {
692 felem_square(ftmp2, ftmp2);
693 } /* 2^88 - 2^8 */
694 felem_mul(ftmp2, ftmp2, e8); /* 2^88 - 2^0 */
695 for (i = 0; i < 4; i++) {
696 felem_square(ftmp2, ftmp2);
697 } /* 2^92 - 2^4 */
698 felem_mul(ftmp2, ftmp2, e4); /* 2^92 - 2^0 */
699 felem_square(ftmp2, ftmp2); /* 2^93 - 2^1 */
700 felem_square(ftmp2, ftmp2); /* 2^94 - 2^2 */
701 felem_mul(ftmp2, ftmp2, e2); /* 2^94 - 2^0 */
702 felem_square(ftmp2, ftmp2); /* 2^95 - 2^1 */
703 felem_square(ftmp2, ftmp2); /* 2^96 - 2^2 */
704 felem_mul(ftmp2, ftmp2, in); /* 2^96 - 3 */
705
706 felem_mul(out, ftmp2, ftmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
707 }
708
709 /* felem_scalar_3 sets out=3*out.
710 *
711 * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
712 * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
713 */
714 static void felem_scalar_3(felem out)
715 {
716 limb carry = 0;
717 unsigned int i;
718
719 for (i = 0;; i++) {
720 out[i] *= 3;
721 out[i] += carry;
722 carry = out[i] >> 29;
723 out[i] &= kBottom29Bits;
724
725 i++;
726 if (i == NLIMBS)
727 break;
728
729 out[i] *= 3;
730 out[i] += carry;
731 carry = out[i] >> 28;
732 out[i] &= kBottom28Bits;
733 }
734
735 felem_reduce_carry(out, carry);
736 }
737
738 /* felem_scalar_4 sets out=4*out.
739 *
740 * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
741 * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
742 */
743 static void felem_scalar_4(felem out)
744 {
745 limb carry = 0, next_carry;
746 unsigned int i;
747
748 for (i = 0;; i++) {
749 next_carry = out[i] >> 27;
750 out[i] <<= 2;
751 out[i] &= kBottom29Bits;
752 out[i] += carry;
753 carry = next_carry + (out[i] >> 29);
754 out[i] &= kBottom29Bits;
755
756 i++;
757 if (i == NLIMBS)
758 break;
759 next_carry = out[i] >> 26;
760 out[i] <<= 2;
761 out[i] &= kBottom28Bits;
762 out[i] += carry;
763 carry = next_carry + (out[i] >> 28);
764 out[i] &= kBottom28Bits;
765 }
766
767 felem_reduce_carry(out, carry);
768 }
769
770 /* felem_scalar_8 sets out=8*out.
771 *
772 * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
773 * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
774 */
775 static void felem_scalar_8(felem out)
776 {
777 limb carry = 0, next_carry;
778 unsigned int i;
779
780 for (i = 0;; i++) {
781 next_carry = out[i] >> 26;
782 out[i] <<= 3;
783 out[i] &= kBottom29Bits;
784 out[i] += carry;
785 carry = next_carry + (out[i] >> 29);
786 out[i] &= kBottom29Bits;
787
788 i++;
789 if (i == NLIMBS)
790 break;
791 next_carry = out[i] >> 25;
792 out[i] <<= 3;
793 out[i] &= kBottom28Bits;
794 out[i] += carry;
795 carry = next_carry + (out[i] >> 28);
796 out[i] &= kBottom28Bits;
797 }
798
799 felem_reduce_carry(out, carry);
800 }
801
802 /* felem_is_zero_vartime returns 1 iff |in| == 0. It takes a variable amount of
803 * time depending on the value of |in|.
804 */
805 static char felem_is_zero_vartime(const felem in)
806 {
807 limb carry;
808 int i;
809 limb tmp[NLIMBS];
810 felem_assign(tmp, in);
811
812 /* First, reduce tmp to a minimal form.
813 */
814 do {
815 carry = 0;
816 for (i = 0;; i++) {
817 tmp[i] += carry;
818 carry = tmp[i] >> 29;
819 tmp[i] &= kBottom29Bits;
820
821 i++;
822 if (i == NLIMBS)
823 break;
824
825 tmp[i] += carry;
826 carry = tmp[i] >> 28;
827 tmp[i] &= kBottom28Bits;
828 }
829
830 felem_reduce_carry(tmp, carry);
831 } while (carry);
832
833 /* tmp < 2**257, so the only possible zero values are 0, p and 2p.
834 */
835 return memcmp(tmp, kZero, sizeof(tmp)) == 0 ||
836 memcmp(tmp, kP, sizeof(tmp)) == 0 ||
837 memcmp(tmp, k2P, sizeof(tmp)) == 0;
838 }
839
840 /* Group operations:
841 *
842 * Elements of the elliptic curve group are represented in Jacobian
843 * coordinates: (x, y, z). An affine point (x', y') is x'=x/z**2, y'=y/z**3 in
844 * Jacobian form.
845 */
846
847 /* point_double sets {x_out,y_out,z_out} = 2*{x,y,z}.
848 *
849 * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling -dbl-2009-l
850 */
851 static void point_double(felem x_out, felem y_out, felem z_out,
852 const felem x, const felem y, const felem z)
853 {
854 felem delta, gamma, alpha, beta, tmp, tmp2;
855
856 felem_square(delta, z);
857 felem_square(gamma, y);
858 felem_mul(beta, x, gamma);
859
860 felem_sum(tmp, x, delta);
861 felem_diff(tmp2, x, delta);
862 felem_mul(alpha, tmp, tmp2);
863 felem_scalar_3(alpha);
864
865 felem_sum(tmp, y, z);
866 felem_square(tmp, tmp);
867 felem_diff(tmp, tmp, gamma);
868 felem_diff(z_out, tmp, delta);
869
870 felem_scalar_4(beta);
871 felem_square(x_out, alpha);
872 felem_diff(x_out, x_out, beta);
873 felem_diff(x_out, x_out, beta);
874
875 felem_diff(tmp, beta, x_out);
876 felem_mul(tmp, alpha, tmp);
877 felem_square(tmp2, gamma);
878 felem_scalar_8(tmp2);
879 felem_diff(y_out, tmp, tmp2);
880 }
881
882 /* point_add_mixed sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,1}.
883 * (i.e. the second point is affine.)
884 *
885 * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition -add-2007-bl
886 *
887 * Note that this function does not handle P+P, infinity+P nor P+infinity
888 * correctly.
889 */
890 static void point_add_mixed(felem x_out, felem y_out, felem z_out,
891 const felem x1, const felem y1, const felem z1,
892 const felem x2, const felem y2)
893 {
894 felem z1z1, z1z1z1, s2, u2, h, i, j, r, rr, v, tmp;
895
896 felem_square(z1z1, z1);
897 felem_sum(tmp, z1, z1);
898
899 felem_mul(u2, x2, z1z1);
900 felem_mul(z1z1z1, z1, z1z1);
901 felem_mul(s2, y2, z1z1z1);
902 felem_diff(h, u2, x1);
903 felem_sum(i, h, h);
904 felem_square(i, i);
905 felem_mul(j, h, i);
906 felem_diff(r, s2, y1);
907 felem_sum(r, r, r);
908 felem_mul(v, x1, i);
909
910 felem_mul(z_out, tmp, h);
911 felem_square(rr, r);
912 felem_diff(x_out, rr, j);
913 felem_diff(x_out, x_out, v);
914 felem_diff(x_out, x_out, v);
915
916 felem_diff(tmp, v, x_out);
917 felem_mul(y_out, tmp, r);
918 felem_mul(tmp, y1, j);
919 felem_diff(y_out, y_out, tmp);
920 felem_diff(y_out, y_out, tmp);
921 }
922
923 /* point_add sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,z2}.
924 *
925 * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition -add-2007-bl
926 *
927 * Note that this function does not handle P+P, infinity+P nor P+infinity
928 * correctly.
929 */
930 static void point_add(felem x_out, felem y_out, felem z_out,
931 const felem x1, const felem y1, const felem z1,
932 const felem x2, const felem y2, const felem z2)
933 {
934 felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp;
935
936 felem_square(z1z1, z1);
937 felem_square(z2z2, z2);
938 felem_mul(u1, x1, z2z2);
939
940 felem_sum(tmp, z1, z2);
941 felem_square(tmp, tmp);
942 felem_diff(tmp, tmp, z1z1);
943 felem_diff(tmp, tmp, z2z2);
944
945 felem_mul(z2z2z2, z2, z2z2);
946 felem_mul(s1, y1, z2z2z2);
947
948 felem_mul(u2, x2, z1z1);
949 felem_mul(z1z1z1, z1, z1z1);
950 felem_mul(s2, y2, z1z1z1);
951 felem_diff(h, u2, u1);
952 felem_sum(i, h, h);
953 felem_square(i, i);
954 felem_mul(j, h, i);
955 felem_diff(r, s2, s1);
956 felem_sum(r, r, r);
957 felem_mul(v, u1, i);
958
959 felem_mul(z_out, tmp, h);
960 felem_square(rr, r);
961 felem_diff(x_out, rr, j);
962 felem_diff(x_out, x_out, v);
963 felem_diff(x_out, x_out, v);
964
965 felem_diff(tmp, v, x_out);
966 felem_mul(y_out, tmp, r);
967 felem_mul(tmp, s1, j);
968 felem_diff(y_out, y_out, tmp);
969 felem_diff(y_out, y_out, tmp);
970 }
971
972 /* point_add_or_double_vartime sets {x_out,y_out,z_out} = {x1,y1,z1} +
973 * {x2,y2,z2}.
974 *
975 * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition -add-2007-bl
976 *
977 * This function handles the case where {x1,y1,z1}={x2,y2,z2}.
978 */
979 static void point_add_or_double_vartime(
980 felem x_out, felem y_out, felem z_out,
981 const felem x1, const felem y1, const felem z1,
982 const felem x2, const felem y2, const felem z2)
983 {
984 felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp;
985 char x_equal, y_equal;
986
987 felem_square(z1z1, z1);
988 felem_square(z2z2, z2);
989 felem_mul(u1, x1, z2z2);
990
991 felem_sum(tmp, z1, z2);
992 felem_square(tmp, tmp);
993 felem_diff(tmp, tmp, z1z1);
994 felem_diff(tmp, tmp, z2z2);
995
996 felem_mul(z2z2z2, z2, z2z2);
997 felem_mul(s1, y1, z2z2z2);
998
999 felem_mul(u2, x2, z1z1);
1000 felem_mul(z1z1z1, z1, z1z1);
1001 felem_mul(s2, y2, z1z1z1);
1002 felem_diff(h, u2, u1);
1003 x_equal = felem_is_zero_vartime(h);
1004 felem_sum(i, h, h);
1005 felem_square(i, i);
1006 felem_mul(j, h, i);
1007 felem_diff(r, s2, s1);
1008 y_equal = felem_is_zero_vartime(r);
1009 if (x_equal && y_equal) {
1010 point_double(x_out, y_out, z_out, x1, y1, z1);
1011 return;
1012 }
1013 felem_sum(r, r, r);
1014 felem_mul(v, u1, i);
1015
1016 felem_mul(z_out, tmp, h);
1017 felem_square(rr, r);
1018 felem_diff(x_out, rr, j);
1019 felem_diff(x_out, x_out, v);
1020 felem_diff(x_out, x_out, v);
1021
1022 felem_diff(tmp, v, x_out);
1023 felem_mul(y_out, tmp, r);
1024 felem_mul(tmp, s1, j);
1025 felem_diff(y_out, y_out, tmp);
1026 felem_diff(y_out, y_out, tmp);
1027 }
1028
1029 /* copy_conditional sets out=in if mask = 0xffffffff in constant time.
1030 *
1031 * On entry: mask is either 0 or 0xffffffff.
1032 */
1033 static void copy_conditional(felem out, const felem in, limb mask)
1034 {
1035 int i;
1036
1037 for (i = 0; i < NLIMBS; i++) {
1038 const limb tmp = mask & (in[i] ^ out[i]);
1039 out[i] ^= tmp;
1040 }
1041 }
1042
1043 /* select_affine_point sets {out_x,out_y} to the index'th entry of table.
1044 * On entry: index < 16, table[0] must be zero.
1045 */
1046 static void select_affine_point(felem out_x, felem out_y,
1047 const limb *table, limb index)
1048 {
1049 limb i, j;
1050
1051 memset(out_x, 0, sizeof(felem));
1052 memset(out_y, 0, sizeof(felem));
1053
1054 for (i = 1; i < 16; i++) {
1055 limb mask = i ^ index;
1056 mask |= mask >> 2;
1057 mask |= mask >> 1;
1058 mask &= 1;
1059 mask--;
1060 for (j = 0; j < NLIMBS; j++, table++) {
1061 out_x[j] |= *table & mask;
1062 }
1063 for (j = 0; j < NLIMBS; j++, table++) {
1064 out_y[j] |= *table & mask;
1065 }
1066 }
1067 }
1068
1069 /* select_jacobian_point sets {out_x,out_y,out_z} to the index'th entry of
1070 * table. On entry: index < 16, table[0] must be zero.
1071 */
1072 static void select_jacobian_point(felem out_x, felem out_y, felem out_z,
1073 const limb *table, limb index)
1074 {
1075 limb i, j;
1076
1077 memset(out_x, 0, sizeof(felem));
1078 memset(out_y, 0, sizeof(felem));
1079 memset(out_z, 0, sizeof(felem));
1080
1081 /* The implicit value at index 0 is all zero. We don't need to perform that
1082 * iteration of the loop because we already set out_* to zero.
1083 */
1084 table += 3*NLIMBS;
1085
1086 for (i = 1; i < 16; i++) {
1087 limb mask = i ^ index;
1088 mask |= mask >> 2;
1089 mask |= mask >> 1;
1090 mask &= 1;
1091 mask--;
1092 for (j = 0; j < NLIMBS; j++, table++) {
1093 out_x[j] |= *table & mask;
1094 }
1095 for (j = 0; j < NLIMBS; j++, table++) {
1096 out_y[j] |= *table & mask;
1097 }
1098 for (j = 0; j < NLIMBS; j++, table++) {
1099 out_z[j] |= *table & mask;
1100 }
1101 }
1102 }
1103
1104 /* get_bit returns the bit'th bit of scalar. */
1105 static char get_bit(const u8 scalar[32], int bit)
1106 {
1107 return ((scalar[bit >> 3]) >> (bit & 7)) & 1;
1108 }
1109
1110 /* scalar_base_mult sets {nx,ny,nz} = scalar*G where scalar is a little-endian
1111 * number. Note that the value of scalar must be less than the order of the
1112 * group.
1113 */
1114 static void scalar_base_mult(felem nx, felem ny, felem nz, const u8 scalar[32])
1115 {
1116 int i, j;
1117 limb n_is_infinity_mask = -1, p_is_noninfinite_mask, mask;
1118 u32 table_offset;
1119
1120 felem px, py;
1121 felem tx, ty, tz;
1122
1123 memset(nx, 0, sizeof(felem));
1124 memset(ny, 0, sizeof(felem));
1125 memset(nz, 0, sizeof(felem));
1126
1127 /* The loop adds bits at positions 0, 64, 128 and 192, followed by
1128 * positions 32,96,160 and 224 and does this 32 times.
1129 */
1130 for (i = 0; i < 32; i++) {
1131 if (i) {
1132 point_double(nx, ny, nz, nx, ny, nz);
1133 }
1134 table_offset = 0;
1135 for (j = 0; j <= 32; j += 32) {
1136 char bit0 = get_bit(scalar, 31 - i + j);
1137 char bit1 = get_bit(scalar, 95 - i + j);
1138 char bit2 = get_bit(scalar, 159 - i + j);
1139 char bit3 = get_bit(scalar, 223 - i + j);
1140 limb index = bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3);
1141
1142 select_affine_point(px, py, kPrecomputed + table_offset, index);
1143 table_offset += 30 * NLIMBS;
1144
1145 /* Since scalar is less than the order of the group, we know that
1146 * {nx,ny,nz} != {px,py,1}, unless both are zero, which we handle
1147 * below.
1148 */
1149 point_add_mixed(tx, ty, tz, nx, ny, nz, px, py);
1150 /* The result of point_add_mixed is incorrect if {nx,ny,nz} is zero
1151 * (a.k.a. the point at infinity). We handle that situation by
1152 * copying the point from the table.
1153 */
1154 copy_conditional(nx, px, n_is_infinity_mask);
1155 copy_conditional(ny, py, n_is_infinity_mask);
1156 copy_conditional(nz, kOne, n_is_infinity_mask);
1157
1158 /* Equally, the result is also wrong if the point from the table is
1159 * zero, which happens when the index is zero. We handle that by
1160 * only copying from {tx,ty,tz} to {nx,ny,nz} if index != 0.
1161 */
1162 p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index);
1163 mask = p_is_noninfinite_mask & ~n_is_infinity_mask;
1164 copy_conditional(nx, tx, mask);
1165 copy_conditional(ny, ty, mask);
1166 copy_conditional(nz, tz, mask);
1167 /* If p was not zero, then n is now non-zero. */
1168 n_is_infinity_mask &= ~p_is_noninfinite_mask;
1169 }
1170 }
1171 }
1172
1173 /* point_to_affine converts a Jacobian point to an affine point. If the input
1174 * is the point at infinity then it returns (0, 0) in constant time.
1175 */
1176 static void point_to_affine(felem x_out, felem y_out,
1177 const felem nx, const felem ny, const felem nz) {
1178 felem z_inv, z_inv_sq;
1179 felem_inv(z_inv, nz);
1180 felem_square(z_inv_sq, z_inv);
1181 felem_mul(x_out, nx, z_inv_sq);
1182 felem_mul(z_inv, z_inv, z_inv_sq);
1183 felem_mul(y_out, ny, z_inv);
1184 }
1185
1186 /* scalar_mult sets {nx,ny,nz} = scalar*{x,y}. */
1187 static void scalar_mult(felem nx, felem ny, felem nz,
1188 const felem x, const felem y, const u8 scalar[32])
1189 {
1190 int i;
1191 felem px, py, pz, tx, ty, tz;
1192 felem precomp[16][3];
1193 limb n_is_infinity_mask, index, p_is_noninfinite_mask, mask;
1194
1195 /* We precompute 0,1,2,... times {x,y}. */
1196 memset(precomp, 0, sizeof(felem) * 3);
1197 memcpy(&precomp[1][0], x, sizeof(felem));
1198 memcpy(&precomp[1][1], y, sizeof(felem));
1199 memcpy(&precomp[1][2], kOne, sizeof(felem));
1200
1201 for (i = 2; i < 16; i += 2) {
1202 point_double(precomp[i][0], precomp[i][1], precomp[i][2],
1203 precomp[i / 2][0], precomp[i / 2][1], precomp[i / 2][2]);
1204
1205 point_add_mixed(precomp[i + 1][0], precomp[i + 1][1], precomp[i + 1][2],
1206 precomp[i][0], precomp[i][1], precomp[i][2], x, y);
1207 }
1208
1209 memset(nx, 0, sizeof(felem));
1210 memset(ny, 0, sizeof(felem));
1211 memset(nz, 0, sizeof(felem));
1212 n_is_infinity_mask = -1;
1213
1214 /* We add in a window of four bits each iteration and do this 64 times. */
1215 for (i = 0; i < 64; i++) {
1216 if (i) {
1217 point_double(nx, ny, nz, nx, ny, nz);
1218 point_double(nx, ny, nz, nx, ny, nz);
1219 point_double(nx, ny, nz, nx, ny, nz);
1220 point_double(nx, ny, nz, nx, ny, nz);
1221 }
1222
1223 index = scalar[31 - i / 2];
1224 if ((i & 1) == 1) {
1225 index &= 15;
1226 } else {
1227 index >>= 4;
1228 }
1229
1230 /* See the comments in scalar_base_mult about handling infinities. */
1231 select_jacobian_point(px, py, pz, precomp[0][0], index);
1232 point_add(tx, ty, tz, nx, ny, nz, px, py, pz);
1233 copy_conditional(nx, px, n_is_infinity_mask);
1234 copy_conditional(ny, py, n_is_infinity_mask);
1235 copy_conditional(nz, pz, n_is_infinity_mask);
1236
1237 p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index);
1238 mask = p_is_noninfinite_mask & ~n_is_infinity_mask;
1239 copy_conditional(nx, tx, mask);
1240 copy_conditional(ny, ty, mask);
1241 copy_conditional(nz, tz, mask);
1242 n_is_infinity_mask &= ~p_is_noninfinite_mask;
1243 }
1244 }
1245
1246 /* Interface with Freebl: */
1247
1248 /* BYTESWAP_MP_DIGIT_TO_LE swaps the bytes of a mp_digit to
1249 * little-endian order.
1250 */
1251 #ifdef IS_BIG_ENDIAN
1252 #ifdef __APPLE__
1253 #include <libkern/OSByteOrder.h>
1254 #define BYTESWAP32(x) OSSwapInt32(x)
1255 #define BYTESWAP64(x) OSSwapInt64(x)
1256 #else
1257 #define BYTESWAP32(x) \
1258 ((x) >> 24 | (x) >> 8 & 0xff00 | ((x) & 0xff00) << 8 | (x) << 24)
1259 #define BYTESWAP64(x) \
1260 ((x) >> 56 | (x) >> 40 & 0xff00 | \
1261 (x) >> 24 & 0xff0000 | (x) >> 8 & 0xff000000 | \
1262 ((x) & 0xff000000) << 8 | ((x) & 0xff0000) << 24 | \
1263 ((x) & 0xff00) << 40 | (x) << 56)
1264 #endif
1265
1266 #ifdef MP_USE_UINT_DIGIT
1267 #define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP32(x)
1268 #else
1269 #define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP64(x)
1270 #endif
1271 #endif /* IS_BIG_ENDIAN */
1272
1273 #ifdef MP_USE_UINT_DIGIT
1274 static const mp_digit kRInvDigits[8] = {
1275 0x80000000, 1, 0xffffffff, 0,
1276 0x80000001, 0xfffffffe, 1, 0x7fffffff
1277 };
1278 #else
1279 static const mp_digit kRInvDigits[4] = {
1280 PR_UINT64(0x180000000), 0xffffffff,
1281 PR_UINT64(0xfffffffe80000001), PR_UINT64(0x7fffffff00000001)
1282 };
1283 #endif
1284 #define MP_DIGITS_IN_256_BITS (32/sizeof(mp_digit))
1285 static const mp_int kRInv = {
1286 MP_ZPOS,
1287 MP_DIGITS_IN_256_BITS,
1288 MP_DIGITS_IN_256_BITS,
1289 (mp_digit*) kRInvDigits
1290 };
1291
1292 static const limb kTwo28 = 0x10000000;
1293 static const limb kTwo29 = 0x20000000;
1294
1295 /* to_montgomery sets out = R*in. */
1296 static mp_err to_montgomery(felem out, const mp_int *in, const ECGroup *group)
1297 {
1298 /* There are no MPI functions for bitshift operations and we wish to shift
1299 * in 257 bits left so we move the digits 256-bits left and then multiply
1300 * by two.
1301 */
1302 mp_int in_shifted;
1303 int i;
1304 mp_err res;
1305
1306 mp_init(&in_shifted);
1307 s_mp_pad(&in_shifted, MP_USED(in) + MP_DIGITS_IN_256_BITS);
1308 memcpy(&MP_DIGIT(&in_shifted, MP_DIGITS_IN_256_BITS),
1309 MP_DIGITS(in),
1310 MP_USED(in)*sizeof(mp_digit));
1311 mp_mul_2(&in_shifted, &in_shifted);
1312 MP_CHECKOK(group->meth->field_mod(&in_shifted, &in_shifted, group->meth));
1313
1314 for (i = 0;; i++) {
1315 out[i] = MP_DIGIT(&in_shifted, 0) & kBottom29Bits;
1316 mp_div_d(&in_shifted, kTwo29, &in_shifted, NULL);
1317
1318 i++;
1319 if (i == NLIMBS)
1320 break;
1321 out[i] = MP_DIGIT(&in_shifted, 0) & kBottom28Bits;
1322 mp_div_d(&in_shifted, kTwo28, &in_shifted, NULL);
1323 }
1324
1325 CLEANUP:
1326 mp_clear(&in_shifted);
1327 return res;
1328 }
1329
1330 /* from_montgomery sets out=in/R. */
1331 static mp_err from_montgomery(mp_int *out, const felem in,
1332 const ECGroup *group)
1333 {
1334 mp_int result, tmp;
1335 mp_err res;
1336 int i;
1337
1338 mp_init(&result);
1339 mp_init(&tmp);
1340
1341 MP_CHECKOK(mp_add_d(&tmp, in[NLIMBS-1], &result));
1342 for (i = NLIMBS-2; i >= 0; i--) {
1343 if ((i & 1) == 0) {
1344 MP_CHECKOK(mp_mul_d(&result, kTwo29, &tmp));
1345 } else {
1346 MP_CHECKOK(mp_mul_d(&result, kTwo28, &tmp));
1347 }
1348 MP_CHECKOK(mp_add_d(&tmp, in[i], &result));
1349 }
1350
1351 MP_CHECKOK(mp_mul(&result, &kRInv, out));
1352 MP_CHECKOK(group->meth->field_mod(out, out, group->meth));
1353
1354 CLEANUP:
1355 mp_clear(&result);
1356 mp_clear(&tmp);
1357 return res;
1358 }
1359
1360 /* scalar_from_mp_int sets out_scalar=n, where n < the group order. */
1361 static void scalar_from_mp_int(u8 out_scalar[32], const mp_int *n)
1362 {
1363 /* We require that |n| is less than the order of the group and therefore it
1364 * will fit into |out_scalar|. However, these is a timing side-channel here
1365 * that we cannot avoid: if |n| is sufficiently small it may be one or more
1366 * words too short and we'll copy less data.
1367 */
1368 PORT_Assert(MP_USED(n) * sizeof(mp_digit) <= 32);
1369 memset(out_scalar, 0, 32);
1370 #ifdef IS_LITTLE_ENDIAN
1371 memcpy(out_scalar, MP_DIGITS(n), MP_USED(n) * sizeof(mp_digit));
1372 #else
1373 {
1374 mp_size i;
1375 mp_digit swapped[MP_DIGITS_IN_256_BITS];
1376 for (i = 0; i < MP_USED(n); i++) {
1377 swapped[i] = BYTESWAP_MP_DIGIT_TO_LE(MP_DIGIT(n, i));
1378 }
1379 memcpy(out_scalar, swapped, MP_USED(n) * sizeof(mp_digit));
1380 }
1381 #endif
1382 }
1383
1384 /* ec_GFp_nistp256_base_point_mul sets {out_x,out_y} = nG, where n is < the
1385 * order of the group.
1386 */
1387 static mp_err ec_GFp_nistp256_base_point_mul(const mp_int *n,
1388 mp_int *out_x, mp_int *out_y,
1389 const ECGroup *group)
1390 {
1391 u8 scalar[32];
1392 felem x, y, z, x_affine, y_affine;
1393 mp_err res;
1394
1395 /* FIXME(agl): test that n < order. */
1396
1397 scalar_from_mp_int(scalar, n);
1398 scalar_base_mult(x, y, z, scalar);
1399 point_to_affine(x_affine, y_affine, x, y, z);
1400 MP_CHECKOK(from_montgomery(out_x, x_affine, group));
1401 MP_CHECKOK(from_montgomery(out_y, y_affine, group));
1402
1403 CLEANUP:
1404 return res;
1405 }
1406
1407 /* ec_GFp_nistp256_point_mul sets {out_x,out_y} = n*{in_x,in_y}, where n is <
1408 * the order of the group.
1409 */
1410 static mp_err ec_GFp_nistp256_point_mul(const mp_int *n,
1411 const mp_int *in_x, const mp_int *in_y,
1412 mp_int *out_x, mp_int *out_y,
1413 const ECGroup *group)
1414 {
1415 u8 scalar[32];
1416 felem x, y, z, x_affine, y_affine, px, py;
1417 mp_err res;
1418
1419 scalar_from_mp_int(scalar, n);
1420
1421 MP_CHECKOK(to_montgomery(px, in_x, group));
1422 MP_CHECKOK(to_montgomery(py, in_y, group));
1423
1424 scalar_mult(x, y, z, px, py, scalar);
1425 point_to_affine(x_affine, y_affine, x, y, z);
1426 MP_CHECKOK(from_montgomery(out_x, x_affine, group));
1427 MP_CHECKOK(from_montgomery(out_y, y_affine, group));
1428
1429 CLEANUP:
1430 return res;
1431 }
1432
1433 /* ec_GFp_nistp256_point_mul_vartime sets {out_x,out_y} = n1*G +
1434 * n2*{in_x,in_y}, where n1 and n2 are < the order of the group.
1435 *
1436 * As indicated by the name, this function operates in variable time. This
1437 * is safe because it's used for signature validation which doesn't deal
1438 * with secrets.
1439 */
1440 static mp_err ec_GFp_nistp256_points_mul_vartime(
1441 const mp_int *n1, const mp_int *n2,
1442 const mp_int *in_x, const mp_int *in_y,
1443 mp_int *out_x, mp_int *out_y,
1444 const ECGroup *group)
1445 {
1446 u8 scalar1[32], scalar2[32];
1447 felem x1, y1, z1, x2, y2, z2, x_affine, y_affine, px, py;
1448 mp_err res = MP_OKAY;
1449
1450 /* If n2 == NULL, this is just a base-point multiplication. */
1451 if (n2 == NULL) {
1452 return ec_GFp_nistp256_base_point_mul(n1, out_x, out_y, group);
1453 }
1454
1455 /* If n1 == nULL, this is just an arbitary-point multiplication. */
1456 if (n1 == NULL) {
1457 return ec_GFp_nistp256_point_mul(n2, in_x, in_y, out_x, out_y, group);
1458 }
1459
1460 /* If both scalars are zero, then the result is the point at infinity. */
1461 if (mp_cmp_z(n1) == 0 && mp_cmp_z(n2) == 0) {
1462 mp_zero(out_x);
1463 mp_zero(out_y);
1464 return res;
1465 }
1466
1467 scalar_from_mp_int(scalar1, n1);
1468 scalar_from_mp_int(scalar2, n2);
1469
1470 MP_CHECKOK(to_montgomery(px, in_x, group));
1471 MP_CHECKOK(to_montgomery(py, in_y, group));
1472 scalar_base_mult(x1, y1, z1, scalar1);
1473 scalar_mult(x2, y2, z2, px, py, scalar2);
1474
1475 if (mp_cmp_z(n2) == 0) {
1476 /* If n2 == 0, then {x2,y2,z2} is zero and the result is just
1477 * {x1,y1,z1}. */
1478 } else if (mp_cmp_z(n1) == 0) {
1479 /* If n1 == 0, then {x1,y1,z1} is zero and the result is just
1480 * {x2,y2,z2}. */
1481 memcpy(x1, x2, sizeof(x2));
1482 memcpy(y1, y2, sizeof(y2));
1483 memcpy(z1, z2, sizeof(z2));
1484 } else {
1485 /* This function handles the case where {x1,y1,z1} == {x2,y2,z2}. */
1486 point_add_or_double_vartime(x1, y1, z1, x1, y1, z1, x2, y2, z2);
1487 }
1488
1489 point_to_affine(x_affine, y_affine, x1, y1, z1);
1490 MP_CHECKOK(from_montgomery(out_x, x_affine, group));
1491 MP_CHECKOK(from_montgomery(out_y, y_affine, group));
1492
1493 CLEANUP:
1494 return res;
1495 }
1496
1497 /* Wire in fast point multiplication for named curves. */
1498 mp_err ec_group_set_gfp256_32(ECGroup *group, ECCurveName name)
1499 {
1500 if (name == ECCurve_NIST_P256) {
1501 group->base_point_mul = &ec_GFp_nistp256_base_point_mul;
1502 group->point_mul = &ec_GFp_nistp256_point_mul;
1503 group->points_mul = &ec_GFp_nistp256_points_mul_vartime;
1504 }
1505 return MP_OKAY;
1506 }
OLDNEW
« no previous file with comments | « mozilla/security/nss/lib/freebl/ecl/ecp.h ('k') | mozilla/security/nss/lib/freebl/ecl/ecp_aff.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698