Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(92)

Side by Side Diff: nss/lib/freebl/ecl/ecp_256_32.c

Issue 2078763002: Delete bundled copy of NSS and replace with README. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/nss@master
Patch Set: Delete bundled copy of NSS and replace with README. Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « nss/lib/freebl/ecl/ecp_256.c ('k') | nss/lib/freebl/ecl/ecp_384.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 /* A 32-bit implementation of the NIST P-256 elliptic curve. */
6
7 #include <string.h>
8
9 #include "prtypes.h"
10 #include "mpi.h"
11 #include "mpi-priv.h"
12 #include "ecp.h"
13
14 typedef PRUint8 u8;
15 typedef PRUint32 u32;
16 typedef PRUint64 u64;
17
18 /* Our field elements are represented as nine, unsigned 32-bit words. Freebl's
19 * MPI library calls them digits, but here they are called limbs, which is
20 * GMP's terminology.
21 *
22 * The value of an felem (field element) is:
23 * x[0] + (x[1] * 2**29) + (x[2] * 2**57) + ... + (x[8] * 2**228)
24 *
25 * That is, each limb is alternately 29 or 28-bits wide in little-endian
26 * order.
27 *
28 * This means that an felem hits 2**257, rather than 2**256 as we would like. A
29 * 28, 29, ... pattern would cause us to hit 2**256, but that causes problems
30 * when multiplying as terms end up one bit short of a limb which would require
31 * much bit-shifting to correct.
32 *
33 * Finally, the values stored in an felem are in Montgomery form. So the value
34 * |y| is stored as (y*R) mod p, where p is the P-256 prime and R is 2**257.
35 */
36 typedef u32 limb;
37 #define NLIMBS 9
38 typedef limb felem[NLIMBS];
39
40 static const limb kBottom28Bits = 0xfffffff;
41 static const limb kBottom29Bits = 0x1fffffff;
42
43 /* kOne is the number 1 as an felem. It's 2**257 mod p split up into 29 and
44 * 28-bit words.
45 */
46 static const felem kOne = {
47 2, 0, 0, 0xffff800,
48 0x1fffffff, 0xfffffff, 0x1fbfffff, 0x1ffffff,
49 0
50 };
51 static const felem kZero = {0};
52 static const felem kP = {
53 0x1fffffff, 0xfffffff, 0x1fffffff, 0x3ff,
54 0, 0, 0x200000, 0xf000000,
55 0xfffffff
56 };
57 static const felem k2P = {
58 0x1ffffffe, 0xfffffff, 0x1fffffff, 0x7ff,
59 0, 0, 0x400000, 0xe000000,
60 0x1fffffff
61 };
62
63 /* kPrecomputed contains precomputed values to aid the calculation of scalar
64 * multiples of the base point, G. It's actually two, equal length, tables
65 * concatenated.
66 *
67 * The first table contains (x,y) felem pairs for 16 multiples of the base
68 * point, G.
69 *
70 * Index | Index (binary) | Value
71 * 0 | 0000 | 0G (all zeros, omitted)
72 * 1 | 0001 | G
73 * 2 | 0010 | 2**64G
74 * 3 | 0011 | 2**64G + G
75 * 4 | 0100 | 2**128G
76 * 5 | 0101 | 2**128G + G
77 * 6 | 0110 | 2**128G + 2**64G
78 * 7 | 0111 | 2**128G + 2**64G + G
79 * 8 | 1000 | 2**192G
80 * 9 | 1001 | 2**192G + G
81 * 10 | 1010 | 2**192G + 2**64G
82 * 11 | 1011 | 2**192G + 2**64G + G
83 * 12 | 1100 | 2**192G + 2**128G
84 * 13 | 1101 | 2**192G + 2**128G + G
85 * 14 | 1110 | 2**192G + 2**128G + 2**64G
86 * 15 | 1111 | 2**192G + 2**128G + 2**64G + G
87 *
88 * The second table follows the same style, but the terms are 2**32G,
89 * 2**96G, 2**160G, 2**224G.
90 *
91 * This is ~2KB of data.
92 */
93 static const limb kPrecomputed[NLIMBS * 2 * 15 * 2] = {
94 0x11522878, 0xe730d41, 0xdb60179, 0x4afe2ff, 0x12883add, 0xcaddd88, 0x119e7e dc, 0xd4a6eab, 0x3120bee,
95 0x1d2aac15, 0xf25357c, 0x19e45cdd, 0x5c721d0, 0x1992c5a5, 0xa237487, 0x154ba 21, 0x14b10bb, 0xae3fe3,
96 0xd41a576, 0x922fc51, 0x234994f, 0x60b60d3, 0x164586ae, 0xce95f18, 0x1fe4907 3, 0x3fa36cc, 0x5ebcd2c,
97 0xb402f2f, 0x15c70bf, 0x1561925c, 0x5a26704, 0xda91e90, 0xcdc1c7f, 0x1ea1244 6, 0xe1ade1e, 0xec91f22,
98 0x26f7778, 0x566847e, 0xa0bec9e, 0x234f453, 0x1a31f21a, 0xd85e75c, 0x56c7109 , 0xa267a00, 0xb57c050,
99 0x98fb57, 0xaa837cc, 0x60c0792, 0xcfa5e19, 0x61bab9e, 0x589e39b, 0xa324c5, 0 x7d6dee7, 0x2976e4b,
100 0x1fc4124a, 0xa8c244b, 0x1ce86762, 0xcd61c7e, 0x1831c8e0, 0x75774e1, 0x1d96a 5a9, 0x843a649, 0xc3ab0fa,
101 0x6e2e7d5, 0x7673a2a, 0x178b65e8, 0x4003e9b, 0x1a1f11c2, 0x7816ea, 0xf643e11 , 0x58c43df, 0xf423fc2,
102 0x19633ffa, 0x891f2b2, 0x123c231c, 0x46add8c, 0x54700dd, 0x59e2b17, 0x172db4 0f, 0x83e277d, 0xb0dd609,
103 0xfd1da12, 0x35c6e52, 0x19ede20c, 0xd19e0c0, 0x97d0f40, 0xb015b19, 0x449e3f5 , 0xe10c9e, 0x33ab581,
104 0x56a67ab, 0x577734d, 0x1dddc062, 0xc57b10d, 0x149b39d, 0x26a9e7b, 0xc35df9f , 0x48764cd, 0x76dbcca,
105 0xca4b366, 0xe9303ab, 0x1a7480e7, 0x57e9e81, 0x1e13eb50, 0xf466cf3, 0x6f16b2 0, 0x4ba3173, 0xc168c33,
106 0x15cb5439, 0x6a38e11, 0x73658bd, 0xb29564f, 0x3f6dc5b, 0x53b97e, 0x1322c4c0 , 0x65dd7ff, 0x3a1e4f6,
107 0x14e614aa, 0x9246317, 0x1bc83aca, 0xad97eed, 0xd38ce4a, 0xf82b006, 0x341f07 7, 0xa6add89, 0x4894acd,
108 0x9f162d5, 0xf8410ef, 0x1b266a56, 0xd7f223, 0x3e0cb92, 0xe39b672, 0x6a2901a, 0x69a8556, 0x7e7c0,
109 0x9b7d8d3, 0x309a80, 0x1ad05f7f, 0xc2fb5dd, 0xcbfd41d, 0x9ceb638, 0x1051825c , 0xda0cf5b, 0x812e881,
110 0x6f35669, 0x6a56f2c, 0x1df8d184, 0x345820, 0x1477d477, 0x1645db1, 0xbe80c51 , 0xc22be3e, 0xe35e65a,
111 0x1aeb7aa0, 0xc375315, 0xf67bc99, 0x7fdd7b9, 0x191fc1be, 0x61235d, 0x2c184e9 , 0x1c5a839, 0x47a1e26,
112 0xb7cb456, 0x93e225d, 0x14f3c6ed, 0xccc1ac9, 0x17fe37f3, 0x4988989, 0x1a90c5 02, 0x2f32042, 0xa17769b,
113 0xafd8c7c, 0x8191c6e, 0x1dcdb237, 0x16200c0, 0x107b32a1, 0x66c08db, 0x10d06a 02, 0x3fc93, 0x5620023,
114 0x16722b27, 0x68b5c59, 0x270fcfc, 0xfad0ecc, 0xe5de1c2, 0xeab466b, 0x2fc513c , 0x407f75c, 0xbaab133,
115 0x9705fe9, 0xb88b8e7, 0x734c993, 0x1e1ff8f, 0x19156970, 0xabd0f00, 0x10469ea 7, 0x3293ac0, 0xcdc98aa,
116 0x1d843fd, 0xe14bfe8, 0x15be825f, 0x8b5212, 0xeb3fb67, 0x81cbd29, 0xbc62f16, 0x2b6fcc7, 0xf5a4e29,
117 0x13560b66, 0xc0b6ac2, 0x51ae690, 0xd41e271, 0xf3e9bd4, 0x1d70aab, 0x1029f72 , 0x73e1c35, 0xee70fbc,
118 0xad81baf, 0x9ecc49a, 0x86c741e, 0xfe6be30, 0x176752e7, 0x23d416, 0x1f83de85 , 0x27de188, 0x66f70b8,
119 0x181cd51f, 0x96b6e4c, 0x188f2335, 0xa5df759, 0x17a77eb6, 0xfeb0e73, 0x154ae 914, 0x2f3ec51, 0x3826b59,
120 0xb91f17d, 0x1c72949, 0x1362bf0a, 0xe23fddf, 0xa5614b0, 0xf7d8f, 0x79061, 0x 823d9d2, 0x8213f39,
121 0x1128ae0b, 0xd095d05, 0xb85c0c2, 0x1ecb2ef, 0x24ddc84, 0xe35e901, 0x18411a4 a, 0xf5ddc3d, 0x3786689,
122 0x52260e8, 0x5ae3564, 0x542b10d, 0x8d93a45, 0x19952aa4, 0x996cc41, 0x1051a72 9, 0x4be3499, 0x52b23aa,
123 0x109f307e, 0x6f5b6bb, 0x1f84e1e7, 0x77a0cfa, 0x10c4df3f, 0x25a02ea, 0xb0480 35, 0xe31de66, 0xc6ecaa3,
124 0x28ea335, 0x2886024, 0x1372f020, 0xf55d35, 0x15e4684c, 0xf2a9e17, 0x1a4a752 9, 0xcb7beb1, 0xb2a78a1,
125 0x1ab21f1f, 0x6361ccf, 0x6c9179d, 0xb135627, 0x1267b974, 0x4408bad, 0x1cbff6 58, 0xe3d6511, 0xc7d76f,
126 0x1cc7a69, 0xe7ee31b, 0x54fab4f, 0x2b914f, 0x1ad27a30, 0xcd3579e, 0xc50124c, 0x50daa90, 0xb13f72,
127 0xb06aa75, 0x70f5cc6, 0x1649e5aa, 0x84a5312, 0x329043c, 0x41c4011, 0x13d3241 1, 0xb04a838, 0xd760d2d,
128 0x1713b532, 0xbaa0c03, 0x84022ab, 0x6bcf5c1, 0x2f45379, 0x18ae070, 0x18c9e11 e, 0x20bca9a, 0x66f496b,
129 0x3eef294, 0x67500d2, 0xd7f613c, 0x2dbbeb, 0xb741038, 0xe04133f, 0x1582968d, 0xbe985f7, 0x1acbc1a,
130 0x1a6a939f, 0x33e50f6, 0xd665ed4, 0xb4b7bd6, 0x1e5a3799, 0x6b33847, 0x17fa56 ff, 0x65ef930, 0x21dc4a,
131 0x2b37659, 0x450fe17, 0xb357b65, 0xdf5efac, 0x15397bef, 0x9d35a7f, 0x112ac15 f, 0x624e62e, 0xa90ae2f,
132 0x107eecd2, 0x1f69bbe, 0x77d6bce, 0x5741394, 0x13c684fc, 0x950c910, 0x725522 b, 0xdc78583, 0x40eeabb,
133 0x1fde328a, 0xbd61d96, 0xd28c387, 0x9e77d89, 0x12550c40, 0x759cb7d, 0x367ef3 4, 0xae2a960, 0x91b8bdc,
134 0x93462a9, 0xf469ef, 0xb2e9aef, 0xd2ca771, 0x54e1f42, 0x7aaa49, 0x6316abb, 0 x2413c8e, 0x5425bf9,
135 0x1bed3e3a, 0xf272274, 0x1f5e7326, 0x6416517, 0xea27072, 0x9cedea7, 0x6e7633 , 0x7c91952, 0xd806dce,
136 0x8e2a7e1, 0xe421e1a, 0x418c9e1, 0x1dbc890, 0x1b395c36, 0xa1dc175, 0x1dc4ef7 3, 0x8956f34, 0xe4b5cf2,
137 0x1b0d3a18, 0x3194a36, 0x6c2641f, 0xe44124c, 0xa2f4eaa, 0xa8c25ba, 0xf927ed7 , 0x627b614, 0x7371cca,
138 0xba16694, 0x417bc03, 0x7c0a7e3, 0x9c35c19, 0x1168a205, 0x8b6b00d, 0x10e3edc 9, 0x9c19bf2, 0x5882229,
139 0x1b2b4162, 0xa5cef1a, 0x1543622b, 0x9bd433e, 0x364e04d, 0x7480792, 0x5c9b5b 3, 0xe85ff25, 0x408ef57,
140 0x1814cfa4, 0x121b41b, 0xd248a0f, 0x3b05222, 0x39bb16a, 0xc75966d, 0xa038113 , 0xa4a1769, 0x11fbc6c,
141 0x917e50e, 0xeec3da8, 0x169d6eac, 0x10c1699, 0xa416153, 0xf724912, 0x15cd60b 7, 0x4acbad9, 0x5efc5fa,
142 0xf150ed7, 0x122b51, 0x1104b40a, 0xcb7f442, 0xfbb28ff, 0x6ac53ca, 0x196142cc , 0x7bf0fa9, 0x957651,
143 0x4e0f215, 0xed439f8, 0x3f46bd5, 0x5ace82f, 0x110916b6, 0x6db078, 0xffd7d57, 0xf2ecaac, 0xca86dec,
144 0x15d6b2da, 0x965ecc9, 0x1c92b4c2, 0x1f3811, 0x1cb080f5, 0x2d8b804, 0x19d1c1 2d, 0xf20bd46, 0x1951fa7,
145 0xa3656c3, 0x523a425, 0xfcd0692, 0xd44ddc8, 0x131f0f5b, 0xaf80e4a, 0xcd9fc74 , 0x99bb618, 0x2db944c,
146 0xa673090, 0x1c210e1, 0x178c8d23, 0x1474383, 0x10b8743d, 0x985a55b, 0x2e7477 9, 0x576138, 0x9587927,
147 0x133130fa, 0xbe05516, 0x9f4d619, 0xbb62570, 0x99ec591, 0xd9468fe, 0x1d07782 d, 0xfc72e0b, 0x701b298,
148 0x1863863b, 0x85954b8, 0x121a0c36, 0x9e7fedf, 0xf64b429, 0x9b9d71e, 0x14e2f5 d8, 0xf858d3a, 0x942eea8,
149 0xda5b765, 0x6edafff, 0xa9d18cc, 0xc65e4ba, 0x1c747e86, 0xe4ea915, 0x1981d7a 1, 0x8395659, 0x52ed4e2,
150 0x87d43b7, 0x37ab11b, 0x19d292ce, 0xf8d4692, 0x18c3053f, 0x8863e13, 0x4c146c 0, 0x6bdf55a, 0x4e4457d,
151 0x16152289, 0xac78ec2, 0x1a59c5a2, 0x2028b97, 0x71c2d01, 0x295851f, 0x404747 b, 0x878558d, 0x7d29aa4,
152 0x13d8341f, 0x8daefd7, 0x139c972d, 0x6b7ea75, 0xd4a9dde, 0xff163d8, 0x81d55d 7, 0xa5bef68, 0xb7b30d8,
153 0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f5195 1, 0x9d0c177, 0x1c49a78,
154 };
155
156 /* Field element operations:
157 */
158
159 /* NON_ZERO_TO_ALL_ONES returns:
160 * 0xffffffff for 0 < x <= 2**31
161 * 0 for x == 0 or x > 2**31.
162 *
163 * x must be a u32 or an equivalent type such as limb.
164 */
165 #define NON_ZERO_TO_ALL_ONES(x) ((((u32)(x) - 1) >> 31) - 1)
166
167 /* felem_reduce_carry adds a multiple of p in order to cancel |carry|,
168 * which is a term at 2**257.
169 *
170 * On entry: carry < 2**3, inout[0,2,...] < 2**29, inout[1,3,...] < 2**28.
171 * On exit: inout[0,2,..] < 2**30, inout[1,3,...] < 2**29.
172 */
173 static void felem_reduce_carry(felem inout, limb carry)
174 {
175 const u32 carry_mask = NON_ZERO_TO_ALL_ONES(carry);
176
177 inout[0] += carry << 1;
178 inout[3] += 0x10000000 & carry_mask;
179 /* carry < 2**3 thus (carry << 11) < 2**14 and we added 2**28 in the
180 * previous line therefore this doesn't underflow.
181 */
182 inout[3] -= carry << 11;
183 inout[4] += (0x20000000 - 1) & carry_mask;
184 inout[5] += (0x10000000 - 1) & carry_mask;
185 inout[6] += (0x20000000 - 1) & carry_mask;
186 inout[6] -= carry << 22;
187 /* This may underflow if carry is non-zero but, if so, we'll fix it in the
188 * next line.
189 */
190 inout[7] -= 1 & carry_mask;
191 inout[7] += carry << 25;
192 }
193
194 /* felem_sum sets out = in+in2.
195 *
196 * On entry, in[i]+in2[i] must not overflow a 32-bit word.
197 * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29
198 */
199 static void felem_sum(felem out, const felem in, const felem in2)
200 {
201 limb carry = 0;
202 unsigned int i;
203 for (i = 0;; i++) {
204 out[i] = in[i] + in2[i];
205 out[i] += carry;
206 carry = out[i] >> 29;
207 out[i] &= kBottom29Bits;
208
209 i++;
210 if (i == NLIMBS)
211 break;
212
213 out[i] = in[i] + in2[i];
214 out[i] += carry;
215 carry = out[i] >> 28;
216 out[i] &= kBottom28Bits;
217 }
218
219 felem_reduce_carry(out, carry);
220 }
221
222 #define two31m3 (((limb)1) << 31) - (((limb)1) << 3)
223 #define two30m2 (((limb)1) << 30) - (((limb)1) << 2)
224 #define two30p13m2 (((limb)1) << 30) + (((limb)1) << 13) - (((limb)1) << 2)
225 #define two31m2 (((limb)1) << 31) - (((limb)1) << 2)
226 #define two31p24m2 (((limb)1) << 31) + (((limb)1) << 24) - (((limb)1) << 2)
227 #define two30m27m2 (((limb)1) << 30) - (((limb)1) << 27) - (((limb)1) << 2)
228
229 /* zero31 is 0 mod p.
230 */
231 static const felem zero31 = {
232 two31m3, two30m2, two31m2, two30p13m2,
233 two31m2, two30m2, two31p24m2, two30m27m2,
234 two31m2
235 };
236
237 /* felem_diff sets out = in-in2.
238 *
239 * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and
240 * in2[0,2,...] < 2**30, in2[1,3,...] < 2**29.
241 * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
242 */
243 static void felem_diff(felem out, const felem in, const felem in2)
244 {
245 limb carry = 0;
246 unsigned int i;
247
248 for (i = 0;; i++) {
249 out[i] = in[i] - in2[i];
250 out[i] += zero31[i];
251 out[i] += carry;
252 carry = out[i] >> 29;
253 out[i] &= kBottom29Bits;
254
255 i++;
256 if (i == NLIMBS)
257 break;
258
259 out[i] = in[i] - in2[i];
260 out[i] += zero31[i];
261 out[i] += carry;
262 carry = out[i] >> 28;
263 out[i] &= kBottom28Bits;
264 }
265
266 felem_reduce_carry(out, carry);
267 }
268
269 /* felem_reduce_degree sets out = tmp/R mod p where tmp contains 64-bit words
270 * with the same 29,28,... bit positions as an felem.
271 *
272 * The values in felems are in Montgomery form: x*R mod p where R = 2**257.
273 * Since we just multiplied two Montgomery values together, the result is
274 * x*y*R*R mod p. We wish to divide by R in order for the result also to be
275 * in Montgomery form.
276 *
277 * On entry: tmp[i] < 2**64
278 * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29
279 */
280 static void felem_reduce_degree(felem out, u64 tmp[17])
281 {
282 /* The following table may be helpful when reading this code:
283 *
284 * Limb number: 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10...
285 * Width (bits): 29| 28| 29| 28| 29| 28| 29| 28| 29| 28| 29
286 * Start bit: 0 | 29| 57| 86|114|143|171|200|228|257|285
287 * (odd phase): 0 | 28| 57| 85|114|142|171|199|228|256|285
288 */
289 limb tmp2[18], carry, x, xMask;
290 unsigned int i;
291
292 /* tmp contains 64-bit words with the same 29,28,29-bit positions as an
293 * felem. So the top of an element of tmp might overlap with another
294 * element two positions down. The following loop eliminates this
295 * overlap.
296 */
297 tmp2[0] = tmp[0] & kBottom29Bits;
298
299 /* In the following we use "(limb) tmp[x]" and "(limb) (tmp[x]>>32)" to try
300 * and hint to the compiler that it can do a single-word shift by selecting
301 * the right register rather than doing a double-word shift and truncating
302 * afterwards.
303 */
304 tmp2[1] = ((limb) tmp[0]) >> 29;
305 tmp2[1] |= (((limb) (tmp[0] >> 32)) << 3) & kBottom28Bits;
306 tmp2[1] += ((limb) tmp[1]) & kBottom28Bits;
307 carry = tmp2[1] >> 28;
308 tmp2[1] &= kBottom28Bits;
309
310 for (i = 2; i < 17; i++) {
311 tmp2[i] = ((limb) (tmp[i - 2] >> 32)) >> 25;
312 tmp2[i] += ((limb) (tmp[i - 1])) >> 28;
313 tmp2[i] += (((limb) (tmp[i - 1] >> 32)) << 4) & kBottom29Bits;
314 tmp2[i] += ((limb) tmp[i]) & kBottom29Bits;
315 tmp2[i] += carry;
316 carry = tmp2[i] >> 29;
317 tmp2[i] &= kBottom29Bits;
318
319 i++;
320 if (i == 17)
321 break;
322 tmp2[i] = ((limb) (tmp[i - 2] >> 32)) >> 25;
323 tmp2[i] += ((limb) (tmp[i - 1])) >> 29;
324 tmp2[i] += (((limb) (tmp[i - 1] >> 32)) << 3) & kBottom28Bits;
325 tmp2[i] += ((limb) tmp[i]) & kBottom28Bits;
326 tmp2[i] += carry;
327 carry = tmp2[i] >> 28;
328 tmp2[i] &= kBottom28Bits;
329 }
330
331 tmp2[17] = ((limb) (tmp[15] >> 32)) >> 25;
332 tmp2[17] += ((limb) (tmp[16])) >> 29;
333 tmp2[17] += (((limb) (tmp[16] >> 32)) << 3);
334 tmp2[17] += carry;
335
336 /* Montgomery elimination of terms:
337 *
338 * Since R is 2**257, we can divide by R with a bitwise shift if we can
339 * ensure that the right-most 257 bits are all zero. We can make that true
340 * by adding multiplies of p without affecting the value.
341 *
342 * So we eliminate limbs from right to left. Since the bottom 29 bits of p
343 * are all ones, then by adding tmp2[0]*p to tmp2 we'll make tmp2[0] == 0.
344 * We can do that for 8 further limbs and then right shift to eliminate the
345 * extra factor of R.
346 */
347 for (i = 0;; i += 2) {
348 tmp2[i + 1] += tmp2[i] >> 29;
349 x = tmp2[i] & kBottom29Bits;
350 xMask = NON_ZERO_TO_ALL_ONES(x);
351 tmp2[i] = 0;
352
353 /* The bounds calculations for this loop are tricky. Each iteration of
354 * the loop eliminates two words by adding values to words to their
355 * right.
356 *
357 * The following table contains the amounts added to each word (as an
358 * offset from the value of i at the top of the loop). The amounts are
359 * accounted for from the first and second half of the loop separately
360 * and are written as, for example, 28 to mean a value <2**28.
361 *
362 * Word: 3 4 5 6 7 8 9 10
363 * Added in top half: 28 11 29 21 29 28
364 * 28 29
365 * 29
366 * Added in bottom half: 29 10 28 21 28 28
367 * 29
368 *
369 * The value that is currently offset 7 will be offset 5 for the next
370 * iteration and then offset 3 for the iteration after that. Therefore
371 * the total value added will be the values added at 7, 5 and 3.
372 *
373 * The following table accumulates these values. The sums at the bottom
374 * are written as, for example, 29+28, to mean a value < 2**29+2**28.
375 *
376 * Word: 3 4 5 6 7 8 9 10 11 12 13
377 * 28 11 10 29 21 29 28 28 28 28 28
378 * 29 28 11 28 29 28 29 28 29 28
379 * 29 28 21 21 29 21 29 21
380 * 10 29 28 21 28 21 28
381 * 28 29 28 29 28 29 28
382 * 11 10 29 10 29 10
383 * 29 28 11 28 11
384 * 29 29
385 * --------------------------------------------
386 * 30+ 31+ 30+ 31+ 30+
387 * 28+ 29+ 28+ 29+ 21+
388 * 21+ 28+ 21+ 28+ 10
389 * 10 21+ 10 21+
390 * 11 11
391 *
392 * So the greatest amount is added to tmp2[10] and tmp2[12]. If
393 * tmp2[10/12] has an initial value of <2**29, then the maximum value
394 * will be < 2**31 + 2**30 + 2**28 + 2**21 + 2**11, which is < 2**32,
395 * as required.
396 */
397 tmp2[i + 3] += (x << 10) & kBottom28Bits;
398 tmp2[i + 4] += (x >> 18);
399
400 tmp2[i + 6] += (x << 21) & kBottom29Bits;
401 tmp2[i + 7] += x >> 8;
402
403 /* At position 200, which is the starting bit position for word 7, we
404 * have a factor of 0xf000000 = 2**28 - 2**24.
405 */
406 tmp2[i + 7] += 0x10000000 & xMask;
407 /* Word 7 is 28 bits wide, so the 2**28 term exactly hits word 8. */
408 tmp2[i + 8] += (x - 1) & xMask;
409 tmp2[i + 7] -= (x << 24) & kBottom28Bits;
410 tmp2[i + 8] -= x >> 4;
411
412 tmp2[i + 8] += 0x20000000 & xMask;
413 tmp2[i + 8] -= x;
414 tmp2[i + 8] += (x << 28) & kBottom29Bits;
415 tmp2[i + 9] += ((x >> 1) - 1) & xMask;
416
417 if (i+1 == NLIMBS)
418 break;
419 tmp2[i + 2] += tmp2[i + 1] >> 28;
420 x = tmp2[i + 1] & kBottom28Bits;
421 xMask = NON_ZERO_TO_ALL_ONES(x);
422 tmp2[i + 1] = 0;
423
424 tmp2[i + 4] += (x << 11) & kBottom29Bits;
425 tmp2[i + 5] += (x >> 18);
426
427 tmp2[i + 7] += (x << 21) & kBottom28Bits;
428 tmp2[i + 8] += x >> 7;
429
430 /* At position 199, which is the starting bit of the 8th word when
431 * dealing with a context starting on an odd word, we have a factor of
432 * 0x1e000000 = 2**29 - 2**25. Since we have not updated i, the 8th
433 * word from i+1 is i+8.
434 */
435 tmp2[i + 8] += 0x20000000 & xMask;
436 tmp2[i + 9] += (x - 1) & xMask;
437 tmp2[i + 8] -= (x << 25) & kBottom29Bits;
438 tmp2[i + 9] -= x >> 4;
439
440 tmp2[i + 9] += 0x10000000 & xMask;
441 tmp2[i + 9] -= x;
442 tmp2[i + 10] += (x - 1) & xMask;
443 }
444
445 /* We merge the right shift with a carry chain. The words above 2**257 have
446 * widths of 28,29,... which we need to correct when copying them down.
447 */
448 carry = 0;
449 for (i = 0; i < 8; i++) {
450 /* The maximum value of tmp2[i + 9] occurs on the first iteration and
451 * is < 2**30+2**29+2**28. Adding 2**29 (from tmp2[i + 10]) is
452 * therefore safe.
453 */
454 out[i] = tmp2[i + 9];
455 out[i] += carry;
456 out[i] += (tmp2[i + 10] << 28) & kBottom29Bits;
457 carry = out[i] >> 29;
458 out[i] &= kBottom29Bits;
459
460 i++;
461 out[i] = tmp2[i + 9] >> 1;
462 out[i] += carry;
463 carry = out[i] >> 28;
464 out[i] &= kBottom28Bits;
465 }
466
467 out[8] = tmp2[17];
468 out[8] += carry;
469 carry = out[8] >> 29;
470 out[8] &= kBottom29Bits;
471
472 felem_reduce_carry(out, carry);
473 }
474
475 /* felem_square sets out=in*in.
476 *
477 * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29.
478 * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
479 */
480 static void felem_square(felem out, const felem in)
481 {
482 u64 tmp[17];
483
484 tmp[0] = ((u64) in[0]) * in[0];
485 tmp[1] = ((u64) in[0]) * (in[1] << 1);
486 tmp[2] = ((u64) in[0]) * (in[2] << 1) +
487 ((u64) in[1]) * (in[1] << 1);
488 tmp[3] = ((u64) in[0]) * (in[3] << 1) +
489 ((u64) in[1]) * (in[2] << 1);
490 tmp[4] = ((u64) in[0]) * (in[4] << 1) +
491 ((u64) in[1]) * (in[3] << 2) +
492 ((u64) in[2]) * in[2];
493 tmp[5] = ((u64) in[0]) * (in[5] << 1) +
494 ((u64) in[1]) * (in[4] << 1) +
495 ((u64) in[2]) * (in[3] << 1);
496 tmp[6] = ((u64) in[0]) * (in[6] << 1) +
497 ((u64) in[1]) * (in[5] << 2) +
498 ((u64) in[2]) * (in[4] << 1) +
499 ((u64) in[3]) * (in[3] << 1);
500 tmp[7] = ((u64) in[0]) * (in[7] << 1) +
501 ((u64) in[1]) * (in[6] << 1) +
502 ((u64) in[2]) * (in[5] << 1) +
503 ((u64) in[3]) * (in[4] << 1);
504 /* tmp[8] has the greatest value of 2**61 + 2**60 + 2**61 + 2**60 + 2**60,
505 * which is < 2**64 as required.
506 */
507 tmp[8] = ((u64) in[0]) * (in[8] << 1) +
508 ((u64) in[1]) * (in[7] << 2) +
509 ((u64) in[2]) * (in[6] << 1) +
510 ((u64) in[3]) * (in[5] << 2) +
511 ((u64) in[4]) * in[4];
512 tmp[9] = ((u64) in[1]) * (in[8] << 1) +
513 ((u64) in[2]) * (in[7] << 1) +
514 ((u64) in[3]) * (in[6] << 1) +
515 ((u64) in[4]) * (in[5] << 1);
516 tmp[10] = ((u64) in[2]) * (in[8] << 1) +
517 ((u64) in[3]) * (in[7] << 2) +
518 ((u64) in[4]) * (in[6] << 1) +
519 ((u64) in[5]) * (in[5] << 1);
520 tmp[11] = ((u64) in[3]) * (in[8] << 1) +
521 ((u64) in[4]) * (in[7] << 1) +
522 ((u64) in[5]) * (in[6] << 1);
523 tmp[12] = ((u64) in[4]) * (in[8] << 1) +
524 ((u64) in[5]) * (in[7] << 2) +
525 ((u64) in[6]) * in[6];
526 tmp[13] = ((u64) in[5]) * (in[8] << 1) +
527 ((u64) in[6]) * (in[7] << 1);
528 tmp[14] = ((u64) in[6]) * (in[8] << 1) +
529 ((u64) in[7]) * (in[7] << 1);
530 tmp[15] = ((u64) in[7]) * (in[8] << 1);
531 tmp[16] = ((u64) in[8]) * in[8];
532
533 felem_reduce_degree(out, tmp);
534 }
535
536 /* felem_mul sets out=in*in2.
537 *
538 * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and
539 * in2[0,2,...] < 2**30, in2[1,3,...] < 2**29.
540 * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
541 */
542 static void felem_mul(felem out, const felem in, const felem in2)
543 {
544 u64 tmp[17];
545
546 tmp[0] = ((u64) in[0]) * in2[0];
547 tmp[1] = ((u64) in[0]) * (in2[1] << 0) +
548 ((u64) in[1]) * (in2[0] << 0);
549 tmp[2] = ((u64) in[0]) * (in2[2] << 0) +
550 ((u64) in[1]) * (in2[1] << 1) +
551 ((u64) in[2]) * (in2[0] << 0);
552 tmp[3] = ((u64) in[0]) * (in2[3] << 0) +
553 ((u64) in[1]) * (in2[2] << 0) +
554 ((u64) in[2]) * (in2[1] << 0) +
555 ((u64) in[3]) * (in2[0] << 0);
556 tmp[4] = ((u64) in[0]) * (in2[4] << 0) +
557 ((u64) in[1]) * (in2[3] << 1) +
558 ((u64) in[2]) * (in2[2] << 0) +
559 ((u64) in[3]) * (in2[1] << 1) +
560 ((u64) in[4]) * (in2[0] << 0);
561 tmp[5] = ((u64) in[0]) * (in2[5] << 0) +
562 ((u64) in[1]) * (in2[4] << 0) +
563 ((u64) in[2]) * (in2[3] << 0) +
564 ((u64) in[3]) * (in2[2] << 0) +
565 ((u64) in[4]) * (in2[1] << 0) +
566 ((u64) in[5]) * (in2[0] << 0);
567 tmp[6] = ((u64) in[0]) * (in2[6] << 0) +
568 ((u64) in[1]) * (in2[5] << 1) +
569 ((u64) in[2]) * (in2[4] << 0) +
570 ((u64) in[3]) * (in2[3] << 1) +
571 ((u64) in[4]) * (in2[2] << 0) +
572 ((u64) in[5]) * (in2[1] << 1) +
573 ((u64) in[6]) * (in2[0] << 0);
574 tmp[7] = ((u64) in[0]) * (in2[7] << 0) +
575 ((u64) in[1]) * (in2[6] << 0) +
576 ((u64) in[2]) * (in2[5] << 0) +
577 ((u64) in[3]) * (in2[4] << 0) +
578 ((u64) in[4]) * (in2[3] << 0) +
579 ((u64) in[5]) * (in2[2] << 0) +
580 ((u64) in[6]) * (in2[1] << 0) +
581 ((u64) in[7]) * (in2[0] << 0);
582 /* tmp[8] has the greatest value but doesn't overflow. See logic in
583 * felem_square.
584 */
585 tmp[8] = ((u64) in[0]) * (in2[8] << 0) +
586 ((u64) in[1]) * (in2[7] << 1) +
587 ((u64) in[2]) * (in2[6] << 0) +
588 ((u64) in[3]) * (in2[5] << 1) +
589 ((u64) in[4]) * (in2[4] << 0) +
590 ((u64) in[5]) * (in2[3] << 1) +
591 ((u64) in[6]) * (in2[2] << 0) +
592 ((u64) in[7]) * (in2[1] << 1) +
593 ((u64) in[8]) * (in2[0] << 0);
594 tmp[9] = ((u64) in[1]) * (in2[8] << 0) +
595 ((u64) in[2]) * (in2[7] << 0) +
596 ((u64) in[3]) * (in2[6] << 0) +
597 ((u64) in[4]) * (in2[5] << 0) +
598 ((u64) in[5]) * (in2[4] << 0) +
599 ((u64) in[6]) * (in2[3] << 0) +
600 ((u64) in[7]) * (in2[2] << 0) +
601 ((u64) in[8]) * (in2[1] << 0);
602 tmp[10] = ((u64) in[2]) * (in2[8] << 0) +
603 ((u64) in[3]) * (in2[7] << 1) +
604 ((u64) in[4]) * (in2[6] << 0) +
605 ((u64) in[5]) * (in2[5] << 1) +
606 ((u64) in[6]) * (in2[4] << 0) +
607 ((u64) in[7]) * (in2[3] << 1) +
608 ((u64) in[8]) * (in2[2] << 0);
609 tmp[11] = ((u64) in[3]) * (in2[8] << 0) +
610 ((u64) in[4]) * (in2[7] << 0) +
611 ((u64) in[5]) * (in2[6] << 0) +
612 ((u64) in[6]) * (in2[5] << 0) +
613 ((u64) in[7]) * (in2[4] << 0) +
614 ((u64) in[8]) * (in2[3] << 0);
615 tmp[12] = ((u64) in[4]) * (in2[8] << 0) +
616 ((u64) in[5]) * (in2[7] << 1) +
617 ((u64) in[6]) * (in2[6] << 0) +
618 ((u64) in[7]) * (in2[5] << 1) +
619 ((u64) in[8]) * (in2[4] << 0);
620 tmp[13] = ((u64) in[5]) * (in2[8] << 0) +
621 ((u64) in[6]) * (in2[7] << 0) +
622 ((u64) in[7]) * (in2[6] << 0) +
623 ((u64) in[8]) * (in2[5] << 0);
624 tmp[14] = ((u64) in[6]) * (in2[8] << 0) +
625 ((u64) in[7]) * (in2[7] << 1) +
626 ((u64) in[8]) * (in2[6] << 0);
627 tmp[15] = ((u64) in[7]) * (in2[8] << 0) +
628 ((u64) in[8]) * (in2[7] << 0);
629 tmp[16] = ((u64) in[8]) * (in2[8] << 0);
630
631 felem_reduce_degree(out, tmp);
632 }
633
634 static void felem_assign(felem out, const felem in)
635 {
636 memcpy(out, in, sizeof(felem));
637 }
638
639 /* felem_inv calculates |out| = |in|^{-1}
640 *
641 * Based on Fermat's Little Theorem:
642 * a^p = a (mod p)
643 * a^{p-1} = 1 (mod p)
644 * a^{p-2} = a^{-1} (mod p)
645 */
646 static void felem_inv(felem out, const felem in)
647 {
648 felem ftmp, ftmp2;
649 /* each e_I will hold |in|^{2^I - 1} */
650 felem e2, e4, e8, e16, e32, e64;
651 unsigned int i;
652
653 felem_square(ftmp, in); /* 2^1 */
654 felem_mul(ftmp, in, ftmp); /* 2^2 - 2^0 */
655 felem_assign(e2, ftmp);
656 felem_square(ftmp, ftmp); /* 2^3 - 2^1 */
657 felem_square(ftmp, ftmp); /* 2^4 - 2^2 */
658 felem_mul(ftmp, ftmp, e2); /* 2^4 - 2^0 */
659 felem_assign(e4, ftmp);
660 felem_square(ftmp, ftmp); /* 2^5 - 2^1 */
661 felem_square(ftmp, ftmp); /* 2^6 - 2^2 */
662 felem_square(ftmp, ftmp); /* 2^7 - 2^3 */
663 felem_square(ftmp, ftmp); /* 2^8 - 2^4 */
664 felem_mul(ftmp, ftmp, e4); /* 2^8 - 2^0 */
665 felem_assign(e8, ftmp);
666 for (i = 0; i < 8; i++) {
667 felem_square(ftmp, ftmp);
668 } /* 2^16 - 2^8 */
669 felem_mul(ftmp, ftmp, e8); /* 2^16 - 2^0 */
670 felem_assign(e16, ftmp);
671 for (i = 0; i < 16; i++) {
672 felem_square(ftmp, ftmp);
673 } /* 2^32 - 2^16 */
674 felem_mul(ftmp, ftmp, e16); /* 2^32 - 2^0 */
675 felem_assign(e32, ftmp);
676 for (i = 0; i < 32; i++) {
677 felem_square(ftmp, ftmp);
678 } /* 2^64 - 2^32 */
679 felem_assign(e64, ftmp);
680 felem_mul(ftmp, ftmp, in); /* 2^64 - 2^32 + 2^0 */
681 for (i = 0; i < 192; i++) {
682 felem_square(ftmp, ftmp);
683 } /* 2^256 - 2^224 + 2^192 */
684
685 felem_mul(ftmp2, e64, e32); /* 2^64 - 2^0 */
686 for (i = 0; i < 16; i++) {
687 felem_square(ftmp2, ftmp2);
688 } /* 2^80 - 2^16 */
689 felem_mul(ftmp2, ftmp2, e16); /* 2^80 - 2^0 */
690 for (i = 0; i < 8; i++) {
691 felem_square(ftmp2, ftmp2);
692 } /* 2^88 - 2^8 */
693 felem_mul(ftmp2, ftmp2, e8); /* 2^88 - 2^0 */
694 for (i = 0; i < 4; i++) {
695 felem_square(ftmp2, ftmp2);
696 } /* 2^92 - 2^4 */
697 felem_mul(ftmp2, ftmp2, e4); /* 2^92 - 2^0 */
698 felem_square(ftmp2, ftmp2); /* 2^93 - 2^1 */
699 felem_square(ftmp2, ftmp2); /* 2^94 - 2^2 */
700 felem_mul(ftmp2, ftmp2, e2); /* 2^94 - 2^0 */
701 felem_square(ftmp2, ftmp2); /* 2^95 - 2^1 */
702 felem_square(ftmp2, ftmp2); /* 2^96 - 2^2 */
703 felem_mul(ftmp2, ftmp2, in); /* 2^96 - 3 */
704
705 felem_mul(out, ftmp2, ftmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
706 }
707
708 /* felem_scalar_3 sets out=3*out.
709 *
710 * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
711 * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
712 */
713 static void felem_scalar_3(felem out)
714 {
715 limb carry = 0;
716 unsigned int i;
717
718 for (i = 0;; i++) {
719 out[i] *= 3;
720 out[i] += carry;
721 carry = out[i] >> 29;
722 out[i] &= kBottom29Bits;
723
724 i++;
725 if (i == NLIMBS)
726 break;
727
728 out[i] *= 3;
729 out[i] += carry;
730 carry = out[i] >> 28;
731 out[i] &= kBottom28Bits;
732 }
733
734 felem_reduce_carry(out, carry);
735 }
736
737 /* felem_scalar_4 sets out=4*out.
738 *
739 * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
740 * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
741 */
742 static void felem_scalar_4(felem out)
743 {
744 limb carry = 0, next_carry;
745 unsigned int i;
746
747 for (i = 0;; i++) {
748 next_carry = out[i] >> 27;
749 out[i] <<= 2;
750 out[i] &= kBottom29Bits;
751 out[i] += carry;
752 carry = next_carry + (out[i] >> 29);
753 out[i] &= kBottom29Bits;
754
755 i++;
756 if (i == NLIMBS)
757 break;
758 next_carry = out[i] >> 26;
759 out[i] <<= 2;
760 out[i] &= kBottom28Bits;
761 out[i] += carry;
762 carry = next_carry + (out[i] >> 28);
763 out[i] &= kBottom28Bits;
764 }
765
766 felem_reduce_carry(out, carry);
767 }
768
769 /* felem_scalar_8 sets out=8*out.
770 *
771 * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
772 * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
773 */
774 static void felem_scalar_8(felem out)
775 {
776 limb carry = 0, next_carry;
777 unsigned int i;
778
779 for (i = 0;; i++) {
780 next_carry = out[i] >> 26;
781 out[i] <<= 3;
782 out[i] &= kBottom29Bits;
783 out[i] += carry;
784 carry = next_carry + (out[i] >> 29);
785 out[i] &= kBottom29Bits;
786
787 i++;
788 if (i == NLIMBS)
789 break;
790 next_carry = out[i] >> 25;
791 out[i] <<= 3;
792 out[i] &= kBottom28Bits;
793 out[i] += carry;
794 carry = next_carry + (out[i] >> 28);
795 out[i] &= kBottom28Bits;
796 }
797
798 felem_reduce_carry(out, carry);
799 }
800
801 /* felem_is_zero_vartime returns 1 iff |in| == 0. It takes a variable amount of
802 * time depending on the value of |in|.
803 */
804 static char felem_is_zero_vartime(const felem in)
805 {
806 limb carry;
807 int i;
808 limb tmp[NLIMBS];
809 felem_assign(tmp, in);
810
811 /* First, reduce tmp to a minimal form.
812 */
813 do {
814 carry = 0;
815 for (i = 0;; i++) {
816 tmp[i] += carry;
817 carry = tmp[i] >> 29;
818 tmp[i] &= kBottom29Bits;
819
820 i++;
821 if (i == NLIMBS)
822 break;
823
824 tmp[i] += carry;
825 carry = tmp[i] >> 28;
826 tmp[i] &= kBottom28Bits;
827 }
828
829 felem_reduce_carry(tmp, carry);
830 } while (carry);
831
832 /* tmp < 2**257, so the only possible zero values are 0, p and 2p.
833 */
834 return memcmp(tmp, kZero, sizeof(tmp)) == 0 ||
835 memcmp(tmp, kP, sizeof(tmp)) == 0 ||
836 memcmp(tmp, k2P, sizeof(tmp)) == 0;
837 }
838
839 /* Group operations:
840 *
841 * Elements of the elliptic curve group are represented in Jacobian
842 * coordinates: (x, y, z). An affine point (x', y') is x'=x/z**2, y'=y/z**3 in
843 * Jacobian form.
844 */
845
846 /* point_double sets {x_out,y_out,z_out} = 2*{x,y,z}.
847 *
848 * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling -dbl-2009-l
849 */
850 static void point_double(felem x_out, felem y_out, felem z_out,
851 const felem x, const felem y, const felem z)
852 {
853 felem delta, gamma, alpha, beta, tmp, tmp2;
854
855 felem_square(delta, z);
856 felem_square(gamma, y);
857 felem_mul(beta, x, gamma);
858
859 felem_sum(tmp, x, delta);
860 felem_diff(tmp2, x, delta);
861 felem_mul(alpha, tmp, tmp2);
862 felem_scalar_3(alpha);
863
864 felem_sum(tmp, y, z);
865 felem_square(tmp, tmp);
866 felem_diff(tmp, tmp, gamma);
867 felem_diff(z_out, tmp, delta);
868
869 felem_scalar_4(beta);
870 felem_square(x_out, alpha);
871 felem_diff(x_out, x_out, beta);
872 felem_diff(x_out, x_out, beta);
873
874 felem_diff(tmp, beta, x_out);
875 felem_mul(tmp, alpha, tmp);
876 felem_square(tmp2, gamma);
877 felem_scalar_8(tmp2);
878 felem_diff(y_out, tmp, tmp2);
879 }
880
881 /* point_add_mixed sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,1}.
882 * (i.e. the second point is affine.)
883 *
884 * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition -add-2007-bl
885 *
886 * Note that this function does not handle P+P, infinity+P nor P+infinity
887 * correctly.
888 */
889 static void point_add_mixed(felem x_out, felem y_out, felem z_out,
890 const felem x1, const felem y1, const felem z1,
891 const felem x2, const felem y2)
892 {
893 felem z1z1, z1z1z1, s2, u2, h, i, j, r, rr, v, tmp;
894
895 felem_square(z1z1, z1);
896 felem_sum(tmp, z1, z1);
897
898 felem_mul(u2, x2, z1z1);
899 felem_mul(z1z1z1, z1, z1z1);
900 felem_mul(s2, y2, z1z1z1);
901 felem_diff(h, u2, x1);
902 felem_sum(i, h, h);
903 felem_square(i, i);
904 felem_mul(j, h, i);
905 felem_diff(r, s2, y1);
906 felem_sum(r, r, r);
907 felem_mul(v, x1, i);
908
909 felem_mul(z_out, tmp, h);
910 felem_square(rr, r);
911 felem_diff(x_out, rr, j);
912 felem_diff(x_out, x_out, v);
913 felem_diff(x_out, x_out, v);
914
915 felem_diff(tmp, v, x_out);
916 felem_mul(y_out, tmp, r);
917 felem_mul(tmp, y1, j);
918 felem_diff(y_out, y_out, tmp);
919 felem_diff(y_out, y_out, tmp);
920 }
921
922 /* point_add sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,z2}.
923 *
924 * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition -add-2007-bl
925 *
926 * Note that this function does not handle P+P, infinity+P nor P+infinity
927 * correctly.
928 */
929 static void point_add(felem x_out, felem y_out, felem z_out,
930 const felem x1, const felem y1, const felem z1,
931 const felem x2, const felem y2, const felem z2)
932 {
933 felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp;
934
935 felem_square(z1z1, z1);
936 felem_square(z2z2, z2);
937 felem_mul(u1, x1, z2z2);
938
939 felem_sum(tmp, z1, z2);
940 felem_square(tmp, tmp);
941 felem_diff(tmp, tmp, z1z1);
942 felem_diff(tmp, tmp, z2z2);
943
944 felem_mul(z2z2z2, z2, z2z2);
945 felem_mul(s1, y1, z2z2z2);
946
947 felem_mul(u2, x2, z1z1);
948 felem_mul(z1z1z1, z1, z1z1);
949 felem_mul(s2, y2, z1z1z1);
950 felem_diff(h, u2, u1);
951 felem_sum(i, h, h);
952 felem_square(i, i);
953 felem_mul(j, h, i);
954 felem_diff(r, s2, s1);
955 felem_sum(r, r, r);
956 felem_mul(v, u1, i);
957
958 felem_mul(z_out, tmp, h);
959 felem_square(rr, r);
960 felem_diff(x_out, rr, j);
961 felem_diff(x_out, x_out, v);
962 felem_diff(x_out, x_out, v);
963
964 felem_diff(tmp, v, x_out);
965 felem_mul(y_out, tmp, r);
966 felem_mul(tmp, s1, j);
967 felem_diff(y_out, y_out, tmp);
968 felem_diff(y_out, y_out, tmp);
969 }
970
971 /* point_add_or_double_vartime sets {x_out,y_out,z_out} = {x1,y1,z1} +
972 * {x2,y2,z2}.
973 *
974 * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition -add-2007-bl
975 *
976 * This function handles the case where {x1,y1,z1}={x2,y2,z2}.
977 */
978 static void point_add_or_double_vartime(
979 felem x_out, felem y_out, felem z_out,
980 const felem x1, const felem y1, const felem z1,
981 const felem x2, const felem y2, const felem z2)
982 {
983 felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp;
984 char x_equal, y_equal;
985
986 felem_square(z1z1, z1);
987 felem_square(z2z2, z2);
988 felem_mul(u1, x1, z2z2);
989
990 felem_sum(tmp, z1, z2);
991 felem_square(tmp, tmp);
992 felem_diff(tmp, tmp, z1z1);
993 felem_diff(tmp, tmp, z2z2);
994
995 felem_mul(z2z2z2, z2, z2z2);
996 felem_mul(s1, y1, z2z2z2);
997
998 felem_mul(u2, x2, z1z1);
999 felem_mul(z1z1z1, z1, z1z1);
1000 felem_mul(s2, y2, z1z1z1);
1001 felem_diff(h, u2, u1);
1002 x_equal = felem_is_zero_vartime(h);
1003 felem_sum(i, h, h);
1004 felem_square(i, i);
1005 felem_mul(j, h, i);
1006 felem_diff(r, s2, s1);
1007 y_equal = felem_is_zero_vartime(r);
1008 if (x_equal && y_equal) {
1009 point_double(x_out, y_out, z_out, x1, y1, z1);
1010 return;
1011 }
1012 felem_sum(r, r, r);
1013 felem_mul(v, u1, i);
1014
1015 felem_mul(z_out, tmp, h);
1016 felem_square(rr, r);
1017 felem_diff(x_out, rr, j);
1018 felem_diff(x_out, x_out, v);
1019 felem_diff(x_out, x_out, v);
1020
1021 felem_diff(tmp, v, x_out);
1022 felem_mul(y_out, tmp, r);
1023 felem_mul(tmp, s1, j);
1024 felem_diff(y_out, y_out, tmp);
1025 felem_diff(y_out, y_out, tmp);
1026 }
1027
1028 /* copy_conditional sets out=in if mask = 0xffffffff in constant time.
1029 *
1030 * On entry: mask is either 0 or 0xffffffff.
1031 */
1032 static void copy_conditional(felem out, const felem in, limb mask)
1033 {
1034 int i;
1035
1036 for (i = 0; i < NLIMBS; i++) {
1037 const limb tmp = mask & (in[i] ^ out[i]);
1038 out[i] ^= tmp;
1039 }
1040 }
1041
1042 /* select_affine_point sets {out_x,out_y} to the index'th entry of table.
1043 * On entry: index < 16, table[0] must be zero.
1044 */
1045 static void select_affine_point(felem out_x, felem out_y,
1046 const limb *table, limb index)
1047 {
1048 limb i, j;
1049
1050 memset(out_x, 0, sizeof(felem));
1051 memset(out_y, 0, sizeof(felem));
1052
1053 for (i = 1; i < 16; i++) {
1054 limb mask = i ^ index;
1055 mask |= mask >> 2;
1056 mask |= mask >> 1;
1057 mask &= 1;
1058 mask--;
1059 for (j = 0; j < NLIMBS; j++, table++) {
1060 out_x[j] |= *table & mask;
1061 }
1062 for (j = 0; j < NLIMBS; j++, table++) {
1063 out_y[j] |= *table & mask;
1064 }
1065 }
1066 }
1067
1068 /* select_jacobian_point sets {out_x,out_y,out_z} to the index'th entry of
1069 * table. On entry: index < 16, table[0] must be zero.
1070 */
1071 static void select_jacobian_point(felem out_x, felem out_y, felem out_z,
1072 const limb *table, limb index)
1073 {
1074 limb i, j;
1075
1076 memset(out_x, 0, sizeof(felem));
1077 memset(out_y, 0, sizeof(felem));
1078 memset(out_z, 0, sizeof(felem));
1079
1080 /* The implicit value at index 0 is all zero. We don't need to perform that
1081 * iteration of the loop because we already set out_* to zero.
1082 */
1083 table += 3*NLIMBS;
1084
1085 for (i = 1; i < 16; i++) {
1086 limb mask = i ^ index;
1087 mask |= mask >> 2;
1088 mask |= mask >> 1;
1089 mask &= 1;
1090 mask--;
1091 for (j = 0; j < NLIMBS; j++, table++) {
1092 out_x[j] |= *table & mask;
1093 }
1094 for (j = 0; j < NLIMBS; j++, table++) {
1095 out_y[j] |= *table & mask;
1096 }
1097 for (j = 0; j < NLIMBS; j++, table++) {
1098 out_z[j] |= *table & mask;
1099 }
1100 }
1101 }
1102
1103 /* get_bit returns the bit'th bit of scalar. */
1104 static char get_bit(const u8 scalar[32], int bit)
1105 {
1106 return ((scalar[bit >> 3]) >> (bit & 7)) & 1;
1107 }
1108
1109 /* scalar_base_mult sets {nx,ny,nz} = scalar*G where scalar is a little-endian
1110 * number. Note that the value of scalar must be less than the order of the
1111 * group.
1112 */
1113 static void scalar_base_mult(felem nx, felem ny, felem nz, const u8 scalar[32])
1114 {
1115 int i, j;
1116 limb n_is_infinity_mask = -1, p_is_noninfinite_mask, mask;
1117 u32 table_offset;
1118
1119 felem px, py;
1120 felem tx, ty, tz;
1121
1122 memset(nx, 0, sizeof(felem));
1123 memset(ny, 0, sizeof(felem));
1124 memset(nz, 0, sizeof(felem));
1125
1126 /* The loop adds bits at positions 0, 64, 128 and 192, followed by
1127 * positions 32,96,160 and 224 and does this 32 times.
1128 */
1129 for (i = 0; i < 32; i++) {
1130 if (i) {
1131 point_double(nx, ny, nz, nx, ny, nz);
1132 }
1133 table_offset = 0;
1134 for (j = 0; j <= 32; j += 32) {
1135 char bit0 = get_bit(scalar, 31 - i + j);
1136 char bit1 = get_bit(scalar, 95 - i + j);
1137 char bit2 = get_bit(scalar, 159 - i + j);
1138 char bit3 = get_bit(scalar, 223 - i + j);
1139 limb index = bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3);
1140
1141 select_affine_point(px, py, kPrecomputed + table_offset, index);
1142 table_offset += 30 * NLIMBS;
1143
1144 /* Since scalar is less than the order of the group, we know that
1145 * {nx,ny,nz} != {px,py,1}, unless both are zero, which we handle
1146 * below.
1147 */
1148 point_add_mixed(tx, ty, tz, nx, ny, nz, px, py);
1149 /* The result of point_add_mixed is incorrect if {nx,ny,nz} is zero
1150 * (a.k.a. the point at infinity). We handle that situation by
1151 * copying the point from the table.
1152 */
1153 copy_conditional(nx, px, n_is_infinity_mask);
1154 copy_conditional(ny, py, n_is_infinity_mask);
1155 copy_conditional(nz, kOne, n_is_infinity_mask);
1156
1157 /* Equally, the result is also wrong if the point from the table is
1158 * zero, which happens when the index is zero. We handle that by
1159 * only copying from {tx,ty,tz} to {nx,ny,nz} if index != 0.
1160 */
1161 p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index);
1162 mask = p_is_noninfinite_mask & ~n_is_infinity_mask;
1163 copy_conditional(nx, tx, mask);
1164 copy_conditional(ny, ty, mask);
1165 copy_conditional(nz, tz, mask);
1166 /* If p was not zero, then n is now non-zero. */
1167 n_is_infinity_mask &= ~p_is_noninfinite_mask;
1168 }
1169 }
1170 }
1171
1172 /* point_to_affine converts a Jacobian point to an affine point. If the input
1173 * is the point at infinity then it returns (0, 0) in constant time.
1174 */
1175 static void point_to_affine(felem x_out, felem y_out,
1176 const felem nx, const felem ny, const felem nz) {
1177 felem z_inv, z_inv_sq;
1178 felem_inv(z_inv, nz);
1179 felem_square(z_inv_sq, z_inv);
1180 felem_mul(x_out, nx, z_inv_sq);
1181 felem_mul(z_inv, z_inv, z_inv_sq);
1182 felem_mul(y_out, ny, z_inv);
1183 }
1184
1185 /* scalar_mult sets {nx,ny,nz} = scalar*{x,y}. */
1186 static void scalar_mult(felem nx, felem ny, felem nz,
1187 const felem x, const felem y, const u8 scalar[32])
1188 {
1189 int i;
1190 felem px, py, pz, tx, ty, tz;
1191 felem precomp[16][3];
1192 limb n_is_infinity_mask, index, p_is_noninfinite_mask, mask;
1193
1194 /* We precompute 0,1,2,... times {x,y}. */
1195 memset(precomp, 0, sizeof(felem) * 3);
1196 memcpy(&precomp[1][0], x, sizeof(felem));
1197 memcpy(&precomp[1][1], y, sizeof(felem));
1198 memcpy(&precomp[1][2], kOne, sizeof(felem));
1199
1200 for (i = 2; i < 16; i += 2) {
1201 point_double(precomp[i][0], precomp[i][1], precomp[i][2],
1202 precomp[i / 2][0], precomp[i / 2][1], precomp[i / 2][2]);
1203
1204 point_add_mixed(precomp[i + 1][0], precomp[i + 1][1], precomp[i + 1][2],
1205 precomp[i][0], precomp[i][1], precomp[i][2], x, y);
1206 }
1207
1208 memset(nx, 0, sizeof(felem));
1209 memset(ny, 0, sizeof(felem));
1210 memset(nz, 0, sizeof(felem));
1211 n_is_infinity_mask = -1;
1212
1213 /* We add in a window of four bits each iteration and do this 64 times. */
1214 for (i = 0; i < 64; i++) {
1215 if (i) {
1216 point_double(nx, ny, nz, nx, ny, nz);
1217 point_double(nx, ny, nz, nx, ny, nz);
1218 point_double(nx, ny, nz, nx, ny, nz);
1219 point_double(nx, ny, nz, nx, ny, nz);
1220 }
1221
1222 index = scalar[31 - i / 2];
1223 if ((i & 1) == 1) {
1224 index &= 15;
1225 } else {
1226 index >>= 4;
1227 }
1228
1229 /* See the comments in scalar_base_mult about handling infinities. */
1230 select_jacobian_point(px, py, pz, precomp[0][0], index);
1231 point_add(tx, ty, tz, nx, ny, nz, px, py, pz);
1232 copy_conditional(nx, px, n_is_infinity_mask);
1233 copy_conditional(ny, py, n_is_infinity_mask);
1234 copy_conditional(nz, pz, n_is_infinity_mask);
1235
1236 p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index);
1237 mask = p_is_noninfinite_mask & ~n_is_infinity_mask;
1238 copy_conditional(nx, tx, mask);
1239 copy_conditional(ny, ty, mask);
1240 copy_conditional(nz, tz, mask);
1241 n_is_infinity_mask &= ~p_is_noninfinite_mask;
1242 }
1243 }
1244
1245 /* Interface with Freebl: */
1246
1247 /* BYTESWAP_MP_DIGIT_TO_LE swaps the bytes of a mp_digit to
1248 * little-endian order.
1249 */
1250 #ifdef IS_BIG_ENDIAN
1251 #ifdef __APPLE__
1252 #include <libkern/OSByteOrder.h>
1253 #define BYTESWAP32(x) OSSwapInt32(x)
1254 #define BYTESWAP64(x) OSSwapInt64(x)
1255 #else
1256 #define BYTESWAP32(x) \
1257 (((x) >> 24) | (((x) >> 8) & 0xff00) | (((x) & 0xff00) << 8) | ((x) << 24))
1258 #define BYTESWAP64(x) \
1259 (((x) >> 56) | (((x) >> 40) & 0xff00) | \
1260 (((x) >> 24) & 0xff0000) | (((x) >> 8) & 0xff000000) | \
1261 (((x) & 0xff000000) << 8) | (((x) & 0xff0000) << 24) | \
1262 (((x) & 0xff00) << 40) | ((x) << 56))
1263 #endif
1264
1265 #ifdef MP_USE_UINT_DIGIT
1266 #define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP32(x)
1267 #else
1268 #define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP64(x)
1269 #endif
1270 #endif /* IS_BIG_ENDIAN */
1271
1272 #ifdef MP_USE_UINT_DIGIT
1273 static const mp_digit kRInvDigits[8] = {
1274 0x80000000, 1, 0xffffffff, 0,
1275 0x80000001, 0xfffffffe, 1, 0x7fffffff
1276 };
1277 #else
1278 static const mp_digit kRInvDigits[4] = {
1279 PR_UINT64(0x180000000), 0xffffffff,
1280 PR_UINT64(0xfffffffe80000001), PR_UINT64(0x7fffffff00000001)
1281 };
1282 #endif
1283 #define MP_DIGITS_IN_256_BITS (32/sizeof(mp_digit))
1284 static const mp_int kRInv = {
1285 MP_ZPOS,
1286 MP_DIGITS_IN_256_BITS,
1287 MP_DIGITS_IN_256_BITS,
1288 (mp_digit*) kRInvDigits
1289 };
1290
1291 static const limb kTwo28 = 0x10000000;
1292 static const limb kTwo29 = 0x20000000;
1293
1294 /* to_montgomery sets out = R*in. */
1295 static mp_err to_montgomery(felem out, const mp_int *in, const ECGroup *group)
1296 {
1297 /* There are no MPI functions for bitshift operations and we wish to shift
1298 * in 257 bits left so we move the digits 256-bits left and then multiply
1299 * by two.
1300 */
1301 mp_int in_shifted;
1302 int i;
1303 mp_err res;
1304
1305 MP_CHECKOK(mp_init(&in_shifted));
1306 MP_CHECKOK(s_mp_pad(&in_shifted, MP_USED(in) + MP_DIGITS_IN_256_BITS));
1307 memcpy(&MP_DIGIT(&in_shifted, MP_DIGITS_IN_256_BITS),
1308 MP_DIGITS(in),
1309 MP_USED(in)*sizeof(mp_digit));
1310 MP_CHECKOK(mp_mul_2(&in_shifted, &in_shifted));
1311 MP_CHECKOK(group->meth->field_mod(&in_shifted, &in_shifted, group->meth));
1312
1313 for (i = 0;; i++) {
1314 out[i] = MP_DIGIT(&in_shifted, 0) & kBottom29Bits;
1315 MP_CHECKOK(mp_div_d(&in_shifted, kTwo29, &in_shifted, NULL));
1316
1317 i++;
1318 if (i == NLIMBS)
1319 break;
1320 out[i] = MP_DIGIT(&in_shifted, 0) & kBottom28Bits;
1321 MP_CHECKOK(mp_div_d(&in_shifted, kTwo28, &in_shifted, NULL));
1322 }
1323
1324 CLEANUP:
1325 mp_clear(&in_shifted);
1326 return res;
1327 }
1328
1329 /* from_montgomery sets out=in/R. */
1330 static mp_err from_montgomery(mp_int *out, const felem in,
1331 const ECGroup *group)
1332 {
1333 mp_int result, tmp;
1334 mp_err res;
1335 int i;
1336
1337 MP_CHECKOK(mp_init(&result));
1338 MP_CHECKOK(mp_init(&tmp));
1339
1340 MP_CHECKOK(mp_add_d(&tmp, in[NLIMBS-1], &result));
1341 for (i = NLIMBS-2; i >= 0; i--) {
1342 if ((i & 1) == 0) {
1343 MP_CHECKOK(mp_mul_d(&result, kTwo29, &tmp));
1344 } else {
1345 MP_CHECKOK(mp_mul_d(&result, kTwo28, &tmp));
1346 }
1347 MP_CHECKOK(mp_add_d(&tmp, in[i], &result));
1348 }
1349
1350 MP_CHECKOK(mp_mul(&result, &kRInv, out));
1351 MP_CHECKOK(group->meth->field_mod(out, out, group->meth));
1352
1353 CLEANUP:
1354 mp_clear(&result);
1355 mp_clear(&tmp);
1356 return res;
1357 }
1358
1359 /* scalar_from_mp_int sets out_scalar=n, where n < the group order. */
1360 static void scalar_from_mp_int(u8 out_scalar[32], const mp_int *n)
1361 {
1362 /* We require that |n| is less than the order of the group and therefore it
1363 * will fit into |out_scalar|. However, these is a timing side-channel here
1364 * that we cannot avoid: if |n| is sufficiently small it may be one or more
1365 * words too short and we'll copy less data.
1366 */
1367 memset(out_scalar, 0, 32);
1368 #ifdef IS_LITTLE_ENDIAN
1369 memcpy(out_scalar, MP_DIGITS(n), MP_USED(n) * sizeof(mp_digit));
1370 #else
1371 {
1372 mp_size i;
1373 mp_digit swapped[MP_DIGITS_IN_256_BITS];
1374 for (i = 0; i < MP_USED(n); i++) {
1375 swapped[i] = BYTESWAP_MP_DIGIT_TO_LE(MP_DIGIT(n, i));
1376 }
1377 memcpy(out_scalar, swapped, MP_USED(n) * sizeof(mp_digit));
1378 }
1379 #endif
1380 }
1381
1382 /* ec_GFp_nistp256_base_point_mul sets {out_x,out_y} = nG, where n is < the
1383 * order of the group.
1384 */
1385 static mp_err ec_GFp_nistp256_base_point_mul(const mp_int *n,
1386 mp_int *out_x, mp_int *out_y,
1387 const ECGroup *group)
1388 {
1389 u8 scalar[32];
1390 felem x, y, z, x_affine, y_affine;
1391 mp_err res;
1392
1393 /* FIXME(agl): test that n < order. */
1394
1395 scalar_from_mp_int(scalar, n);
1396 scalar_base_mult(x, y, z, scalar);
1397 point_to_affine(x_affine, y_affine, x, y, z);
1398 MP_CHECKOK(from_montgomery(out_x, x_affine, group));
1399 MP_CHECKOK(from_montgomery(out_y, y_affine, group));
1400
1401 CLEANUP:
1402 return res;
1403 }
1404
1405 /* ec_GFp_nistp256_point_mul sets {out_x,out_y} = n*{in_x,in_y}, where n is <
1406 * the order of the group.
1407 */
1408 static mp_err ec_GFp_nistp256_point_mul(const mp_int *n,
1409 const mp_int *in_x, const mp_int *in_y,
1410 mp_int *out_x, mp_int *out_y,
1411 const ECGroup *group)
1412 {
1413 u8 scalar[32];
1414 felem x, y, z, x_affine, y_affine, px, py;
1415 mp_err res;
1416
1417 scalar_from_mp_int(scalar, n);
1418
1419 MP_CHECKOK(to_montgomery(px, in_x, group));
1420 MP_CHECKOK(to_montgomery(py, in_y, group));
1421
1422 scalar_mult(x, y, z, px, py, scalar);
1423 point_to_affine(x_affine, y_affine, x, y, z);
1424 MP_CHECKOK(from_montgomery(out_x, x_affine, group));
1425 MP_CHECKOK(from_montgomery(out_y, y_affine, group));
1426
1427 CLEANUP:
1428 return res;
1429 }
1430
1431 /* ec_GFp_nistp256_point_mul_vartime sets {out_x,out_y} = n1*G +
1432 * n2*{in_x,in_y}, where n1 and n2 are < the order of the group.
1433 *
1434 * As indicated by the name, this function operates in variable time. This
1435 * is safe because it's used for signature validation which doesn't deal
1436 * with secrets.
1437 */
1438 static mp_err ec_GFp_nistp256_points_mul_vartime(
1439 const mp_int *n1, const mp_int *n2,
1440 const mp_int *in_x, const mp_int *in_y,
1441 mp_int *out_x, mp_int *out_y,
1442 const ECGroup *group)
1443 {
1444 u8 scalar1[32], scalar2[32];
1445 felem x1, y1, z1, x2, y2, z2, x_affine, y_affine, px, py;
1446 mp_err res = MP_OKAY;
1447
1448 /* If n2 == NULL, this is just a base-point multiplication. */
1449 if (n2 == NULL) {
1450 return ec_GFp_nistp256_base_point_mul(n1, out_x, out_y, group);
1451 }
1452
1453 /* If n1 == nULL, this is just an arbitary-point multiplication. */
1454 if (n1 == NULL) {
1455 return ec_GFp_nistp256_point_mul(n2, in_x, in_y, out_x, out_y, group);
1456 }
1457
1458 /* If both scalars are zero, then the result is the point at infinity. */
1459 if (mp_cmp_z(n1) == 0 && mp_cmp_z(n2) == 0) {
1460 mp_zero(out_x);
1461 mp_zero(out_y);
1462 return res;
1463 }
1464
1465 scalar_from_mp_int(scalar1, n1);
1466 scalar_from_mp_int(scalar2, n2);
1467
1468 MP_CHECKOK(to_montgomery(px, in_x, group));
1469 MP_CHECKOK(to_montgomery(py, in_y, group));
1470 scalar_base_mult(x1, y1, z1, scalar1);
1471 scalar_mult(x2, y2, z2, px, py, scalar2);
1472
1473 if (mp_cmp_z(n2) == 0) {
1474 /* If n2 == 0, then {x2,y2,z2} is zero and the result is just
1475 * {x1,y1,z1}. */
1476 } else if (mp_cmp_z(n1) == 0) {
1477 /* If n1 == 0, then {x1,y1,z1} is zero and the result is just
1478 * {x2,y2,z2}. */
1479 memcpy(x1, x2, sizeof(x2));
1480 memcpy(y1, y2, sizeof(y2));
1481 memcpy(z1, z2, sizeof(z2));
1482 } else {
1483 /* This function handles the case where {x1,y1,z1} == {x2,y2,z2}. */
1484 point_add_or_double_vartime(x1, y1, z1, x1, y1, z1, x2, y2, z2);
1485 }
1486
1487 point_to_affine(x_affine, y_affine, x1, y1, z1);
1488 MP_CHECKOK(from_montgomery(out_x, x_affine, group));
1489 MP_CHECKOK(from_montgomery(out_y, y_affine, group));
1490
1491 CLEANUP:
1492 return res;
1493 }
1494
1495 /* Wire in fast point multiplication for named curves. */
1496 mp_err ec_group_set_gfp256_32(ECGroup *group, ECCurveName name)
1497 {
1498 if (name == ECCurve_NIST_P256) {
1499 group->base_point_mul = &ec_GFp_nistp256_base_point_mul;
1500 group->point_mul = &ec_GFp_nistp256_point_mul;
1501 group->points_mul = &ec_GFp_nistp256_points_mul_vartime;
1502 }
1503 return MP_OKAY;
1504 }
OLDNEW
« no previous file with comments | « nss/lib/freebl/ecl/ecp_256.c ('k') | nss/lib/freebl/ecl/ecp_384.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698