mozilla/security/nss/lib/freebl/ecl/ecp_256_32.c - Issue 12137003: Support big-endian processors. Eliminate the dependency on the

Side by Side Diff: mozilla/security/nss/lib/freebl/ecl/ecp_256_32.c

Issue 12137003: Support big-endian processors. Eliminate the dependency on the (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/nss/

Patch Set: Eliminate other signed right-shifts. Add PORT_Assert to check n digits before memcpy Created 7 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /* This Source Code Form is subject to the terms of the Mozilla Public	1 /* This Source Code Form is subject to the terms of the Mozilla Public

2 * License, v. 2.0. If a copy of the MPL was not distributed with this	2 * License, v. 2.0. If a copy of the MPL was not distributed with this

3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */	3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

4	4

5 /* A 32-bit implementation of the NIST P-256 elliptic curve. */	5 /* A 32-bit implementation of the NIST P-256 elliptic curve. */

6	6

7 #include <string.h>	7 #include <string.h>

8	8

9 #include "prtypes.h"	9 #include "prtypes.h"

10 #include "mpi.h"	10 #include "mpi.h"

11 #include "mpi-priv.h"	11 #include "mpi-priv.h"

12 #include "ecp.h"	12 #include "ecp.h"

	13 #include "secport.h"

13	14

14 typedef PRUint8 u8;	15 typedef PRUint8 u8;

15 typedef PRUint32 u32;	16 typedef PRUint32 u32;

16 typedef PRInt32 s32;

17 typedef PRUint64 u64;	17 typedef PRUint64 u64;

18	18

19 /* Our field elements are represented as nine, unsigned 32-bit words. Freebl's	19 /* Our field elements are represented as nine, unsigned 32-bit words. Freebl's

20 * MPI library calls them digits, but here they are called limbs, which is	20 * MPI library calls them digits, but here they are called limbs, which is

21 * GMP's terminology.	21 * GMP's terminology.

22 *	22 *

23 * The value of an felem (field element) is:	23 * The value of an felem (field element) is:

24 * x[0] + (x[1] * 2*29) + (x[2] 2*57) + ... + (x[8] 2**228)	24 * x[0] + (x[1] * 2*29) + (x[2] 2*57) + ... + (x[8] 2**228)

25 *	25 *

26 * That is, each limb is alternately 29 or 28-bits wide in little-endian	26 * That is, each limb is alternately 29 or 28-bits wide in little-endian

(...skipping 127 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
154 0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f5195 1, 0x9d0c177, 0x1c49a78,	154 0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f5195 1, 0x9d0c177, 0x1c49a78,

155 };	155 };

156	156

157 /* Field element operations:	157 /* Field element operations:

158 */	158 */

159	159

160 /* NON_ZERO_TO_ALL_ONES returns:	160 /* NON_ZERO_TO_ALL_ONES returns:

161 * 0xffffffff for 0 < x <= 2**31	161 * 0xffffffff for 0 < x <= 2**31

162 * 0 for x == 0 or x > 2**31.	162 * 0 for x == 0 or x > 2**31.

163 *	163 *

164 * This macro assumes that right-shifting a signed number shifts in the MSB on	164 * x must be a u32 or an equivalent type such as limb.

165 * the left. This is not ensured by the C standard, but is true on the CPUs

166 * that we're targetting with this code (x86 and ARM).

167 */	165 */

168 #define NON_ZERO_TO_ALL_ONES(x) (~((u32) (((s32) ((x)-1)) >> 31)))	166 #define NON_ZERO_TO_ALL_ONES(x) ((((u32)(x) - 1) >> 31) - 1)

169	167

170 /* felem_reduce_carry adds a multiple of p in order to cancel \|carry\|,	168 /* felem_reduce_carry adds a multiple of p in order to cancel \|carry\|,

171 * which is a term at 2**257.	169 * which is a term at 2**257.

172 *	170 *

173 * On entry: carry < 23, inout[0,2,...] < 229, inout[1,3,...] < 2**28.	171 * On entry: carry < 23, inout[0,2,...] < 229, inout[1,3,...] < 2**28.

174 * On exit: inout[0,2,..] < 230, inout[1,3,...] < 229.	172 * On exit: inout[0,2,..] < 230, inout[1,3,...] < 229.

175 */	173 */

176 static void felem_reduce_carry(felem inout, limb carry)	174 static void felem_reduce_carry(felem inout, limb carry)

177 {	175 {

178 const u32 carry_mask = NON_ZERO_TO_ALL_ONES(carry);	176 const u32 carry_mask = NON_ZERO_TO_ALL_ONES(carry);

(...skipping 947 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1126 memset(ny, 0, sizeof(felem));	1124 memset(ny, 0, sizeof(felem));

1127 memset(nz, 0, sizeof(felem));	1125 memset(nz, 0, sizeof(felem));

1128	1126

1129 /* The loop adds bits at positions 0, 64, 128 and 192, followed by	1127 /* The loop adds bits at positions 0, 64, 128 and 192, followed by

1130 * positions 32,96,160 and 224 and does this 32 times.	1128 * positions 32,96,160 and 224 and does this 32 times.

1131 */	1129 */

1132 for (i = 0; i < 32; i++) {	1130 for (i = 0; i < 32; i++) {

1133 if (i) {	1131 if (i) {

1134 point_double(nx, ny, nz, nx, ny, nz);	1132 point_double(nx, ny, nz, nx, ny, nz);

1135 }	1133 }

	1134 table_offset = 0;

1136 for (j = 0; j <= 32; j += 32) {	1135 for (j = 0; j <= 32; j += 32) {

1137 char bit0 = get_bit(scalar, 31 - i + j);	1136 char bit0 = get_bit(scalar, 31 - i + j);

1138 char bit1 = get_bit(scalar, 95 - i + j);	1137 char bit1 = get_bit(scalar, 95 - i + j);

1139 char bit2 = get_bit(scalar, 159 - i + j);	1138 char bit2 = get_bit(scalar, 159 - i + j);

1140 char bit3 = get_bit(scalar, 223 - i + j);	1139 char bit3 = get_bit(scalar, 223 - i + j);

1141 limb index = bit0 \| (bit1 << 1) \| (bit2 << 2) \| (bit3 << 3);	1140 limb index = bit0 \| (bit1 << 1) \| (bit2 << 2) \| (bit3 << 3);

1142	1141

1143 table_offset = ((((s32)j) << (32-6)) >> 31) & (30*NLIMBS);

1144 select_affine_point(px, py, kPrecomputed + table_offset, index);	1142 select_affine_point(px, py, kPrecomputed + table_offset, index);

	1143 table_offset += 30 * NLIMBS;

1145	1144

1146 /* Since scalar is less than the order of the group, we know that	1145 /* Since scalar is less than the order of the group, we know that

1147 * {nx,ny,nz} != {px,py,1}, unless both are zero, which we handle	1146 * {nx,ny,nz} != {px,py,1}, unless both are zero, which we handle

1148 * below.	1147 * below.

1149 */	1148 */

1150 point_add_mixed(tx, ty, tz, nx, ny, nz, px, py);	1149 point_add_mixed(tx, ty, tz, nx, ny, nz, px, py);

1151 /* The result of point_add_mixed is incorrect if {nx,ny,nz} is zero	1150 /* The result of point_add_mixed is incorrect if {nx,ny,nz} is zero

1152 * (a.k.a. the point at infinity). We handle that situation by	1151 * (a.k.a. the point at infinity). We handle that situation by

1153 * copying the point from the table.	1152 * copying the point from the table.

1154 */	1153 */

(...skipping 67 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1222 }	1221 }

1223	1222

1224 index = scalar[31 - i / 2];	1223 index = scalar[31 - i / 2];

1225 if ((i & 1) == 1) {	1224 if ((i & 1) == 1) {

1226 index &= 15;	1225 index &= 15;

1227 } else {	1226 } else {

1228 index >>= 4;	1227 index >>= 4;

1229 }	1228 }

1230	1229

1231 /* See the comments in scalar_base_mult about handling infinities. */	1230 /* See the comments in scalar_base_mult about handling infinities. */

1232 » select_jacobian_point(px, py, pz, (limb *) precomp, index);	1231 » select_jacobian_point(px, py, pz, precomp[0][0], index);

1233 point_add(tx, ty, tz, nx, ny, nz, px, py, pz);	1232 point_add(tx, ty, tz, nx, ny, nz, px, py, pz);

1234 copy_conditional(nx, px, n_is_infinity_mask);	1233 copy_conditional(nx, px, n_is_infinity_mask);

1235 copy_conditional(ny, py, n_is_infinity_mask);	1234 copy_conditional(ny, py, n_is_infinity_mask);

1236 copy_conditional(nz, pz, n_is_infinity_mask);	1235 copy_conditional(nz, pz, n_is_infinity_mask);

1237	1236

1238 » p_is_noninfinite_mask = ((s32) ~ (index - 1)) >> 31;	1237 » p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index);

1239 mask = p_is_noninfinite_mask & ~n_is_infinity_mask;	1238 mask = p_is_noninfinite_mask & ~n_is_infinity_mask;

1240 copy_conditional(nx, tx, mask);	1239 copy_conditional(nx, tx, mask);

1241 copy_conditional(ny, ty, mask);	1240 copy_conditional(ny, ty, mask);

1242 copy_conditional(nz, tz, mask);	1241 copy_conditional(nz, tz, mask);

1243 n_is_infinity_mask &= ~p_is_noninfinite_mask;	1242 n_is_infinity_mask &= ~p_is_noninfinite_mask;

1244 }	1243 }

1245 }	1244 }

1246	1245

1247 /* Interface with Freebl: */	1246 /* Interface with Freebl: */

1248	1247

	1248 /* BYTESWAP_MP_DIGIT_TO_LE swaps the bytes of a mp_digit to

	1249 * little-endian order.

	1250 */

1249 #ifdef IS_BIG_ENDIAN	1251 #ifdef IS_BIG_ENDIAN

1250 #error "This code needs a little-endian processor"	1252 #ifdef __APPLE__

	1253 #include <libkern/OSByteOrder.h>

	1254 #define BYTESWAP32(x) OSSwapInt32(x)

	1255 #define BYTESWAP64(x) OSSwapInt64(x)

	1256 #else

	1257 #define BYTESWAP32(x) \
	agl 2013/02/02 20:34:26 This only works if x is unsigned, but I believe th This only works if x is unsigned, but I believe that's ok here.
	1258 ((x) >> 24 \| (x) >> 8 & 0xff00 \| ((x) & 0xff00) << 8 \| (x) << 24)

	1259 #define BYTESWAP64(x) \

	1260 ((x) >> 56 \| (x) >> 40 & 0xff00 \| \

	1261 (x) >> 24 & 0xff0000 \| (x) >> 8 & 0xff000000 \| \

	1262 ((x) & 0xff000000) << 8 \| ((x) & 0xff0000) << 24 \| \

	1263 ((x) & 0xff00) << 40 \| (x) << 56)

1251 #endif	1264 #endif

1252	1265

1253 static const u32 kRInvDigits[8] = {	1266 #ifdef MP_USE_UINT_DIGIT

	1267 #define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP32(BYTESWAP32(x))

	1268 #else

	1269 #define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP64(BYTESWAP64(x))

	1270 #endif

	1271 #endif /* IS_BIG_ENDIAN */

	1272

	1273 #ifdef MP_USE_UINT_DIGIT

	1274 static const mp_digit kRInvDigits[8] = {

1254 0x80000000, 1, 0xffffffff, 0,	1275 0x80000000, 1, 0xffffffff, 0,

1255 0x80000001, 0xfffffffe, 1, 0x7fffffff	1276 0x80000001, 0xfffffffe, 1, 0x7fffffff

1256 };	1277 };

	1278 #else

	1279 static const mp_digit kRInvDigits[4] = {

	1280 PR_UINT64(0x180000000), 0xffffffff,

	1281 PR_UINT64(0xfffffffe80000001), PR_UINT64(0x7fffffff00000001)

	1282 };

	1283 #endif

1257 #define MP_DIGITS_IN_256_BITS (32/sizeof(mp_digit))	1284 #define MP_DIGITS_IN_256_BITS (32/sizeof(mp_digit))

1258 static const mp_int kRInv = {	1285 static const mp_int kRInv = {

1259 MP_ZPOS,	1286 MP_ZPOS,

1260 MP_DIGITS_IN_256_BITS,	1287 MP_DIGITS_IN_256_BITS,

1261 MP_DIGITS_IN_256_BITS,	1288 MP_DIGITS_IN_256_BITS,

1262 /* Because we are running on a little-endian processor, this cast works for

1263 * both 32 and 64-bit processors.

1264 */

1265 (mp_digit*) kRInvDigits	1289 (mp_digit*) kRInvDigits

1266 };	1290 };

1267	1291

1268 static const limb kTwo28 = 0x10000000;	1292 static const limb kTwo28 = 0x10000000;

1269 static const limb kTwo29 = 0x20000000;	1293 static const limb kTwo29 = 0x20000000;

1270	1294

1271 /* to_montgomery sets out = Rin. /	1295 /* to_montgomery sets out = Rin. /

1272 static mp_err to_montgomery(felem out, const mp_int in, const ECGroup group)	1296 static mp_err to_montgomery(felem out, const mp_int in, const ECGroup group)

1273 {	1297 {

1274 /* There are no MPI functions for bitshift operations and we wish to shift	1298 /* There are no MPI functions for bitshift operations and we wish to shift

(...skipping 55 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1330 CLEANUP:	1354 CLEANUP:

1331 mp_clear(&result);	1355 mp_clear(&result);

1332 mp_clear(&tmp);	1356 mp_clear(&tmp);

1333 return res;	1357 return res;

1334 }	1358 }

1335	1359

1336 /* scalar_from_mp_int sets out_scalar=n, where n < the group order. */	1360 /* scalar_from_mp_int sets out_scalar=n, where n < the group order. */

1337 static void scalar_from_mp_int(u8 out_scalar[32], const mp_int *n)	1361 static void scalar_from_mp_int(u8 out_scalar[32], const mp_int *n)

1338 {	1362 {

1339 /* We require that \|n\| is less than the order of the group and therefore it	1363 /* We require that \|n\| is less than the order of the group and therefore it

1340 * will fit into \|scalar\|. However, these is a timing side-channel here that	1364 * will fit into \|out_scalar\|. However, these is a timing side-channel here

1341 * we cannot avoid: if \|n\| is sufficiently small it may be one or more words	1365 * that we cannot avoid: if \|n\| is sufficiently small it may be one or more

1342 * too short and we'll copy less data.	1366 * words too short and we'll copy less data.

1343 */	1367 */

	1368 PORT_Assert(MP_USED(n) * sizeof(mp_digit) <= 32);

1344 memset(out_scalar, 0, 32);	1369 memset(out_scalar, 0, 32);

	1370 #ifdef IS_LITTLE_ENDIAN

1345 memcpy(out_scalar, MP_DIGITS(n), MP_USED(n) * sizeof(mp_digit));	1371 memcpy(out_scalar, MP_DIGITS(n), MP_USED(n) * sizeof(mp_digit));

	1372 #else

	1373 {

	1374 mp_size i;

	1375 mp_digit swapped[MP_DIGITS_IN_256_BITS];

	1376 for (i = 0; i < MP_USED(n); i++) {

	1377 swapped[i] = BYTESWAP_MP_DIGIT_TO_LE(MP_DIGIT(n, i));

	1378 }

	1379 memcpy(out_scalar, swapped, MP_USED(n) * sizeof(mp_digit));

	1380 }

	1381 #endif

1346 }	1382 }

1347	1383

1348 /* ec_GFp_nistp256_base_point_mul sets {out_x,out_y} = nG, where n is < the	1384 /* ec_GFp_nistp256_base_point_mul sets {out_x,out_y} = nG, where n is < the

1349 * order of the group.	1385 * order of the group.

1350 */	1386 */

1351 static mp_err ec_GFp_nistp256_base_point_mul(const mp_int *n,	1387 static mp_err ec_GFp_nistp256_base_point_mul(const mp_int *n,

1352 mp_int out_x, mp_int out_y,	1388 mp_int out_x, mp_int out_y,

1353 const ECGroup *group)	1389 const ECGroup *group)

1354 {	1390 {

1355 u8 scalar[32];	1391 u8 scalar[32];

1356 felem x, y, z, x_affine, y_affine;	1392 felem x, y, z, x_affine, y_affine;

1357 mp_err res;	1393 mp_err res;

1358	1394

1359 /* FIXME(agl): test that n < order. */
agl 2013/02/02 20:34:26 I think this TODO should remain. I'll write someth I think this TODO should remain. I'll write something in a DEBUG section to ensure this.
1360

1361 scalar_from_mp_int(scalar, n);	1395 scalar_from_mp_int(scalar, n);

1362 scalar_base_mult(x, y, z, scalar);	1396 scalar_base_mult(x, y, z, scalar);

1363 point_to_affine(x_affine, y_affine, x, y, z);	1397 point_to_affine(x_affine, y_affine, x, y, z);

1364 MP_CHECKOK(from_montgomery(out_x, x_affine, group));	1398 MP_CHECKOK(from_montgomery(out_x, x_affine, group));

1365 MP_CHECKOK(from_montgomery(out_y, y_affine, group));	1399 MP_CHECKOK(from_montgomery(out_y, y_affine, group));

1366	1400

1367 CLEANUP:	1401 CLEANUP:

1368 return res;	1402 return res;

1369 }	1403 }

1370	1404

(...skipping 90 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1461 /* Wire in fast point multiplication for named curves. */	1495 /* Wire in fast point multiplication for named curves. */

1462 mp_err ec_group_set_gfp256_32(ECGroup *group, ECCurveName name)	1496 mp_err ec_group_set_gfp256_32(ECGroup *group, ECCurveName name)

1463 {	1497 {

1464 if (name == ECCurve_NIST_P256) {	1498 if (name == ECCurve_NIST_P256) {

1465 group->base_point_mul = &ec_GFp_nistp256_base_point_mul;	1499 group->base_point_mul = &ec_GFp_nistp256_base_point_mul;

1466 group->point_mul = &ec_GFp_nistp256_point_mul;	1500 group->point_mul = &ec_GFp_nistp256_point_mul;

1467 group->points_mul = &ec_GFp_nistp256_points_mul_vartime;	1501 group->points_mul = &ec_GFp_nistp256_points_mul_vartime;

1468 }	1502 }

1469 return MP_OKAY;	1503 return MP_OKAY;

1470 }	1504 }

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »