Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(386)

Side by Side Diff: mozilla/security/nss/lib/freebl/ecl/ecp_256_32.c

Issue 12137003: Support big-endian processors. Eliminate the dependency on the (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/nss/
Patch Set: Eliminate other signed right-shifts. Add PORT_Assert to check n digits before memcpy Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* This Source Code Form is subject to the terms of the Mozilla Public 1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this 2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 4
5 /* A 32-bit implementation of the NIST P-256 elliptic curve. */ 5 /* A 32-bit implementation of the NIST P-256 elliptic curve. */
6 6
7 #include <string.h> 7 #include <string.h>
8 8
9 #include "prtypes.h" 9 #include "prtypes.h"
10 #include "mpi.h" 10 #include "mpi.h"
11 #include "mpi-priv.h" 11 #include "mpi-priv.h"
12 #include "ecp.h" 12 #include "ecp.h"
13 #include "secport.h"
13 14
14 typedef PRUint8 u8; 15 typedef PRUint8 u8;
15 typedef PRUint32 u32; 16 typedef PRUint32 u32;
16 typedef PRInt32 s32;
17 typedef PRUint64 u64; 17 typedef PRUint64 u64;
18 18
19 /* Our field elements are represented as nine, unsigned 32-bit words. Freebl's 19 /* Our field elements are represented as nine, unsigned 32-bit words. Freebl's
20 * MPI library calls them digits, but here they are called limbs, which is 20 * MPI library calls them digits, but here they are called limbs, which is
21 * GMP's terminology. 21 * GMP's terminology.
22 * 22 *
23 * The value of an felem (field element) is: 23 * The value of an felem (field element) is:
24 * x[0] + (x[1] * 2**29) + (x[2] * 2**57) + ... + (x[8] * 2**228) 24 * x[0] + (x[1] * 2**29) + (x[2] * 2**57) + ... + (x[8] * 2**228)
25 * 25 *
26 * That is, each limb is alternately 29 or 28-bits wide in little-endian 26 * That is, each limb is alternately 29 or 28-bits wide in little-endian
(...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after
154 0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f5195 1, 0x9d0c177, 0x1c49a78, 154 0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f5195 1, 0x9d0c177, 0x1c49a78,
155 }; 155 };
156 156
157 /* Field element operations: 157 /* Field element operations:
158 */ 158 */
159 159
160 /* NON_ZERO_TO_ALL_ONES returns: 160 /* NON_ZERO_TO_ALL_ONES returns:
161 * 0xffffffff for 0 < x <= 2**31 161 * 0xffffffff for 0 < x <= 2**31
162 * 0 for x == 0 or x > 2**31. 162 * 0 for x == 0 or x > 2**31.
163 * 163 *
164 * This macro assumes that right-shifting a signed number shifts in the MSB on 164 * x must be a u32 or an equivalent type such as limb.
165 * the left. This is not ensured by the C standard, but is true on the CPUs
166 * that we're targetting with this code (x86 and ARM).
167 */ 165 */
168 #define NON_ZERO_TO_ALL_ONES(x) (~((u32) (((s32) ((x)-1)) >> 31))) 166 #define NON_ZERO_TO_ALL_ONES(x) ((((u32)(x) - 1) >> 31) - 1)
169 167
170 /* felem_reduce_carry adds a multiple of p in order to cancel |carry|, 168 /* felem_reduce_carry adds a multiple of p in order to cancel |carry|,
171 * which is a term at 2**257. 169 * which is a term at 2**257.
172 * 170 *
173 * On entry: carry < 2**3, inout[0,2,...] < 2**29, inout[1,3,...] < 2**28. 171 * On entry: carry < 2**3, inout[0,2,...] < 2**29, inout[1,3,...] < 2**28.
174 * On exit: inout[0,2,..] < 2**30, inout[1,3,...] < 2**29. 172 * On exit: inout[0,2,..] < 2**30, inout[1,3,...] < 2**29.
175 */ 173 */
176 static void felem_reduce_carry(felem inout, limb carry) 174 static void felem_reduce_carry(felem inout, limb carry)
177 { 175 {
178 const u32 carry_mask = NON_ZERO_TO_ALL_ONES(carry); 176 const u32 carry_mask = NON_ZERO_TO_ALL_ONES(carry);
(...skipping 947 matching lines...) Expand 10 before | Expand all | Expand 10 after
1126 memset(ny, 0, sizeof(felem)); 1124 memset(ny, 0, sizeof(felem));
1127 memset(nz, 0, sizeof(felem)); 1125 memset(nz, 0, sizeof(felem));
1128 1126
1129 /* The loop adds bits at positions 0, 64, 128 and 192, followed by 1127 /* The loop adds bits at positions 0, 64, 128 and 192, followed by
1130 * positions 32,96,160 and 224 and does this 32 times. 1128 * positions 32,96,160 and 224 and does this 32 times.
1131 */ 1129 */
1132 for (i = 0; i < 32; i++) { 1130 for (i = 0; i < 32; i++) {
1133 if (i) { 1131 if (i) {
1134 point_double(nx, ny, nz, nx, ny, nz); 1132 point_double(nx, ny, nz, nx, ny, nz);
1135 } 1133 }
1134 table_offset = 0;
1136 for (j = 0; j <= 32; j += 32) { 1135 for (j = 0; j <= 32; j += 32) {
1137 char bit0 = get_bit(scalar, 31 - i + j); 1136 char bit0 = get_bit(scalar, 31 - i + j);
1138 char bit1 = get_bit(scalar, 95 - i + j); 1137 char bit1 = get_bit(scalar, 95 - i + j);
1139 char bit2 = get_bit(scalar, 159 - i + j); 1138 char bit2 = get_bit(scalar, 159 - i + j);
1140 char bit3 = get_bit(scalar, 223 - i + j); 1139 char bit3 = get_bit(scalar, 223 - i + j);
1141 limb index = bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3); 1140 limb index = bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3);
1142 1141
1143 table_offset = ((((s32)j) << (32-6)) >> 31) & (30*NLIMBS);
1144 select_affine_point(px, py, kPrecomputed + table_offset, index); 1142 select_affine_point(px, py, kPrecomputed + table_offset, index);
1143 table_offset += 30 * NLIMBS;
1145 1144
1146 /* Since scalar is less than the order of the group, we know that 1145 /* Since scalar is less than the order of the group, we know that
1147 * {nx,ny,nz} != {px,py,1}, unless both are zero, which we handle 1146 * {nx,ny,nz} != {px,py,1}, unless both are zero, which we handle
1148 * below. 1147 * below.
1149 */ 1148 */
1150 point_add_mixed(tx, ty, tz, nx, ny, nz, px, py); 1149 point_add_mixed(tx, ty, tz, nx, ny, nz, px, py);
1151 /* The result of point_add_mixed is incorrect if {nx,ny,nz} is zero 1150 /* The result of point_add_mixed is incorrect if {nx,ny,nz} is zero
1152 * (a.k.a. the point at infinity). We handle that situation by 1151 * (a.k.a. the point at infinity). We handle that situation by
1153 * copying the point from the table. 1152 * copying the point from the table.
1154 */ 1153 */
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
1222 } 1221 }
1223 1222
1224 index = scalar[31 - i / 2]; 1223 index = scalar[31 - i / 2];
1225 if ((i & 1) == 1) { 1224 if ((i & 1) == 1) {
1226 index &= 15; 1225 index &= 15;
1227 } else { 1226 } else {
1228 index >>= 4; 1227 index >>= 4;
1229 } 1228 }
1230 1229
1231 /* See the comments in scalar_base_mult about handling infinities. */ 1230 /* See the comments in scalar_base_mult about handling infinities. */
1232 » select_jacobian_point(px, py, pz, (limb *) precomp, index); 1231 » select_jacobian_point(px, py, pz, precomp[0][0], index);
1233 point_add(tx, ty, tz, nx, ny, nz, px, py, pz); 1232 point_add(tx, ty, tz, nx, ny, nz, px, py, pz);
1234 copy_conditional(nx, px, n_is_infinity_mask); 1233 copy_conditional(nx, px, n_is_infinity_mask);
1235 copy_conditional(ny, py, n_is_infinity_mask); 1234 copy_conditional(ny, py, n_is_infinity_mask);
1236 copy_conditional(nz, pz, n_is_infinity_mask); 1235 copy_conditional(nz, pz, n_is_infinity_mask);
1237 1236
1238 » p_is_noninfinite_mask = ((s32) ~ (index - 1)) >> 31; 1237 » p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index);
1239 mask = p_is_noninfinite_mask & ~n_is_infinity_mask; 1238 mask = p_is_noninfinite_mask & ~n_is_infinity_mask;
1240 copy_conditional(nx, tx, mask); 1239 copy_conditional(nx, tx, mask);
1241 copy_conditional(ny, ty, mask); 1240 copy_conditional(ny, ty, mask);
1242 copy_conditional(nz, tz, mask); 1241 copy_conditional(nz, tz, mask);
1243 n_is_infinity_mask &= ~p_is_noninfinite_mask; 1242 n_is_infinity_mask &= ~p_is_noninfinite_mask;
1244 } 1243 }
1245 } 1244 }
1246 1245
1247 /* Interface with Freebl: */ 1246 /* Interface with Freebl: */
1248 1247
1248 /* BYTESWAP_MP_DIGIT_TO_LE swaps the bytes of a mp_digit to
1249 * little-endian order.
1250 */
1249 #ifdef IS_BIG_ENDIAN 1251 #ifdef IS_BIG_ENDIAN
1250 #error "This code needs a little-endian processor" 1252 #ifdef __APPLE__
1253 #include <libkern/OSByteOrder.h>
1254 #define BYTESWAP32(x) OSSwapInt32(x)
1255 #define BYTESWAP64(x) OSSwapInt64(x)
1256 #else
1257 #define BYTESWAP32(x) \
agl 2013/02/02 20:34:26 This only works if x is unsigned, but I believe th
1258 ((x) >> 24 | (x) >> 8 & 0xff00 | ((x) & 0xff00) << 8 | (x) << 24)
1259 #define BYTESWAP64(x) \
1260 ((x) >> 56 | (x) >> 40 & 0xff00 | \
1261 (x) >> 24 & 0xff0000 | (x) >> 8 & 0xff000000 | \
1262 ((x) & 0xff000000) << 8 | ((x) & 0xff0000) << 24 | \
1263 ((x) & 0xff00) << 40 | (x) << 56)
1251 #endif 1264 #endif
1252 1265
1253 static const u32 kRInvDigits[8] = { 1266 #ifdef MP_USE_UINT_DIGIT
1267 #define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP32(BYTESWAP32(x))
1268 #else
1269 #define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP64(BYTESWAP64(x))
1270 #endif
1271 #endif /* IS_BIG_ENDIAN */
1272
1273 #ifdef MP_USE_UINT_DIGIT
1274 static const mp_digit kRInvDigits[8] = {
1254 0x80000000, 1, 0xffffffff, 0, 1275 0x80000000, 1, 0xffffffff, 0,
1255 0x80000001, 0xfffffffe, 1, 0x7fffffff 1276 0x80000001, 0xfffffffe, 1, 0x7fffffff
1256 }; 1277 };
1278 #else
1279 static const mp_digit kRInvDigits[4] = {
1280 PR_UINT64(0x180000000), 0xffffffff,
1281 PR_UINT64(0xfffffffe80000001), PR_UINT64(0x7fffffff00000001)
1282 };
1283 #endif
1257 #define MP_DIGITS_IN_256_BITS (32/sizeof(mp_digit)) 1284 #define MP_DIGITS_IN_256_BITS (32/sizeof(mp_digit))
1258 static const mp_int kRInv = { 1285 static const mp_int kRInv = {
1259 MP_ZPOS, 1286 MP_ZPOS,
1260 MP_DIGITS_IN_256_BITS, 1287 MP_DIGITS_IN_256_BITS,
1261 MP_DIGITS_IN_256_BITS, 1288 MP_DIGITS_IN_256_BITS,
1262 /* Because we are running on a little-endian processor, this cast works for
1263 * both 32 and 64-bit processors.
1264 */
1265 (mp_digit*) kRInvDigits 1289 (mp_digit*) kRInvDigits
1266 }; 1290 };
1267 1291
1268 static const limb kTwo28 = 0x10000000; 1292 static const limb kTwo28 = 0x10000000;
1269 static const limb kTwo29 = 0x20000000; 1293 static const limb kTwo29 = 0x20000000;
1270 1294
1271 /* to_montgomery sets out = R*in. */ 1295 /* to_montgomery sets out = R*in. */
1272 static mp_err to_montgomery(felem out, const mp_int *in, const ECGroup *group) 1296 static mp_err to_montgomery(felem out, const mp_int *in, const ECGroup *group)
1273 { 1297 {
1274 /* There are no MPI functions for bitshift operations and we wish to shift 1298 /* There are no MPI functions for bitshift operations and we wish to shift
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
1330 CLEANUP: 1354 CLEANUP:
1331 mp_clear(&result); 1355 mp_clear(&result);
1332 mp_clear(&tmp); 1356 mp_clear(&tmp);
1333 return res; 1357 return res;
1334 } 1358 }
1335 1359
1336 /* scalar_from_mp_int sets out_scalar=n, where n < the group order. */ 1360 /* scalar_from_mp_int sets out_scalar=n, where n < the group order. */
1337 static void scalar_from_mp_int(u8 out_scalar[32], const mp_int *n) 1361 static void scalar_from_mp_int(u8 out_scalar[32], const mp_int *n)
1338 { 1362 {
1339 /* We require that |n| is less than the order of the group and therefore it 1363 /* We require that |n| is less than the order of the group and therefore it
1340 * will fit into |scalar|. However, these is a timing side-channel here that 1364 * will fit into |out_scalar|. However, these is a timing side-channel here
1341 * we cannot avoid: if |n| is sufficiently small it may be one or more words 1365 * that we cannot avoid: if |n| is sufficiently small it may be one or more
1342 * too short and we'll copy less data. 1366 * words too short and we'll copy less data.
1343 */ 1367 */
1368 PORT_Assert(MP_USED(n) * sizeof(mp_digit) <= 32);
1344 memset(out_scalar, 0, 32); 1369 memset(out_scalar, 0, 32);
1370 #ifdef IS_LITTLE_ENDIAN
1345 memcpy(out_scalar, MP_DIGITS(n), MP_USED(n) * sizeof(mp_digit)); 1371 memcpy(out_scalar, MP_DIGITS(n), MP_USED(n) * sizeof(mp_digit));
1372 #else
1373 {
1374 mp_size i;
1375 mp_digit swapped[MP_DIGITS_IN_256_BITS];
1376 for (i = 0; i < MP_USED(n); i++) {
1377 swapped[i] = BYTESWAP_MP_DIGIT_TO_LE(MP_DIGIT(n, i));
1378 }
1379 memcpy(out_scalar, swapped, MP_USED(n) * sizeof(mp_digit));
1380 }
1381 #endif
1346 } 1382 }
1347 1383
1348 /* ec_GFp_nistp256_base_point_mul sets {out_x,out_y} = nG, where n is < the 1384 /* ec_GFp_nistp256_base_point_mul sets {out_x,out_y} = nG, where n is < the
1349 * order of the group. 1385 * order of the group.
1350 */ 1386 */
1351 static mp_err ec_GFp_nistp256_base_point_mul(const mp_int *n, 1387 static mp_err ec_GFp_nistp256_base_point_mul(const mp_int *n,
1352 mp_int *out_x, mp_int *out_y, 1388 mp_int *out_x, mp_int *out_y,
1353 const ECGroup *group) 1389 const ECGroup *group)
1354 { 1390 {
1355 u8 scalar[32]; 1391 u8 scalar[32];
1356 felem x, y, z, x_affine, y_affine; 1392 felem x, y, z, x_affine, y_affine;
1357 mp_err res; 1393 mp_err res;
1358 1394
1359 /* FIXME(agl): test that n < order. */
agl 2013/02/02 20:34:26 I think this TODO should remain. I'll write someth
1360
1361 scalar_from_mp_int(scalar, n); 1395 scalar_from_mp_int(scalar, n);
1362 scalar_base_mult(x, y, z, scalar); 1396 scalar_base_mult(x, y, z, scalar);
1363 point_to_affine(x_affine, y_affine, x, y, z); 1397 point_to_affine(x_affine, y_affine, x, y, z);
1364 MP_CHECKOK(from_montgomery(out_x, x_affine, group)); 1398 MP_CHECKOK(from_montgomery(out_x, x_affine, group));
1365 MP_CHECKOK(from_montgomery(out_y, y_affine, group)); 1399 MP_CHECKOK(from_montgomery(out_y, y_affine, group));
1366 1400
1367 CLEANUP: 1401 CLEANUP:
1368 return res; 1402 return res;
1369 } 1403 }
1370 1404
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
1461 /* Wire in fast point multiplication for named curves. */ 1495 /* Wire in fast point multiplication for named curves. */
1462 mp_err ec_group_set_gfp256_32(ECGroup *group, ECCurveName name) 1496 mp_err ec_group_set_gfp256_32(ECGroup *group, ECCurveName name)
1463 { 1497 {
1464 if (name == ECCurve_NIST_P256) { 1498 if (name == ECCurve_NIST_P256) {
1465 group->base_point_mul = &ec_GFp_nistp256_base_point_mul; 1499 group->base_point_mul = &ec_GFp_nistp256_base_point_mul;
1466 group->point_mul = &ec_GFp_nistp256_point_mul; 1500 group->point_mul = &ec_GFp_nistp256_point_mul;
1467 group->points_mul = &ec_GFp_nistp256_points_mul_vartime; 1501 group->points_mul = &ec_GFp_nistp256_points_mul_vartime;
1468 } 1502 }
1469 return MP_OKAY; 1503 return MP_OKAY;
1470 } 1504 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698