Chromium Code Reviews| Index: mozilla/security/nss/lib/freebl/ecl/ecp_256_32.c |
| =================================================================== |
| --- mozilla/security/nss/lib/freebl/ecl/ecp_256_32.c (revision 179928) |
| +++ mozilla/security/nss/lib/freebl/ecl/ecp_256_32.c (working copy) |
| @@ -10,10 +10,10 @@ |
| #include "mpi.h" |
| #include "mpi-priv.h" |
| #include "ecp.h" |
| +#include "secport.h" |
| typedef PRUint8 u8; |
| typedef PRUint32 u32; |
| -typedef PRInt32 s32; |
| typedef PRUint64 u64; |
| /* Our field elements are represented as nine, unsigned 32-bit words. Freebl's |
| @@ -161,11 +161,9 @@ |
| * 0xffffffff for 0 < x <= 2**31 |
| * 0 for x == 0 or x > 2**31. |
| * |
| - * This macro assumes that right-shifting a signed number shifts in the MSB on |
| - * the left. This is not ensured by the C standard, but is true on the CPUs |
| - * that we're targetting with this code (x86 and ARM). |
| + * x must be a u32 or an equivalent type such as limb. |
| */ |
| -#define NON_ZERO_TO_ALL_ONES(x) (~((u32) (((s32) ((x)-1)) >> 31))) |
| +#define NON_ZERO_TO_ALL_ONES(x) ((((u32)(x) - 1) >> 31) - 1) |
| /* felem_reduce_carry adds a multiple of p in order to cancel |carry|, |
| * which is a term at 2**257. |
| @@ -1133,6 +1131,7 @@ |
| if (i) { |
| point_double(nx, ny, nz, nx, ny, nz); |
| } |
| + table_offset = 0; |
| for (j = 0; j <= 32; j += 32) { |
| char bit0 = get_bit(scalar, 31 - i + j); |
| char bit1 = get_bit(scalar, 95 - i + j); |
| @@ -1140,8 +1139,8 @@ |
| char bit3 = get_bit(scalar, 223 - i + j); |
| limb index = bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3); |
| - table_offset = ((((s32)j) << (32-6)) >> 31) & (30*NLIMBS); |
| select_affine_point(px, py, kPrecomputed + table_offset, index); |
| + table_offset += 30 * NLIMBS; |
| /* Since scalar is less than the order of the group, we know that |
| * {nx,ny,nz} != {px,py,1}, unless both are zero, which we handle |
| @@ -1229,13 +1228,13 @@ |
| } |
| /* See the comments in scalar_base_mult about handling infinities. */ |
| - select_jacobian_point(px, py, pz, (limb *) precomp, index); |
| + select_jacobian_point(px, py, pz, precomp[0][0], index); |
| point_add(tx, ty, tz, nx, ny, nz, px, py, pz); |
| copy_conditional(nx, px, n_is_infinity_mask); |
| copy_conditional(ny, py, n_is_infinity_mask); |
| copy_conditional(nz, pz, n_is_infinity_mask); |
| - p_is_noninfinite_mask = ((s32) ~ (index - 1)) >> 31; |
| + p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index); |
| mask = p_is_noninfinite_mask & ~n_is_infinity_mask; |
| copy_conditional(nx, tx, mask); |
| copy_conditional(ny, ty, mask); |
| @@ -1246,22 +1245,47 @@ |
| /* Interface with Freebl: */ |
| +/* BYTESWAP_MP_DIGIT_TO_LE swaps the bytes of a mp_digit to |
| + * little-endian order. |
| + */ |
| #ifdef IS_BIG_ENDIAN |
| -#error "This code needs a little-endian processor" |
| +#ifdef __APPLE__ |
| +#include <libkern/OSByteOrder.h> |
| +#define BYTESWAP32(x) OSSwapInt32(x) |
| +#define BYTESWAP64(x) OSSwapInt64(x) |
| +#else |
| +#define BYTESWAP32(x) \ |
|
agl
2013/02/02 20:34:26
This only works if x is unsigned, but I believe th
|
| + ((x) >> 24 | (x) >> 8 & 0xff00 | ((x) & 0xff00) << 8 | (x) << 24) |
| +#define BYTESWAP64(x) \ |
| + ((x) >> 56 | (x) >> 40 & 0xff00 | \ |
| + (x) >> 24 & 0xff0000 | (x) >> 8 & 0xff000000 | \ |
| + ((x) & 0xff000000) << 8 | ((x) & 0xff0000) << 24 | \ |
| + ((x) & 0xff00) << 40 | (x) << 56) |
| #endif |
| -static const u32 kRInvDigits[8] = { |
| +#ifdef MP_USE_UINT_DIGIT |
| +#define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP32(BYTESWAP32(x)) |
| +#else |
| +#define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP64(BYTESWAP64(x)) |
| +#endif |
| +#endif /* IS_BIG_ENDIAN */ |
| + |
| +#ifdef MP_USE_UINT_DIGIT |
| +static const mp_digit kRInvDigits[8] = { |
| 0x80000000, 1, 0xffffffff, 0, |
| 0x80000001, 0xfffffffe, 1, 0x7fffffff |
| }; |
| +#else |
| +static const mp_digit kRInvDigits[4] = { |
| + PR_UINT64(0x180000000), 0xffffffff, |
| + PR_UINT64(0xfffffffe80000001), PR_UINT64(0x7fffffff00000001) |
| +}; |
| +#endif |
| #define MP_DIGITS_IN_256_BITS (32/sizeof(mp_digit)) |
| static const mp_int kRInv = { |
| MP_ZPOS, |
| MP_DIGITS_IN_256_BITS, |
| MP_DIGITS_IN_256_BITS, |
| - /* Because we are running on a little-endian processor, this cast works for |
| - * both 32 and 64-bit processors. |
| - */ |
| (mp_digit*) kRInvDigits |
| }; |
| @@ -1337,12 +1361,24 @@ |
| static void scalar_from_mp_int(u8 out_scalar[32], const mp_int *n) |
| { |
| /* We require that |n| is less than the order of the group and therefore it |
| - * will fit into |scalar|. However, these is a timing side-channel here that |
| - * we cannot avoid: if |n| is sufficiently small it may be one or more words |
| - * too short and we'll copy less data. |
| + * will fit into |out_scalar|. However, these is a timing side-channel here |
| + * that we cannot avoid: if |n| is sufficiently small it may be one or more |
| + * words too short and we'll copy less data. |
| */ |
| + PORT_Assert(MP_USED(n) * sizeof(mp_digit) <= 32); |
| memset(out_scalar, 0, 32); |
| +#ifdef IS_LITTLE_ENDIAN |
| memcpy(out_scalar, MP_DIGITS(n), MP_USED(n) * sizeof(mp_digit)); |
| +#else |
| + { |
| + mp_size i; |
| + mp_digit swapped[MP_DIGITS_IN_256_BITS]; |
| + for (i = 0; i < MP_USED(n); i++) { |
| + swapped[i] = BYTESWAP_MP_DIGIT_TO_LE(MP_DIGIT(n, i)); |
| + } |
| + memcpy(out_scalar, swapped, MP_USED(n) * sizeof(mp_digit)); |
| + } |
| +#endif |
| } |
| /* ec_GFp_nistp256_base_point_mul sets {out_x,out_y} = nG, where n is < the |
| @@ -1356,8 +1392,6 @@ |
| felem x, y, z, x_affine, y_affine; |
| mp_err res; |
| - /* FIXME(agl): test that n < order. */ |
|
agl
2013/02/02 20:34:26
I think this TODO should remain. I'll write someth
|
| - |
| scalar_from_mp_int(scalar, n); |
| scalar_base_mult(x, y, z, scalar); |
| point_to_affine(x_affine, y_affine, x, y, z); |