| Index: mozilla/security/nss/lib/freebl/ecl/ecp_256_32.c
|
| ===================================================================
|
| --- mozilla/security/nss/lib/freebl/ecl/ecp_256_32.c (revision 179928)
|
| +++ mozilla/security/nss/lib/freebl/ecl/ecp_256_32.c (working copy)
|
| @@ -10,10 +10,10 @@
|
| #include "mpi.h"
|
| #include "mpi-priv.h"
|
| #include "ecp.h"
|
| +#include "secport.h"
|
|
|
| typedef PRUint8 u8;
|
| typedef PRUint32 u32;
|
| -typedef PRInt32 s32;
|
| typedef PRUint64 u64;
|
|
|
| /* Our field elements are represented as nine, unsigned 32-bit words. Freebl's
|
| @@ -161,11 +161,9 @@
|
| * 0xffffffff for 0 < x <= 2**31
|
| * 0 for x == 0 or x > 2**31.
|
| *
|
| - * This macro assumes that right-shifting a signed number shifts in the MSB on
|
| - * the left. This is not ensured by the C standard, but is true on the CPUs
|
| - * that we're targetting with this code (x86 and ARM).
|
| + * x must be a u32 or an equivalent type such as limb.
|
| */
|
| -#define NON_ZERO_TO_ALL_ONES(x) (~((u32) (((s32) ((x)-1)) >> 31)))
|
| +#define NON_ZERO_TO_ALL_ONES(x) ((((u32)(x) - 1) >> 31) - 1)
|
|
|
| /* felem_reduce_carry adds a multiple of p in order to cancel |carry|,
|
| * which is a term at 2**257.
|
| @@ -1133,6 +1131,7 @@
|
| if (i) {
|
| point_double(nx, ny, nz, nx, ny, nz);
|
| }
|
| + table_offset = 0;
|
| for (j = 0; j <= 32; j += 32) {
|
| char bit0 = get_bit(scalar, 31 - i + j);
|
| char bit1 = get_bit(scalar, 95 - i + j);
|
| @@ -1140,8 +1139,8 @@
|
| char bit3 = get_bit(scalar, 223 - i + j);
|
| limb index = bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3);
|
|
|
| - table_offset = ((((s32)j) << (32-6)) >> 31) & (30*NLIMBS);
|
| select_affine_point(px, py, kPrecomputed + table_offset, index);
|
| + table_offset += 30 * NLIMBS;
|
|
|
| /* Since scalar is less than the order of the group, we know that
|
| * {nx,ny,nz} != {px,py,1}, unless both are zero, which we handle
|
| @@ -1229,13 +1228,13 @@
|
| }
|
|
|
| /* See the comments in scalar_base_mult about handling infinities. */
|
| - select_jacobian_point(px, py, pz, (limb *) precomp, index);
|
| + select_jacobian_point(px, py, pz, precomp[0][0], index);
|
| point_add(tx, ty, tz, nx, ny, nz, px, py, pz);
|
| copy_conditional(nx, px, n_is_infinity_mask);
|
| copy_conditional(ny, py, n_is_infinity_mask);
|
| copy_conditional(nz, pz, n_is_infinity_mask);
|
|
|
| - p_is_noninfinite_mask = ((s32) ~ (index - 1)) >> 31;
|
| + p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index);
|
| mask = p_is_noninfinite_mask & ~n_is_infinity_mask;
|
| copy_conditional(nx, tx, mask);
|
| copy_conditional(ny, ty, mask);
|
| @@ -1246,22 +1245,47 @@
|
|
|
| /* Interface with Freebl: */
|
|
|
| +/* BYTESWAP_MP_DIGIT_TO_LE swaps the bytes of a mp_digit to
|
| + * little-endian order.
|
| + */
|
| #ifdef IS_BIG_ENDIAN
|
| -#error "This code needs a little-endian processor"
|
| +#ifdef __APPLE__
|
| +#include <libkern/OSByteOrder.h>
|
| +#define BYTESWAP32(x) OSSwapInt32(x)
|
| +#define BYTESWAP64(x) OSSwapInt64(x)
|
| +#else
|
| +#define BYTESWAP32(x) \
|
| + ((x) >> 24 | (x) >> 8 & 0xff00 | ((x) & 0xff00) << 8 | (x) << 24)
|
| +#define BYTESWAP64(x) \
|
| + ((x) >> 56 | (x) >> 40 & 0xff00 | \
|
| + (x) >> 24 & 0xff0000 | (x) >> 8 & 0xff000000 | \
|
| + ((x) & 0xff000000) << 8 | ((x) & 0xff0000) << 24 | \
|
| + ((x) & 0xff00) << 40 | (x) << 56)
|
| #endif
|
|
|
| -static const u32 kRInvDigits[8] = {
|
| +#ifdef MP_USE_UINT_DIGIT
|
| +#define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP32(BYTESWAP32(x))
|
| +#else
|
| +#define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP64(BYTESWAP64(x))
|
| +#endif
|
| +#endif /* IS_BIG_ENDIAN */
|
| +
|
| +#ifdef MP_USE_UINT_DIGIT
|
| +static const mp_digit kRInvDigits[8] = {
|
| 0x80000000, 1, 0xffffffff, 0,
|
| 0x80000001, 0xfffffffe, 1, 0x7fffffff
|
| };
|
| +#else
|
| +static const mp_digit kRInvDigits[4] = {
|
| + PR_UINT64(0x180000000), 0xffffffff,
|
| + PR_UINT64(0xfffffffe80000001), PR_UINT64(0x7fffffff00000001)
|
| +};
|
| +#endif
|
| #define MP_DIGITS_IN_256_BITS (32/sizeof(mp_digit))
|
| static const mp_int kRInv = {
|
| MP_ZPOS,
|
| MP_DIGITS_IN_256_BITS,
|
| MP_DIGITS_IN_256_BITS,
|
| - /* Because we are running on a little-endian processor, this cast works for
|
| - * both 32 and 64-bit processors.
|
| - */
|
| (mp_digit*) kRInvDigits
|
| };
|
|
|
| @@ -1337,12 +1361,24 @@
|
| static void scalar_from_mp_int(u8 out_scalar[32], const mp_int *n)
|
| {
|
| /* We require that |n| is less than the order of the group and therefore it
|
| - * will fit into |scalar|. However, these is a timing side-channel here that
|
| - * we cannot avoid: if |n| is sufficiently small it may be one or more words
|
| - * too short and we'll copy less data.
|
| + * will fit into |out_scalar|. However, these is a timing side-channel here
|
| + * that we cannot avoid: if |n| is sufficiently small it may be one or more
|
| + * words too short and we'll copy less data.
|
| */
|
| + PORT_Assert(MP_USED(n) * sizeof(mp_digit) <= 32);
|
| memset(out_scalar, 0, 32);
|
| +#ifdef IS_LITTLE_ENDIAN
|
| memcpy(out_scalar, MP_DIGITS(n), MP_USED(n) * sizeof(mp_digit));
|
| +#else
|
| + {
|
| + mp_size i;
|
| + mp_digit swapped[MP_DIGITS_IN_256_BITS];
|
| + for (i = 0; i < MP_USED(n); i++) {
|
| + swapped[i] = BYTESWAP_MP_DIGIT_TO_LE(MP_DIGIT(n, i));
|
| + }
|
| + memcpy(out_scalar, swapped, MP_USED(n) * sizeof(mp_digit));
|
| + }
|
| +#endif
|
| }
|
|
|
| /* ec_GFp_nistp256_base_point_mul sets {out_x,out_y} = nG, where n is < the
|
|
|