OLD | NEW |
---|---|
1 /* This Source Code Form is subject to the terms of the Mozilla Public | 1 /* This Source Code Form is subject to the terms of the Mozilla Public |
2 * License, v. 2.0. If a copy of the MPL was not distributed with this | 2 * License, v. 2.0. If a copy of the MPL was not distributed with this |
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ | 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
4 | 4 |
5 /* A 32-bit implementation of the NIST P-256 elliptic curve. */ | 5 /* A 32-bit implementation of the NIST P-256 elliptic curve. */ |
6 | 6 |
7 #include <string.h> | 7 #include <string.h> |
8 | 8 |
9 #include "prtypes.h" | 9 #include "prtypes.h" |
10 #include "mpi.h" | 10 #include "mpi.h" |
(...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
153 0x13d8341f, 0x8daefd7, 0x139c972d, 0x6b7ea75, 0xd4a9dde, 0xff163d8, 0x81d55d 7, 0xa5bef68, 0xb7b30d8, | 153 0x13d8341f, 0x8daefd7, 0x139c972d, 0x6b7ea75, 0xd4a9dde, 0xff163d8, 0x81d55d 7, 0xa5bef68, 0xb7b30d8, |
154 0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f5195 1, 0x9d0c177, 0x1c49a78, | 154 0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f5195 1, 0x9d0c177, 0x1c49a78, |
155 }; | 155 }; |
156 | 156 |
157 /* Field element operations: | 157 /* Field element operations: |
158 */ | 158 */ |
159 | 159 |
160 /* NON_ZERO_TO_ALL_ONES returns: | 160 /* NON_ZERO_TO_ALL_ONES returns: |
161 * 0xffffffff for 0 < x <= 2**31 | 161 * 0xffffffff for 0 < x <= 2**31 |
162 * 0 for x == 0 or x > 2**31. | 162 * 0 for x == 0 or x > 2**31. |
163 * | |
164 * This macro assumes that right-shifting a signed number shifts in the MSB on | |
165 * the left. This is not ensured by the C standard, but is true on the CPUs | |
166 * that we're targetting with this code (x86 and ARM). | |
167 */ | 163 */ |
168 #define NON_ZERO_TO_ALL_ONES(x) (~((u32) (((s32) ((x)-1)) >> 31))) | 164 #define NON_ZERO_TO_ALL_ONES(x) ((((x) - 1) >> 31) - 1) |
Ryan Sleevi
2013/01/31 22:53:38
Suggestion: Add u32 casts to make sure that the un
wtc
2013/02/01 05:28:04
Thank you for the suggestion. The argument |x| is
| |
169 | 165 |
170 /* felem_reduce_carry adds a multiple of p in order to cancel |carry|, | 166 /* felem_reduce_carry adds a multiple of p in order to cancel |carry|, |
171 * which is a term at 2**257. | 167 * which is a term at 2**257. |
172 * | 168 * |
173 * On entry: carry < 2**3, inout[0,2,...] < 2**29, inout[1,3,...] < 2**28. | 169 * On entry: carry < 2**3, inout[0,2,...] < 2**29, inout[1,3,...] < 2**28. |
174 * On exit: inout[0,2,..] < 2**30, inout[1,3,...] < 2**29. | 170 * On exit: inout[0,2,..] < 2**30, inout[1,3,...] < 2**29. |
175 */ | 171 */ |
176 static void felem_reduce_carry(felem inout, limb carry) | 172 static void felem_reduce_carry(felem inout, limb carry) |
177 { | 173 { |
178 const u32 carry_mask = NON_ZERO_TO_ALL_ONES(carry); | 174 const u32 carry_mask = NON_ZERO_TO_ALL_ONES(carry); |
(...skipping 954 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1133 if (i) { | 1129 if (i) { |
1134 point_double(nx, ny, nz, nx, ny, nz); | 1130 point_double(nx, ny, nz, nx, ny, nz); |
1135 } | 1131 } |
1136 for (j = 0; j <= 32; j += 32) { | 1132 for (j = 0; j <= 32; j += 32) { |
1137 char bit0 = get_bit(scalar, 31 - i + j); | 1133 char bit0 = get_bit(scalar, 31 - i + j); |
1138 char bit1 = get_bit(scalar, 95 - i + j); | 1134 char bit1 = get_bit(scalar, 95 - i + j); |
1139 char bit2 = get_bit(scalar, 159 - i + j); | 1135 char bit2 = get_bit(scalar, 159 - i + j); |
1140 char bit3 = get_bit(scalar, 223 - i + j); | 1136 char bit3 = get_bit(scalar, 223 - i + j); |
1141 limb index = bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3); | 1137 limb index = bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3); |
1142 | 1138 |
1143 table_offset = ((((s32)j) << (32-6)) >> 31) & (30*NLIMBS); | 1139 table_offset = ((((s32)j) << (32-6)) >> 31) & (30*NLIMBS); |
wtc
2013/02/01 05:28:04
Here j is 0 or 32 (1 << 5). So this can be simplif
wtc
2013/02/01 05:35:40
A better solution:
table_offset = 0;
for
agl
2013/02/01 18:45:17
I like this, thanks.
| |
1144 select_affine_point(px, py, kPrecomputed + table_offset, index); | 1140 select_affine_point(px, py, kPrecomputed + table_offset, index); |
1145 | 1141 |
1146 /* Since scalar is less than the order of the group, we know that | 1142 /* Since scalar is less than the order of the group, we know that |
1147 * {nx,ny,nz} != {px,py,1}, unless both are zero, which we handle | 1143 * {nx,ny,nz} != {px,py,1}, unless both are zero, which we handle |
1148 * below. | 1144 * below. |
1149 */ | 1145 */ |
1150 point_add_mixed(tx, ty, tz, nx, ny, nz, px, py); | 1146 point_add_mixed(tx, ty, tz, nx, ny, nz, px, py); |
1151 /* The result of point_add_mixed is incorrect if {nx,ny,nz} is zero | 1147 /* The result of point_add_mixed is incorrect if {nx,ny,nz} is zero |
1152 * (a.k.a. the point at infinity). We handle that situation by | 1148 * (a.k.a. the point at infinity). We handle that situation by |
1153 * copying the point from the table. | 1149 * copying the point from the table. |
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1222 } | 1218 } |
1223 | 1219 |
1224 index = scalar[31 - i / 2]; | 1220 index = scalar[31 - i / 2]; |
1225 if ((i & 1) == 1) { | 1221 if ((i & 1) == 1) { |
1226 index &= 15; | 1222 index &= 15; |
1227 } else { | 1223 } else { |
1228 index >>= 4; | 1224 index >>= 4; |
1229 } | 1225 } |
1230 | 1226 |
1231 /* See the comments in scalar_base_mult about handling infinities. */ | 1227 /* See the comments in scalar_base_mult about handling infinities. */ |
1232 select_jacobian_point(px, py, pz, (limb *) precomp, index); | 1228 select_jacobian_point(px, py, pz, (limb *) precomp, index); |
wtc
2013/01/31 22:46:05
Why is this (limb *) cast necessary?
Is there som
agl
2013/02/01 18:45:17
ecp_256_32.c:1233:2: warning: passing argument 4 o
wtc
2013/02/01 19:28:35
Thanks for the compiler warning message. I changed
| |
1233 point_add(tx, ty, tz, nx, ny, nz, px, py, pz); | 1229 point_add(tx, ty, tz, nx, ny, nz, px, py, pz); |
1234 copy_conditional(nx, px, n_is_infinity_mask); | 1230 copy_conditional(nx, px, n_is_infinity_mask); |
1235 copy_conditional(ny, py, n_is_infinity_mask); | 1231 copy_conditional(ny, py, n_is_infinity_mask); |
1236 copy_conditional(nz, pz, n_is_infinity_mask); | 1232 copy_conditional(nz, pz, n_is_infinity_mask); |
1237 | 1233 |
1238 p_is_noninfinite_mask = ((s32) ~ (index - 1)) >> 31; | 1234 p_is_noninfinite_mask = ((s32) ~ (index - 1)) >> 31; |
wtc
2013/02/01 05:28:04
Here |index| is 0, ..., 15. So this can be rewritt
agl
2013/02/01 18:45:17
Agreed.
| |
1239 mask = p_is_noninfinite_mask & ~n_is_infinity_mask; | 1235 mask = p_is_noninfinite_mask & ~n_is_infinity_mask; |
1240 copy_conditional(nx, tx, mask); | 1236 copy_conditional(nx, tx, mask); |
1241 copy_conditional(ny, ty, mask); | 1237 copy_conditional(ny, ty, mask); |
1242 copy_conditional(nz, tz, mask); | 1238 copy_conditional(nz, tz, mask); |
1243 n_is_infinity_mask &= ~p_is_noninfinite_mask; | 1239 n_is_infinity_mask &= ~p_is_noninfinite_mask; |
1244 } | 1240 } |
1245 } | 1241 } |
1246 | 1242 |
1247 /* Interface with Freebl: */ | 1243 /* Interface with Freebl: */ |
1248 | 1244 |
1245 /* BYTESWAP_MP_DIGIT_TO_LE swaps the bytes of a mp_digit to | |
1246 * little-endian order. | |
1247 */ | |
1249 #ifdef IS_BIG_ENDIAN | 1248 #ifdef IS_BIG_ENDIAN |
1250 #error "This code needs a little-endian processor" | 1249 #ifdef __APPLE__ |
1250 #include <libkern/OSByteOrder.h> | |
1251 #define BYTESWAP32(x) OSSwapInt32(x) | |
1252 #define BYTESWAP64(x) OSSwapInt64(x) | |
1253 #else | |
1254 #define BYTESWAP32(x) \ | |
1255 ((x) >> 24 | (x) >> 8 & 0xff00 | ((x) & 0xff00) << 8 | (x) << 24) | |
1256 #define BYTESWAP64(x) \ | |
1257 ((x) >> 56 | (x) >> 40 & 0xff00 | \ | |
1258 (x) >> 24 & 0xff0000 | (x) >> 8 & 0xff000000 | \ | |
1259 ((x) & 0xff000000) << 8 | ((x) & 0xff0000) << 24 | \ | |
1260 ((x) & 0xff00) << 40 | (x) << 56) | |
1251 #endif | 1261 #endif |
1252 | 1262 |
1253 static const u32 kRInvDigits[8] = { | 1263 #ifdef MP_USE_UINT_DIGIT |
wtc
2013/01/31 22:46:05
If this macro is defined, mp_digit is 32-bit. Othe
| |
1264 #define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP32(BYTESWAP32(x)) | |
1265 #else | |
1266 #define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP64(BYTESWAP64(x)) | |
1267 #endif | |
1268 #endif /* IS_BIG_ENDIAN */ | |
1269 | |
1270 #ifdef MP_USE_UINT_DIGIT | |
1271 static const mp_digit kRInvDigits[8] = { | |
1254 0x80000000, 1, 0xffffffff, 0, | 1272 0x80000000, 1, 0xffffffff, 0, |
1255 0x80000001, 0xfffffffe, 1, 0x7fffffff | 1273 0x80000001, 0xfffffffe, 1, 0x7fffffff |
1256 }; | 1274 }; |
1275 #else | |
1276 static const mp_digit kRInvDigits[4] = { | |
1277 PR_UINT64(0x180000000), 0xffffffff, | |
1278 PR_UINT64(0xfffffffe80000001), PR_UINT64(0x7fffffff00000001) | |
1279 }; | |
1280 #endif | |
1257 #define MP_DIGITS_IN_256_BITS (32/sizeof(mp_digit)) | 1281 #define MP_DIGITS_IN_256_BITS (32/sizeof(mp_digit)) |
1258 static const mp_int kRInv = { | 1282 static const mp_int kRInv = { |
1259 MP_ZPOS, | 1283 MP_ZPOS, |
1260 MP_DIGITS_IN_256_BITS, | 1284 MP_DIGITS_IN_256_BITS, |
1261 MP_DIGITS_IN_256_BITS, | 1285 MP_DIGITS_IN_256_BITS, |
1262 /* Because we are running on a little-endian processor, this cast works for | |
1263 * both 32 and 64-bit processors. | |
1264 */ | |
1265 (mp_digit*) kRInvDigits | 1286 (mp_digit*) kRInvDigits |
1266 }; | 1287 }; |
1267 | 1288 |
1268 static const limb kTwo28 = 0x10000000; | 1289 static const limb kTwo28 = 0x10000000; |
1269 static const limb kTwo29 = 0x20000000; | 1290 static const limb kTwo29 = 0x20000000; |
1270 | 1291 |
1271 /* to_montgomery sets out = R*in. */ | 1292 /* to_montgomery sets out = R*in. */ |
1272 static mp_err to_montgomery(felem out, const mp_int *in, const ECGroup *group) | 1293 static mp_err to_montgomery(felem out, const mp_int *in, const ECGroup *group) |
1273 { | 1294 { |
1274 /* There are no MPI functions for bitshift operations and we wish to shift | 1295 /* There are no MPI functions for bitshift operations and we wish to shift |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1335 | 1356 |
1336 /* scalar_from_mp_int sets out_scalar=n, where n < the group order. */ | 1357 /* scalar_from_mp_int sets out_scalar=n, where n < the group order. */ |
1337 static void scalar_from_mp_int(u8 out_scalar[32], const mp_int *n) | 1358 static void scalar_from_mp_int(u8 out_scalar[32], const mp_int *n) |
1338 { | 1359 { |
1339 /* We require that |n| is less than the order of the group and therefore it | 1360 /* We require that |n| is less than the order of the group and therefore it |
1340 * will fit into |scalar|. However, these is a timing side-channel here that | 1361 * will fit into |scalar|. However, these is a timing side-channel here that |
1341 * we cannot avoid: if |n| is sufficiently small it may be one or more words | 1362 * we cannot avoid: if |n| is sufficiently small it may be one or more words |
1342 * too short and we'll copy less data. | 1363 * too short and we'll copy less data. |
1343 */ | 1364 */ |
1344 memset(out_scalar, 0, 32); | 1365 memset(out_scalar, 0, 32); |
1366 #ifdef IS_LITTLE_ENDIAN | |
1345 memcpy(out_scalar, MP_DIGITS(n), MP_USED(n) * sizeof(mp_digit)); | 1367 memcpy(out_scalar, MP_DIGITS(n), MP_USED(n) * sizeof(mp_digit)); |
wtc
2013/01/31 22:46:05
I found that this memcpy is done without checking
agl
2013/02/01 18:45:17
Yes. I had forgotten about that.
Is there a #defi
wtc
2013/02/01 19:28:35
NSS uses both DEBUG and NDEBUG. DEBUG is more comm
| |
1368 #else | |
1369 { | |
1370 mp_size i; | |
1371 mp_digit swapped[MP_DIGITS_IN_256_BITS]; | |
1372 for (i = 0; i < MP_USED(n); i++) { | |
1373 swapped[i] = BYTESWAP_MP_DIGIT_TO_LE(MP_DIGIT(n, i)); | |
1374 } | |
1375 memcpy(out_scalar, swapped, MP_USED(n) * sizeof(mp_digit)); | |
1376 } | |
1377 #endif | |
1346 } | 1378 } |
1347 | 1379 |
1348 /* ec_GFp_nistp256_base_point_mul sets {out_x,out_y} = nG, where n is < the | 1380 /* ec_GFp_nistp256_base_point_mul sets {out_x,out_y} = nG, where n is < the |
1349 * order of the group. | 1381 * order of the group. |
1350 */ | 1382 */ |
1351 static mp_err ec_GFp_nistp256_base_point_mul(const mp_int *n, | 1383 static mp_err ec_GFp_nistp256_base_point_mul(const mp_int *n, |
1352 mp_int *out_x, mp_int *out_y, | 1384 mp_int *out_x, mp_int *out_y, |
1353 const ECGroup *group) | 1385 const ECGroup *group) |
1354 { | 1386 { |
1355 u8 scalar[32]; | 1387 u8 scalar[32]; |
1356 felem x, y, z, x_affine, y_affine; | 1388 felem x, y, z, x_affine, y_affine; |
1357 mp_err res; | 1389 mp_err res; |
1358 | 1390 |
1359 /* FIXME(agl): test that n < order. */ | 1391 /* FIXME(agl): test that n < order. */ |
wtc
2013/02/01 19:28:35
I added an assertion to scalar_from_mp_int to ensu
| |
1360 | 1392 |
1361 scalar_from_mp_int(scalar, n); | 1393 scalar_from_mp_int(scalar, n); |
1362 scalar_base_mult(x, y, z, scalar); | 1394 scalar_base_mult(x, y, z, scalar); |
1363 point_to_affine(x_affine, y_affine, x, y, z); | 1395 point_to_affine(x_affine, y_affine, x, y, z); |
1364 MP_CHECKOK(from_montgomery(out_x, x_affine, group)); | 1396 MP_CHECKOK(from_montgomery(out_x, x_affine, group)); |
1365 MP_CHECKOK(from_montgomery(out_y, y_affine, group)); | 1397 MP_CHECKOK(from_montgomery(out_y, y_affine, group)); |
1366 | 1398 |
1367 CLEANUP: | 1399 CLEANUP: |
1368 return res; | 1400 return res; |
1369 } | 1401 } |
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1461 /* Wire in fast point multiplication for named curves. */ | 1493 /* Wire in fast point multiplication for named curves. */ |
1462 mp_err ec_group_set_gfp256_32(ECGroup *group, ECCurveName name) | 1494 mp_err ec_group_set_gfp256_32(ECGroup *group, ECCurveName name) |
1463 { | 1495 { |
1464 if (name == ECCurve_NIST_P256) { | 1496 if (name == ECCurve_NIST_P256) { |
1465 group->base_point_mul = &ec_GFp_nistp256_base_point_mul; | 1497 group->base_point_mul = &ec_GFp_nistp256_base_point_mul; |
1466 group->point_mul = &ec_GFp_nistp256_point_mul; | 1498 group->point_mul = &ec_GFp_nistp256_point_mul; |
1467 group->points_mul = &ec_GFp_nistp256_points_mul_vartime; | 1499 group->points_mul = &ec_GFp_nistp256_points_mul_vartime; |
1468 } | 1500 } |
1469 return MP_OKAY; | 1501 return MP_OKAY; |
1470 } | 1502 } |
OLD | NEW |