nss/lib/freebl/mpi/mpmontg.c - Issue 2078763002: Delete bundled copy of NSS and replace with README.

Side by Side Diff: nss/lib/freebl/mpi/mpmontg.c

Issue 2078763002: Delete bundled copy of NSS and replace with README. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/nss@master

Patch Set: Delete bundled copy of NSS and replace with README. Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 /* This Source Code Form is subject to the terms of the Mozilla Public

2 * License, v. 2.0. If a copy of the MPL was not distributed with this

3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

4

5 /* This file implements moduluar exponentiation using Montgomery's

6 * method for modular reduction. This file implements the method

7 * described as "Improvement 2" in the paper "A Cryptogrpahic Library for

8 * the Motorola DSP56000" by Stephen R. Dusse' and Burton S. Kaliski Jr.

9 * published in "Advances in Cryptology: Proceedings of EUROCRYPT '90"

10 * "Lecture Notes in Computer Science" volume 473, 1991, pg 230-244,

11 * published by Springer Verlag.

12 */

13

14 #define MP_USING_CACHE_SAFE_MOD_EXP 1

15 #include <string.h>

16 #include "mpi-priv.h"

17 #include "mplogic.h"

18 #include "mpprime.h"

19 #ifdef MP_USING_MONT_MULF

20 #include "montmulf.h"

21 #endif

22 #include <stddef.h> /* ptrdiff_t */

23

24 /* if MP_CHAR_STORE_SLOW is defined, we */

25 /* need to know endianness of this platform. */

26 #ifdef MP_CHAR_STORE_SLOW

27 #if !defined(MP_IS_BIG_ENDIAN) && !defined(MP_IS_LITTLE_ENDIAN)

28 #error "You must define MP_IS_BIG_ENDIAN or MP_IS_LITTLE_ENDIAN\n" \

29 " if you define MP_CHAR_STORE_SLOW."

30 #endif

31 #endif

32

33 #define STATIC

34

35 #define MAX_ODD_INTS 32 /* 2 ** (WINDOW_BITS - 1) */

36

37 /*! computes T = REDC(T), 2^b == R

38 \param T < RN

39 */

40 mp_err s_mp_redc(mp_int T, mp_mont_modulus mmm)

41 {

42 mp_err res;

43 mp_size i;

44

45 i = (MP_USED(&mmm->N) << 1) + 1;

46 MP_CHECKOK( s_mp_pad(T, i) );

47 for (i = 0; i < MP_USED(&mmm->N); ++i ) {

48 mp_digit m_i = MP_DIGIT(T, i) * mmm->n0prime;

49 /* T += N * m_i * (MP_RADIX ** i); */

50 s_mp_mul_d_add_offset(&mmm->N, m_i, T, i);

51 }

52 s_mp_clamp(T);

53

54 /* T /= R */

55 s_mp_rshd( T, MP_USED(&mmm->N) );

56

57 if ((res = s_mp_cmp(T, &mmm->N)) >= 0) {

58 /* T = T - N */

59 MP_CHECKOK( s_mp_sub(T, &mmm->N) );

60 #ifdef DEBUG

61 if ((res = mp_cmp(T, &mmm->N)) >= 0) {

62 res = MP_UNDEF;

63 goto CLEANUP;

64 }

65 #endif

66 }

67 res = MP_OKAY;

68 CLEANUP:

69 return res;

70 }

71

72 #if !defined(MP_MONT_USE_MP_MUL)

73

74 /! c <- REDC( a b ) mod N

75 \param a < N i.e. "reduced"

76 \param b < N i.e. "reduced"

77 \param mmm modulus N and n0' of N

78 */

79 mp_err s_mp_mul_mont(const mp_int a, const mp_int b, mp_int *c,

80 mp_mont_modulus *mmm)

81 {

82 mp_digit *pb;

83 mp_digit m_i;

84 mp_err res;

85 mp_size ib; /* "index b": index of current digit of B */

86 mp_size useda, usedb;

87

88 ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);

89

90 if (MP_USED(a) < MP_USED(b)) {

91 const mp_int xch = b; / switch a and b, to do fewer outer loops */

92 b = a;

93 a = xch;

94 }

95

96 MP_USED(c) = 1; MP_DIGIT(c, 0) = 0;

97 ib = (MP_USED(&mmm->N) << 1) + 1;

98 if((res = s_mp_pad(c, ib)) != MP_OKAY)

99 goto CLEANUP;

100

101 useda = MP_USED(a);

102 pb = MP_DIGITS(b);

103 s_mpv_mul_d(MP_DIGITS(a), useda, *pb++, MP_DIGITS(c));

104 s_mp_setz(MP_DIGITS(c) + useda + 1, ib - (useda + 1));

105 m_i = MP_DIGIT(c, 0) * mmm->n0prime;

106 s_mp_mul_d_add_offset(&mmm->N, m_i, c, 0);

107

108 /* Outer loop: Digits of b */

109 usedb = MP_USED(b);

110 for (ib = 1; ib < usedb; ib++) {

111 mp_digit b_i = *pb++;

112

113 /* Inner product: Digits of a */

114 if (b_i)

115 s_mpv_mul_d_add_prop(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);

116 m_i = MP_DIGIT(c, ib) * mmm->n0prime;

117 s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib);

118 }

119 if (usedb < MP_USED(&mmm->N)) {

120 for (usedb = MP_USED(&mmm->N); ib < usedb; ++ib ) {

121 m_i = MP_DIGIT(c, ib) * mmm->n0prime;

122 s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib);

123 }

124 }

125 s_mp_clamp(c);

126 s_mp_rshd( c, MP_USED(&mmm->N) ); /* c /= R */

127 if (s_mp_cmp(c, &mmm->N) >= 0) {

128 MP_CHECKOK( s_mp_sub(c, &mmm->N) );

129 }

130 res = MP_OKAY;

131

132 CLEANUP:

133 return res;

134 }

135 #endif

136

137 STATIC

138 mp_err s_mp_to_mont(const mp_int x, mp_mont_modulus mmm, mp_int *xMont)

139 {

140 mp_err res;

141

142 /* xMont = x * R mod N where N is modulus */

143 MP_CHECKOK( mp_copy( x, xMont ) );

144 MP_CHECKOK( s_mp_lshd( xMont, MP_USED(&mmm->N) ) ); /* xMont = x << b */

145 MP_CHECKOK( mp_div(xMont, &mmm->N, 0, xMont) ); /* mod N */

146 CLEANUP:

147 return res;

148 }

149

150 #ifdef MP_USING_MONT_MULF

151

152 /* the floating point multiply is already cache safe,

153 * don't turn on cache safe unless we specifically

154 * force it */

155 #ifndef MP_FORCE_CACHE_SAFE

156 #undef MP_USING_CACHE_SAFE_MOD_EXP

157 #endif

158

159 unsigned int mp_using_mont_mulf = 1;

160

161 /* computes montgomery square of the integer in mResult */

162 #define SQR \

163 conv_i32_to_d32_and_d16(dm1, d16Tmp, mResult, nLen); \

164 mont_mulf_noconv(mResult, dm1, d16Tmp, \

165 dTmp, dn, MP_DIGITS(modulus), nLen, dn0)

166

167 /* computes montgomery product of x and the integer in mResult */

168 #define MUL(x) \

169 conv_i32_to_d32(dm1, mResult, nLen); \

170 mont_mulf_noconv(mResult, dm1, oddPowers[x], \

171 dTmp, dn, MP_DIGITS(modulus), nLen, dn0)

172

173 /* Do modular exponentiation using floating point multiply code. */

174 mp_err mp_exptmod_f(const mp_int * montBase,

175 const mp_int * exponent,

176 const mp_int * modulus,

177 mp_int * result,

178 mp_mont_modulus *mmm,

179 int nLen,

180 mp_size bits_in_exponent,

181 mp_size window_bits,

182 mp_size odd_ints)

183 {

184 mp_digit *mResult;

185 double dBuf = 0, dm1, dn, dSqr, d16Tmp, dTmp;

186 double dn0;

187 mp_size i;

188 mp_err res;

189 int expOff;

190 int dSize = 0, oddPowSize, dTmpSize;

191 mp_int accum1;

192 double *oddPowers[MAX_ODD_INTS];

193

194 /* function for computing n0prime only works if n0 is odd */

195

196 MP_DIGITS(&accum1) = 0;

197

198 for (i = 0; i < MAX_ODD_INTS; ++i)

199 oddPowers[i] = 0;

200

201 MP_CHECKOK( mp_init_size(&accum1, 3 * nLen + 2) );

202

203 mp_set(&accum1, 1);

204 MP_CHECKOK( s_mp_to_mont(&accum1, mmm, &accum1) );

205 MP_CHECKOK( s_mp_pad(&accum1, nLen) );

206

207 oddPowSize = 2 * nLen + 1;

208 dTmpSize = 2 * oddPowSize;

209 dSize = sizeof(double) * (nLen * 4 + 1 +

210 ((odd_ints + 1) * oddPowSize) + dTmpSize);

211 dBuf = (double *)malloc(dSize);

212 dm1 = dBuf; /* array of d32 */

213 dn = dBuf + nLen; /* array of d32 */

214 dSqr = dn + nLen; /* array of d32 */

215 d16Tmp = dSqr + nLen; /* array of d16 */

216 dTmp = d16Tmp + oddPowSize;

217

218 for (i = 0; i < odd_ints; ++i) {

219 oddPowers[i] = dTmp;

220 dTmp += oddPowSize;

221 }

222 mResult = (mp_digit )(dTmp + dTmpSize); / size is nLen + 1 */

223

224 /* Make dn and dn0 */

225 conv_i32_to_d32(dn, MP_DIGITS(modulus), nLen);

226 dn0 = (double)(mmm->n0prime & 0xffff);

227

228 /* Make dSqr */

229 conv_i32_to_d32_and_d16(dm1, oddPowers[0], MP_DIGITS(montBase), nLen);

230 mont_mulf_noconv(mResult, dm1, oddPowers[0],

231 dTmp, dn, MP_DIGITS(modulus), nLen, dn0);

232 conv_i32_to_d32(dSqr, mResult, nLen);

233

234 for (i = 1; i < odd_ints; ++i) {

235 mont_mulf_noconv(mResult, dSqr, oddPowers[i - 1],

236 dTmp, dn, MP_DIGITS(modulus), nLen, dn0);

237 conv_i32_to_d16(oddPowers[i], mResult, nLen);

238 }

239

240 s_mp_copy(MP_DIGITS(&accum1), mResult, nLen); /* from, to, len */

241

242 for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bi ts) {

243 mp_size smallExp;

244 MP_CHECKOK( mpl_get_bits(exponent, expOff, window_bits) );

245 smallExp = (mp_size)res;

246

247 if (window_bits == 1) {

248 if (!smallExp) {

249 SQR;

250 } else if (smallExp & 1) {

251 SQR; MUL(0);

252 } else {

253 abort();

254 }

255 } else if (window_bits == 4) {

256 if (!smallExp) {

257 SQR; SQR; SQR; SQR;

258 } else if (smallExp & 1) {

259 SQR; SQR; SQR; SQR; MUL(smallExp/2);

260 } else if (smallExp & 2) {

261 SQR; SQR; SQR; MUL(smallExp/4); SQR;

262 } else if (smallExp & 4) {

263 SQR; SQR; MUL(smallExp/8); SQR; SQR;

264 } else if (smallExp & 8) {

265 SQR; MUL(smallExp/16); SQR; SQR; SQR;

266 } else {

267 abort();

268 }

269 } else if (window_bits == 5) {

270 if (!smallExp) {

271 SQR; SQR; SQR; SQR; SQR;

272 } else if (smallExp & 1) {

273 SQR; SQR; SQR; SQR; SQR; MUL(smallExp/2);

274 } else if (smallExp & 2) {

275 SQR; SQR; SQR; SQR; MUL(smallExp/4); SQR;

276 } else if (smallExp & 4) {

277 SQR; SQR; SQR; MUL(smallExp/8); SQR; SQR;

278 } else if (smallExp & 8) {

279 SQR; SQR; MUL(smallExp/16); SQR; SQR; SQR;

280 } else if (smallExp & 0x10) {

281 SQR; MUL(smallExp/32); SQR; SQR; SQR; SQR;

282 } else {

283 abort();

284 }

285 } else if (window_bits == 6) {

286 if (!smallExp) {

287 SQR; SQR; SQR; SQR; SQR; SQR;

288 } else if (smallExp & 1) {

289 SQR; SQR; SQR; SQR; SQR; SQR; MUL(smallExp/2);

290 } else if (smallExp & 2) {

291 SQR; SQR; SQR; SQR; SQR; MUL(smallExp/4); SQR;

292 } else if (smallExp & 4) {

293 SQR; SQR; SQR; SQR; MUL(smallExp/8); SQR; SQR;

294 } else if (smallExp & 8) {

295 SQR; SQR; SQR; MUL(smallExp/16); SQR; SQR; SQR;

296 } else if (smallExp & 0x10) {

297 SQR; SQR; MUL(smallExp/32); SQR; SQR; SQR; SQR;

298 } else if (smallExp & 0x20) {

299 SQR; MUL(smallExp/64); SQR; SQR; SQR; SQR; SQR;

300 } else {

301 abort();

302 }

303 } else {

304 abort();

305 }

306 }

307

308 s_mp_copy(mResult, MP_DIGITS(&accum1), nLen); /* from, to, len */

309

310 res = s_mp_redc(&accum1, mmm);

311 mp_exch(&accum1, result);

312

313 CLEANUP:

314 mp_clear(&accum1);

315 if (dBuf) {

316 if (dSize)

317 memset(dBuf, 0, dSize);

318 free(dBuf);

319 }

320

321 return res;

322 }

323 #undef SQR

324 #undef MUL

325 #endif

326

327 #define SQR(a,b) \

328 MP_CHECKOK( mp_sqr(a, b) );\

329 MP_CHECKOK( s_mp_redc(b, mmm) )

330

331 #if defined(MP_MONT_USE_MP_MUL)

332 #define MUL(x,a,b) \

333 MP_CHECKOK( mp_mul(a, oddPowers + (x), b) ); \

334 MP_CHECKOK( s_mp_redc(b, mmm) )

335 #else

336 #define MUL(x,a,b) \

337 MP_CHECKOK( s_mp_mul_mont(a, oddPowers + (x), b, mmm) )

338 #endif

339

340 #define SWAPPA ptmp = pa1; pa1 = pa2; pa2 = ptmp

341

342 /* Do modular exponentiation using integer multiply code. */

343 mp_err mp_exptmod_i(const mp_int * montBase,

344 const mp_int * exponent,

345 const mp_int * modulus,

346 mp_int * result,

347 mp_mont_modulus *mmm,

348 int nLen,

349 mp_size bits_in_exponent,

350 mp_size window_bits,

351 mp_size odd_ints)

352 {

353 mp_int pa1, pa2, *ptmp;

354 mp_size i;

355 mp_err res;

356 int expOff;

357 mp_int accum1, accum2, power2, oddPowers[MAX_ODD_INTS];

358

359 /* power2 = base 2; oddPowers[i] = base (2i + 1); /

360 /* oddPowers[i] = base ** (2i + 1); /

361

362 MP_DIGITS(&accum1) = 0;

363 MP_DIGITS(&accum2) = 0;

364 MP_DIGITS(&power2) = 0;

365 for (i = 0; i < MAX_ODD_INTS; ++i) {

366 MP_DIGITS(oddPowers + i) = 0;

367 }

368

369 MP_CHECKOK( mp_init_size(&accum1, 3 * nLen + 2) );

370 MP_CHECKOK( mp_init_size(&accum2, 3 * nLen + 2) );

371

372 MP_CHECKOK( mp_init_copy(&oddPowers[0], montBase) );

373

374 MP_CHECKOK( mp_init_size(&power2, nLen + 2 * MP_USED(montBase) + 2) );

375 MP_CHECKOK( mp_sqr(montBase, &power2) ); /* power2 = montBase ** 2 */

376 MP_CHECKOK( s_mp_redc(&power2, mmm) );

377

378 for (i = 1; i < odd_ints; ++i) {

379 MP_CHECKOK( mp_init_size(oddPowers + i, nLen + 2 * MP_USED(&power2) + 2) );

380 MP_CHECKOK( mp_mul(oddPowers + (i - 1), &power2, oddPowers + i) );

381 MP_CHECKOK( s_mp_redc(oddPowers + i, mmm) );

382 }

383

384 /* set accumulator to montgomery residue of 1 */

385 mp_set(&accum1, 1);

386 MP_CHECKOK( s_mp_to_mont(&accum1, mmm, &accum1) );

387 pa1 = &accum1;

388 pa2 = &accum2;

389

390 for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bi ts) {

391 mp_size smallExp;

392 MP_CHECKOK( mpl_get_bits(exponent, expOff, window_bits) );

393 smallExp = (mp_size)res;

394

395 if (window_bits == 1) {

396 if (!smallExp) {

397 SQR(pa1,pa2); SWAPPA;

398 } else if (smallExp & 1) {

399 SQR(pa1,pa2); MUL(0,pa2,pa1);

400 } else {

401 abort();

402 }

403 } else if (window_bits == 4) {

404 if (!smallExp) {

405 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

406 } else if (smallExp & 1) {

407 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

408 MUL(smallExp/2, pa1,pa2); SWAPPA;

409 } else if (smallExp & 2) {

410 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2);

411 MUL(smallExp/4,pa2,pa1); SQR(pa1,pa2); SWAPPA;

412 } else if (smallExp & 4) {

413 SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/8,pa1,pa2);

414 SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;

415 } else if (smallExp & 8) {

416 SQR(pa1,pa2); MUL(smallExp/16,pa2,pa1); SQR(pa1,pa2);

417 SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;

418 } else {

419 abort();

420 }

421 } else if (window_bits == 5) {

422 if (!smallExp) {

423 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

424 SQR(pa1,pa2); SWAPPA;

425 } else if (smallExp & 1) {

426 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

427 SQR(pa1,pa2); MUL(smallExp/2,pa2,pa1);

428 } else if (smallExp & 2) {

429 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

430 MUL(smallExp/4,pa1,pa2); SQR(pa2,pa1);

431 } else if (smallExp & 4) {

432 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2);

433 MUL(smallExp/8,pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

434 } else if (smallExp & 8) {

435 SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/16,pa1,pa2);

436 SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

437 } else if (smallExp & 0x10) {

438 SQR(pa1,pa2); MUL(smallExp/32,pa2,pa1); SQR(pa1,pa2);

439 SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

440 } else {

441 abort();

442 }

443 } else if (window_bits == 6) {

444 if (!smallExp) {

445 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

446 SQR(pa1,pa2); SQR(pa2,pa1);

447 } else if (smallExp & 1) {

448 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

449 SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/2,pa1,pa2); SWAPPA;

450 } else if (smallExp & 2) {

451 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

452 SQR(pa1,pa2); MUL(smallExp/4,pa2,pa1); SQR(pa1,pa2); SWAPPA;

453 } else if (smallExp & 4) {

454 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

455 MUL(smallExp/8,pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;

456 } else if (smallExp & 8) {

457 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2);

458 MUL(smallExp/16,pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

459 SQR(pa1,pa2); SWAPPA;

460 } else if (smallExp & 0x10) {

461 SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/32,pa1,pa2);

462 SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;

463 } else if (smallExp & 0x20) {

464 SQR(pa1,pa2); MUL(smallExp/64,pa2,pa1); SQR(pa1,pa2);

465 SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;

466 } else {

467 abort();

468 }

469 } else {

470 abort();

471 }

472 }

473

474 res = s_mp_redc(pa1, mmm);

475 mp_exch(pa1, result);

476

477 CLEANUP:

478 mp_clear(&accum1);

479 mp_clear(&accum2);

480 mp_clear(&power2);

481 for (i = 0; i < odd_ints; ++i) {

482 mp_clear(oddPowers + i);

483 }

484 return res;

485 }

486 #undef SQR

487 #undef MUL

488

489 #ifdef MP_USING_CACHE_SAFE_MOD_EXP

490 unsigned int mp_using_cache_safe_exp = 1;

491 #endif

492

493 mp_err mp_set_safe_modexp(int value)

494 {

495 #ifdef MP_USING_CACHE_SAFE_MOD_EXP

496 mp_using_cache_safe_exp = value;

497 return MP_OKAY;

498 #else

499 if (value == 0) {

500 return MP_OKAY;

501 }

502 return MP_BADARG;

503 #endif

504 }

505

506 #ifdef MP_USING_CACHE_SAFE_MOD_EXP

507 #define WEAVE_WORD_SIZE 4

508

509 #ifndef MP_CHAR_STORE_SLOW

510 /*

511 * mpi_to_weave takes an array of bignums, a matrix in which each bignum

512 * occupies all the columns of a row, and transposes it into a matrix in

513 * which each bignum occupies a column of every row. The first row of the

514 * input matrix becomes the first column of the output matrix. The n'th

515 * row of input becomes the n'th column of output. The input data is said

516 * to be "interleaved" or "woven" into the output matrix.

517 *

518 * The array of bignums is left in this woven form. Each time a single

519 * bignum value is needed, it is recreated by fetching the n'th column,

520 * forming a single row which is the new bignum.

521 *

522 * The purpose of this interleaving is make it impossible to determine which

523 * of the bignums is being used in any one operation by examining the pattern

524 * of cache misses.

525 *

526 * The weaving function does not transpose the entire input matrix in one call.

527 * It transposes 4 rows of mp_ints into their respective columns of output.

528 *

529 * There are two different implementations of the weaving and unweaving code

530 * in this file. One uses byte loads and stores. The second uses loads and

531 * stores of mp_weave_word size values. The weaved forms of these two

532 * implementations differ. Consequently, each one has its own explanation.

533 *

534 * Here is the explanation for the byte-at-a-time implementation.

535 *

536 * This implementation treats each mp_int bignum as an array of bytes,

537 * rather than as an array of mp_digits. It stores those bytes as a

538 * column of bytes in the output matrix. It doesn't care if the machine

539 * uses big-endian or little-endian byte ordering within mp_digits.

540 * The first byte of the mp_digit array becomes the first byte in the output

541 * column, regardless of whether that byte is the MSB or LSB of the mp_digit.

542 *

543 * "bignums" is an array of mp_ints.

544 * It points to four rows, four mp_ints, a subset of a larger array of mp_ints.

545 *

546 * "weaved" is the weaved output matrix.

547 * The first byte of bignums[0] is stored in weaved[0].

548 *

549 * "nBignums" is the total number of bignums in the array of which "bignums"

550 * is a part.

551 *

552 * "nDigits" is the size in mp_digits of each mp_int in the "bignums" array.

553 * mp_ints that use less than nDigits digits are logically padded with zeros

554 * while being stored in the weaved array.

555 */

556 mp_err mpi_to_weave(const mp_int *bignums,

557 unsigned char *weaved,

558 mp_size nDigits, /* in each mp_int of input */

559 mp_size nBignums) /* in the entire source array */

560 {

561 mp_size i;

562 unsigned char * endDest = weaved + (nDigits * nBignums * sizeof(mp_digit));

563

564 for (i=0; i < WEAVE_WORD_SIZE; i++) {

565 mp_size used = MP_USED(&bignums[i]);

566 unsigned char pSrc = (unsigned char )MP_DIGITS(&bignums[i]);

567 unsigned char endSrc = pSrc + (used sizeof(mp_digit));

568 unsigned char *pDest = weaved + i;

569

570 ARGCHK(MP_SIGN(&bignums[i]) == MP_ZPOS, MP_BADARG);

571 ARGCHK(used <= nDigits, MP_BADARG);

572

573 for (; pSrc < endSrc; pSrc++) {

574 pDest = pSrc;

575 pDest += nBignums;

576 }

577 while (pDest < endDest) {

578 *pDest = 0;

579 pDest += nBignums;

580 }

581 }

582

583 return MP_OKAY;

584 }

585

586 /* Reverse the operation above for one mp_int.

587 * Reconstruct one mp_int from its column in the weaved array.

588 * "pSrc" points to the offset into the weave array of the bignum we

589 * are going to reconstruct.

590 */

591 mp_err weave_to_mpi(mp_int a, / output, result */

592 const unsigned char pSrc, / input, byte matrix */

593 mp_size nDigits, /* per mp_int output */

594 mp_size nBignums) /* bignums in weaved matrix */

595 {

596 unsigned char pDest = (unsigned char )MP_DIGITS(a);

597 unsigned char endDest = pDest + (nDigits sizeof(mp_digit));

598

599 MP_SIGN(a) = MP_ZPOS;

600 MP_USED(a) = nDigits;

601

602 for (; pDest < endDest; pSrc += nBignums, pDest++) {

603 pDest = pSrc;

604 }

605 s_mp_clamp(a);

606 return MP_OKAY;

607 }

608

609 #else

610

611 /* Need a primitive that we know is 32 bits long... */

612 /* this is true on all modern processors we know of today*/

613 typedef unsigned int mp_weave_word;

614

615 /*

616 * on some platforms character stores into memory is very expensive since they

617 * generate a read/modify/write operation on the bus. On those platforms

618 * we need to do integer writes to the bus. Because of some unrolled code,

619 * in this current code the size of mp_weave_word must be four. The code that

620 * makes this assumption explicity is called out. (on some platforms a write

621 * of 4 bytes still requires a single read-modify-write operation.

622 *

623 * This function is takes the identical parameters as the function above,

624 * however it lays out the final array differently. Where the previous function

625 * treats the mpi_int as an byte array, this function treats it as an array of

626 * mp_digits where each digit is stored in big endian order.

627 *

628 * since we need to interleave on a byte by byte basis, we need to collect

629 * several mpi structures together into a single PRUint32 before we write. We

630 * also need to make sure the PRUint32 is arranged so that the first value of

631 * the first array winds up in b[0]. This means construction of that PRUint32

632 * is endian specific (even though the layout of the mp_digits in the array

633 * is always big endian).

634 *

635 * The final data is stored as follows :

636 *

637 * Our same logical array p array, m is sizeof(mp_digit),

638 * N is still count and n is now b_size. If we define p[i].digit[j]0 as the

639 * most significant byte of the word p[i].digit[j], p[i].digit[j]1 as

640 * the next most significant byte of p[i].digit[j], ... and p[i].digit[j]m-1

641 * is the least significant byte.

642 * Our array would look like:

643 * p[0].digit[0]0 p[1].digit[0]0 ... p[N-2].digit[0]0 p[N-1].digit[0] 0

644 * p[0].digit[0]1 p[1].digit[0]1 ... p[N-2].digit[0]1 p[N-1].digit[0] 1

645 * . .

646 * p[0].digit[0]m-1 p[1].digit[0]m-1 ... p[N-2].digit[0]m-1 p[N-1].digit[0] m-1

647 * p[0].digit[1]0 p[1].digit[1]0 ... p[N-2].digit[1]0 p[N-1].digit[1] 0

648 * . .

649 * . .

650 * p[0].digit[n-1]m-2 p[1].digit[n-1]m-2 ... p[N-2].digit[n-1]m-2 p[N-1].digit[n -1]m-2

651 * p[0].digit[n-1]m-1 p[1].digit[n-1]m-1 ... p[N-2].digit[n-1]m-1 p[N-1].digit[n -1]m-1

652 *

653 */

654 mp_err mpi_to_weave(const mp_int a, unsigned char b,

655 mp_size b_size, mp_size count)

656 {

657 mp_size i;

658 mp_digit *digitsa0;

659 mp_digit *digitsa1;

660 mp_digit *digitsa2;

661 mp_digit *digitsa3;

662 mp_size useda0;

663 mp_size useda1;

664 mp_size useda2;

665 mp_size useda3;

666 mp_weave_word weaved = (mp_weave_word )b;

667

668 count = count/sizeof(mp_weave_word);

669

670 /* this code pretty much depends on this ! */

671 #if MP_ARGCHK == 2

672 assert(WEAVE_WORD_SIZE == 4);

673 assert(sizeof(mp_weave_word) == 4);

674 #endif

675

676 digitsa0 = MP_DIGITS(&a[0]);

677 digitsa1 = MP_DIGITS(&a[1]);

678 digitsa2 = MP_DIGITS(&a[2]);

679 digitsa3 = MP_DIGITS(&a[3]);

680 useda0 = MP_USED(&a[0]);

681 useda1 = MP_USED(&a[1]);

682 useda2 = MP_USED(&a[2]);

683 useda3 = MP_USED(&a[3]);

684

685 ARGCHK(MP_SIGN(&a[0]) == MP_ZPOS, MP_BADARG);

686 ARGCHK(MP_SIGN(&a[1]) == MP_ZPOS, MP_BADARG);

687 ARGCHK(MP_SIGN(&a[2]) == MP_ZPOS, MP_BADARG);

688 ARGCHK(MP_SIGN(&a[3]) == MP_ZPOS, MP_BADARG);

689 ARGCHK(useda0 <= b_size, MP_BADARG);

690 ARGCHK(useda1 <= b_size, MP_BADARG);

691 ARGCHK(useda2 <= b_size, MP_BADARG);

692 ARGCHK(useda3 <= b_size, MP_BADARG);

693

694 #define SAFE_FETCH(digit, used, word) ((word) < (used) ? (digit[word]) : 0)

695

696 for (i=0; i < b_size; i++) {

697 mp_digit d0 = SAFE_FETCH(digitsa0,useda0,i);

698 mp_digit d1 = SAFE_FETCH(digitsa1,useda1,i);

699 mp_digit d2 = SAFE_FETCH(digitsa2,useda2,i);

700 mp_digit d3 = SAFE_FETCH(digitsa3,useda3,i);

701 register mp_weave_word acc;

702

703 /*

704 * ONE_STEP takes the MSB of each of our current digits and places that

705 * byte in the appropriate position for writing to the weaved array.

706 * On little endian:

707 * b3 b2 b1 b0

708 * On big endian:

709 * b0 b1 b2 b3

710 * When the data is written it would always wind up:

711 * b[0] = b0

712 * b[1] = b1

713 * b[2] = b2

714 * b[3] = b3

715 *

716 * Once we've written the MSB, we shift the whole digit up left one

717 * byte, putting the Next Most Significant Byte in the MSB position,

718 * so we we repeat the next one step that byte will be written.

719 * NOTE: This code assumes sizeof(mp_weave_word) and MP_WEAVE_WORD_SIZE

720 * is 4.

721 */

722 #ifdef MP_IS_LITTLE_ENDIAN

723 #define MPI_WEAVE_ONE_STEP \

724 acc = (d0 >> (MP_DIGIT_BIT-8)) & 0x000000ff; d0 <<= 8; /b0/ \

725 acc \|= (d1 >> (MP_DIGIT_BIT-16)) & 0x0000ff00; d1 <<= 8; /b1/ \

726 acc \|= (d2 >> (MP_DIGIT_BIT-24)) & 0x00ff0000; d2 <<= 8; /b2/ \

727 acc \|= (d3 >> (MP_DIGIT_BIT-32)) & 0xff000000; d3 <<= 8; /b3/ \

728 *weaved = acc; weaved += count;

729 #else

730 #define MPI_WEAVE_ONE_STEP \

731 acc = (d0 >> (MP_DIGIT_BIT-32)) & 0xff000000; d0 <<= 8; /b0/ \

732 acc \|= (d1 >> (MP_DIGIT_BIT-24)) & 0x00ff0000; d1 <<= 8; /b1/ \

733 acc \|= (d2 >> (MP_DIGIT_BIT-16)) & 0x0000ff00; d2 <<= 8; /b2/ \

734 acc \|= (d3 >> (MP_DIGIT_BIT-8)) & 0x000000ff; d3 <<= 8; /b3/ \

735 *weaved = acc; weaved += count;

736 #endif

737 switch (sizeof(mp_digit)) {

738 case 32:

739 MPI_WEAVE_ONE_STEP

740 MPI_WEAVE_ONE_STEP

741 MPI_WEAVE_ONE_STEP

742 MPI_WEAVE_ONE_STEP

743 MPI_WEAVE_ONE_STEP

744 MPI_WEAVE_ONE_STEP

745 MPI_WEAVE_ONE_STEP

746 MPI_WEAVE_ONE_STEP

747 MPI_WEAVE_ONE_STEP

748 MPI_WEAVE_ONE_STEP

749 MPI_WEAVE_ONE_STEP

750 MPI_WEAVE_ONE_STEP

751 MPI_WEAVE_ONE_STEP

752 MPI_WEAVE_ONE_STEP

753 MPI_WEAVE_ONE_STEP

754 MPI_WEAVE_ONE_STEP

755 case 16:

756 MPI_WEAVE_ONE_STEP

757 MPI_WEAVE_ONE_STEP

758 MPI_WEAVE_ONE_STEP

759 MPI_WEAVE_ONE_STEP

760 MPI_WEAVE_ONE_STEP

761 MPI_WEAVE_ONE_STEP

762 MPI_WEAVE_ONE_STEP

763 MPI_WEAVE_ONE_STEP

764 case 8:

765 MPI_WEAVE_ONE_STEP

766 MPI_WEAVE_ONE_STEP

767 MPI_WEAVE_ONE_STEP

768 MPI_WEAVE_ONE_STEP

769 case 4:

770 MPI_WEAVE_ONE_STEP

771 MPI_WEAVE_ONE_STEP

772 case 2:

773 MPI_WEAVE_ONE_STEP

774 case 1:

775 MPI_WEAVE_ONE_STEP

776 break;

777 }

778 }

779

780 return MP_OKAY;

781 }

782

783 /* reverse the operation above for one entry.

784 * b points to the offset into the weave array of the power we are

785 * calculating */

786 mp_err weave_to_mpi(mp_int a, const unsigned char b,

787 mp_size b_size, mp_size count)

788 {

789 mp_digit *pb = MP_DIGITS(a);

790 mp_digit *end = &pb[b_size];

791

792 MP_SIGN(a) = MP_ZPOS;

793 MP_USED(a) = b_size;

794

795 for (; pb < end; pb++) {

796 register mp_digit digit;

797

798 digit = *b << 8; b += count;

799 #define MPI_UNWEAVE_ONE_STEP digit \|= *b; b += count; digit = digit << 8;

800 switch (sizeof(mp_digit)) {

801 case 32:

802 MPI_UNWEAVE_ONE_STEP

803 MPI_UNWEAVE_ONE_STEP

804 MPI_UNWEAVE_ONE_STEP

805 MPI_UNWEAVE_ONE_STEP

806 MPI_UNWEAVE_ONE_STEP

807 MPI_UNWEAVE_ONE_STEP

808 MPI_UNWEAVE_ONE_STEP

809 MPI_UNWEAVE_ONE_STEP

810 MPI_UNWEAVE_ONE_STEP

811 MPI_UNWEAVE_ONE_STEP

812 MPI_UNWEAVE_ONE_STEP

813 MPI_UNWEAVE_ONE_STEP

814 MPI_UNWEAVE_ONE_STEP

815 MPI_UNWEAVE_ONE_STEP

816 MPI_UNWEAVE_ONE_STEP

817 MPI_UNWEAVE_ONE_STEP

818 case 16:

819 MPI_UNWEAVE_ONE_STEP

820 MPI_UNWEAVE_ONE_STEP

821 MPI_UNWEAVE_ONE_STEP

822 MPI_UNWEAVE_ONE_STEP

823 MPI_UNWEAVE_ONE_STEP

824 MPI_UNWEAVE_ONE_STEP

825 MPI_UNWEAVE_ONE_STEP

826 MPI_UNWEAVE_ONE_STEP

827 case 8:

828 MPI_UNWEAVE_ONE_STEP

829 MPI_UNWEAVE_ONE_STEP

830 MPI_UNWEAVE_ONE_STEP

831 MPI_UNWEAVE_ONE_STEP

832 case 4:

833 MPI_UNWEAVE_ONE_STEP

834 MPI_UNWEAVE_ONE_STEP

835 case 2:

836 break;

837 }

838 digit \|= *b; b += count;

839

840 *pb = digit;

841 }

842 s_mp_clamp(a);

843 return MP_OKAY;

844 }

845 #endif

846

847

848 #define SQR(a,b) \

849 MP_CHECKOK( mp_sqr(a, b) );\

850 MP_CHECKOK( s_mp_redc(b, mmm) )

851

852 #if defined(MP_MONT_USE_MP_MUL)

853 #define MUL_NOWEAVE(x,a,b) \

854 MP_CHECKOK( mp_mul(a, x, b) ); \

855 MP_CHECKOK( s_mp_redc(b, mmm) )

856 #else

857 #define MUL_NOWEAVE(x,a,b) \

858 MP_CHECKOK( s_mp_mul_mont(a, x, b, mmm) )

859 #endif

860

861 #define MUL(x,a,b) \

862 MP_CHECKOK( weave_to_mpi(&tmp, powers + (x), nLen, num_powers) ); \

863 MUL_NOWEAVE(&tmp,a,b)

864

865 #define SWAPPA ptmp = pa1; pa1 = pa2; pa2 = ptmp

866 #define MP_ALIGN(x,y) ((((ptrdiff_t)(x))+((y)-1))&(((ptrdiff_t)0)-(y)))

867

868 /* Do modular exponentiation using integer multiply code. */

869 mp_err mp_exptmod_safe_i(const mp_int * montBase,

870 const mp_int * exponent,

871 const mp_int * modulus,

872 mp_int * result,

873 mp_mont_modulus *mmm,

874 int nLen,

875 mp_size bits_in_exponent,

876 mp_size window_bits,

877 mp_size num_powers)

878 {

879 mp_int pa1, pa2, *ptmp;

880 mp_size i;

881 mp_size first_window;

882 mp_err res;

883 int expOff;

884 mp_int accum1, accum2, accum[WEAVE_WORD_SIZE];

885 mp_int tmp;

886 unsigned char *powersArray = NULL;

887 unsigned char *powers = NULL;

888

889 MP_DIGITS(&accum1) = 0;

890 MP_DIGITS(&accum2) = 0;

891 MP_DIGITS(&accum[0]) = 0;

892 MP_DIGITS(&accum[1]) = 0;

893 MP_DIGITS(&accum[2]) = 0;

894 MP_DIGITS(&accum[3]) = 0;

895 MP_DIGITS(&tmp) = 0;

896

897 /* grab the first window value. This allows us to preload accumulator1

898 * and save a conversion, some squares and a multiple*/

899 MP_CHECKOK( mpl_get_bits(exponent,

900 bits_in_exponent-window_bits, window_bits) );

901 first_window = (mp_size)res;

902

903 MP_CHECKOK( mp_init_size(&accum1, 3 * nLen + 2) );

904 MP_CHECKOK( mp_init_size(&accum2, 3 * nLen + 2) );

905

906 /* build the first WEAVE_WORD powers inline */

907 /* if WEAVE_WORD_SIZE is not 4, this code will have to change */

908 if (num_powers > 2) {

909 MP_CHECKOK( mp_init_size(&accum[0], 3 * nLen + 2) );

910 MP_CHECKOK( mp_init_size(&accum[1], 3 * nLen + 2) );

911 MP_CHECKOK( mp_init_size(&accum[2], 3 * nLen + 2) );

912 MP_CHECKOK( mp_init_size(&accum[3], 3 * nLen + 2) );

913 mp_set(&accum[0], 1);

914 MP_CHECKOK( s_mp_to_mont(&accum[0], mmm, &accum[0]) );

915 MP_CHECKOK( mp_copy(montBase, &accum[1]) );

916 SQR(montBase, &accum[2]);

917 MUL_NOWEAVE(montBase, &accum[2], &accum[3]);

918 powersArray = (unsigned char )malloc(num_powers(nLen*sizeof(mp_digit)+1));

919 if (!powersArray) {

920 res = MP_MEM;

921 goto CLEANUP;

922 }

923 /* powers[i] = base ** (i); */ \

924 powers = (unsigned char *)MP_ALIGN(powersArray,num_powers); \

925 MP_CHECKOK( mpi_to_weave(accum, powers, nLen, num_powers) );

926 if (first_window < 4) {

927 MP_CHECKOK( mp_copy(&accum[first_window], &accum1) );

928 first_window = num_powers;

929 }

930 } else {

931 if (first_window == 0) {

932 mp_set(&accum1, 1);

933 MP_CHECKOK( s_mp_to_mont(&accum1, mmm, &accum1) );

934 } else {

935 /* assert first_window == 1? */

936 MP_CHECKOK( mp_copy(montBase, &accum1) );

937 }

938 }

939

940 /*

941 * calculate all the powers in the powers array.

942 * this adds 2**(k-1)-2 square operations over just calculating the

943 * odd powers where k is the window size in the two other mp_modexpt

944 * implementations in this file. We will get some of that

945 * back by not needing the first 'k' squares and one multiply for the

946 * first window.

947 * Given the value of 4 for WEAVE_WORD_SIZE, this loop will only execute if

948 * num_powers > 2, in which case powers will have been allocated.

949 */

950 for (i = WEAVE_WORD_SIZE; i < num_powers; i++) {

951 int acc_index = i & (WEAVE_WORD_SIZE-1); /* i % WEAVE_WORD_SIZE */

952 if ( i & 1 ) {

953 MUL_NOWEAVE(montBase, &accum[acc_index-1] , &accum[acc_index]);

954 /* we've filled the array do our 'per array' processing */

955 if (acc_index == (WEAVE_WORD_SIZE-1)) {

956 MP_CHECKOK( mpi_to_weave(accum, powers + i - (WEAVE_WORD_SIZE-1),

957 nLen, num_powers) );

958

959 if (first_window <= i) {

960 MP_CHECKOK( mp_copy(&accum[first_window & (WEAVE_WORD_SIZE-1)],

961 &accum1) );

962 first_window = num_powers;

963 }

964 }

965 } else {

966 /* up to 8 we can find 2^i-1 in the accum array, but at 8 we our source

967 * and target are the same so we need to copy.. After that, the

968 * value is overwritten, so we need to fetch it from the stored

969 * weave array */

970 if (i > 2* WEAVE_WORD_SIZE) {

971 MP_CHECKOK(weave_to_mpi(&accum2, powers+i/2, nLen, num_powers));

972 SQR(&accum2, &accum[acc_index]);

973 } else {

974 int half_power_index = (i/2) & (WEAVE_WORD_SIZE-1);

975 if (half_power_index == acc_index) {

976 /* copy is cheaper than weave_to_mpi */

977 MP_CHECKOK(mp_copy(&accum[half_power_index], &accum2));

978 SQR(&accum2,&accum[acc_index]);

979 } else {

980 SQR(&accum[half_power_index],&accum[acc_index]);

981 }

982 }

983 }

984 }

985 /* if the accum1 isn't set, Then there is something wrong with our logic

986 * above and is an internal programming error.

987 */

988 #if MP_ARGCHK == 2

989 assert(MP_USED(&accum1) != 0);

990 #endif

991

992 /* set accumulator to montgomery residue of 1 */

993 pa1 = &accum1;

994 pa2 = &accum2;

995

996 /* tmp is not used if window_bits == 1. */

997 if (window_bits != 1) {

998 MP_CHECKOK( mp_init_size(&tmp, 3 * nLen + 2) );

999 }

1000

1001 for (expOff = bits_in_exponent - window_bits*2; expOff >= 0; expOff -= window_ bits) {

1002 mp_size smallExp;

1003 MP_CHECKOK( mpl_get_bits(exponent, expOff, window_bits) );

1004 smallExp = (mp_size)res;

1005

1006 /* handle unroll the loops */

1007 switch (window_bits) {

1008 case 1:

1009 if (!smallExp) {

1010 SQR(pa1,pa2); SWAPPA;

1011 } else if (smallExp & 1) {

1012 SQR(pa1,pa2); MUL_NOWEAVE(montBase,pa2,pa1);

1013 } else {

1014 abort();

1015 }

1016 break;

1017 case 6:

1018 SQR(pa1,pa2); SQR(pa2,pa1);

1019 /* fall through */

1020 case 4:

1021 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

1022 MUL(smallExp, pa1,pa2); SWAPPA;

1023 break;

1024 case 5:

1025 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

1026 SQR(pa1,pa2); MUL(smallExp,pa2,pa1);

1027 break;

1028 default:

1029 abort(); /* could do a loop? */

1030 }

1031 }

1032

1033 res = s_mp_redc(pa1, mmm);

1034 mp_exch(pa1, result);

1035

1036 CLEANUP:

1037 mp_clear(&accum1);

1038 mp_clear(&accum2);

1039 mp_clear(&accum[0]);

1040 mp_clear(&accum[1]);

1041 mp_clear(&accum[2]);

1042 mp_clear(&accum[3]);

1043 mp_clear(&tmp);

1044 /* PORT_Memset(powers,0,num_powersnLensizeof(mp_digit)); */

1045 free(powersArray);

1046 return res;

1047 }

1048 #undef SQR

1049 #undef MUL

1050 #endif

1051

1052 mp_err mp_exptmod(const mp_int inBase, const mp_int exponent,

1053 const mp_int modulus, mp_int result)

1054 {

1055 const mp_int *base;

1056 mp_size bits_in_exponent, i, window_bits, odd_ints;

1057 mp_err res;

1058 int nLen;

1059 mp_int montBase, goodBase;

1060 mp_mont_modulus mmm;

1061 #ifdef MP_USING_CACHE_SAFE_MOD_EXP

1062 static unsigned int max_window_bits;

1063 #endif

1064

1065 /* function for computing n0prime only works if n0 is odd */

1066 if (!mp_isodd(modulus))

1067 return s_mp_exptmod(inBase, exponent, modulus, result);

1068

1069 MP_DIGITS(&montBase) = 0;

1070 MP_DIGITS(&goodBase) = 0;

1071

1072 if (mp_cmp(inBase, modulus) < 0) {

1073 base = inBase;

1074 } else {

1075 MP_CHECKOK( mp_init(&goodBase) );

1076 base = &goodBase;

1077 MP_CHECKOK( mp_mod(inBase, modulus, &goodBase) );

1078 }

1079

1080 nLen = MP_USED(modulus);

1081 MP_CHECKOK( mp_init_size(&montBase, 2 * nLen + 2) );

1082

1083 mmm.N = modulus; / a copy of the mp_int struct */

1084

1085 /* compute n0', given n0, n0' = -(n0 ** -1) mod MP_RADIX

1086 ** where n0 = least significant mp_digit of N, the modulus.

1087 */

1088 mmm.n0prime = 0 - s_mp_invmod_radix( MP_DIGIT(modulus, 0) );

1089

1090 MP_CHECKOK( s_mp_to_mont(base, &mmm, &montBase) );

1091

1092 bits_in_exponent = mpl_significant_bits(exponent);

1093 #ifdef MP_USING_CACHE_SAFE_MOD_EXP

1094 if (mp_using_cache_safe_exp) {

1095 if (bits_in_exponent > 780)

1096 window_bits = 6;

1097 else if (bits_in_exponent > 256)

1098 window_bits = 5;

1099 else if (bits_in_exponent > 20)

1100 window_bits = 4;

1101 /* RSA public key exponents are typically under 20 bits (common values

1102 * are: 3, 17, 65537) and a 4-bit window is inefficient

1103 */

1104 else

1105 window_bits = 1;

1106 } else

1107 #endif

1108 if (bits_in_exponent > 480)

1109 window_bits = 6;

1110 else if (bits_in_exponent > 160)

1111 window_bits = 5;

1112 else if (bits_in_exponent > 20)

1113 window_bits = 4;

1114 /* RSA public key exponents are typically under 20 bits (common values

1115 * are: 3, 17, 65537) and a 4-bit window is inefficient

1116 */

1117 else

1118 window_bits = 1;

1119

1120 #ifdef MP_USING_CACHE_SAFE_MOD_EXP

1121 /*

1122 * clamp the window size based on

1123 * the cache line size.

1124 */

1125 if (!max_window_bits) {

1126 unsigned long cache_size = s_mpi_getProcessorLineSize();

1127 /* processor has no cache, use 'fast' code always */

1128 if (cache_size == 0) {

1129 mp_using_cache_safe_exp = 0;

1130 }

1131 if ((cache_size == 0) \|\| (cache_size >= 64)) {

1132 max_window_bits = 6;

1133 } else if (cache_size >= 32) {

1134 max_window_bits = 5;

1135 } else if (cache_size >= 16) {

1136 max_window_bits = 4;

1137 } else max_window_bits = 1; /* should this be an assert? */

1138 }

1139

1140 /* clamp the window size down before we caclulate bits_in_exponent */

1141 if (mp_using_cache_safe_exp) {

1142 if (window_bits > max_window_bits) {

1143 window_bits = max_window_bits;

1144 }

1145 }

1146 #endif

1147

1148 odd_ints = 1 << (window_bits - 1);

1149 i = bits_in_exponent % window_bits;

1150 if (i != 0) {

1151 bits_in_exponent += window_bits - i;

1152 }

1153

1154 #ifdef MP_USING_MONT_MULF

1155 if (mp_using_mont_mulf) {

1156 MP_CHECKOK( s_mp_pad(&montBase, nLen) );

1157 res = mp_exptmod_f(&montBase, exponent, modulus, result, &mmm, nLen,

1158 bits_in_exponent, window_bits, odd_ints);

1159 } else

1160 #endif

1161 #ifdef MP_USING_CACHE_SAFE_MOD_EXP

1162 if (mp_using_cache_safe_exp) {

1163 res = mp_exptmod_safe_i(&montBase, exponent, modulus, result, &mmm, nLen,

1164 bits_in_exponent, window_bits, 1 << window_bits);

1165 } else

1166 #endif

1167 res = mp_exptmod_i(&montBase, exponent, modulus, result, &mmm, nLen,

1168 bits_in_exponent, window_bits, odd_ints);

1169

1170 CLEANUP:

1171 mp_clear(&montBase);

1172 mp_clear(&goodBase);

1173 /* Don't mp_clear mmm.N because it is merely a copy of modulus.

1174 ** Just zap it.

1175 */

1176 memset(&mmm, 0, sizeof mmm);

1177 return res;

1178 }

OLD	NEW

« no previous file with comments | « nss/lib/freebl/mpi/mplogic.c ('k') | nss/lib/freebl/mpi/mpprime.h » ('j') | no next file with comments »