net/third_party/nss/ssl/mpi/mpmontg.c - Issue 6804032: Add TLS-SRP (RFC 5054) support

Side by Side Diff: net/third_party/nss/ssl/mpi/mpmontg.c

Issue 6804032: Add TLS-SRP (RFC 5054) support Base URL: http://git.chromium.org/git/chromium.git@trunk

Patch Set: Created 9 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 /* *** BEGIN LICENSE BLOCK ***

	2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1

	3 *

	4 * The contents of this file are subject to the Mozilla Public License Version

	5 * 1.1 (the "License"); you may not use this file except in compliance with

	6 * the License. You may obtain a copy of the License at

	7 * http://www.mozilla.org/MPL/

	8 *

	9 * Software distributed under the License is distributed on an "AS IS" basis,

	10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License

	11 * for the specific language governing rights and limitations under the

	12 * License.

	13 *

	14 * The Original Code is the Netscape security libraries.

	15 *

	16 * The Initial Developer of the Original Code is

	17 * Netscape Communications Corporation.

	18 * Portions created by the Initial Developer are Copyright (C) 2000

	19 * the Initial Developer. All Rights Reserved.

	20 *

	21 * Contributor(s):

	22 * Sheueling Chang Shantz <sheueling.chang@sun.com>,

	23 * Stephen Fung <stephen.fung@sun.com>, and

	24 * Douglas Stebila <douglas@stebila.ca> of Sun Laboratories.

	25 *

	26 * Alternatively, the contents of this file may be used under the terms of

	27 * either the GNU General Public License Version 2 or later (the "GPL"), or

	28 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),

	29 * in which case the provisions of the GPL or the LGPL are applicable instead

	30 * of those above. If you wish to allow use of your version of this file only

	31 * under the terms of either the GPL or the LGPL, and not to allow others to

	32 * use your version of this file under the terms of the MPL, indicate your

	33 * decision by deleting the provisions above and replace them with the notice

	34 * and other provisions required by the GPL or the LGPL. If you do not delete

	35 * the provisions above, a recipient may use your version of this file under

	36 * the terms of any one of the MPL, the GPL or the LGPL.

	37 *

	38 * *** END LICENSE BLOCK *** */

	39 /* $Id: mpmontg.c,v 1.22 2010/05/02 22:36:41 nelson%bolyard.com Exp $ */

	40

	41 /* This file implements moduluar exponentiation using Montgomery's

	42 * method for modular reduction. This file implements the method

	43 * described as "Improvement 1" in the paper "A Cryptogrpahic Library for

	44 * the Motorola DSP56000" by Stephen R. Dusse' and Burton S. Kaliski Jr.

	45 * published in "Advances in Cryptology: Proceedings of EUROCRYPT '90"

	46 * "Lecture Notes in Computer Science" volume 473, 1991, pg 230-244,

	47 * published by Springer Verlag.

	48 */

	49

	50 #define MP_API_COMPATIBLE 1

	51 #define MP_USING_CACHE_SAFE_MOD_EXP 1

	52 #include <string.h>

	53 #include "mpi-priv.h"

	54 #include "mplogic.h"

	55 #include "mpprime.h"

	56 #ifdef MP_USING_MONT_MULF

	57 #include "montmulf.h"

	58 #endif

	59 #include <stddef.h> /* ptrdiff_t */

	60

	61 /* if MP_CHAR_STORE_SLOW is defined, we */

	62 /* need to know endianness of this platform. */

	63 #ifdef MP_CHAR_STORE_SLOW

	64 #if !defined(MP_IS_BIG_ENDIAN) && !defined(MP_IS_LITTLE_ENDIAN)

	65 #error "You must define MP_IS_BIG_ENDIAN or MP_IS_LITTLE_ENDIAN\n" \

	66 " if you define MP_CHAR_STORE_SLOW."

	67 #endif

	68 #endif

	69

	70 #define STATIC

	71

	72 #define MAX_ODD_INTS 32 /* 2 ** (WINDOW_BITS - 1) */

	73

	74 #if defined(_WIN32_WCE)

	75 #define ABORT res = MP_UNDEF; goto CLEANUP

	76 #else

	77 #define ABORT abort()

	78 #endif

	79

	80 /* computes T = REDC(T), 2^b == R */

	81 mp_err s_mp_redc(mp_int T, mp_mont_modulus mmm)

	82 {

	83 mp_err res;

	84 mp_size i;

	85

	86 i = MP_USED(T) + MP_USED(&mmm->N) + 2;

	87 MP_CHECKOK( s_mp_pad(T, i) );

	88 for (i = 0; i < MP_USED(&mmm->N); ++i ) {

	89 mp_digit m_i = MP_DIGIT(T, i) * mmm->n0prime;

	90 /* T += N * m_i * (MP_RADIX ** i); */

	91 MP_CHECKOK( s_mp_mul_d_add_offset(&mmm->N, m_i, T, i) );

	92 }

	93 s_mp_clamp(T);

	94

	95 /* T /= R */

	96 s_mp_div_2d(T, mmm->b);

	97

	98 if ((res = s_mp_cmp(T, &mmm->N)) >= 0) {

	99 /* T = T - N */

	100 MP_CHECKOK( s_mp_sub(T, &mmm->N) );

	101 #ifdef DEBUG

	102 if ((res = mp_cmp(T, &mmm->N)) >= 0) {

	103 res = MP_UNDEF;

	104 goto CLEANUP;

	105 }

	106 #endif

	107 }

	108 res = MP_OKAY;

	109 CLEANUP:

	110 return res;

	111 }

	112

	113 #if !defined(MP_ASSEMBLY_MUL_MONT) && !defined(MP_MONT_USE_MP_MUL)

	114 mp_err s_mp_mul_mont(const mp_int a, const mp_int b, mp_int *c,

	115 mp_mont_modulus *mmm)

	116 {

	117 mp_digit *pb;

	118 mp_digit m_i;

	119 mp_err res;

	120 mp_size ib;

	121 mp_size useda, usedb;

	122

	123 ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);

	124

	125 if (MP_USED(a) < MP_USED(b)) {

	126 const mp_int xch = b; / switch a and b, to do fewer outer loops */

	127 b = a;

	128 a = xch;

	129 }

	130

	131 MP_USED(c) = 1; MP_DIGIT(c, 0) = 0;

	132 ib = MP_USED(a) + MP_MAX(MP_USED(b), MP_USED(&mmm->N)) + 2;

	133 if((res = s_mp_pad(c, ib)) != MP_OKAY)

	134 goto CLEANUP;

	135

	136 useda = MP_USED(a);

	137 pb = MP_DIGITS(b);

	138 s_mpv_mul_d(MP_DIGITS(a), useda, *pb++, MP_DIGITS(c));

	139 s_mp_setz(MP_DIGITS(c) + useda + 1, ib - (useda + 1));

	140 m_i = MP_DIGIT(c, 0) * mmm->n0prime;

	141 s_mp_mul_d_add_offset(&mmm->N, m_i, c, 0);

	142

	143 /* Outer loop: Digits of b */

	144 usedb = MP_USED(b);

	145 for (ib = 1; ib < usedb; ib++) {

	146 mp_digit b_i = *pb++;

	147

	148 /* Inner product: Digits of a */

	149 if (b_i)

	150 s_mpv_mul_d_add_prop(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);

	151 m_i = MP_DIGIT(c, ib) * mmm->n0prime;

	152 s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib);

	153 }

	154 if (usedb < MP_USED(&mmm->N)) {

	155 for (usedb = MP_USED(&mmm->N); ib < usedb; ++ib ) {

	156 m_i = MP_DIGIT(c, ib) * mmm->n0prime;

	157 s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib);

	158 }

	159 }

	160 s_mp_clamp(c);

	161 s_mp_div_2d(c, mmm->b);

	162 if (s_mp_cmp(c, &mmm->N) >= 0) {

	163 MP_CHECKOK( s_mp_sub(c, &mmm->N) );

	164 }

	165 res = MP_OKAY;

	166

	167 CLEANUP:

	168 return res;

	169 }

	170 #endif

	171

	172 STATIC

	173 mp_err s_mp_to_mont(const mp_int x, mp_mont_modulus mmm, mp_int *xMont)

	174 {

	175 mp_err res;

	176

	177 /* xMont = x * R mod N where N is modulus */

	178 MP_CHECKOK( mpl_lsh(x, xMont, mmm->b) ); /* xMont = x << b */

	179 MP_CHECKOK( mp_div(xMont, &mmm->N, 0, xMont) ); /* mod N */

	180 CLEANUP:

	181 return res;

	182 }

	183

	184 #ifdef MP_USING_MONT_MULF

	185

	186 /* the floating point multiply is already cache safe,

	187 * don't turn on cache safe unless we specifically

	188 * force it */

	189 #ifndef MP_FORCE_CACHE_SAFE

	190 #undef MP_USING_CACHE_SAFE_MOD_EXP

	191 #endif

	192

	193 unsigned int mp_using_mont_mulf = 1;

	194

	195 /* computes montgomery square of the integer in mResult */

	196 #define SQR \

	197 conv_i32_to_d32_and_d16(dm1, d16Tmp, mResult, nLen); \

	198 mont_mulf_noconv(mResult, dm1, d16Tmp, \

	199 dTmp, dn, MP_DIGITS(modulus), nLen, dn0)

	200

	201 /* computes montgomery product of x and the integer in mResult */

	202 #define MUL(x) \

	203 conv_i32_to_d32(dm1, mResult, nLen); \

	204 mont_mulf_noconv(mResult, dm1, oddPowers[x], \

	205 dTmp, dn, MP_DIGITS(modulus), nLen, dn0)

	206

	207 /* Do modular exponentiation using floating point multiply code. */

	208 mp_err mp_exptmod_f(const mp_int * montBase,

	209 const mp_int * exponent,

	210 const mp_int * modulus,

	211 mp_int * result,

	212 mp_mont_modulus *mmm,

	213 int nLen,

	214 mp_size bits_in_exponent,

	215 mp_size window_bits,

	216 mp_size odd_ints)

	217 {

	218 mp_digit *mResult;

	219 double dBuf = 0, dm1, dn, dSqr, d16Tmp, dTmp;

	220 double dn0;

	221 mp_size i;

	222 mp_err res;

	223 int expOff;

	224 int dSize = 0, oddPowSize, dTmpSize;

	225 mp_int accum1;

	226 double *oddPowers[MAX_ODD_INTS];

	227

	228 /* function for computing n0prime only works if n0 is odd */

	229

	230 MP_DIGITS(&accum1) = 0;

	231

	232 for (i = 0; i < MAX_ODD_INTS; ++i)

	233 oddPowers[i] = 0;

	234

	235 MP_CHECKOK( mp_init_size(&accum1, 3 * nLen + 2) );

	236

	237 mp_set(&accum1, 1);

	238 MP_CHECKOK( s_mp_to_mont(&accum1, mmm, &accum1) );

	239 MP_CHECKOK( s_mp_pad(&accum1, nLen) );

	240

	241 oddPowSize = 2 * nLen + 1;

	242 dTmpSize = 2 * oddPowSize;

	243 dSize = sizeof(double) * (nLen * 4 + 1 +

	244 ((odd_ints + 1) * oddPowSize) + dTmpSize);

	245 dBuf = (double *)malloc(dSize);

	246 dm1 = dBuf; /* array of d32 */

	247 dn = dBuf + nLen; /* array of d32 */

	248 dSqr = dn + nLen; /* array of d32 */

	249 d16Tmp = dSqr + nLen; /* array of d16 */

	250 dTmp = d16Tmp + oddPowSize;

	251

	252 for (i = 0; i < odd_ints; ++i) {

	253 oddPowers[i] = dTmp;

	254 dTmp += oddPowSize;

	255 }

	256 mResult = (mp_digit )(dTmp + dTmpSize); / size is nLen + 1 */

	257

	258 /* Make dn and dn0 */

	259 conv_i32_to_d32(dn, MP_DIGITS(modulus), nLen);

	260 dn0 = (double)(mmm->n0prime & 0xffff);

	261

	262 /* Make dSqr */

	263 conv_i32_to_d32_and_d16(dm1, oddPowers[0], MP_DIGITS(montBase), nLen);

	264 mont_mulf_noconv(mResult, dm1, oddPowers[0],

	265 dTmp, dn, MP_DIGITS(modulus), nLen, dn0);

	266 conv_i32_to_d32(dSqr, mResult, nLen);

	267

	268 for (i = 1; i < odd_ints; ++i) {

	269 mont_mulf_noconv(mResult, dSqr, oddPowers[i - 1],

	270 dTmp, dn, MP_DIGITS(modulus), nLen, dn0);

	271 conv_i32_to_d16(oddPowers[i], mResult, nLen);

	272 }

	273

	274 s_mp_copy(MP_DIGITS(&accum1), mResult, nLen); /* from, to, len */

	275

	276 for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bi ts) {

	277 mp_size smallExp;

	278 MP_CHECKOK( mpl_get_bits(exponent, expOff, window_bits) );

	279 smallExp = (mp_size)res;

	280

	281 if (window_bits == 1) {

	282 if (!smallExp) {

	283 SQR;

	284 } else if (smallExp & 1) {

	285 SQR; MUL(0);

	286 } else {

	287 ABORT;

	288 }

	289 } else if (window_bits == 4) {

	290 if (!smallExp) {

	291 SQR; SQR; SQR; SQR;

	292 } else if (smallExp & 1) {

	293 SQR; SQR; SQR; SQR; MUL(smallExp/2);

	294 } else if (smallExp & 2) {

	295 SQR; SQR; SQR; MUL(smallExp/4); SQR;

	296 } else if (smallExp & 4) {

	297 SQR; SQR; MUL(smallExp/8); SQR; SQR;

	298 } else if (smallExp & 8) {

	299 SQR; MUL(smallExp/16); SQR; SQR; SQR;

	300 } else {

	301 ABORT;

	302 }

	303 } else if (window_bits == 5) {

	304 if (!smallExp) {

	305 SQR; SQR; SQR; SQR; SQR;

	306 } else if (smallExp & 1) {

	307 SQR; SQR; SQR; SQR; SQR; MUL(smallExp/2);

	308 } else if (smallExp & 2) {

	309 SQR; SQR; SQR; SQR; MUL(smallExp/4); SQR;

	310 } else if (smallExp & 4) {

	311 SQR; SQR; SQR; MUL(smallExp/8); SQR; SQR;

	312 } else if (smallExp & 8) {

	313 SQR; SQR; MUL(smallExp/16); SQR; SQR; SQR;

	314 } else if (smallExp & 0x10) {

	315 SQR; MUL(smallExp/32); SQR; SQR; SQR; SQR;

	316 } else {

	317 ABORT;

	318 }

	319 } else if (window_bits == 6) {

	320 if (!smallExp) {

	321 SQR; SQR; SQR; SQR; SQR; SQR;

	322 } else if (smallExp & 1) {

	323 SQR; SQR; SQR; SQR; SQR; SQR; MUL(smallExp/2);

	324 } else if (smallExp & 2) {

	325 SQR; SQR; SQR; SQR; SQR; MUL(smallExp/4); SQR;

	326 } else if (smallExp & 4) {

	327 SQR; SQR; SQR; SQR; MUL(smallExp/8); SQR; SQR;

	328 } else if (smallExp & 8) {

	329 SQR; SQR; SQR; MUL(smallExp/16); SQR; SQR; SQR;

	330 } else if (smallExp & 0x10) {

	331 SQR; SQR; MUL(smallExp/32); SQR; SQR; SQR; SQR;

	332 } else if (smallExp & 0x20) {

	333 SQR; MUL(smallExp/64); SQR; SQR; SQR; SQR; SQR;

	334 } else {

	335 ABORT;

	336 }

	337 } else {

	338 ABORT;

	339 }

	340 }

	341

	342 s_mp_copy(mResult, MP_DIGITS(&accum1), nLen); /* from, to, len */

	343

	344 res = s_mp_redc(&accum1, mmm);

	345 mp_exch(&accum1, result);

	346

	347 CLEANUP:

	348 mp_clear(&accum1);

	349 if (dBuf) {

	350 if (dSize)

	351 memset(dBuf, 0, dSize);

	352 free(dBuf);

	353 }

	354

	355 return res;

	356 }

	357 #undef SQR

	358 #undef MUL

	359 #endif

	360

	361 #define SQR(a,b) \

	362 MP_CHECKOK( mp_sqr(a, b) );\

	363 MP_CHECKOK( s_mp_redc(b, mmm) )

	364

	365 #if defined(MP_MONT_USE_MP_MUL)

	366 #define MUL(x,a,b) \

	367 MP_CHECKOK( mp_mul(a, oddPowers + (x), b) ); \

	368 MP_CHECKOK( s_mp_redc(b, mmm) )

	369 #else

	370 #define MUL(x,a,b) \

	371 MP_CHECKOK( s_mp_mul_mont(a, oddPowers + (x), b, mmm) )

	372 #endif

	373

	374 #define SWAPPA ptmp = pa1; pa1 = pa2; pa2 = ptmp

	375

	376 /* Do modular exponentiation using integer multiply code. */

	377 mp_err mp_exptmod_i(const mp_int * montBase,

	378 const mp_int * exponent,

	379 const mp_int * modulus,

	380 mp_int * result,

	381 mp_mont_modulus *mmm,

	382 int nLen,

	383 mp_size bits_in_exponent,

	384 mp_size window_bits,

	385 mp_size odd_ints)

	386 {

	387 mp_int pa1, pa2, *ptmp;

	388 mp_size i;

	389 mp_err res;

	390 int expOff;

	391 mp_int accum1, accum2, power2, oddPowers[MAX_ODD_INTS];

	392

	393 /* power2 = base 2; oddPowers[i] = base (2i + 1); /

	394 /* oddPowers[i] = base ** (2i + 1); /

	395

	396 MP_DIGITS(&accum1) = 0;

	397 MP_DIGITS(&accum2) = 0;

	398 MP_DIGITS(&power2) = 0;

	399 for (i = 0; i < MAX_ODD_INTS; ++i) {

	400 MP_DIGITS(oddPowers + i) = 0;

	401 }

	402

	403 MP_CHECKOK( mp_init_size(&accum1, 3 * nLen + 2) );

	404 MP_CHECKOK( mp_init_size(&accum2, 3 * nLen + 2) );

	405

	406 MP_CHECKOK( mp_init_copy(&oddPowers[0], montBase) );

	407

	408 mp_init_size(&power2, nLen + 2 * MP_USED(montBase) + 2);

	409 MP_CHECKOK( mp_sqr(montBase, &power2) ); /* power2 = montBase ** 2 */

	410 MP_CHECKOK( s_mp_redc(&power2, mmm) );

	411

	412 for (i = 1; i < odd_ints; ++i) {

	413 mp_init_size(oddPowers + i, nLen + 2 * MP_USED(&power2) + 2);

	414 MP_CHECKOK( mp_mul(oddPowers + (i - 1), &power2, oddPowers + i) );

	415 MP_CHECKOK( s_mp_redc(oddPowers + i, mmm) );

	416 }

	417

	418 /* set accumulator to montgomery residue of 1 */

	419 mp_set(&accum1, 1);

	420 MP_CHECKOK( s_mp_to_mont(&accum1, mmm, &accum1) );

	421 pa1 = &accum1;

	422 pa2 = &accum2;

	423

	424 for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bi ts) {

	425 mp_size smallExp;

	426 MP_CHECKOK( mpl_get_bits(exponent, expOff, window_bits) );

	427 smallExp = (mp_size)res;

	428

	429 if (window_bits == 1) {

	430 if (!smallExp) {

	431 SQR(pa1,pa2); SWAPPA;

	432 } else if (smallExp & 1) {

	433 SQR(pa1,pa2); MUL(0,pa2,pa1);

	434 } else {

	435 ABORT;

	436 }

	437 } else if (window_bits == 4) {

	438 if (!smallExp) {

	439 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

	440 } else if (smallExp & 1) {

	441 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

	442 MUL(smallExp/2, pa1,pa2); SWAPPA;

	443 } else if (smallExp & 2) {

	444 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2);

	445 MUL(smallExp/4,pa2,pa1); SQR(pa1,pa2); SWAPPA;

	446 } else if (smallExp & 4) {

	447 SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/8,pa1,pa2);

	448 SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;

	449 } else if (smallExp & 8) {

	450 SQR(pa1,pa2); MUL(smallExp/16,pa2,pa1); SQR(pa1,pa2);

	451 SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;

	452 } else {

	453 ABORT;

	454 }

	455 } else if (window_bits == 5) {

	456 if (!smallExp) {

	457 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

	458 SQR(pa1,pa2); SWAPPA;

	459 } else if (smallExp & 1) {

	460 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

	461 SQR(pa1,pa2); MUL(smallExp/2,pa2,pa1);

	462 } else if (smallExp & 2) {

	463 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

	464 MUL(smallExp/4,pa1,pa2); SQR(pa2,pa1);

	465 } else if (smallExp & 4) {

	466 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2);

	467 MUL(smallExp/8,pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

	468 } else if (smallExp & 8) {

	469 SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/16,pa1,pa2);

	470 SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

	471 } else if (smallExp & 0x10) {

	472 SQR(pa1,pa2); MUL(smallExp/32,pa2,pa1); SQR(pa1,pa2);

	473 SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

	474 } else {

	475 ABORT;

	476 }

	477 } else if (window_bits == 6) {

	478 if (!smallExp) {

	479 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

	480 SQR(pa1,pa2); SQR(pa2,pa1);

	481 } else if (smallExp & 1) {

	482 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

	483 SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/2,pa1,pa2); SWAPPA;

	484 } else if (smallExp & 2) {

	485 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

	486 SQR(pa1,pa2); MUL(smallExp/4,pa2,pa1); SQR(pa1,pa2); SWAPPA;

	487 } else if (smallExp & 4) {

	488 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

	489 MUL(smallExp/8,pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;

	490 } else if (smallExp & 8) {

	491 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2);

	492 MUL(smallExp/16,pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

	493 SQR(pa1,pa2); SWAPPA;

	494 } else if (smallExp & 0x10) {

	495 SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/32,pa1,pa2);

	496 SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;

	497 } else if (smallExp & 0x20) {

	498 SQR(pa1,pa2); MUL(smallExp/64,pa2,pa1); SQR(pa1,pa2);

	499 SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;

	500 } else {

	501 ABORT;

	502 }

	503 } else {

	504 ABORT;

	505 }

	506 }

	507

	508 res = s_mp_redc(pa1, mmm);

	509 mp_exch(pa1, result);

	510

	511 CLEANUP:

	512 mp_clear(&accum1);

	513 mp_clear(&accum2);

	514 mp_clear(&power2);

	515 for (i = 0; i < odd_ints; ++i) {

	516 mp_clear(oddPowers + i);

	517 }

	518 return res;

	519 }

	520 #undef SQR

	521 #undef MUL

	522

	523 #ifdef MP_USING_CACHE_SAFE_MOD_EXP

	524 unsigned int mp_using_cache_safe_exp = 1;

	525 #endif

	526

	527 mp_err mp_set_safe_modexp(int value)

	528 {

	529 #ifdef MP_USING_CACHE_SAFE_MOD_EXP

	530 mp_using_cache_safe_exp = value;

	531 return MP_OKAY;

	532 #else

	533 if (value == 0) {

	534 return MP_OKAY;

	535 }

	536 return MP_BADARG;

	537 #endif

	538 }

	539

	540 #ifdef MP_USING_CACHE_SAFE_MOD_EXP

	541 #define WEAVE_WORD_SIZE 4

	542

	543 #ifndef MP_CHAR_STORE_SLOW

	544 /*

	545 * mpi_to_weave takes an array of bignums, a matrix in which each bignum

	546 * occupies all the columns of a row, and transposes it into a matrix in

	547 * which each bignum occupies a column of every row. The first row of the

	548 * input matrix becomes the first column of the output matrix. The n'th

	549 * row of input becomes the n'th column of output. The input data is said

	550 * to be "interleaved" or "woven" into the output matrix.

	551 *

	552 * The array of bignums is left in this woven form. Each time a single

	553 * bignum value is needed, it is recreated by fetching the n'th column,

	554 * forming a single row which is the new bignum.

	555 *

	556 * The purpose of this interleaving is make it impossible to determine which

	557 * of the bignums is being used in any one operation by examining the pattern

	558 * of cache misses.

	559 *

	560 * The weaving function does not transpose the entire input matrix in one call.

	561 * It transposes 4 rows of mp_ints into their respective columns of output.

	562 *

	563 * There are two different implementations of the weaving and unweaving code

	564 * in this file. One uses byte loads and stores. The second uses loads and

	565 * stores of mp_weave_word size values. The weaved forms of these two

	566 * implementations differ. Consequently, each one has its own explanation.

	567 *

	568 * Here is the explanation for the byte-at-a-time implementation.

	569 *

	570 * This implementation treats each mp_int bignum as an array of bytes,

	571 * rather than as an array of mp_digits. It stores those bytes as a

	572 * column of bytes in the output matrix. It doesn't care if the machine

	573 * uses big-endian or little-endian byte ordering within mp_digits.

	574 * The first byte of the mp_digit array becomes the first byte in the output

	575 * column, regardless of whether that byte is the MSB or LSB of the mp_digit.

	576 *

	577 * "bignums" is an array of mp_ints.

	578 * It points to four rows, four mp_ints, a subset of a larger array of mp_ints.

	579 *

	580 * "weaved" is the weaved output matrix.

	581 * The first byte of bignums[0] is stored in weaved[0].

	582 *

	583 * "nBignums" is the total number of bignums in the array of which "bignums"

	584 * is a part.

	585 *

	586 * "nDigits" is the size in mp_digits of each mp_int in the "bignums" array.

	587 * mp_ints that use less than nDigits digits are logically padded with zeros

	588 * while being stored in the weaved array.

	589 */

	590 mp_err mpi_to_weave(const mp_int *bignums,

	591 unsigned char *weaved,

	592 mp_size nDigits, /* in each mp_int of input */

	593 mp_size nBignums) /* in the entire source array */

	594 {

	595 mp_size i;

	596 unsigned char * endDest = weaved + (nDigits * nBignums * sizeof(mp_digit));

	597

	598 for (i=0; i < WEAVE_WORD_SIZE; i++) {

	599 mp_size used = MP_USED(&bignums[i]);

	600 unsigned char pSrc = (unsigned char )MP_DIGITS(&bignums[i]);

	601 unsigned char endSrc = pSrc + (used sizeof(mp_digit));

	602 unsigned char *pDest = weaved + i;

	603

	604 ARGCHK(MP_SIGN(&bignums[i]) == MP_ZPOS, MP_BADARG);

	605 ARGCHK(used <= nDigits, MP_BADARG);

	606

	607 for (; pSrc < endSrc; pSrc++) {

	608 pDest = pSrc;

	609 pDest += nBignums;

	610 }

	611 while (pDest < endDest) {

	612 *pDest = 0;

	613 pDest += nBignums;

	614 }

	615 }

	616

	617 return MP_OKAY;

	618 }

	619

	620 /* Reverse the operation above for one mp_int.

	621 * Reconstruct one mp_int from its column in the weaved array.

	622 * "pSrc" points to the offset into the weave array of the bignum we

	623 * are going to reconstruct.

	624 */

	625 mp_err weave_to_mpi(mp_int a, / output, result */

	626 const unsigned char pSrc, / input, byte matrix */

	627 mp_size nDigits, /* per mp_int output */

	628 mp_size nBignums) /* bignums in weaved matrix */

	629 {

	630 unsigned char pDest = (unsigned char )MP_DIGITS(a);

	631 unsigned char endDest = pDest + (nDigits sizeof(mp_digit));

	632

	633 MP_SIGN(a) = MP_ZPOS;

	634 MP_USED(a) = nDigits;

	635

	636 for (; pDest < endDest; pSrc += nBignums, pDest++) {

	637 pDest = pSrc;

	638 }

	639 s_mp_clamp(a);

	640 return MP_OKAY;

	641 }

	642

	643 #else

	644

	645 /* Need a primitive that we know is 32 bits long... */

	646 /* this is true on all modern processors we know of today*/

	647 typedef unsigned int mp_weave_word;

	648

	649 /*

	650 * on some platforms character stores into memory is very expensive since they

	651 * generate a read/modify/write operation on the bus. On those platforms

	652 * we need to do integer writes to the bus. Because of some unrolled code,

	653 * in this current code the size of mp_weave_word must be four. The code that

	654 * makes this assumption explicity is called out. (on some platforms a write

	655 * of 4 bytes still requires a single read-modify-write operation.

	656 *

	657 * This function is takes the identical parameters as the function above,

	658 * however it lays out the final array differently. Where the previous function

	659 * treats the mpi_int as an byte array, this function treats it as an array of

	660 * mp_digits where each digit is stored in big endian order.

	661 *

	662 * since we need to interleave on a byte by byte basis, we need to collect

	663 * several mpi structures together into a single uint32 before we write. We

	664 * also need to make sure the uint32 is arranged so that the first value of

	665 * the first array winds up in b[0]. This means construction of that uint32

	666 * is endian specific (even though the layout of the mp_digits in the array

	667 * is always big endian).

	668 *

	669 * The final data is stored as follows :

	670 *

	671 * Our same logical array p array, m is sizeof(mp_digit),

	672 * N is still count and n is now b_size. If we define p[i].digit[j]0 as the

	673 * most significant byte of the word p[i].digit[j], p[i].digit[j]1 as

	674 * the next most significant byte of p[i].digit[j], ... and p[i].digit[j]m-1

	675 * is the least significant byte.

	676 * Our array would look like:

	677 * p[0].digit[0]0 p[1].digit[0]0 ... p[N-2].digit[0]0 p[N-1].digit[0] 0

	678 * p[0].digit[0]1 p[1].digit[0]1 ... p[N-2].digit[0]1 p[N-1].digit[0] 1

	679 * . .

	680 * p[0].digit[0]m-1 p[1].digit[0]m-1 ... p[N-2].digit[0]m-1 p[N-1].digit[0] m-1

	681 * p[0].digit[1]0 p[1].digit[1]0 ... p[N-2].digit[1]0 p[N-1].digit[1] 0

	682 * . .

	683 * . .

	684 * p[0].digit[n-1]m-2 p[1].digit[n-1]m-2 ... p[N-2].digit[n-1]m-2 p[N-1].digit[n -1]m-2

	685 * p[0].digit[n-1]m-1 p[1].digit[n-1]m-1 ... p[N-2].digit[n-1]m-1 p[N-1].digit[n -1]m-1

	686 *

	687 */

	688 mp_err mpi_to_weave(const mp_int a, unsigned char b,

	689 mp_size b_size, mp_size count)

	690 {

	691 mp_size i;

	692 mp_digit *digitsa0;

	693 mp_digit *digitsa1;

	694 mp_digit *digitsa2;

	695 mp_digit *digitsa3;

	696 mp_size useda0;

	697 mp_size useda1;

	698 mp_size useda2;

	699 mp_size useda3;

	700 mp_weave_word weaved = (mp_weave_word )b;

	701

	702 count = count/sizeof(mp_weave_word);

	703

	704 /* this code pretty much depends on this ! */

	705 #if MP_ARGCHK == 2

	706 assert(WEAVE_WORD_SIZE == 4);

	707 assert(sizeof(mp_weave_word) == 4);

	708 #endif

	709

	710 digitsa0 = MP_DIGITS(&a[0]);

	711 digitsa1 = MP_DIGITS(&a[1]);

	712 digitsa2 = MP_DIGITS(&a[2]);

	713 digitsa3 = MP_DIGITS(&a[3]);

	714 useda0 = MP_USED(&a[0]);

	715 useda1 = MP_USED(&a[1]);

	716 useda2 = MP_USED(&a[2]);

	717 useda3 = MP_USED(&a[3]);

	718

	719 ARGCHK(MP_SIGN(&a[0]) == MP_ZPOS, MP_BADARG);

	720 ARGCHK(MP_SIGN(&a[1]) == MP_ZPOS, MP_BADARG);

	721 ARGCHK(MP_SIGN(&a[2]) == MP_ZPOS, MP_BADARG);

	722 ARGCHK(MP_SIGN(&a[3]) == MP_ZPOS, MP_BADARG);

	723 ARGCHK(useda0 <= b_size, MP_BADARG);

	724 ARGCHK(useda1 <= b_size, MP_BADARG);

	725 ARGCHK(useda2 <= b_size, MP_BADARG);

	726 ARGCHK(useda3 <= b_size, MP_BADARG);

	727

	728 #define SAFE_FETCH(digit, used, word) ((word) < (used) ? (digit[word]) : 0)

	729

	730 for (i=0; i < b_size; i++) {

	731 mp_digit d0 = SAFE_FETCH(digitsa0,useda0,i);

	732 mp_digit d1 = SAFE_FETCH(digitsa1,useda1,i);

	733 mp_digit d2 = SAFE_FETCH(digitsa2,useda2,i);

	734 mp_digit d3 = SAFE_FETCH(digitsa3,useda3,i);

	735 register mp_weave_word acc;

	736

	737 /*

	738 * ONE_STEP takes the MSB of each of our current digits and places that

	739 * byte in the appropriate position for writing to the weaved array.

	740 * On little endian:

	741 * b3 b2 b1 b0

	742 * On big endian:

	743 * b0 b1 b2 b3

	744 * When the data is written it would always wind up:

	745 * b[0] = b0

	746 * b[1] = b1

	747 * b[2] = b2

	748 * b[3] = b3

	749 *

	750 * Once we've written the MSB, we shift the whole digit up left one

	751 * byte, putting the Next Most Significant Byte in the MSB position,

	752 * so we we repeat the next one step that byte will be written.

	753 * NOTE: This code assumes sizeof(mp_weave_word) and MP_WEAVE_WORD_SIZE

	754 * is 4.

	755 */

	756 #ifdef MP_IS_LITTLE_ENDIAN

	757 #define MPI_WEAVE_ONE_STEP \

	758 acc = (d0 >> (MP_DIGIT_BIT-8)) & 0x000000ff; d0 <<= 8; /b0/ \

	759 acc \|= (d1 >> (MP_DIGIT_BIT-16)) & 0x0000ff00; d1 <<= 8; /b1/ \

	760 acc \|= (d2 >> (MP_DIGIT_BIT-24)) & 0x00ff0000; d2 <<= 8; /b2/ \

	761 acc \|= (d3 >> (MP_DIGIT_BIT-32)) & 0xff000000; d3 <<= 8; /b3/ \

	762 *weaved = acc; weaved += count;

	763 #else

	764 #define MPI_WEAVE_ONE_STEP \

	765 acc = (d0 >> (MP_DIGIT_BIT-32)) & 0xff000000; d0 <<= 8; /b0/ \

	766 acc \|= (d1 >> (MP_DIGIT_BIT-24)) & 0x00ff0000; d1 <<= 8; /b1/ \

	767 acc \|= (d2 >> (MP_DIGIT_BIT-16)) & 0x0000ff00; d2 <<= 8; /b2/ \

	768 acc \|= (d3 >> (MP_DIGIT_BIT-8)) & 0x000000ff; d3 <<= 8; /b3/ \

	769 *weaved = acc; weaved += count;

	770 #endif

	771 switch (sizeof(mp_digit)) {

	772 case 32:

	773 MPI_WEAVE_ONE_STEP

	774 MPI_WEAVE_ONE_STEP

	775 MPI_WEAVE_ONE_STEP

	776 MPI_WEAVE_ONE_STEP

	777 MPI_WEAVE_ONE_STEP

	778 MPI_WEAVE_ONE_STEP

	779 MPI_WEAVE_ONE_STEP

	780 MPI_WEAVE_ONE_STEP

	781 MPI_WEAVE_ONE_STEP

	782 MPI_WEAVE_ONE_STEP

	783 MPI_WEAVE_ONE_STEP

	784 MPI_WEAVE_ONE_STEP

	785 MPI_WEAVE_ONE_STEP

	786 MPI_WEAVE_ONE_STEP

	787 MPI_WEAVE_ONE_STEP

	788 MPI_WEAVE_ONE_STEP

	789 case 16:

	790 MPI_WEAVE_ONE_STEP

	791 MPI_WEAVE_ONE_STEP

	792 MPI_WEAVE_ONE_STEP

	793 MPI_WEAVE_ONE_STEP

	794 MPI_WEAVE_ONE_STEP

	795 MPI_WEAVE_ONE_STEP

	796 MPI_WEAVE_ONE_STEP

	797 MPI_WEAVE_ONE_STEP

	798 case 8:

	799 MPI_WEAVE_ONE_STEP

	800 MPI_WEAVE_ONE_STEP

	801 MPI_WEAVE_ONE_STEP

	802 MPI_WEAVE_ONE_STEP

	803 case 4:

	804 MPI_WEAVE_ONE_STEP

	805 MPI_WEAVE_ONE_STEP

	806 case 2:

	807 MPI_WEAVE_ONE_STEP

	808 case 1:

	809 MPI_WEAVE_ONE_STEP

	810 break;

	811 }

	812 }

	813

	814 return MP_OKAY;

	815 }

	816

	817 /* reverse the operation above for one entry.

	818 * b points to the offset into the weave array of the power we are

	819 * calculating */

	820 mp_err weave_to_mpi(mp_int a, const unsigned char b,

	821 mp_size b_size, mp_size count)

	822 {

	823 mp_digit *pb = MP_DIGITS(a);

	824 mp_digit *end = &pb[b_size];

	825

	826 MP_SIGN(a) = MP_ZPOS;

	827 MP_USED(a) = b_size;

	828

	829 for (; pb < end; pb++) {

	830 register mp_digit digit;

	831

	832 digit = *b << 8; b += count;

	833 #define MPI_UNWEAVE_ONE_STEP digit \|= *b; b += count; digit = digit << 8;

	834 switch (sizeof(mp_digit)) {

	835 case 32:

	836 MPI_UNWEAVE_ONE_STEP

	837 MPI_UNWEAVE_ONE_STEP

	838 MPI_UNWEAVE_ONE_STEP

	839 MPI_UNWEAVE_ONE_STEP

	840 MPI_UNWEAVE_ONE_STEP

	841 MPI_UNWEAVE_ONE_STEP

	842 MPI_UNWEAVE_ONE_STEP

	843 MPI_UNWEAVE_ONE_STEP

	844 MPI_UNWEAVE_ONE_STEP

	845 MPI_UNWEAVE_ONE_STEP

	846 MPI_UNWEAVE_ONE_STEP

	847 MPI_UNWEAVE_ONE_STEP

	848 MPI_UNWEAVE_ONE_STEP

	849 MPI_UNWEAVE_ONE_STEP

	850 MPI_UNWEAVE_ONE_STEP

	851 MPI_UNWEAVE_ONE_STEP

	852 case 16:

	853 MPI_UNWEAVE_ONE_STEP

	854 MPI_UNWEAVE_ONE_STEP

	855 MPI_UNWEAVE_ONE_STEP

	856 MPI_UNWEAVE_ONE_STEP

	857 MPI_UNWEAVE_ONE_STEP

	858 MPI_UNWEAVE_ONE_STEP

	859 MPI_UNWEAVE_ONE_STEP

	860 MPI_UNWEAVE_ONE_STEP

	861 case 8:

	862 MPI_UNWEAVE_ONE_STEP

	863 MPI_UNWEAVE_ONE_STEP

	864 MPI_UNWEAVE_ONE_STEP

	865 MPI_UNWEAVE_ONE_STEP

	866 case 4:

	867 MPI_UNWEAVE_ONE_STEP

	868 MPI_UNWEAVE_ONE_STEP

	869 case 2:

	870 break;

	871 }

	872 digit \|= *b; b += count;

	873

	874 *pb = digit;

	875 }

	876 s_mp_clamp(a);

	877 return MP_OKAY;

	878 }

	879 #endif

	880

	881

	882 #define SQR(a,b) \

	883 MP_CHECKOK( mp_sqr(a, b) );\

	884 MP_CHECKOK( s_mp_redc(b, mmm) )

	885

	886 #if defined(MP_MONT_USE_MP_MUL)

	887 #define MUL_NOWEAVE(x,a,b) \

	888 MP_CHECKOK( mp_mul(a, x, b) ); \

	889 MP_CHECKOK( s_mp_redc(b, mmm) )

	890 #else

	891 #define MUL_NOWEAVE(x,a,b) \

	892 MP_CHECKOK( s_mp_mul_mont(a, x, b, mmm) )

	893 #endif

	894

	895 #define MUL(x,a,b) \

	896 MP_CHECKOK( weave_to_mpi(&tmp, powers + (x), nLen, num_powers) ); \

	897 MUL_NOWEAVE(&tmp,a,b)

	898

	899 #define SWAPPA ptmp = pa1; pa1 = pa2; pa2 = ptmp

	900 #define MP_ALIGN(x,y) ((((ptrdiff_t)(x))+((y)-1))&(((ptrdiff_t)0)-(y)))

	901

	902 /* Do modular exponentiation using integer multiply code. */

	903 mp_err mp_exptmod_safe_i(const mp_int * montBase,

	904 const mp_int * exponent,

	905 const mp_int * modulus,

	906 mp_int * result,

	907 mp_mont_modulus *mmm,

	908 int nLen,

	909 mp_size bits_in_exponent,

	910 mp_size window_bits,

	911 mp_size num_powers)

	912 {

	913 mp_int pa1, pa2, *ptmp;

	914 mp_size i;

	915 mp_size first_window;

	916 mp_err res;

	917 int expOff;

	918 mp_int accum1, accum2, accum[WEAVE_WORD_SIZE];

	919 mp_int tmp;

	920 unsigned char *powersArray;

	921 unsigned char *powers;

	922

	923 MP_DIGITS(&accum1) = 0;

	924 MP_DIGITS(&accum2) = 0;

	925 MP_DIGITS(&accum[0]) = 0;

	926 MP_DIGITS(&accum[1]) = 0;

	927 MP_DIGITS(&accum[2]) = 0;

	928 MP_DIGITS(&accum[3]) = 0;

	929 MP_DIGITS(&tmp) = 0;

	930

	931 powersArray = (unsigned char )malloc(num_powers(nLen*sizeof(mp_digit)+1));

	932 if (powersArray == NULL) {

	933 res = MP_MEM;

	934 goto CLEANUP;

	935 }

	936

	937 /* powers[i] = base ** (i); */

	938 powers = (unsigned char *)MP_ALIGN(powersArray,num_powers);

	939

	940 /* grab the first window value. This allows us to preload accumulator1

	941 * and save a conversion, some squares and a multiple*/

	942 MP_CHECKOK( mpl_get_bits(exponent,

	943 bits_in_exponent-window_bits, window_bits) );

	944 first_window = (mp_size)res;

	945

	946 MP_CHECKOK( mp_init_size(&accum1, 3 * nLen + 2) );

	947 MP_CHECKOK( mp_init_size(&accum2, 3 * nLen + 2) );

	948 MP_CHECKOK( mp_init_size(&tmp, 3 * nLen + 2) );

	949

	950 /* build the first WEAVE_WORD powers inline */

	951 /* if WEAVE_WORD_SIZE is not 4, this code will have to change */

	952 if (num_powers > 2) {

	953 MP_CHECKOK( mp_init_size(&accum[0], 3 * nLen + 2) );

	954 MP_CHECKOK( mp_init_size(&accum[1], 3 * nLen + 2) );

	955 MP_CHECKOK( mp_init_size(&accum[2], 3 * nLen + 2) );

	956 MP_CHECKOK( mp_init_size(&accum[3], 3 * nLen + 2) );

	957 mp_set(&accum[0], 1);

	958 MP_CHECKOK( s_mp_to_mont(&accum[0], mmm, &accum[0]) );

	959 MP_CHECKOK( mp_copy(montBase, &accum[1]) );

	960 SQR(montBase, &accum[2]);

	961 MUL_NOWEAVE(montBase, &accum[2], &accum[3]);

	962 MP_CHECKOK( mpi_to_weave(accum, powers, nLen, num_powers) );

	963 if (first_window < 4) {

	964 MP_CHECKOK( mp_copy(&accum[first_window], &accum1) );

	965 first_window = num_powers;

	966 }

	967 } else {

	968 if (first_window == 0) {

	969 mp_set(&accum1, 1);

	970 MP_CHECKOK( s_mp_to_mont(&accum1, mmm, &accum1) );

	971 } else {

	972 /* assert first_window == 1? */

	973 MP_CHECKOK( mp_copy(montBase, &accum1) );

	974 }

	975 }

	976

	977 /*

	978 * calculate all the powers in the powers array.

	979 * this adds 2**(k-1)-2 square operations over just calculating the

	980 * odd powers where k is the window size in the two other mp_modexpt

	981 * implementations in this file. We will get some of that

	982 * back by not needing the first 'k' squares and one multiply for the

	983 * first window */

	984 for (i = WEAVE_WORD_SIZE; i < num_powers; i++) {

	985 int acc_index = i & (WEAVE_WORD_SIZE-1); /* i % WEAVE_WORD_SIZE */

	986 if ( i & 1 ) {

	987 MUL_NOWEAVE(montBase, &accum[acc_index-1] , &accum[acc_index]);

	988 /* we've filled the array do our 'per array' processing */

	989 if (acc_index == (WEAVE_WORD_SIZE-1)) {

	990 MP_CHECKOK( mpi_to_weave(accum, powers + i - (WEAVE_WORD_SIZE-1),

	991 nLen, num_powers) );

	992

	993 if (first_window <= i) {

	994 MP_CHECKOK( mp_copy(&accum[first_window & (WEAVE_WORD_SIZE-1)],

	995 &accum1) );

	996 first_window = num_powers;

	997 }

	998 }

	999 } else {

	1000 /* up to 8 we can find 2^i-1 in the accum array, but at 8 we our source

	1001 * and target are the same so we need to copy.. After that, the

	1002 * value is overwritten, so we need to fetch it from the stored

	1003 * weave array */

	1004 if (i > 2* WEAVE_WORD_SIZE) {

	1005 MP_CHECKOK(weave_to_mpi(&accum2, powers+i/2, nLen, num_powers));

	1006 SQR(&accum2, &accum[acc_index]);

	1007 } else {

	1008 int half_power_index = (i/2) & (WEAVE_WORD_SIZE-1);

	1009 if (half_power_index == acc_index) {

	1010 /* copy is cheaper than weave_to_mpi */

	1011 MP_CHECKOK(mp_copy(&accum[half_power_index], &accum2));

	1012 SQR(&accum2,&accum[acc_index]);

	1013 } else {

	1014 SQR(&accum[half_power_index],&accum[acc_index]);

	1015 }

	1016 }

	1017 }

	1018 }

	1019 /* if the accum1 isn't set, Then there is something wrong with our logic

	1020 * above and is an internal programming error.

	1021 */

	1022 #if MP_ARGCHK == 2

	1023 assert(MP_USED(&accum1) != 0);

	1024 #endif

	1025

	1026 /* set accumulator to montgomery residue of 1 */

	1027 pa1 = &accum1;

	1028 pa2 = &accum2;

	1029

	1030 for (expOff = bits_in_exponent - window_bits*2; expOff >= 0; expOff -= window_ bits) {

	1031 mp_size smallExp;

	1032 MP_CHECKOK( mpl_get_bits(exponent, expOff, window_bits) );

	1033 smallExp = (mp_size)res;

	1034

	1035 /* handle unroll the loops */

	1036 switch (window_bits) {

	1037 case 1:

	1038 if (!smallExp) {

	1039 SQR(pa1,pa2); SWAPPA;

	1040 } else if (smallExp & 1) {

	1041 SQR(pa1,pa2); MUL_NOWEAVE(montBase,pa2,pa1);

	1042 } else {

	1043 ABORT;

	1044 }

	1045 break;

	1046 case 6:

	1047 SQR(pa1,pa2); SQR(pa2,pa1);

	1048 /* fall through */

	1049 case 4:

	1050 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

	1051 MUL(smallExp, pa1,pa2); SWAPPA;

	1052 break;

	1053 case 5:

	1054 SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);

	1055 SQR(pa1,pa2); MUL(smallExp,pa2,pa1);

	1056 break;

	1057 default:

	1058 ABORT; /* could do a loop? */

	1059 }

	1060 }

	1061

	1062 res = s_mp_redc(pa1, mmm);

	1063 mp_exch(pa1, result);

	1064

	1065 CLEANUP:

	1066 mp_clear(&accum1);

	1067 mp_clear(&accum2);

	1068 mp_clear(&accum[0]);

	1069 mp_clear(&accum[1]);

	1070 mp_clear(&accum[2]);

	1071 mp_clear(&accum[3]);

	1072 mp_clear(&tmp);

	1073 /* PORT_Memset(powers,0,num_powersnLensizeof(mp_digit)); */

	1074 free(powersArray);

	1075 return res;

	1076 }

	1077 #undef SQR

	1078 #undef MUL

	1079 #endif

	1080

	1081 mp_err mp_exptmod(const mp_int inBase, const mp_int exponent,

	1082 const mp_int modulus, mp_int result)

	1083 {

	1084 const mp_int *base;

	1085 mp_size bits_in_exponent, i, window_bits, odd_ints;

	1086 mp_err res;

	1087 int nLen;

	1088 mp_int montBase, goodBase;

	1089 mp_mont_modulus mmm;

	1090 #ifdef MP_USING_CACHE_SAFE_MOD_EXP

	1091 static unsigned int max_window_bits;

	1092 #endif

	1093

	1094 /* function for computing n0prime only works if n0 is odd */

	1095 if (!mp_isodd(modulus))

	1096 return s_mp_exptmod(inBase, exponent, modulus, result);

	1097

	1098 MP_DIGITS(&montBase) = 0;

	1099 MP_DIGITS(&goodBase) = 0;

	1100

	1101 if (mp_cmp(inBase, modulus) < 0) {

	1102 base = inBase;

	1103 } else {

	1104 MP_CHECKOK( mp_init(&goodBase) );

	1105 base = &goodBase;

	1106 MP_CHECKOK( mp_mod(inBase, modulus, &goodBase) );

	1107 }

	1108

	1109 nLen = MP_USED(modulus);

	1110 MP_CHECKOK( mp_init_size(&montBase, 2 * nLen + 2) );

	1111

	1112 mmm.N = modulus; / a copy of the mp_int struct */

	1113 i = mpl_significant_bits(modulus);

	1114 i += MP_DIGIT_BIT - 1;

	1115 mmm.b = i - i % MP_DIGIT_BIT;

	1116

	1117 /* compute n0', given n0, n0' = -(n0 ** -1) mod MP_RADIX

	1118 ** where n0 = least significant mp_digit of N, the modulus.

	1119 */

	1120 mmm.n0prime = 0 - s_mp_invmod_radix( MP_DIGIT(modulus, 0) );

	1121

	1122 MP_CHECKOK( s_mp_to_mont(base, &mmm, &montBase) );

	1123

	1124 bits_in_exponent = mpl_significant_bits(exponent);

	1125 #ifdef MP_USING_CACHE_SAFE_MOD_EXP

	1126 if (mp_using_cache_safe_exp) {

	1127 if (bits_in_exponent > 780)

	1128 window_bits = 6;

	1129 else if (bits_in_exponent > 256)

	1130 window_bits = 5;

	1131 else if (bits_in_exponent > 20)

	1132 window_bits = 4;

	1133 /* RSA public key exponents are typically under 20 bits (common values

	1134 * are: 3, 17, 65537) and a 4-bit window is inefficient

	1135 */

	1136 else

	1137 window_bits = 1;

	1138 } else

	1139 #endif

	1140 if (bits_in_exponent > 480)

	1141 window_bits = 6;

	1142 else if (bits_in_exponent > 160)

	1143 window_bits = 5;

	1144 else if (bits_in_exponent > 20)

	1145 window_bits = 4;

	1146 /* RSA public key exponents are typically under 20 bits (common values

	1147 * are: 3, 17, 65537) and a 4-bit window is inefficient

	1148 */

	1149 else

	1150 window_bits = 1;

	1151

	1152 #ifdef MP_USING_CACHE_SAFE_MOD_EXP

	1153 /*

	1154 * clamp the window size based on

	1155 * the cache line size.

	1156 */

	1157 if (!max_window_bits) {

	1158 unsigned long cache_size = s_mpi_getProcessorLineSize();

	1159 /* processor has no cache, use 'fast' code always */

	1160 if (cache_size == 0) {

	1161 mp_using_cache_safe_exp = 0;

	1162 }

	1163 if ((cache_size == 0) \|\| (cache_size >= 64)) {

	1164 max_window_bits = 6;

	1165 } else if (cache_size >= 32) {

	1166 max_window_bits = 5;

	1167 } else if (cache_size >= 16) {

	1168 max_window_bits = 4;

	1169 } else max_window_bits = 1; /* should this be an assert? */

	1170 }

	1171

	1172 /* clamp the window size down before we caclulate bits_in_exponent */

	1173 if (mp_using_cache_safe_exp) {

	1174 if (window_bits > max_window_bits) {

	1175 window_bits = max_window_bits;

	1176 }

	1177 }

	1178 #endif

	1179

	1180 odd_ints = 1 << (window_bits - 1);

	1181 i = bits_in_exponent % window_bits;

	1182 if (i != 0) {

	1183 bits_in_exponent += window_bits - i;

	1184 }

	1185

	1186 #ifdef MP_USING_MONT_MULF

	1187 if (mp_using_mont_mulf) {

	1188 MP_CHECKOK( s_mp_pad(&montBase, nLen) );

	1189 res = mp_exptmod_f(&montBase, exponent, modulus, result, &mmm, nLen,

	1190 bits_in_exponent, window_bits, odd_ints);

	1191 } else

	1192 #endif

	1193 #ifdef MP_USING_CACHE_SAFE_MOD_EXP

	1194 if (mp_using_cache_safe_exp) {

	1195 res = mp_exptmod_safe_i(&montBase, exponent, modulus, result, &mmm, nLen,

	1196 bits_in_exponent, window_bits, 1 << window_bits);

	1197 } else

	1198 #endif

	1199 res = mp_exptmod_i(&montBase, exponent, modulus, result, &mmm, nLen,

	1200 bits_in_exponent, window_bits, odd_ints);

	1201

	1202 CLEANUP:

	1203 mp_clear(&montBase);

	1204 mp_clear(&goodBase);

	1205 /* Don't mp_clear mmm.N because it is merely a copy of modulus.

	1206 ** Just zap it.

	1207 */

	1208 memset(&mmm, 0, sizeof mmm);

	1209 return res;

	1210 }

OLD	NEW

« no previous file with comments | « net/third_party/nss/ssl/mpi/mplogic.c ('k') | net/third_party/nss/ssl/mpi/mpprime.h » ('j') | ppapi/c/dev/ppb_opengles_dev.h » ('J')