openssl/crypto/bn/bn_asm.c - Issue 9254031: Upgrade chrome's OpenSSL to same version Android ships with.

Unified Diff: openssl/crypto/bn/bn_asm.c

Issue 9254031: Upgrade chrome's OpenSSL to same version Android ships with. (Closed) Base URL: http://src.chromium.org/svn/trunk/deps/third_party/openssl/

Patch Set: '' Created 8 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: openssl/crypto/bn/bn_asm.c

===================================================================

--- openssl/crypto/bn/bn_asm.c (revision 105093)

+++ openssl/crypto/bn/bn_asm.c (working copy)

@@ -75,6 +75,7 @@

assert(num >= 0);

if (num <= 0) return(c1);

+#ifndef OPENSSL_SMALL_FOOTPRINT

while (num&~3)

{

mul_add(rp[0],ap[0],w,c1);

@@ -83,11 +84,11 @@

mul_add(rp[3],ap[3],w,c1);

ap+=4; rp+=4; num-=4;

}

- if (num)

+#endif

+ while (num)

{

- mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1;

- mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1;

- mul_add(rp[2],ap[2],w,c1); return c1;

+ mul_add(rp[0],ap[0],w,c1);

+ ap++; rp++; num--;

}

return(c1);

@@ -100,6 +101,7 @@

assert(num >= 0);

if (num <= 0) return(c1);

+#ifndef OPENSSL_SMALL_FOOTPRINT

while (num&~3)

{

mul(rp[0],ap[0],w,c1);

@@ -108,11 +110,11 @@

mul(rp[3],ap[3],w,c1);

ap+=4; rp+=4; num-=4;

}

- if (num)

+#endif

+ while (num)

{

- mul(rp[0],ap[0],w,c1); if (--num == 0) return c1;

- mul(rp[1],ap[1],w,c1); if (--num == 0) return c1;

- mul(rp[2],ap[2],w,c1);

+ mul(rp[0],ap[0],w,c1);

+ ap++; rp++; num--;

}

return(c1);

}

@@ -121,6 +123,8 @@

{

assert(n >= 0);

if (n <= 0) return;

+#ifndef OPENSSL_SMALL_FOOTPRINT

while (n&~3)

{

sqr(r[0],r[1],a[0]);

@@ -129,11 +133,11 @@

sqr(r[6],r[7],a[3]);

a+=4; r+=8; n-=4;

}

- if (n)

+#endif

+ while (n)

{

- sqr(r[0],r[1],a[0]); if (--n == 0) return;

- sqr(r[2],r[3],a[1]); if (--n == 0) return;

- sqr(r[4],r[5],a[2]);

+ sqr(r[0],r[1],a[0]);

+ a++; r+=2; n--;

}

@@ -150,19 +154,21 @@

bl=LBITS(w);

bh=HBITS(w);

- for (;;)

+#ifndef OPENSSL_SMALL_FOOTPRINT

+ while (num&~3)

{

mul_add(rp[0],ap[0],bl,bh,c);

- if (--num == 0) break;

mul_add(rp[1],ap[1],bl,bh,c);

- if (--num == 0) break;

mul_add(rp[2],ap[2],bl,bh,c);

- if (--num == 0) break;

mul_add(rp[3],ap[3],bl,bh,c);

- if (--num == 0) break;

- ap+=4;

- rp+=4;

+ ap+=4; rp+=4; num-=4;

}

+#endif

+ while (num)

+ {

+ mul_add(rp[0],ap[0],bl,bh,c);

+ ap++; rp++; num--;

+ }

return(c);

}

@@ -177,19 +183,21 @@

bl=LBITS(w);

bh=HBITS(w);

- for (;;)

+#ifndef OPENSSL_SMALL_FOOTPRINT

+ while (num&~3)

{

mul(rp[0],ap[0],bl,bh,carry);

- if (--num == 0) break;

mul(rp[1],ap[1],bl,bh,carry);

- if (--num == 0) break;

mul(rp[2],ap[2],bl,bh,carry);

- if (--num == 0) break;

mul(rp[3],ap[3],bl,bh,carry);

- if (--num == 0) break;

- ap+=4;

- rp+=4;

+ ap+=4; rp+=4; num-=4;

}

+#endif

+ while (num)

+ {

+ mul(rp[0],ap[0],bl,bh,carry);

+ ap++; rp++; num--;

+ }

return(carry);

}

@@ -197,23 +205,22 @@

{

assert(n >= 0);

if (n <= 0) return;

- for (;;)

+#ifndef OPENSSL_SMALL_FOOTPRINT

+ while (n&~3)

{

sqr64(r[0],r[1],a[0]);

- if (--n == 0) break;

sqr64(r[2],r[3],a[1]);

- if (--n == 0) break;

sqr64(r[4],r[5],a[2]);

- if (--n == 0) break;

sqr64(r[6],r[7],a[3]);

- if (--n == 0) break;

- a+=4;

- r+=8;

+ a+=4; r+=8; n-=4;

}

+#endif

+ while (n)

+ {

+ sqr64(r[0],r[1],a[0]);

+ a++; r+=2; n--;

+ }

}

#endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */

@@ -303,32 +310,31 @@

assert(n >= 0);

if (n <= 0) return((BN_ULONG)0);

- for (;;)

+#ifndef OPENSSL_SMALL_FOOTPRINT

+ while (n&~3)

{

ll+=(BN_ULLONG)a[0]+b[0];

r[0]=(BN_ULONG)ll&BN_MASK2;

ll>>=BN_BITS2;

- if (--n <= 0) break;

ll+=(BN_ULLONG)a[1]+b[1];

r[1]=(BN_ULONG)ll&BN_MASK2;

ll>>=BN_BITS2;

- if (--n <= 0) break;

ll+=(BN_ULLONG)a[2]+b[2];

r[2]=(BN_ULONG)ll&BN_MASK2;

ll>>=BN_BITS2;

- if (--n <= 0) break;

ll+=(BN_ULLONG)a[3]+b[3];

r[3]=(BN_ULONG)ll&BN_MASK2;

ll>>=BN_BITS2;

- if (--n <= 0) break;

- a+=4;

- b+=4;

- r+=4;

+ a+=4; b+=4; r+=4; n-=4;

}

+#endif

+ while (n)

+ {

+ ll+=(BN_ULLONG)a[0]+b[0];

+ r[0]=(BN_ULONG)ll&BN_MASK2;

+ ll>>=BN_BITS2;

+ a++; b++; r++; n--;

+ }

return((BN_ULONG)ll);

}

#else /* !BN_LLONG */

@@ -340,7 +346,8 @@

if (n <= 0) return((BN_ULONG)0);

c=0;

- for (;;)

+#ifndef OPENSSL_SMALL_FOOTPRINT

+ while (n&~3)

{

t=a[0];

t=(t+c)&BN_MASK2;

@@ -348,36 +355,37 @@

l=(t+b[0])&BN_MASK2;

c+=(l < t);

r[0]=l;

- if (--n <= 0) break;

t=a[1];

t=(t+c)&BN_MASK2;

c=(t < c);

l=(t+b[1])&BN_MASK2;

c+=(l < t);

r[1]=l;

- if (--n <= 0) break;

t=a[2];

t=(t+c)&BN_MASK2;

c=(t < c);

l=(t+b[2])&BN_MASK2;

c+=(l < t);

r[2]=l;

- if (--n <= 0) break;

t=a[3];

t=(t+c)&BN_MASK2;

c=(t < c);

l=(t+b[3])&BN_MASK2;

c+=(l < t);

r[3]=l;

- if (--n <= 0) break;

- a+=4;

- b+=4;

- r+=4;

+ a+=4; b+=4; r+=4; n-=4;

}

+#endif

+ while(n)

+ {

+ t=a[0];

+ t=(t+c)&BN_MASK2;

+ c=(t < c);

+ l=(t+b[0])&BN_MASK2;

+ c+=(l < t);

+ r[0]=l;

+ a++; b++; r++; n--;

+ }

return((BN_ULONG)c);

}

#endif /* !BN_LLONG */

@@ -390,36 +398,35 @@

assert(n >= 0);

if (n <= 0) return((BN_ULONG)0);

- for (;;)

+#ifndef OPENSSL_SMALL_FOOTPRINT

+ while (n&~3)

{

t1=a[0]; t2=b[0];

r[0]=(t1-t2-c)&BN_MASK2;

if (t1 != t2) c=(t1 < t2);

- if (--n <= 0) break;

t1=a[1]; t2=b[1];

r[1]=(t1-t2-c)&BN_MASK2;

if (t1 != t2) c=(t1 < t2);

- if (--n <= 0) break;

t1=a[2]; t2=b[2];

r[2]=(t1-t2-c)&BN_MASK2;

if (t1 != t2) c=(t1 < t2);

- if (--n <= 0) break;

t1=a[3]; t2=b[3];

r[3]=(t1-t2-c)&BN_MASK2;

if (t1 != t2) c=(t1 < t2);

- if (--n <= 0) break;

- a+=4;

- b+=4;

- r+=4;

+ a+=4; b+=4; r+=4; n-=4;

}

+#endif

+ while (n)

+ {

+ t1=a[0]; t2=b[0];

+ r[0]=(t1-t2-c)&BN_MASK2;

+ if (t1 != t2) c=(t1 < t2);

+ a++; b++; r++; n--;

+ }

return(c);

}

-#ifdef BN_MUL_COMBA

+#if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT)

#undef bn_mul_comba8

#undef bn_mul_comba4

@@ -820,18 +827,134 @@

r[6]=c1;

r[7]=c2;

}

+#ifdef OPENSSL_NO_ASM

+#ifdef OPENSSL_BN_ASM_MONT

+#include <alloca.h>

+/*

+ * This is essentially reference implementation, which may or may not

+ * result in performance improvement. E.g. on IA-32 this routine was

+ * observed to give 40% faster rsa1024 private key operations and 10%

+ * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only

+ * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a

+ * reference implementation, one to be used as starting point for

+ * platform-specific assembler. Mentioned numbers apply to compiler

+ * generated code compiled with and without -DOPENSSL_BN_ASM_MONT and

+ * can vary not only from platform to platform, but even for compiler

+ * versions. Assembler vs. assembler improvement coefficients can

+ * [and are known to] differ and are to be documented elsewhere.

+ */

+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0p, int num)

+ {

+ BN_ULONG c0,c1,ml,*tp,n0;

+#ifdef mul64

+ BN_ULONG mh;

+#endif

+ volatile BN_ULONG *vp;

+ int i=0,j;

+#if 0 /* template for platform-specific implementation */

+ if (ap==bp) return bn_sqr_mont(rp,ap,np,n0p,num);

+#endif

+ vp = tp = alloca((num+2)*sizeof(BN_ULONG));

+ n0 = *n0p;

+ c0 = 0;

+ ml = bp[0];

+#ifdef mul64

+ mh = HBITS(ml);

+ ml = LBITS(ml);

+ for (j=0;j<num;++j)

+ mul(tp[j],ap[j],ml,mh,c0);

+#else

+ for (j=0;j<num;++j)

+ mul(tp[j],ap[j],ml,c0);

+#endif

+ tp[num] = c0;

+ tp[num+1] = 0;

+ goto enter;

+ for(i=0;i<num;i++)

+ {

+ c0 = 0;

+ ml = bp[i];

+#ifdef mul64

+ mh = HBITS(ml);

+ ml = LBITS(ml);

+ for (j=0;j<num;++j)

+ mul_add(tp[j],ap[j],ml,mh,c0);

+#else

+ for (j=0;j<num;++j)

+ mul_add(tp[j],ap[j],ml,c0);

+#endif

+ c1 = (tp[num] + c0)&BN_MASK2;

+ tp[num] = c1;

+ tp[num+1] = (c1<c0?1:0);

+ enter:

+ c1 = tp[0];

+ ml = (c1*n0)&BN_MASK2;

+ c0 = 0;

+#ifdef mul64

+ mh = HBITS(ml);

+ ml = LBITS(ml);

+ mul_add(c1,np[0],ml,mh,c0);

+#else

+ mul_add(c1,ml,np[0],c0);

+#endif

+ for(j=1;j<num;j++)

+ {

+ c1 = tp[j];

+#ifdef mul64

+ mul_add(c1,np[j],ml,mh,c0);

+#else

+ mul_add(c1,ml,np[j],c0);

+#endif

+ tp[j-1] = c1&BN_MASK2;

+ }

+ c1 = (tp[num] + c0)&BN_MASK2;

+ tp[num-1] = c1;

+ tp[num] = tp[num+1] + (c1<c0?1:0);

+ }

+ if (tp[num]!=0 || tp[num-1]>=np[num-1])

+ {

+ c0 = bn_sub_words(rp,tp,np,num);

+ if (tp[num]!=0 || c0==0)

+ {

+ for(i=0;i<num+2;i++) vp[i] = 0;

+ return 1;

+ }

+ for(i=0;i<num;i++) rp[i] = tp[i], vp[i] = 0;

+ vp[num] = 0;

+ vp[num+1] = 0;

+ return 1;

+ }

+#else

+/*

+ * Return value of 0 indicates that multiplication/convolution was not

+ * performed to signal the caller to fall down to alternative/original

+ * code-path.

+ */

+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num)

+{ return 0; }

+#endif /* OPENSSL_BN_ASM_MONT */

+#endif

#else /* !BN_MUL_COMBA */

/* hmm... is it faster just to do a multiply? */

#undef bn_sqr_comba4

-void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)

+void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)

{

BN_ULONG t[8];

bn_sqr_normal(r,a,4,t);

}

#undef bn_sqr_comba8

-void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)

+void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)

{

BN_ULONG t[16];

bn_sqr_normal(r,a,8,t);

@@ -857,4 +980,51 @@

r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]);

}

+#ifdef OPENSSL_NO_ASM

+#ifdef OPENSSL_BN_ASM_MONT

+#include <alloca.h>

+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0p, int num)

+ {

+ BN_ULONG c0,c1,*tp,n0=*n0p;

+ volatile BN_ULONG *vp;

+ int i=0,j;

+ vp = tp = alloca((num+2)*sizeof(BN_ULONG));

+ for(i=0;i<=num;i++) tp[i]=0;

+ for(i=0;i<num;i++)

+ {

+ c0 = bn_mul_add_words(tp,ap,num,bp[i]);

+ c1 = (tp[num] + c0)&BN_MASK2;

+ tp[num] = c1;

+ tp[num+1] = (c1<c0?1:0);

+ c0 = bn_mul_add_words(tp,np,num,tp[0]*n0);

+ c1 = (tp[num] + c0)&BN_MASK2;

+ tp[num] = c1;

+ tp[num+1] += (c1<c0?1:0);

+ for(j=0;j<=num;j++) tp[j]=tp[j+1];

+ }

+ if (tp[num]!=0 || tp[num-1]>=np[num-1])

+ {

+ c0 = bn_sub_words(rp,tp,np,num);

+ if (tp[num]!=0 || c0==0)

+ {

+ for(i=0;i<num+2;i++) vp[i] = 0;

+ return 1;

+ }

+ for(i=0;i<num;i++) rp[i] = tp[i], vp[i] = 0;

+ vp[num] = 0;

+ vp[num+1] = 0;

+ return 1;

+ }

+#else

+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num)

+{ return 0; }

+#endif /* OPENSSL_BN_ASM_MONT */

+#endif

#endif /* !BN_MUL_COMBA */

« no previous file with comments | « openssl/crypto/bn/bn.h ('k') | openssl/crypto/bn/bn_blind.c » ('j') | no next file with comments »