| Index: openssl/crypto/bn/asm/alpha-mont.pl
 | 
| diff --git a/openssl/crypto/bn/asm/alpha-mont.pl b/openssl/crypto/bn/asm/alpha-mont.pl
 | 
| deleted file mode 100644
 | 
| index 03596e2014d4035d02440316cc1cf025f726d772..0000000000000000000000000000000000000000
 | 
| --- a/openssl/crypto/bn/asm/alpha-mont.pl
 | 
| +++ /dev/null
 | 
| @@ -1,321 +0,0 @@
 | 
| -#!/usr/bin/env perl
 | 
| -#
 | 
| -# ====================================================================
 | 
| -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
 | 
| -# project. The module is, however, dual licensed under OpenSSL and
 | 
| -# CRYPTOGAMS licenses depending on where you obtain it. For further
 | 
| -# details see http://www.openssl.org/~appro/cryptogams/.
 | 
| -# ====================================================================
 | 
| -#
 | 
| -# On 21264 RSA sign performance improves by 70/35/20/15 percent for
 | 
| -# 512/1024/2048/4096 bit key lengths. This is against vendor compiler
 | 
| -# instructed to '-tune host' code with in-line assembler. Other
 | 
| -# benchmarks improve by 15-20%. To anchor it to something else, the
 | 
| -# code provides approximately the same performance per GHz as AMD64.
 | 
| -# I.e. if you compare 1GHz 21264 and 2GHz Opteron, you'll observe ~2x
 | 
| -# difference.
 | 
| -
 | 
| -# int bn_mul_mont(
 | 
| -$rp="a0";	# BN_ULONG *rp,
 | 
| -$ap="a1";	# const BN_ULONG *ap,
 | 
| -$bp="a2";	# const BN_ULONG *bp,
 | 
| -$np="a3";	# const BN_ULONG *np,
 | 
| -$n0="a4";	# const BN_ULONG *n0,
 | 
| -$num="a5";	# int num);
 | 
| -
 | 
| -$lo0="t0";
 | 
| -$hi0="t1";
 | 
| -$lo1="t2";
 | 
| -$hi1="t3";
 | 
| -$aj="t4";
 | 
| -$bi="t5";
 | 
| -$nj="t6";
 | 
| -$tp="t7";
 | 
| -$alo="t8";
 | 
| -$ahi="t9";
 | 
| -$nlo="t10";
 | 
| -$nhi="t11";
 | 
| -$tj="t12";
 | 
| -$i="s3";
 | 
| -$j="s4";
 | 
| -$m1="s5";
 | 
| -
 | 
| -$code=<<___;
 | 
| -#ifdef __linux__
 | 
| -#include <asm/regdef.h>
 | 
| -#else
 | 
| -#include <asm.h>
 | 
| -#include <regdef.h>
 | 
| -#endif
 | 
| -
 | 
| -.text
 | 
| -
 | 
| -.set	noat
 | 
| -.set	noreorder
 | 
| -
 | 
| -.globl	bn_mul_mont
 | 
| -.align	5
 | 
| -.ent	bn_mul_mont
 | 
| -bn_mul_mont:
 | 
| -	lda	sp,-48(sp)
 | 
| -	stq	ra,0(sp)
 | 
| -	stq	s3,8(sp)
 | 
| -	stq	s4,16(sp)
 | 
| -	stq	s5,24(sp)
 | 
| -	stq	fp,32(sp)
 | 
| -	mov	sp,fp
 | 
| -	.mask	0x0400f000,-48
 | 
| -	.frame	fp,48,ra
 | 
| -	.prologue 0
 | 
| -
 | 
| -	.align	4
 | 
| -	.set	reorder
 | 
| -	sextl	$num,$num
 | 
| -	mov	0,v0
 | 
| -	cmplt	$num,4,AT
 | 
| -	bne	AT,.Lexit
 | 
| -
 | 
| -	ldq	$hi0,0($ap)	# ap[0]
 | 
| -	s8addq	$num,16,AT
 | 
| -	ldq	$aj,8($ap)
 | 
| -	subq	sp,AT,sp
 | 
| -	ldq	$bi,0($bp)	# bp[0]
 | 
| -	lda	AT,-4096(zero)	# mov	-4096,AT
 | 
| -	ldq	$n0,0($n0)
 | 
| -	and	sp,AT,sp
 | 
| -
 | 
| -	mulq	$hi0,$bi,$lo0
 | 
| -	ldq	$hi1,0($np)	# np[0]
 | 
| -	umulh	$hi0,$bi,$hi0
 | 
| -	ldq	$nj,8($np)
 | 
| -
 | 
| -	mulq	$lo0,$n0,$m1
 | 
| -
 | 
| -	mulq	$hi1,$m1,$lo1
 | 
| -	umulh	$hi1,$m1,$hi1
 | 
| -
 | 
| -	addq	$lo1,$lo0,$lo1
 | 
| -	cmpult	$lo1,$lo0,AT
 | 
| -	addq	$hi1,AT,$hi1
 | 
| -
 | 
| -	mulq	$aj,$bi,$alo
 | 
| -	mov	2,$j
 | 
| -	umulh	$aj,$bi,$ahi
 | 
| -	mov	sp,$tp
 | 
| -
 | 
| -	mulq	$nj,$m1,$nlo
 | 
| -	s8addq	$j,$ap,$aj
 | 
| -	umulh	$nj,$m1,$nhi
 | 
| -	s8addq	$j,$np,$nj
 | 
| -.align	4
 | 
| -.L1st:
 | 
| -	.set	noreorder
 | 
| -	ldq	$aj,0($aj)
 | 
| -	addl	$j,1,$j
 | 
| -	ldq	$nj,0($nj)
 | 
| -	lda	$tp,8($tp)
 | 
| -
 | 
| -	addq	$alo,$hi0,$lo0
 | 
| -	mulq	$aj,$bi,$alo
 | 
| -	cmpult	$lo0,$hi0,AT
 | 
| -	addq	$nlo,$hi1,$lo1
 | 
| -
 | 
| -	mulq	$nj,$m1,$nlo
 | 
| -	addq	$ahi,AT,$hi0
 | 
| -	cmpult	$lo1,$hi1,v0
 | 
| -	cmplt	$j,$num,$tj
 | 
| -
 | 
| -	umulh	$aj,$bi,$ahi
 | 
| -	addq	$nhi,v0,$hi1
 | 
| -	addq	$lo1,$lo0,$lo1
 | 
| -	s8addq	$j,$ap,$aj
 | 
| -
 | 
| -	umulh	$nj,$m1,$nhi
 | 
| -	cmpult	$lo1,$lo0,v0
 | 
| -	addq	$hi1,v0,$hi1
 | 
| -	s8addq	$j,$np,$nj
 | 
| -
 | 
| -	stq	$lo1,-8($tp)
 | 
| -	nop
 | 
| -	unop
 | 
| -	bne	$tj,.L1st
 | 
| -	.set	reorder
 | 
| -
 | 
| -	addq	$alo,$hi0,$lo0
 | 
| -	addq	$nlo,$hi1,$lo1
 | 
| -	cmpult	$lo0,$hi0,AT
 | 
| -	cmpult	$lo1,$hi1,v0
 | 
| -	addq	$ahi,AT,$hi0
 | 
| -	addq	$nhi,v0,$hi1
 | 
| -
 | 
| -	addq	$lo1,$lo0,$lo1
 | 
| -	cmpult	$lo1,$lo0,v0
 | 
| -	addq	$hi1,v0,$hi1
 | 
| -
 | 
| -	stq	$lo1,0($tp)
 | 
| -
 | 
| -	addq	$hi1,$hi0,$hi1
 | 
| -	cmpult	$hi1,$hi0,AT
 | 
| -	stq	$hi1,8($tp)
 | 
| -	stq	AT,16($tp)
 | 
| -
 | 
| -	mov	1,$i
 | 
| -.align	4
 | 
| -.Louter:
 | 
| -	s8addq	$i,$bp,$bi
 | 
| -	ldq	$hi0,0($ap)
 | 
| -	ldq	$aj,8($ap)
 | 
| -	ldq	$bi,0($bi)
 | 
| -	ldq	$hi1,0($np)
 | 
| -	ldq	$nj,8($np)
 | 
| -	ldq	$tj,0(sp)
 | 
| -
 | 
| -	mulq	$hi0,$bi,$lo0
 | 
| -	umulh	$hi0,$bi,$hi0
 | 
| -
 | 
| -	addq	$lo0,$tj,$lo0
 | 
| -	cmpult	$lo0,$tj,AT
 | 
| -	addq	$hi0,AT,$hi0
 | 
| -
 | 
| -	mulq	$lo0,$n0,$m1
 | 
| -
 | 
| -	mulq	$hi1,$m1,$lo1
 | 
| -	umulh	$hi1,$m1,$hi1
 | 
| -
 | 
| -	addq	$lo1,$lo0,$lo1
 | 
| -	cmpult	$lo1,$lo0,AT
 | 
| -	mov	2,$j
 | 
| -	addq	$hi1,AT,$hi1
 | 
| -
 | 
| -	mulq	$aj,$bi,$alo
 | 
| -	mov	sp,$tp
 | 
| -	umulh	$aj,$bi,$ahi
 | 
| -
 | 
| -	mulq	$nj,$m1,$nlo
 | 
| -	s8addq	$j,$ap,$aj
 | 
| -	umulh	$nj,$m1,$nhi
 | 
| -.align	4
 | 
| -.Linner:
 | 
| -	.set	noreorder
 | 
| -	ldq	$tj,8($tp)	#L0
 | 
| -	nop			#U1
 | 
| -	ldq	$aj,0($aj)	#L1
 | 
| -	s8addq	$j,$np,$nj	#U0
 | 
| -
 | 
| -	ldq	$nj,0($nj)	#L0
 | 
| -	nop			#U1
 | 
| -	addq	$alo,$hi0,$lo0	#L1
 | 
| -	lda	$tp,8($tp)
 | 
| -
 | 
| -	mulq	$aj,$bi,$alo	#U1
 | 
| -	cmpult	$lo0,$hi0,AT	#L0
 | 
| -	addq	$nlo,$hi1,$lo1	#L1
 | 
| -	addl	$j,1,$j
 | 
| -
 | 
| -	mulq	$nj,$m1,$nlo	#U1
 | 
| -	addq	$ahi,AT,$hi0	#L0
 | 
| -	addq	$lo0,$tj,$lo0	#L1
 | 
| -	cmpult	$lo1,$hi1,v0	#U0
 | 
| -
 | 
| -	umulh	$aj,$bi,$ahi	#U1
 | 
| -	cmpult	$lo0,$tj,AT	#L0
 | 
| -	addq	$lo1,$lo0,$lo1	#L1
 | 
| -	addq	$nhi,v0,$hi1	#U0
 | 
| -
 | 
| -	umulh	$nj,$m1,$nhi	#U1
 | 
| -	s8addq	$j,$ap,$aj	#L0
 | 
| -	cmpult	$lo1,$lo0,v0	#L1
 | 
| -	cmplt	$j,$num,$tj	#U0	# borrow $tj
 | 
| -
 | 
| -	addq	$hi0,AT,$hi0	#L0
 | 
| -	addq	$hi1,v0,$hi1	#U1
 | 
| -	stq	$lo1,-8($tp)	#L1
 | 
| -	bne	$tj,.Linner	#U0
 | 
| -	.set	reorder
 | 
| -
 | 
| -	ldq	$tj,8($tp)
 | 
| -	addq	$alo,$hi0,$lo0
 | 
| -	addq	$nlo,$hi1,$lo1
 | 
| -	cmpult	$lo0,$hi0,AT
 | 
| -	cmpult	$lo1,$hi1,v0
 | 
| -	addq	$ahi,AT,$hi0
 | 
| -	addq	$nhi,v0,$hi1
 | 
| -
 | 
| -	addq	$lo0,$tj,$lo0
 | 
| -	cmpult	$lo0,$tj,AT
 | 
| -	addq	$hi0,AT,$hi0
 | 
| -
 | 
| -	ldq	$tj,16($tp)
 | 
| -	addq	$lo1,$lo0,$j
 | 
| -	cmpult	$j,$lo0,v0
 | 
| -	addq	$hi1,v0,$hi1
 | 
| -
 | 
| -	addq	$hi1,$hi0,$lo1
 | 
| -	stq	$j,0($tp)
 | 
| -	cmpult	$lo1,$hi0,$hi1
 | 
| -	addq	$lo1,$tj,$lo1
 | 
| -	cmpult	$lo1,$tj,AT
 | 
| -	addl	$i,1,$i
 | 
| -	addq	$hi1,AT,$hi1
 | 
| -	stq	$lo1,8($tp)
 | 
| -	cmplt	$i,$num,$tj	# borrow $tj
 | 
| -	stq	$hi1,16($tp)
 | 
| -	bne	$tj,.Louter
 | 
| -
 | 
| -	s8addq	$num,sp,$tj	# &tp[num]
 | 
| -	mov	$rp,$bp		# put rp aside
 | 
| -	mov	sp,$tp
 | 
| -	mov	sp,$ap
 | 
| -	mov	0,$hi0		# clear borrow bit
 | 
| -
 | 
| -.align	4
 | 
| -.Lsub:	ldq	$lo0,0($tp)
 | 
| -	ldq	$lo1,0($np)
 | 
| -	lda	$tp,8($tp)
 | 
| -	lda	$np,8($np)
 | 
| -	subq	$lo0,$lo1,$lo1	# tp[i]-np[i]
 | 
| -	cmpult	$lo0,$lo1,AT
 | 
| -	subq	$lo1,$hi0,$lo0
 | 
| -	cmpult	$lo1,$lo0,$hi0
 | 
| -	or	$hi0,AT,$hi0
 | 
| -	stq	$lo0,0($rp)
 | 
| -	cmpult	$tp,$tj,v0
 | 
| -	lda	$rp,8($rp)
 | 
| -	bne	v0,.Lsub
 | 
| -
 | 
| -	subq	$hi1,$hi0,$hi0	# handle upmost overflow bit
 | 
| -	mov	sp,$tp
 | 
| -	mov	$bp,$rp		# restore rp
 | 
| -
 | 
| -	and	sp,$hi0,$ap
 | 
| -	bic	$bp,$hi0,$bp
 | 
| -	bis	$bp,$ap,$ap	# ap=borrow?tp:rp
 | 
| -
 | 
| -.align	4
 | 
| -.Lcopy:	ldq	$aj,0($ap)	# copy or in-place refresh
 | 
| -	lda	$tp,8($tp)
 | 
| -	lda	$rp,8($rp)
 | 
| -	lda	$ap,8($ap)
 | 
| -	stq	zero,-8($tp)	# zap tp
 | 
| -	cmpult	$tp,$tj,AT
 | 
| -	stq	$aj,-8($rp)
 | 
| -	bne	AT,.Lcopy
 | 
| -	mov	1,v0
 | 
| -
 | 
| -.Lexit:
 | 
| -	.set	noreorder
 | 
| -	mov	fp,sp
 | 
| -	/*ldq	ra,0(sp)*/
 | 
| -	ldq	s3,8(sp)
 | 
| -	ldq	s4,16(sp)
 | 
| -	ldq	s5,24(sp)
 | 
| -	ldq	fp,32(sp)
 | 
| -	lda	sp,48(sp)
 | 
| -	ret	(ra)
 | 
| -.end	bn_mul_mont
 | 
| -.ascii	"Montgomery Multiplication for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
 | 
| -.align	2
 | 
| -___
 | 
| -
 | 
| -print $code;
 | 
| -close STDOUT;
 | 
| 
 |