| Index: openssl/crypto/bn/asm/x86_64-mont.pl
 | 
| ===================================================================
 | 
| --- openssl/crypto/bn/asm/x86_64-mont.pl	(revision 105093)
 | 
| +++ openssl/crypto/bn/asm/x86_64-mont.pl	(working copy)
 | 
| @@ -15,14 +15,18 @@
 | 
|  # respectful 50%. It remains to be seen if loop unrolling and
 | 
|  # dedicated squaring routine can provide further improvement...
 | 
|  
 | 
| -$output=shift;
 | 
| +$flavour = shift;
 | 
| +$output  = shift;
 | 
| +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
 | 
|  
 | 
| +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
 | 
| +
 | 
|  $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 | 
|  ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
 | 
|  ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
 | 
|  die "can't locate x86_64-xlate.pl";
 | 
|  
 | 
| -open STDOUT,"| $^X $xlate $output";
 | 
| +open STDOUT,"| $^X $xlate $flavour $output";
 | 
|  
 | 
|  # int bn_mul_mont(
 | 
|  $rp="%rdi";	# BN_ULONG *rp,
 | 
| @@ -55,13 +59,14 @@
 | 
|  	push	%r15
 | 
|  
 | 
|  	mov	${num}d,${num}d
 | 
| -	lea	2($num),%rax
 | 
| -	mov	%rsp,%rbp
 | 
| -	neg	%rax
 | 
| -	lea	(%rsp,%rax,8),%rsp	# tp=alloca(8*(num+2))
 | 
| +	lea	2($num),%r10
 | 
| +	mov	%rsp,%r11
 | 
| +	neg	%r10
 | 
| +	lea	(%rsp,%r10,8),%rsp	# tp=alloca(8*(num+2))
 | 
|  	and	\$-1024,%rsp		# minimize TLB usage
 | 
|  
 | 
| -	mov	%rbp,8(%rsp,$num,8)	# tp[num+1]=%rsp
 | 
| +	mov	%r11,8(%rsp,$num,8)	# tp[num+1]=%rsp
 | 
| +.Lprologue:
 | 
|  	mov	%rdx,$bp		# $bp reassigned, remember?
 | 
|  
 | 
|  	mov	($n0),$n0		# pull n0[0] value
 | 
| @@ -197,18 +202,129 @@
 | 
|  	dec	$j
 | 
|  	jge	.Lcopy
 | 
|  
 | 
| -	mov	8(%rsp,$num,8),%rsp	# restore %rsp
 | 
| +	mov	8(%rsp,$num,8),%rsi	# restore %rsp
 | 
|  	mov	\$1,%rax
 | 
| +	mov	(%rsi),%r15
 | 
| +	mov	8(%rsi),%r14
 | 
| +	mov	16(%rsi),%r13
 | 
| +	mov	24(%rsi),%r12
 | 
| +	mov	32(%rsi),%rbp
 | 
| +	mov	40(%rsi),%rbx
 | 
| +	lea	48(%rsi),%rsp
 | 
| +.Lepilogue:
 | 
| +	ret
 | 
| +.size	bn_mul_mont,.-bn_mul_mont
 | 
| +.asciz	"Montgomery Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
 | 
| +.align	16
 | 
| +___
 | 
| +
 | 
| +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
 | 
| +#		CONTEXT *context,DISPATCHER_CONTEXT *disp)
 | 
| +if ($win64) {
 | 
| +$rec="%rcx";
 | 
| +$frame="%rdx";
 | 
| +$context="%r8";
 | 
| +$disp="%r9";
 | 
| +
 | 
| +$code.=<<___;
 | 
| +.extern	__imp_RtlVirtualUnwind
 | 
| +.type	se_handler,\@abi-omnipotent
 | 
| +.align	16
 | 
| +se_handler:
 | 
| +	push	%rsi
 | 
| +	push	%rdi
 | 
| +	push	%rbx
 | 
| +	push	%rbp
 | 
| +	push	%r12
 | 
| +	push	%r13
 | 
| +	push	%r14
 | 
| +	push	%r15
 | 
| +	pushfq
 | 
| +	sub	\$64,%rsp
 | 
| +
 | 
| +	mov	120($context),%rax	# pull context->Rax
 | 
| +	mov	248($context),%rbx	# pull context->Rip
 | 
| +
 | 
| +	lea	.Lprologue(%rip),%r10
 | 
| +	cmp	%r10,%rbx		# context->Rip<.Lprologue
 | 
| +	jb	.Lin_prologue
 | 
| +
 | 
| +	mov	152($context),%rax	# pull context->Rsp
 | 
| +
 | 
| +	lea	.Lepilogue(%rip),%r10
 | 
| +	cmp	%r10,%rbx		# context->Rip>=.Lepilogue
 | 
| +	jae	.Lin_prologue
 | 
| +
 | 
| +	mov	192($context),%r10	# pull $num
 | 
| +	mov	8(%rax,%r10,8),%rax	# pull saved stack pointer
 | 
| +	lea	48(%rax),%rax
 | 
| +
 | 
| +	mov	-8(%rax),%rbx
 | 
| +	mov	-16(%rax),%rbp
 | 
| +	mov	-24(%rax),%r12
 | 
| +	mov	-32(%rax),%r13
 | 
| +	mov	-40(%rax),%r14
 | 
| +	mov	-48(%rax),%r15
 | 
| +	mov	%rbx,144($context)	# restore context->Rbx
 | 
| +	mov	%rbp,160($context)	# restore context->Rbp
 | 
| +	mov	%r12,216($context)	# restore context->R12
 | 
| +	mov	%r13,224($context)	# restore context->R13
 | 
| +	mov	%r14,232($context)	# restore context->R14
 | 
| +	mov	%r15,240($context)	# restore context->R15
 | 
| +
 | 
| +.Lin_prologue:
 | 
| +	mov	8(%rax),%rdi
 | 
| +	mov	16(%rax),%rsi
 | 
| +	mov	%rax,152($context)	# restore context->Rsp
 | 
| +	mov	%rsi,168($context)	# restore context->Rsi
 | 
| +	mov	%rdi,176($context)	# restore context->Rdi
 | 
| +
 | 
| +	mov	40($disp),%rdi		# disp->ContextRecord
 | 
| +	mov	$context,%rsi		# context
 | 
| +	mov	\$154,%ecx		# sizeof(CONTEXT)
 | 
| +	.long	0xa548f3fc		# cld; rep movsq
 | 
| +
 | 
| +	mov	$disp,%rsi
 | 
| +	xor	%rcx,%rcx		# arg1, UNW_FLAG_NHANDLER
 | 
| +	mov	8(%rsi),%rdx		# arg2, disp->ImageBase
 | 
| +	mov	0(%rsi),%r8		# arg3, disp->ControlPc
 | 
| +	mov	16(%rsi),%r9		# arg4, disp->FunctionEntry
 | 
| +	mov	40(%rsi),%r10		# disp->ContextRecord
 | 
| +	lea	56(%rsi),%r11		# &disp->HandlerData
 | 
| +	lea	24(%rsi),%r12		# &disp->EstablisherFrame
 | 
| +	mov	%r10,32(%rsp)		# arg5
 | 
| +	mov	%r11,40(%rsp)		# arg6
 | 
| +	mov	%r12,48(%rsp)		# arg7
 | 
| +	mov	%rcx,56(%rsp)		# arg8, (NULL)
 | 
| +	call	*__imp_RtlVirtualUnwind(%rip)
 | 
| +
 | 
| +	mov	\$1,%eax		# ExceptionContinueSearch
 | 
| +	add	\$64,%rsp
 | 
| +	popfq
 | 
|  	pop	%r15
 | 
|  	pop	%r14
 | 
|  	pop	%r13
 | 
|  	pop	%r12
 | 
|  	pop	%rbp
 | 
|  	pop	%rbx
 | 
| +	pop	%rdi
 | 
| +	pop	%rsi
 | 
|  	ret
 | 
| -.size	bn_mul_mont,.-bn_mul_mont
 | 
| -.asciz	"Montgomery Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
 | 
| +.size	se_handler,.-se_handler
 | 
| +
 | 
| +.section	.pdata
 | 
| +.align	4
 | 
| +	.rva	.LSEH_begin_bn_mul_mont
 | 
| +	.rva	.LSEH_end_bn_mul_mont
 | 
| +	.rva	.LSEH_info_bn_mul_mont
 | 
| +
 | 
| +.section	.xdata
 | 
| +.align	8
 | 
| +.LSEH_info_bn_mul_mont:
 | 
| +	.byte	9,0,0,0
 | 
| +	.rva	se_handler
 | 
|  ___
 | 
| +}
 | 
|  
 | 
|  print $code;
 | 
|  close STDOUT;
 | 
| 
 |