Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(135)

Side by Side Diff: openssl/crypto/bn/asm/x86_64-mont.pl

Issue 9254031: Upgrade chrome's OpenSSL to same version Android ships with. (Closed) Base URL: http://src.chromium.org/svn/trunk/deps/third_party/openssl/
Patch Set: '' Created 8 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « openssl/crypto/bn/asm/x86_64-gcc.c ('k') | openssl/crypto/bn/bn.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env perl 1 #!/usr/bin/env perl
2 2
3 # ==================================================================== 3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and 5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further 6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/. 7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ==================================================================== 8 # ====================================================================
9 9
10 # October 2005. 10 # October 2005.
11 # 11 #
12 # Montgomery multiplication routine for x86_64. While it gives modest 12 # Montgomery multiplication routine for x86_64. While it gives modest
13 # 9% improvement of rsa4096 sign on Opteron, rsa512 sign runs more 13 # 9% improvement of rsa4096 sign on Opteron, rsa512 sign runs more
14 # than twice, >2x, as fast. Most common rsa1024 sign is improved by 14 # than twice, >2x, as fast. Most common rsa1024 sign is improved by
15 # respectful 50%. It remains to be seen if loop unrolling and 15 # respectful 50%. It remains to be seen if loop unrolling and
16 # dedicated squaring routine can provide further improvement... 16 # dedicated squaring routine can provide further improvement...
17 17
18 $output=shift; 18 $flavour = shift;
19 $output = shift;
20 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
21
22 $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
19 23
20 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 24 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
21 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 25 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
22 ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or 26 ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
23 die "can't locate x86_64-xlate.pl"; 27 die "can't locate x86_64-xlate.pl";
24 28
25 open STDOUT,"| $^X $xlate $output"; 29 open STDOUT,"| $^X $xlate $flavour $output";
26 30
27 # int bn_mul_mont( 31 # int bn_mul_mont(
28 $rp="%rdi"; # BN_ULONG *rp, 32 $rp="%rdi"; # BN_ULONG *rp,
29 $ap="%rsi"; # const BN_ULONG *ap, 33 $ap="%rsi"; # const BN_ULONG *ap,
30 $bp="%rdx"; # const BN_ULONG *bp, 34 $bp="%rdx"; # const BN_ULONG *bp,
31 $np="%rcx"; # const BN_ULONG *np, 35 $np="%rcx"; # const BN_ULONG *np,
32 $n0="%r8"; # const BN_ULONG *n0, 36 $n0="%r8"; # const BN_ULONG *n0,
33 $num="%r9"; # int num); 37 $num="%r9"; # int num);
34 $lo0="%r10"; 38 $lo0="%r10";
35 $hi0="%r11"; 39 $hi0="%r11";
(...skipping 12 matching lines...) Expand all
48 .align 16 52 .align 16
49 bn_mul_mont: 53 bn_mul_mont:
50 push %rbx 54 push %rbx
51 push %rbp 55 push %rbp
52 push %r12 56 push %r12
53 push %r13 57 push %r13
54 push %r14 58 push %r14
55 push %r15 59 push %r15
56 60
57 mov ${num}d,${num}d 61 mov ${num}d,${num}d
58 » lea» 2($num),%rax 62 » lea» 2($num),%r10
59 » mov» %rsp,%rbp 63 » mov» %rsp,%r11
60 » neg» %rax 64 » neg» %r10
61 » lea» (%rsp,%rax,8),%rsp» # tp=alloca(8*(num+2)) 65 » lea» (%rsp,%r10,8),%rsp» # tp=alloca(8*(num+2))
62 and \$-1024,%rsp # minimize TLB usage 66 and \$-1024,%rsp # minimize TLB usage
63 67
64 » mov» %rbp,8(%rsp,$num,8)» # tp[num+1]=%rsp 68 » mov» %r11,8(%rsp,$num,8)» # tp[num+1]=%rsp
69 .Lprologue:
65 mov %rdx,$bp # $bp reassigned, remember? 70 mov %rdx,$bp # $bp reassigned, remember?
66 71
67 mov ($n0),$n0 # pull n0[0] value 72 mov ($n0),$n0 # pull n0[0] value
68 73
69 xor $i,$i # i=0 74 xor $i,$i # i=0
70 xor $j,$j # j=0 75 xor $j,$j # j=0
71 76
72 mov ($bp),$m0 # m0=bp[0] 77 mov ($bp),$m0 # m0=bp[0]
73 mov ($ap),%rax 78 mov ($ap),%rax
74 mulq $m0 # ap[0]*bp[0] 79 mulq $m0 # ap[0]*bp[0]
(...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after
190 lea -1($num),$j 195 lea -1($num),$j
191 or $np,$ap # ap=borrow?tp:rp 196 or $np,$ap # ap=borrow?tp:rp
192 .align 16 197 .align 16
193 .Lcopy: # copy or in-place refresh 198 .Lcopy: # copy or in-place refresh
194 mov ($ap,$j,8),%rax 199 mov ($ap,$j,8),%rax
195 mov %rax,($rp,$j,8) # rp[i]=tp[i] 200 mov %rax,($rp,$j,8) # rp[i]=tp[i]
196 mov $i,(%rsp,$j,8) # zap temporary vector 201 mov $i,(%rsp,$j,8) # zap temporary vector
197 dec $j 202 dec $j
198 jge .Lcopy 203 jge .Lcopy
199 204
200 » mov» 8(%rsp,$num,8),%rsp» # restore %rsp 205 » mov» 8(%rsp,$num,8),%rsi» # restore %rsp
201 mov \$1,%rax 206 mov \$1,%rax
207 mov (%rsi),%r15
208 mov 8(%rsi),%r14
209 mov 16(%rsi),%r13
210 mov 24(%rsi),%r12
211 mov 32(%rsi),%rbp
212 mov 40(%rsi),%rbx
213 lea 48(%rsi),%rsp
214 .Lepilogue:
215 ret
216 .size bn_mul_mont,.-bn_mul_mont
217 .asciz "Montgomery Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org >"
218 .align 16
219 ___
220
221 # EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
222 # CONTEXT *context,DISPATCHER_CONTEXT *disp)
223 if ($win64) {
224 $rec="%rcx";
225 $frame="%rdx";
226 $context="%r8";
227 $disp="%r9";
228
229 $code.=<<___;
230 .extern __imp_RtlVirtualUnwind
231 .type se_handler,\@abi-omnipotent
232 .align 16
233 se_handler:
234 push %rsi
235 push %rdi
236 push %rbx
237 push %rbp
238 push %r12
239 push %r13
240 push %r14
241 push %r15
242 pushfq
243 sub \$64,%rsp
244
245 mov 120($context),%rax # pull context->Rax
246 mov 248($context),%rbx # pull context->Rip
247
248 lea .Lprologue(%rip),%r10
249 cmp %r10,%rbx # context->Rip<.Lprologue
250 jb .Lin_prologue
251
252 mov 152($context),%rax # pull context->Rsp
253
254 lea .Lepilogue(%rip),%r10
255 cmp %r10,%rbx # context->Rip>=.Lepilogue
256 jae .Lin_prologue
257
258 mov 192($context),%r10 # pull $num
259 mov 8(%rax,%r10,8),%rax # pull saved stack pointer
260 lea 48(%rax),%rax
261
262 mov -8(%rax),%rbx
263 mov -16(%rax),%rbp
264 mov -24(%rax),%r12
265 mov -32(%rax),%r13
266 mov -40(%rax),%r14
267 mov -48(%rax),%r15
268 mov %rbx,144($context) # restore context->Rbx
269 mov %rbp,160($context) # restore context->Rbp
270 mov %r12,216($context) # restore context->R12
271 mov %r13,224($context) # restore context->R13
272 mov %r14,232($context) # restore context->R14
273 mov %r15,240($context) # restore context->R15
274
275 .Lin_prologue:
276 mov 8(%rax),%rdi
277 mov 16(%rax),%rsi
278 mov %rax,152($context) # restore context->Rsp
279 mov %rsi,168($context) # restore context->Rsi
280 mov %rdi,176($context) # restore context->Rdi
281
282 mov 40($disp),%rdi # disp->ContextRecord
283 mov $context,%rsi # context
284 mov \$154,%ecx # sizeof(CONTEXT)
285 .long 0xa548f3fc # cld; rep movsq
286
287 mov $disp,%rsi
288 xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
289 mov 8(%rsi),%rdx # arg2, disp->ImageBase
290 mov 0(%rsi),%r8 # arg3, disp->ControlPc
291 mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
292 mov 40(%rsi),%r10 # disp->ContextRecord
293 lea 56(%rsi),%r11 # &disp->HandlerData
294 lea 24(%rsi),%r12 # &disp->EstablisherFrame
295 mov %r10,32(%rsp) # arg5
296 mov %r11,40(%rsp) # arg6
297 mov %r12,48(%rsp) # arg7
298 mov %rcx,56(%rsp) # arg8, (NULL)
299 call *__imp_RtlVirtualUnwind(%rip)
300
301 mov \$1,%eax # ExceptionContinueSearch
302 add \$64,%rsp
303 popfq
202 pop %r15 304 pop %r15
203 pop %r14 305 pop %r14
204 pop %r13 306 pop %r13
205 pop %r12 307 pop %r12
206 pop %rbp 308 pop %rbp
207 pop %rbx 309 pop %rbx
310 pop %rdi
311 pop %rsi
208 ret 312 ret
209 .size» bn_mul_mont,.-bn_mul_mont 313 .size» se_handler,.-se_handler
210 .asciz» "Montgomery Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org >" 314
315 .section» .pdata
316 .align» 4
317 » .rva» .LSEH_begin_bn_mul_mont
318 » .rva» .LSEH_end_bn_mul_mont
319 » .rva» .LSEH_info_bn_mul_mont
320
321 .section» .xdata
322 .align» 8
323 .LSEH_info_bn_mul_mont:
324 » .byte» 9,0,0,0
325 » .rva» se_handler
211 ___ 326 ___
327 }
212 328
213 print $code; 329 print $code;
214 close STDOUT; 330 close STDOUT;
OLDNEW
« no previous file with comments | « openssl/crypto/bn/asm/x86_64-gcc.c ('k') | openssl/crypto/bn/bn.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698