Index: third_party/boringssl/mac-x86_64/crypto/bn/rsaz-x86_64.S |
diff --git a/third_party/boringssl/mac-x86_64/crypto/bn/rsaz-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/bn/rsaz-x86_64.S |
new file mode 100644 |
index 0000000000000000000000000000000000000000..5840426fa0f7253111e0430decca1c6ca6895552 |
--- /dev/null |
+++ b/third_party/boringssl/mac-x86_64/crypto/bn/rsaz-x86_64.S |
@@ -0,0 +1,1119 @@ |
+#if defined(__x86_64__) |
+.text |
+ |
+ |
+ |
+.globl _rsaz_512_sqr |
+ |
+.p2align 5 |
+_rsaz_512_sqr: |
+ pushq %rbx |
+ pushq %rbp |
+ pushq %r12 |
+ pushq %r13 |
+ pushq %r14 |
+ pushq %r15 |
+ |
+ subq $128+24,%rsp |
+L$sqr_body: |
+ movq %rdx,%rbp |
+ movq (%rsi),%rdx |
+ movq 8(%rsi),%rax |
+ movq %rcx,128(%rsp) |
+ jmp L$oop_sqr |
+ |
+.p2align 5 |
+L$oop_sqr: |
+ movl %r8d,128+8(%rsp) |
+ |
+ movq %rdx,%rbx |
+ mulq %rdx |
+ movq %rax,%r8 |
+ movq 16(%rsi),%rax |
+ movq %rdx,%r9 |
+ |
+ mulq %rbx |
+ addq %rax,%r9 |
+ movq 24(%rsi),%rax |
+ movq %rdx,%r10 |
+ adcq $0,%r10 |
+ |
+ mulq %rbx |
+ addq %rax,%r10 |
+ movq 32(%rsi),%rax |
+ movq %rdx,%r11 |
+ adcq $0,%r11 |
+ |
+ mulq %rbx |
+ addq %rax,%r11 |
+ movq 40(%rsi),%rax |
+ movq %rdx,%r12 |
+ adcq $0,%r12 |
+ |
+ mulq %rbx |
+ addq %rax,%r12 |
+ movq 48(%rsi),%rax |
+ movq %rdx,%r13 |
+ adcq $0,%r13 |
+ |
+ mulq %rbx |
+ addq %rax,%r13 |
+ movq 56(%rsi),%rax |
+ movq %rdx,%r14 |
+ adcq $0,%r14 |
+ |
+ mulq %rbx |
+ addq %rax,%r14 |
+ movq %rbx,%rax |
+ movq %rdx,%r15 |
+ adcq $0,%r15 |
+ |
+ addq %r8,%r8 |
+ movq %r9,%rcx |
+ adcq %r9,%r9 |
+ |
+ mulq %rax |
+ movq %rax,(%rsp) |
+ addq %rdx,%r8 |
+ adcq $0,%r9 |
+ |
+ movq %r8,8(%rsp) |
+ shrq $63,%rcx |
+ |
+ |
+ movq 8(%rsi),%r8 |
+ movq 16(%rsi),%rax |
+ mulq %r8 |
+ addq %rax,%r10 |
+ movq 24(%rsi),%rax |
+ movq %rdx,%rbx |
+ adcq $0,%rbx |
+ |
+ mulq %r8 |
+ addq %rax,%r11 |
+ movq 32(%rsi),%rax |
+ adcq $0,%rdx |
+ addq %rbx,%r11 |
+ movq %rdx,%rbx |
+ adcq $0,%rbx |
+ |
+ mulq %r8 |
+ addq %rax,%r12 |
+ movq 40(%rsi),%rax |
+ adcq $0,%rdx |
+ addq %rbx,%r12 |
+ movq %rdx,%rbx |
+ adcq $0,%rbx |
+ |
+ mulq %r8 |
+ addq %rax,%r13 |
+ movq 48(%rsi),%rax |
+ adcq $0,%rdx |
+ addq %rbx,%r13 |
+ movq %rdx,%rbx |
+ adcq $0,%rbx |
+ |
+ mulq %r8 |
+ addq %rax,%r14 |
+ movq 56(%rsi),%rax |
+ adcq $0,%rdx |
+ addq %rbx,%r14 |
+ movq %rdx,%rbx |
+ adcq $0,%rbx |
+ |
+ mulq %r8 |
+ addq %rax,%r15 |
+ movq %r8,%rax |
+ adcq $0,%rdx |
+ addq %rbx,%r15 |
+ movq %rdx,%r8 |
+ movq %r10,%rdx |
+ adcq $0,%r8 |
+ |
+ addq %rdx,%rdx |
+ leaq (%rcx,%r10,2),%r10 |
+ movq %r11,%rbx |
+ adcq %r11,%r11 |
+ |
+ mulq %rax |
+ addq %rax,%r9 |
+ adcq %rdx,%r10 |
+ adcq $0,%r11 |
+ |
+ movq %r9,16(%rsp) |
+ movq %r10,24(%rsp) |
+ shrq $63,%rbx |
+ |
+ |
+ movq 16(%rsi),%r9 |
+ movq 24(%rsi),%rax |
+ mulq %r9 |
+ addq %rax,%r12 |
+ movq 32(%rsi),%rax |
+ movq %rdx,%rcx |
+ adcq $0,%rcx |
+ |
+ mulq %r9 |
+ addq %rax,%r13 |
+ movq 40(%rsi),%rax |
+ adcq $0,%rdx |
+ addq %rcx,%r13 |
+ movq %rdx,%rcx |
+ adcq $0,%rcx |
+ |
+ mulq %r9 |
+ addq %rax,%r14 |
+ movq 48(%rsi),%rax |
+ adcq $0,%rdx |
+ addq %rcx,%r14 |
+ movq %rdx,%rcx |
+ adcq $0,%rcx |
+ |
+ mulq %r9 |
+ movq %r12,%r10 |
+ leaq (%rbx,%r12,2),%r12 |
+ addq %rax,%r15 |
+ movq 56(%rsi),%rax |
+ adcq $0,%rdx |
+ addq %rcx,%r15 |
+ movq %rdx,%rcx |
+ adcq $0,%rcx |
+ |
+ mulq %r9 |
+ shrq $63,%r10 |
+ addq %rax,%r8 |
+ movq %r9,%rax |
+ adcq $0,%rdx |
+ addq %rcx,%r8 |
+ movq %rdx,%r9 |
+ adcq $0,%r9 |
+ |
+ movq %r13,%rcx |
+ leaq (%r10,%r13,2),%r13 |
+ |
+ mulq %rax |
+ addq %rax,%r11 |
+ adcq %rdx,%r12 |
+ adcq $0,%r13 |
+ |
+ movq %r11,32(%rsp) |
+ movq %r12,40(%rsp) |
+ shrq $63,%rcx |
+ |
+ |
+ movq 24(%rsi),%r10 |
+ movq 32(%rsi),%rax |
+ mulq %r10 |
+ addq %rax,%r14 |
+ movq 40(%rsi),%rax |
+ movq %rdx,%rbx |
+ adcq $0,%rbx |
+ |
+ mulq %r10 |
+ addq %rax,%r15 |
+ movq 48(%rsi),%rax |
+ adcq $0,%rdx |
+ addq %rbx,%r15 |
+ movq %rdx,%rbx |
+ adcq $0,%rbx |
+ |
+ mulq %r10 |
+ movq %r14,%r12 |
+ leaq (%rcx,%r14,2),%r14 |
+ addq %rax,%r8 |
+ movq 56(%rsi),%rax |
+ adcq $0,%rdx |
+ addq %rbx,%r8 |
+ movq %rdx,%rbx |
+ adcq $0,%rbx |
+ |
+ mulq %r10 |
+ shrq $63,%r12 |
+ addq %rax,%r9 |
+ movq %r10,%rax |
+ adcq $0,%rdx |
+ addq %rbx,%r9 |
+ movq %rdx,%r10 |
+ adcq $0,%r10 |
+ |
+ movq %r15,%rbx |
+ leaq (%r12,%r15,2),%r15 |
+ |
+ mulq %rax |
+ addq %rax,%r13 |
+ adcq %rdx,%r14 |
+ adcq $0,%r15 |
+ |
+ movq %r13,48(%rsp) |
+ movq %r14,56(%rsp) |
+ shrq $63,%rbx |
+ |
+ |
+ movq 32(%rsi),%r11 |
+ movq 40(%rsi),%rax |
+ mulq %r11 |
+ addq %rax,%r8 |
+ movq 48(%rsi),%rax |
+ movq %rdx,%rcx |
+ adcq $0,%rcx |
+ |
+ mulq %r11 |
+ addq %rax,%r9 |
+ movq 56(%rsi),%rax |
+ adcq $0,%rdx |
+ movq %r8,%r12 |
+ leaq (%rbx,%r8,2),%r8 |
+ addq %rcx,%r9 |
+ movq %rdx,%rcx |
+ adcq $0,%rcx |
+ |
+ mulq %r11 |
+ shrq $63,%r12 |
+ addq %rax,%r10 |
+ movq %r11,%rax |
+ adcq $0,%rdx |
+ addq %rcx,%r10 |
+ movq %rdx,%r11 |
+ adcq $0,%r11 |
+ |
+ movq %r9,%rcx |
+ leaq (%r12,%r9,2),%r9 |
+ |
+ mulq %rax |
+ addq %rax,%r15 |
+ adcq %rdx,%r8 |
+ adcq $0,%r9 |
+ |
+ movq %r15,64(%rsp) |
+ movq %r8,72(%rsp) |
+ shrq $63,%rcx |
+ |
+ |
+ movq 40(%rsi),%r12 |
+ movq 48(%rsi),%rax |
+ mulq %r12 |
+ addq %rax,%r10 |
+ movq 56(%rsi),%rax |
+ movq %rdx,%rbx |
+ adcq $0,%rbx |
+ |
+ mulq %r12 |
+ addq %rax,%r11 |
+ movq %r12,%rax |
+ movq %r10,%r15 |
+ leaq (%rcx,%r10,2),%r10 |
+ adcq $0,%rdx |
+ shrq $63,%r15 |
+ addq %rbx,%r11 |
+ movq %rdx,%r12 |
+ adcq $0,%r12 |
+ |
+ movq %r11,%rbx |
+ leaq (%r15,%r11,2),%r11 |
+ |
+ mulq %rax |
+ addq %rax,%r9 |
+ adcq %rdx,%r10 |
+ adcq $0,%r11 |
+ |
+ movq %r9,80(%rsp) |
+ movq %r10,88(%rsp) |
+ |
+ |
+ movq 48(%rsi),%r13 |
+ movq 56(%rsi),%rax |
+ mulq %r13 |
+ addq %rax,%r12 |
+ movq %r13,%rax |
+ movq %rdx,%r13 |
+ adcq $0,%r13 |
+ |
+ xorq %r14,%r14 |
+ shlq $1,%rbx |
+ adcq %r12,%r12 |
+ adcq %r13,%r13 |
+ adcq %r14,%r14 |
+ |
+ mulq %rax |
+ addq %rax,%r11 |
+ adcq %rdx,%r12 |
+ adcq $0,%r13 |
+ |
+ movq %r11,96(%rsp) |
+ movq %r12,104(%rsp) |
+ |
+ |
+ movq 56(%rsi),%rax |
+ mulq %rax |
+ addq %rax,%r13 |
+ adcq $0,%rdx |
+ |
+ addq %rdx,%r14 |
+ |
+ movq %r13,112(%rsp) |
+ movq %r14,120(%rsp) |
+ |
+ movq (%rsp),%r8 |
+ movq 8(%rsp),%r9 |
+ movq 16(%rsp),%r10 |
+ movq 24(%rsp),%r11 |
+ movq 32(%rsp),%r12 |
+ movq 40(%rsp),%r13 |
+ movq 48(%rsp),%r14 |
+ movq 56(%rsp),%r15 |
+ |
+ call __rsaz_512_reduce |
+ |
+ addq 64(%rsp),%r8 |
+ adcq 72(%rsp),%r9 |
+ adcq 80(%rsp),%r10 |
+ adcq 88(%rsp),%r11 |
+ adcq 96(%rsp),%r12 |
+ adcq 104(%rsp),%r13 |
+ adcq 112(%rsp),%r14 |
+ adcq 120(%rsp),%r15 |
+ sbbq %rcx,%rcx |
+ |
+ call __rsaz_512_subtract |
+ |
+ movq %r8,%rdx |
+ movq %r9,%rax |
+ movl 128+8(%rsp),%r8d |
+ movq %rdi,%rsi |
+ |
+ decl %r8d |
+ jnz L$oop_sqr |
+ |
+ leaq 128+24+48(%rsp),%rax |
+ movq -48(%rax),%r15 |
+ movq -40(%rax),%r14 |
+ movq -32(%rax),%r13 |
+ movq -24(%rax),%r12 |
+ movq -16(%rax),%rbp |
+ movq -8(%rax),%rbx |
+ leaq (%rax),%rsp |
+L$sqr_epilogue: |
+ .byte 0xf3,0xc3 |
+ |
+.globl _rsaz_512_mul |
+ |
+.p2align 5 |
+_rsaz_512_mul: |
+ pushq %rbx |
+ pushq %rbp |
+ pushq %r12 |
+ pushq %r13 |
+ pushq %r14 |
+ pushq %r15 |
+ |
+ subq $128+24,%rsp |
+L$mul_body: |
+.byte 102,72,15,110,199 |
+.byte 102,72,15,110,201 |
+ movq %r8,128(%rsp) |
+ movq (%rdx),%rbx |
+ movq %rdx,%rbp |
+ call __rsaz_512_mul |
+ |
+.byte 102,72,15,126,199 |
+.byte 102,72,15,126,205 |
+ |
+ movq (%rsp),%r8 |
+ movq 8(%rsp),%r9 |
+ movq 16(%rsp),%r10 |
+ movq 24(%rsp),%r11 |
+ movq 32(%rsp),%r12 |
+ movq 40(%rsp),%r13 |
+ movq 48(%rsp),%r14 |
+ movq 56(%rsp),%r15 |
+ |
+ call __rsaz_512_reduce |
+ addq 64(%rsp),%r8 |
+ adcq 72(%rsp),%r9 |
+ adcq 80(%rsp),%r10 |
+ adcq 88(%rsp),%r11 |
+ adcq 96(%rsp),%r12 |
+ adcq 104(%rsp),%r13 |
+ adcq 112(%rsp),%r14 |
+ adcq 120(%rsp),%r15 |
+ sbbq %rcx,%rcx |
+ |
+ call __rsaz_512_subtract |
+ |
+ leaq 128+24+48(%rsp),%rax |
+ movq -48(%rax),%r15 |
+ movq -40(%rax),%r14 |
+ movq -32(%rax),%r13 |
+ movq -24(%rax),%r12 |
+ movq -16(%rax),%rbp |
+ movq -8(%rax),%rbx |
+ leaq (%rax),%rsp |
+L$mul_epilogue: |
+ .byte 0xf3,0xc3 |
+ |
+.globl _rsaz_512_mul_gather4 |
+ |
+.p2align 5 |
+_rsaz_512_mul_gather4: |
+ pushq %rbx |
+ pushq %rbp |
+ pushq %r12 |
+ pushq %r13 |
+ pushq %r14 |
+ pushq %r15 |
+ |
+ movl %r9d,%r9d |
+ subq $128+24,%rsp |
+L$mul_gather4_body: |
+ movl 64(%rdx,%r9,4),%eax |
+.byte 102,72,15,110,199 |
+ movl (%rdx,%r9,4),%ebx |
+.byte 102,72,15,110,201 |
+ movq %r8,128(%rsp) |
+ |
+ shlq $32,%rax |
+ orq %rax,%rbx |
+ movq (%rsi),%rax |
+ movq 8(%rsi),%rcx |
+ leaq 128(%rdx,%r9,4),%rbp |
+ mulq %rbx |
+ movq %rax,(%rsp) |
+ movq %rcx,%rax |
+ movq %rdx,%r8 |
+ |
+ mulq %rbx |
+ movd (%rbp),%xmm4 |
+ addq %rax,%r8 |
+ movq 16(%rsi),%rax |
+ movq %rdx,%r9 |
+ adcq $0,%r9 |
+ |
+ mulq %rbx |
+ movd 64(%rbp),%xmm5 |
+ addq %rax,%r9 |
+ movq 24(%rsi),%rax |
+ movq %rdx,%r10 |
+ adcq $0,%r10 |
+ |
+ mulq %rbx |
+ pslldq $4,%xmm5 |
+ addq %rax,%r10 |
+ movq 32(%rsi),%rax |
+ movq %rdx,%r11 |
+ adcq $0,%r11 |
+ |
+ mulq %rbx |
+ por %xmm5,%xmm4 |
+ addq %rax,%r11 |
+ movq 40(%rsi),%rax |
+ movq %rdx,%r12 |
+ adcq $0,%r12 |
+ |
+ mulq %rbx |
+ addq %rax,%r12 |
+ movq 48(%rsi),%rax |
+ movq %rdx,%r13 |
+ adcq $0,%r13 |
+ |
+ mulq %rbx |
+ leaq 128(%rbp),%rbp |
+ addq %rax,%r13 |
+ movq 56(%rsi),%rax |
+ movq %rdx,%r14 |
+ adcq $0,%r14 |
+ |
+ mulq %rbx |
+.byte 102,72,15,126,227 |
+ addq %rax,%r14 |
+ movq (%rsi),%rax |
+ movq %rdx,%r15 |
+ adcq $0,%r15 |
+ |
+ leaq 8(%rsp),%rdi |
+ movl $7,%ecx |
+ jmp L$oop_mul_gather |
+ |
+.p2align 5 |
+L$oop_mul_gather: |
+ mulq %rbx |
+ addq %rax,%r8 |
+ movq 8(%rsi),%rax |
+ movq %r8,(%rdi) |
+ movq %rdx,%r8 |
+ adcq $0,%r8 |
+ |
+ mulq %rbx |
+ movd (%rbp),%xmm4 |
+ addq %rax,%r9 |
+ movq 16(%rsi),%rax |
+ adcq $0,%rdx |
+ addq %r9,%r8 |
+ movq %rdx,%r9 |
+ adcq $0,%r9 |
+ |
+ mulq %rbx |
+ movd 64(%rbp),%xmm5 |
+ addq %rax,%r10 |
+ movq 24(%rsi),%rax |
+ adcq $0,%rdx |
+ addq %r10,%r9 |
+ movq %rdx,%r10 |
+ adcq $0,%r10 |
+ |
+ mulq %rbx |
+ pslldq $4,%xmm5 |
+ addq %rax,%r11 |
+ movq 32(%rsi),%rax |
+ adcq $0,%rdx |
+ addq %r11,%r10 |
+ movq %rdx,%r11 |
+ adcq $0,%r11 |
+ |
+ mulq %rbx |
+ por %xmm5,%xmm4 |
+ addq %rax,%r12 |
+ movq 40(%rsi),%rax |
+ adcq $0,%rdx |
+ addq %r12,%r11 |
+ movq %rdx,%r12 |
+ adcq $0,%r12 |
+ |
+ mulq %rbx |
+ addq %rax,%r13 |
+ movq 48(%rsi),%rax |
+ adcq $0,%rdx |
+ addq %r13,%r12 |
+ movq %rdx,%r13 |
+ adcq $0,%r13 |
+ |
+ mulq %rbx |
+ addq %rax,%r14 |
+ movq 56(%rsi),%rax |
+ adcq $0,%rdx |
+ addq %r14,%r13 |
+ movq %rdx,%r14 |
+ adcq $0,%r14 |
+ |
+ mulq %rbx |
+.byte 102,72,15,126,227 |
+ addq %rax,%r15 |
+ movq (%rsi),%rax |
+ adcq $0,%rdx |
+ addq %r15,%r14 |
+ movq %rdx,%r15 |
+ adcq $0,%r15 |
+ |
+ leaq 128(%rbp),%rbp |
+ leaq 8(%rdi),%rdi |
+ |
+ decl %ecx |
+ jnz L$oop_mul_gather |
+ |
+ movq %r8,(%rdi) |
+ movq %r9,8(%rdi) |
+ movq %r10,16(%rdi) |
+ movq %r11,24(%rdi) |
+ movq %r12,32(%rdi) |
+ movq %r13,40(%rdi) |
+ movq %r14,48(%rdi) |
+ movq %r15,56(%rdi) |
+ |
+.byte 102,72,15,126,199 |
+.byte 102,72,15,126,205 |
+ |
+ movq (%rsp),%r8 |
+ movq 8(%rsp),%r9 |
+ movq 16(%rsp),%r10 |
+ movq 24(%rsp),%r11 |
+ movq 32(%rsp),%r12 |
+ movq 40(%rsp),%r13 |
+ movq 48(%rsp),%r14 |
+ movq 56(%rsp),%r15 |
+ |
+ call __rsaz_512_reduce |
+ addq 64(%rsp),%r8 |
+ adcq 72(%rsp),%r9 |
+ adcq 80(%rsp),%r10 |
+ adcq 88(%rsp),%r11 |
+ adcq 96(%rsp),%r12 |
+ adcq 104(%rsp),%r13 |
+ adcq 112(%rsp),%r14 |
+ adcq 120(%rsp),%r15 |
+ sbbq %rcx,%rcx |
+ |
+ call __rsaz_512_subtract |
+ |
+ leaq 128+24+48(%rsp),%rax |
+ movq -48(%rax),%r15 |
+ movq -40(%rax),%r14 |
+ movq -32(%rax),%r13 |
+ movq -24(%rax),%r12 |
+ movq -16(%rax),%rbp |
+ movq -8(%rax),%rbx |
+ leaq (%rax),%rsp |
+L$mul_gather4_epilogue: |
+ .byte 0xf3,0xc3 |
+ |
+.globl _rsaz_512_mul_scatter4 |
+ |
+.p2align 5 |
+_rsaz_512_mul_scatter4: |
+ pushq %rbx |
+ pushq %rbp |
+ pushq %r12 |
+ pushq %r13 |
+ pushq %r14 |
+ pushq %r15 |
+ |
+ movl %r9d,%r9d |
+ subq $128+24,%rsp |
+L$mul_scatter4_body: |
+ leaq (%r8,%r9,4),%r8 |
+.byte 102,72,15,110,199 |
+.byte 102,72,15,110,202 |
+.byte 102,73,15,110,208 |
+ movq %rcx,128(%rsp) |
+ |
+ movq %rdi,%rbp |
+ movq (%rdi),%rbx |
+ call __rsaz_512_mul |
+ |
+.byte 102,72,15,126,199 |
+.byte 102,72,15,126,205 |
+ |
+ movq (%rsp),%r8 |
+ movq 8(%rsp),%r9 |
+ movq 16(%rsp),%r10 |
+ movq 24(%rsp),%r11 |
+ movq 32(%rsp),%r12 |
+ movq 40(%rsp),%r13 |
+ movq 48(%rsp),%r14 |
+ movq 56(%rsp),%r15 |
+ |
+ call __rsaz_512_reduce |
+ addq 64(%rsp),%r8 |
+ adcq 72(%rsp),%r9 |
+ adcq 80(%rsp),%r10 |
+ adcq 88(%rsp),%r11 |
+ adcq 96(%rsp),%r12 |
+ adcq 104(%rsp),%r13 |
+ adcq 112(%rsp),%r14 |
+ adcq 120(%rsp),%r15 |
+.byte 102,72,15,126,214 |
+ sbbq %rcx,%rcx |
+ |
+ call __rsaz_512_subtract |
+ |
+ movl %r8d,0(%rsi) |
+ shrq $32,%r8 |
+ movl %r9d,128(%rsi) |
+ shrq $32,%r9 |
+ movl %r10d,256(%rsi) |
+ shrq $32,%r10 |
+ movl %r11d,384(%rsi) |
+ shrq $32,%r11 |
+ movl %r12d,512(%rsi) |
+ shrq $32,%r12 |
+ movl %r13d,640(%rsi) |
+ shrq $32,%r13 |
+ movl %r14d,768(%rsi) |
+ shrq $32,%r14 |
+ movl %r15d,896(%rsi) |
+ shrq $32,%r15 |
+ movl %r8d,64(%rsi) |
+ movl %r9d,192(%rsi) |
+ movl %r10d,320(%rsi) |
+ movl %r11d,448(%rsi) |
+ movl %r12d,576(%rsi) |
+ movl %r13d,704(%rsi) |
+ movl %r14d,832(%rsi) |
+ movl %r15d,960(%rsi) |
+ |
+ leaq 128+24+48(%rsp),%rax |
+ movq -48(%rax),%r15 |
+ movq -40(%rax),%r14 |
+ movq -32(%rax),%r13 |
+ movq -24(%rax),%r12 |
+ movq -16(%rax),%rbp |
+ movq -8(%rax),%rbx |
+ leaq (%rax),%rsp |
+L$mul_scatter4_epilogue: |
+ .byte 0xf3,0xc3 |
+ |
+.globl _rsaz_512_mul_by_one |
+ |
+.p2align 5 |
+_rsaz_512_mul_by_one: |
+ pushq %rbx |
+ pushq %rbp |
+ pushq %r12 |
+ pushq %r13 |
+ pushq %r14 |
+ pushq %r15 |
+ |
+ subq $128+24,%rsp |
+L$mul_by_one_body: |
+ movq %rdx,%rbp |
+ movq %rcx,128(%rsp) |
+ |
+ movq (%rsi),%r8 |
+ pxor %xmm0,%xmm0 |
+ movq 8(%rsi),%r9 |
+ movq 16(%rsi),%r10 |
+ movq 24(%rsi),%r11 |
+ movq 32(%rsi),%r12 |
+ movq 40(%rsi),%r13 |
+ movq 48(%rsi),%r14 |
+ movq 56(%rsi),%r15 |
+ |
+ movdqa %xmm0,(%rsp) |
+ movdqa %xmm0,16(%rsp) |
+ movdqa %xmm0,32(%rsp) |
+ movdqa %xmm0,48(%rsp) |
+ movdqa %xmm0,64(%rsp) |
+ movdqa %xmm0,80(%rsp) |
+ movdqa %xmm0,96(%rsp) |
+ call __rsaz_512_reduce |
+ movq %r8,(%rdi) |
+ movq %r9,8(%rdi) |
+ movq %r10,16(%rdi) |
+ movq %r11,24(%rdi) |
+ movq %r12,32(%rdi) |
+ movq %r13,40(%rdi) |
+ movq %r14,48(%rdi) |
+ movq %r15,56(%rdi) |
+ |
+ leaq 128+24+48(%rsp),%rax |
+ movq -48(%rax),%r15 |
+ movq -40(%rax),%r14 |
+ movq -32(%rax),%r13 |
+ movq -24(%rax),%r12 |
+ movq -16(%rax),%rbp |
+ movq -8(%rax),%rbx |
+ leaq (%rax),%rsp |
+L$mul_by_one_epilogue: |
+ .byte 0xf3,0xc3 |
+ |
+ |
+.p2align 5 |
+__rsaz_512_reduce: |
+ movq %r8,%rbx |
+ imulq 128+8(%rsp),%rbx |
+ movq 0(%rbp),%rax |
+ movl $8,%ecx |
+ jmp L$reduction_loop |
+ |
+.p2align 5 |
+L$reduction_loop: |
+ mulq %rbx |
+ movq 8(%rbp),%rax |
+ negq %r8 |
+ movq %rdx,%r8 |
+ adcq $0,%r8 |
+ |
+ mulq %rbx |
+ addq %rax,%r9 |
+ movq 16(%rbp),%rax |
+ adcq $0,%rdx |
+ addq %r9,%r8 |
+ movq %rdx,%r9 |
+ adcq $0,%r9 |
+ |
+ mulq %rbx |
+ addq %rax,%r10 |
+ movq 24(%rbp),%rax |
+ adcq $0,%rdx |
+ addq %r10,%r9 |
+ movq %rdx,%r10 |
+ adcq $0,%r10 |
+ |
+ mulq %rbx |
+ addq %rax,%r11 |
+ movq 32(%rbp),%rax |
+ adcq $0,%rdx |
+ addq %r11,%r10 |
+ movq 128+8(%rsp),%rsi |
+ |
+ |
+ adcq $0,%rdx |
+ movq %rdx,%r11 |
+ |
+ mulq %rbx |
+ addq %rax,%r12 |
+ movq 40(%rbp),%rax |
+ adcq $0,%rdx |
+ imulq %r8,%rsi |
+ addq %r12,%r11 |
+ movq %rdx,%r12 |
+ adcq $0,%r12 |
+ |
+ mulq %rbx |
+ addq %rax,%r13 |
+ movq 48(%rbp),%rax |
+ adcq $0,%rdx |
+ addq %r13,%r12 |
+ movq %rdx,%r13 |
+ adcq $0,%r13 |
+ |
+ mulq %rbx |
+ addq %rax,%r14 |
+ movq 56(%rbp),%rax |
+ adcq $0,%rdx |
+ addq %r14,%r13 |
+ movq %rdx,%r14 |
+ adcq $0,%r14 |
+ |
+ mulq %rbx |
+ movq %rsi,%rbx |
+ addq %rax,%r15 |
+ movq 0(%rbp),%rax |
+ adcq $0,%rdx |
+ addq %r15,%r14 |
+ movq %rdx,%r15 |
+ adcq $0,%r15 |
+ |
+ decl %ecx |
+ jne L$reduction_loop |
+ |
+ .byte 0xf3,0xc3 |
+ |
+ |
+.p2align 5 |
+__rsaz_512_subtract: |
+ movq %r8,(%rdi) |
+ movq %r9,8(%rdi) |
+ movq %r10,16(%rdi) |
+ movq %r11,24(%rdi) |
+ movq %r12,32(%rdi) |
+ movq %r13,40(%rdi) |
+ movq %r14,48(%rdi) |
+ movq %r15,56(%rdi) |
+ |
+ movq 0(%rbp),%r8 |
+ movq 8(%rbp),%r9 |
+ negq %r8 |
+ notq %r9 |
+ andq %rcx,%r8 |
+ movq 16(%rbp),%r10 |
+ andq %rcx,%r9 |
+ notq %r10 |
+ movq 24(%rbp),%r11 |
+ andq %rcx,%r10 |
+ notq %r11 |
+ movq 32(%rbp),%r12 |
+ andq %rcx,%r11 |
+ notq %r12 |
+ movq 40(%rbp),%r13 |
+ andq %rcx,%r12 |
+ notq %r13 |
+ movq 48(%rbp),%r14 |
+ andq %rcx,%r13 |
+ notq %r14 |
+ movq 56(%rbp),%r15 |
+ andq %rcx,%r14 |
+ notq %r15 |
+ andq %rcx,%r15 |
+ |
+ addq (%rdi),%r8 |
+ adcq 8(%rdi),%r9 |
+ adcq 16(%rdi),%r10 |
+ adcq 24(%rdi),%r11 |
+ adcq 32(%rdi),%r12 |
+ adcq 40(%rdi),%r13 |
+ adcq 48(%rdi),%r14 |
+ adcq 56(%rdi),%r15 |
+ |
+ movq %r8,(%rdi) |
+ movq %r9,8(%rdi) |
+ movq %r10,16(%rdi) |
+ movq %r11,24(%rdi) |
+ movq %r12,32(%rdi) |
+ movq %r13,40(%rdi) |
+ movq %r14,48(%rdi) |
+ movq %r15,56(%rdi) |
+ |
+ .byte 0xf3,0xc3 |
+ |
+ |
+.p2align 5 |
+__rsaz_512_mul: |
+ leaq 8(%rsp),%rdi |
+ |
+ movq (%rsi),%rax |
+ mulq %rbx |
+ movq %rax,(%rdi) |
+ movq 8(%rsi),%rax |
+ movq %rdx,%r8 |
+ |
+ mulq %rbx |
+ addq %rax,%r8 |
+ movq 16(%rsi),%rax |
+ movq %rdx,%r9 |
+ adcq $0,%r9 |
+ |
+ mulq %rbx |
+ addq %rax,%r9 |
+ movq 24(%rsi),%rax |
+ movq %rdx,%r10 |
+ adcq $0,%r10 |
+ |
+ mulq %rbx |
+ addq %rax,%r10 |
+ movq 32(%rsi),%rax |
+ movq %rdx,%r11 |
+ adcq $0,%r11 |
+ |
+ mulq %rbx |
+ addq %rax,%r11 |
+ movq 40(%rsi),%rax |
+ movq %rdx,%r12 |
+ adcq $0,%r12 |
+ |
+ mulq %rbx |
+ addq %rax,%r12 |
+ movq 48(%rsi),%rax |
+ movq %rdx,%r13 |
+ adcq $0,%r13 |
+ |
+ mulq %rbx |
+ addq %rax,%r13 |
+ movq 56(%rsi),%rax |
+ movq %rdx,%r14 |
+ adcq $0,%r14 |
+ |
+ mulq %rbx |
+ addq %rax,%r14 |
+ movq (%rsi),%rax |
+ movq %rdx,%r15 |
+ adcq $0,%r15 |
+ |
+ leaq 8(%rbp),%rbp |
+ leaq 8(%rdi),%rdi |
+ |
+ movl $7,%ecx |
+ jmp L$oop_mul |
+ |
+.p2align 5 |
+L$oop_mul: |
+ movq (%rbp),%rbx |
+ mulq %rbx |
+ addq %rax,%r8 |
+ movq 8(%rsi),%rax |
+ movq %r8,(%rdi) |
+ movq %rdx,%r8 |
+ adcq $0,%r8 |
+ |
+ mulq %rbx |
+ addq %rax,%r9 |
+ movq 16(%rsi),%rax |
+ adcq $0,%rdx |
+ addq %r9,%r8 |
+ movq %rdx,%r9 |
+ adcq $0,%r9 |
+ |
+ mulq %rbx |
+ addq %rax,%r10 |
+ movq 24(%rsi),%rax |
+ adcq $0,%rdx |
+ addq %r10,%r9 |
+ movq %rdx,%r10 |
+ adcq $0,%r10 |
+ |
+ mulq %rbx |
+ addq %rax,%r11 |
+ movq 32(%rsi),%rax |
+ adcq $0,%rdx |
+ addq %r11,%r10 |
+ movq %rdx,%r11 |
+ adcq $0,%r11 |
+ |
+ mulq %rbx |
+ addq %rax,%r12 |
+ movq 40(%rsi),%rax |
+ adcq $0,%rdx |
+ addq %r12,%r11 |
+ movq %rdx,%r12 |
+ adcq $0,%r12 |
+ |
+ mulq %rbx |
+ addq %rax,%r13 |
+ movq 48(%rsi),%rax |
+ adcq $0,%rdx |
+ addq %r13,%r12 |
+ movq %rdx,%r13 |
+ adcq $0,%r13 |
+ |
+ mulq %rbx |
+ addq %rax,%r14 |
+ movq 56(%rsi),%rax |
+ adcq $0,%rdx |
+ addq %r14,%r13 |
+ movq %rdx,%r14 |
+ leaq 8(%rbp),%rbp |
+ adcq $0,%r14 |
+ |
+ mulq %rbx |
+ addq %rax,%r15 |
+ movq (%rsi),%rax |
+ adcq $0,%rdx |
+ addq %r15,%r14 |
+ movq %rdx,%r15 |
+ adcq $0,%r15 |
+ |
+ leaq 8(%rdi),%rdi |
+ |
+ decl %ecx |
+ jnz L$oop_mul |
+ |
+ movq %r8,(%rdi) |
+ movq %r9,8(%rdi) |
+ movq %r10,16(%rdi) |
+ movq %r11,24(%rdi) |
+ movq %r12,32(%rdi) |
+ movq %r13,40(%rdi) |
+ movq %r14,48(%rdi) |
+ movq %r15,56(%rdi) |
+ |
+ .byte 0xf3,0xc3 |
+ |
+.globl _rsaz_512_scatter4 |
+ |
+.p2align 4 |
+_rsaz_512_scatter4: |
+ leaq (%rdi,%rdx,4),%rdi |
+ movl $8,%r9d |
+ jmp L$oop_scatter |
+.p2align 4 |
+L$oop_scatter: |
+ movq (%rsi),%rax |
+ leaq 8(%rsi),%rsi |
+ movl %eax,(%rdi) |
+ shrq $32,%rax |
+ movl %eax,64(%rdi) |
+ leaq 128(%rdi),%rdi |
+ decl %r9d |
+ jnz L$oop_scatter |
+ .byte 0xf3,0xc3 |
+ |
+ |
+.globl _rsaz_512_gather4 |
+ |
+.p2align 4 |
+_rsaz_512_gather4: |
+ leaq (%rsi,%rdx,4),%rsi |
+ movl $8,%r9d |
+ jmp L$oop_gather |
+.p2align 4 |
+L$oop_gather: |
+ movl (%rsi),%eax |
+ movl 64(%rsi),%r8d |
+ leaq 128(%rsi),%rsi |
+ shlq $32,%r8 |
+ orq %r8,%rax |
+ movq %rax,(%rdi) |
+ leaq 8(%rdi),%rdi |
+ decl %r9d |
+ jnz L$oop_gather |
+ .byte 0xf3,0xc3 |
+ |
+#endif |