Index: third_party/boringssl/mac-x86_64/crypto/chacha/chacha-x86_64.S |
diff --git a/third_party/boringssl/mac-x86_64/crypto/chacha/chacha-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/chacha/chacha-x86_64.S |
index c3554c8d13a5ee55f6293429dd382ddcf6b0523e..256a4663f096c72d61d7abd7b95644a7a5267c8b 100644 |
--- a/third_party/boringssl/mac-x86_64/crypto/chacha/chacha-x86_64.S |
+++ b/third_party/boringssl/mac-x86_64/crypto/chacha/chacha-x86_64.S |
@@ -22,6 +22,15 @@ L$rot24: |
.byte 0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe |
L$sigma: |
.byte 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107,0 |
+.p2align 6 |
+L$zeroz: |
+.long 0,0,0,0, 1,0,0,0, 2,0,0,0, 3,0,0,0 |
+L$fourz: |
+.long 4,0,0,0, 4,0,0,0, 4,0,0,0, 4,0,0,0 |
+L$incz: |
+.long 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 |
+L$sixteen: |
+.long 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16 |
.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 |
.globl _ChaCha20_ctr32 |
.private_extern _ChaCha20_ctr32 |
@@ -315,7 +324,7 @@ L$do_sse3_after_all: |
movdqa %xmm1,16(%rsp) |
movdqa %xmm2,32(%rsp) |
movdqa %xmm3,48(%rsp) |
- movl $10,%ebp |
+ movq $10,%r8 |
jmp L$oop_ssse3 |
.p2align 5 |
@@ -325,7 +334,7 @@ L$oop_outer_ssse3: |
movdqa 16(%rsp),%xmm1 |
movdqa 32(%rsp),%xmm2 |
paddd 48(%rsp),%xmm3 |
- movl $10,%ebp |
+ movq $10,%r8 |
movdqa %xmm3,48(%rsp) |
jmp L$oop_ssse3 |
@@ -374,7 +383,7 @@ L$oop_ssse3: |
pshufd $78,%xmm2,%xmm2 |
pshufd $147,%xmm1,%xmm1 |
pshufd $57,%xmm3,%xmm3 |
- decl %ebp |
+ decq %r8 |
jnz L$oop_ssse3 |
paddd 0(%rsp),%xmm0 |
paddd 16(%rsp),%xmm1 |
@@ -411,25 +420,19 @@ L$tail_ssse3: |
movdqa %xmm1,16(%rsp) |
movdqa %xmm2,32(%rsp) |
movdqa %xmm3,48(%rsp) |
- xorq %rbx,%rbx |
+ xorq %r8,%r8 |
L$oop_tail_ssse3: |
- movzbl (%rsi,%rbx,1),%eax |
- movzbl (%rsp,%rbx,1),%ecx |
- leaq 1(%rbx),%rbx |
+ movzbl (%rsi,%r8,1),%eax |
+ movzbl (%rsp,%r8,1),%ecx |
+ leaq 1(%r8),%r8 |
xorl %ecx,%eax |
- movb %al,-1(%rdi,%rbx,1) |
+ movb %al,-1(%rdi,%r8,1) |
decq %rdx |
jnz L$oop_tail_ssse3 |
L$done_ssse3: |
- addq $64+24,%rsp |
- popq %r15 |
- popq %r14 |
- popq %r13 |
- popq %r12 |
- popq %rbp |
- popq %rbx |
+ addq $64+24+48,%rsp |
.byte 0xf3,0xc3 |