| Index: third_party/boringssl/mac-x86/crypto/aes/aesni-x86.S
|
| diff --git a/third_party/boringssl/mac-x86/crypto/aes/aesni-x86.S b/third_party/boringssl/mac-x86/crypto/aes/aesni-x86.S
|
| index 07719ba7ae8f6fc5b8e8ab41ab143ddb131ede4a..9000478d20e1a132fcb8f1b96c9cef9d627b22c1 100644
|
| --- a/third_party/boringssl/mac-x86/crypto/aes/aesni-x86.S
|
| +++ b/third_party/boringssl/mac-x86/crypto/aes/aesni-x86.S
|
| @@ -22,10 +22,7 @@ L000enc1_loop_1:
|
| leal 16(%edx),%edx
|
| jnz L000enc1_loop_1
|
| .byte 102,15,56,221,209
|
| - pxor %xmm0,%xmm0
|
| - pxor %xmm1,%xmm1
|
| movups %xmm2,(%eax)
|
| - pxor %xmm2,%xmm2
|
| ret
|
| .globl _aesni_decrypt
|
| .private_extern _aesni_decrypt
|
| @@ -48,10 +45,7 @@ L001dec1_loop_2:
|
| leal 16(%edx),%edx
|
| jnz L001dec1_loop_2
|
| .byte 102,15,56,223,209
|
| - pxor %xmm0,%xmm0
|
| - pxor %xmm1,%xmm1
|
| movups %xmm2,(%eax)
|
| - pxor %xmm2,%xmm2
|
| ret
|
| .private_extern __aesni_encrypt2
|
| .align 4
|
| @@ -258,15 +252,17 @@ __aesni_encrypt6:
|
| negl %ecx
|
| .byte 102,15,56,220,225
|
| pxor %xmm0,%xmm7
|
| - movups (%edx,%ecx,1),%xmm0
|
| addl $16,%ecx
|
| - jmp L008_aesni_encrypt6_inner
|
| +.byte 102,15,56,220,233
|
| +.byte 102,15,56,220,241
|
| +.byte 102,15,56,220,249
|
| + movups -16(%edx,%ecx,1),%xmm0
|
| + jmp L_aesni_encrypt6_enter
|
| .align 4,0x90
|
| -L009enc6_loop:
|
| +L008enc6_loop:
|
| .byte 102,15,56,220,209
|
| .byte 102,15,56,220,217
|
| .byte 102,15,56,220,225
|
| -L008_aesni_encrypt6_inner:
|
| .byte 102,15,56,220,233
|
| .byte 102,15,56,220,241
|
| .byte 102,15,56,220,249
|
| @@ -280,7 +276,7 @@ L_aesni_encrypt6_enter:
|
| .byte 102,15,56,220,240
|
| .byte 102,15,56,220,248
|
| movups -16(%edx,%ecx,1),%xmm0
|
| - jnz L009enc6_loop
|
| + jnz L008enc6_loop
|
| .byte 102,15,56,220,209
|
| .byte 102,15,56,220,217
|
| .byte 102,15,56,220,225
|
| @@ -311,15 +307,17 @@ __aesni_decrypt6:
|
| negl %ecx
|
| .byte 102,15,56,222,225
|
| pxor %xmm0,%xmm7
|
| - movups (%edx,%ecx,1),%xmm0
|
| addl $16,%ecx
|
| - jmp L010_aesni_decrypt6_inner
|
| +.byte 102,15,56,222,233
|
| +.byte 102,15,56,222,241
|
| +.byte 102,15,56,222,249
|
| + movups -16(%edx,%ecx,1),%xmm0
|
| + jmp L_aesni_decrypt6_enter
|
| .align 4,0x90
|
| -L011dec6_loop:
|
| +L009dec6_loop:
|
| .byte 102,15,56,222,209
|
| .byte 102,15,56,222,217
|
| .byte 102,15,56,222,225
|
| -L010_aesni_decrypt6_inner:
|
| .byte 102,15,56,222,233
|
| .byte 102,15,56,222,241
|
| .byte 102,15,56,222,249
|
| @@ -333,7 +331,7 @@ L_aesni_decrypt6_enter:
|
| .byte 102,15,56,222,240
|
| .byte 102,15,56,222,248
|
| movups -16(%edx,%ecx,1),%xmm0
|
| - jnz L011dec6_loop
|
| + jnz L009dec6_loop
|
| .byte 102,15,56,222,209
|
| .byte 102,15,56,222,217
|
| .byte 102,15,56,222,225
|
| @@ -362,14 +360,14 @@ L_aesni_ecb_encrypt_begin:
|
| movl 32(%esp),%edx
|
| movl 36(%esp),%ebx
|
| andl $-16,%eax
|
| - jz L012ecb_ret
|
| + jz L010ecb_ret
|
| movl 240(%edx),%ecx
|
| testl %ebx,%ebx
|
| - jz L013ecb_decrypt
|
| + jz L011ecb_decrypt
|
| movl %edx,%ebp
|
| movl %ecx,%ebx
|
| cmpl $96,%eax
|
| - jb L014ecb_enc_tail
|
| + jb L012ecb_enc_tail
|
| movdqu (%esi),%xmm2
|
| movdqu 16(%esi),%xmm3
|
| movdqu 32(%esi),%xmm4
|
| @@ -378,9 +376,9 @@ L_aesni_ecb_encrypt_begin:
|
| movdqu 80(%esi),%xmm7
|
| leal 96(%esi),%esi
|
| subl $96,%eax
|
| - jmp L015ecb_enc_loop6_enter
|
| + jmp L013ecb_enc_loop6_enter
|
| .align 4,0x90
|
| -L016ecb_enc_loop6:
|
| +L014ecb_enc_loop6:
|
| movups %xmm2,(%edi)
|
| movdqu (%esi),%xmm2
|
| movups %xmm3,16(%edi)
|
| @@ -395,12 +393,12 @@ L016ecb_enc_loop6:
|
| leal 96(%edi),%edi
|
| movdqu 80(%esi),%xmm7
|
| leal 96(%esi),%esi
|
| -L015ecb_enc_loop6_enter:
|
| +L013ecb_enc_loop6_enter:
|
| call __aesni_encrypt6
|
| movl %ebp,%edx
|
| movl %ebx,%ecx
|
| subl $96,%eax
|
| - jnc L016ecb_enc_loop6
|
| + jnc L014ecb_enc_loop6
|
| movups %xmm2,(%edi)
|
| movups %xmm3,16(%edi)
|
| movups %xmm4,32(%edi)
|
| @@ -409,18 +407,18 @@ L015ecb_enc_loop6_enter:
|
| movups %xmm7,80(%edi)
|
| leal 96(%edi),%edi
|
| addl $96,%eax
|
| - jz L012ecb_ret
|
| -L014ecb_enc_tail:
|
| + jz L010ecb_ret
|
| +L012ecb_enc_tail:
|
| movups (%esi),%xmm2
|
| cmpl $32,%eax
|
| - jb L017ecb_enc_one
|
| + jb L015ecb_enc_one
|
| movups 16(%esi),%xmm3
|
| - je L018ecb_enc_two
|
| + je L016ecb_enc_two
|
| movups 32(%esi),%xmm4
|
| cmpl $64,%eax
|
| - jb L019ecb_enc_three
|
| + jb L017ecb_enc_three
|
| movups 48(%esi),%xmm5
|
| - je L020ecb_enc_four
|
| + je L018ecb_enc_four
|
| movups 64(%esi),%xmm6
|
| xorps %xmm7,%xmm7
|
| call __aesni_encrypt6
|
| @@ -429,49 +427,49 @@ L014ecb_enc_tail:
|
| movups %xmm4,32(%edi)
|
| movups %xmm5,48(%edi)
|
| movups %xmm6,64(%edi)
|
| - jmp L012ecb_ret
|
| + jmp L010ecb_ret
|
| .align 4,0x90
|
| -L017ecb_enc_one:
|
| +L015ecb_enc_one:
|
| movups (%edx),%xmm0
|
| movups 16(%edx),%xmm1
|
| leal 32(%edx),%edx
|
| xorps %xmm0,%xmm2
|
| -L021enc1_loop_3:
|
| +L019enc1_loop_3:
|
| .byte 102,15,56,220,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L021enc1_loop_3
|
| + jnz L019enc1_loop_3
|
| .byte 102,15,56,221,209
|
| movups %xmm2,(%edi)
|
| - jmp L012ecb_ret
|
| + jmp L010ecb_ret
|
| .align 4,0x90
|
| -L018ecb_enc_two:
|
| +L016ecb_enc_two:
|
| call __aesni_encrypt2
|
| movups %xmm2,(%edi)
|
| movups %xmm3,16(%edi)
|
| - jmp L012ecb_ret
|
| + jmp L010ecb_ret
|
| .align 4,0x90
|
| -L019ecb_enc_three:
|
| +L017ecb_enc_three:
|
| call __aesni_encrypt3
|
| movups %xmm2,(%edi)
|
| movups %xmm3,16(%edi)
|
| movups %xmm4,32(%edi)
|
| - jmp L012ecb_ret
|
| + jmp L010ecb_ret
|
| .align 4,0x90
|
| -L020ecb_enc_four:
|
| +L018ecb_enc_four:
|
| call __aesni_encrypt4
|
| movups %xmm2,(%edi)
|
| movups %xmm3,16(%edi)
|
| movups %xmm4,32(%edi)
|
| movups %xmm5,48(%edi)
|
| - jmp L012ecb_ret
|
| + jmp L010ecb_ret
|
| .align 4,0x90
|
| -L013ecb_decrypt:
|
| +L011ecb_decrypt:
|
| movl %edx,%ebp
|
| movl %ecx,%ebx
|
| cmpl $96,%eax
|
| - jb L022ecb_dec_tail
|
| + jb L020ecb_dec_tail
|
| movdqu (%esi),%xmm2
|
| movdqu 16(%esi),%xmm3
|
| movdqu 32(%esi),%xmm4
|
| @@ -480,9 +478,9 @@ L013ecb_decrypt:
|
| movdqu 80(%esi),%xmm7
|
| leal 96(%esi),%esi
|
| subl $96,%eax
|
| - jmp L023ecb_dec_loop6_enter
|
| + jmp L021ecb_dec_loop6_enter
|
| .align 4,0x90
|
| -L024ecb_dec_loop6:
|
| +L022ecb_dec_loop6:
|
| movups %xmm2,(%edi)
|
| movdqu (%esi),%xmm2
|
| movups %xmm3,16(%edi)
|
| @@ -497,12 +495,12 @@ L024ecb_dec_loop6:
|
| leal 96(%edi),%edi
|
| movdqu 80(%esi),%xmm7
|
| leal 96(%esi),%esi
|
| -L023ecb_dec_loop6_enter:
|
| +L021ecb_dec_loop6_enter:
|
| call __aesni_decrypt6
|
| movl %ebp,%edx
|
| movl %ebx,%ecx
|
| subl $96,%eax
|
| - jnc L024ecb_dec_loop6
|
| + jnc L022ecb_dec_loop6
|
| movups %xmm2,(%edi)
|
| movups %xmm3,16(%edi)
|
| movups %xmm4,32(%edi)
|
| @@ -511,18 +509,18 @@ L023ecb_dec_loop6_enter:
|
| movups %xmm7,80(%edi)
|
| leal 96(%edi),%edi
|
| addl $96,%eax
|
| - jz L012ecb_ret
|
| -L022ecb_dec_tail:
|
| + jz L010ecb_ret
|
| +L020ecb_dec_tail:
|
| movups (%esi),%xmm2
|
| cmpl $32,%eax
|
| - jb L025ecb_dec_one
|
| + jb L023ecb_dec_one
|
| movups 16(%esi),%xmm3
|
| - je L026ecb_dec_two
|
| + je L024ecb_dec_two
|
| movups 32(%esi),%xmm4
|
| cmpl $64,%eax
|
| - jb L027ecb_dec_three
|
| + jb L025ecb_dec_three
|
| movups 48(%esi),%xmm5
|
| - je L028ecb_dec_four
|
| + je L026ecb_dec_four
|
| movups 64(%esi),%xmm6
|
| xorps %xmm7,%xmm7
|
| call __aesni_decrypt6
|
| @@ -531,51 +529,43 @@ L022ecb_dec_tail:
|
| movups %xmm4,32(%edi)
|
| movups %xmm5,48(%edi)
|
| movups %xmm6,64(%edi)
|
| - jmp L012ecb_ret
|
| + jmp L010ecb_ret
|
| .align 4,0x90
|
| -L025ecb_dec_one:
|
| +L023ecb_dec_one:
|
| movups (%edx),%xmm0
|
| movups 16(%edx),%xmm1
|
| leal 32(%edx),%edx
|
| xorps %xmm0,%xmm2
|
| -L029dec1_loop_4:
|
| +L027dec1_loop_4:
|
| .byte 102,15,56,222,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L029dec1_loop_4
|
| + jnz L027dec1_loop_4
|
| .byte 102,15,56,223,209
|
| movups %xmm2,(%edi)
|
| - jmp L012ecb_ret
|
| + jmp L010ecb_ret
|
| .align 4,0x90
|
| -L026ecb_dec_two:
|
| +L024ecb_dec_two:
|
| call __aesni_decrypt2
|
| movups %xmm2,(%edi)
|
| movups %xmm3,16(%edi)
|
| - jmp L012ecb_ret
|
| + jmp L010ecb_ret
|
| .align 4,0x90
|
| -L027ecb_dec_three:
|
| +L025ecb_dec_three:
|
| call __aesni_decrypt3
|
| movups %xmm2,(%edi)
|
| movups %xmm3,16(%edi)
|
| movups %xmm4,32(%edi)
|
| - jmp L012ecb_ret
|
| + jmp L010ecb_ret
|
| .align 4,0x90
|
| -L028ecb_dec_four:
|
| +L026ecb_dec_four:
|
| call __aesni_decrypt4
|
| movups %xmm2,(%edi)
|
| movups %xmm3,16(%edi)
|
| movups %xmm4,32(%edi)
|
| movups %xmm5,48(%edi)
|
| -L012ecb_ret:
|
| - pxor %xmm0,%xmm0
|
| - pxor %xmm1,%xmm1
|
| - pxor %xmm2,%xmm2
|
| - pxor %xmm3,%xmm3
|
| - pxor %xmm4,%xmm4
|
| - pxor %xmm5,%xmm5
|
| - pxor %xmm6,%xmm6
|
| - pxor %xmm7,%xmm7
|
| +L010ecb_ret:
|
| popl %edi
|
| popl %esi
|
| popl %ebx
|
| @@ -621,7 +611,7 @@ L_aesni_ccm64_encrypt_blocks_begin:
|
| leal 32(%edx,%ecx,1),%edx
|
| subl %ecx,%ebx
|
| .byte 102,15,56,0,253
|
| -L030ccm64_enc_outer:
|
| +L028ccm64_enc_outer:
|
| movups (%ebp),%xmm0
|
| movl %ebx,%ecx
|
| movups (%esi),%xmm6
|
| @@ -630,7 +620,7 @@ L030ccm64_enc_outer:
|
| xorps %xmm6,%xmm0
|
| xorps %xmm0,%xmm3
|
| movups 32(%ebp),%xmm0
|
| -L031ccm64_enc2_loop:
|
| +L029ccm64_enc2_loop:
|
| .byte 102,15,56,220,209
|
| .byte 102,15,56,220,217
|
| movups (%edx,%ecx,1),%xmm1
|
| @@ -638,7 +628,7 @@ L031ccm64_enc2_loop:
|
| .byte 102,15,56,220,208
|
| .byte 102,15,56,220,216
|
| movups -16(%edx,%ecx,1),%xmm0
|
| - jnz L031ccm64_enc2_loop
|
| + jnz L029ccm64_enc2_loop
|
| .byte 102,15,56,220,209
|
| .byte 102,15,56,220,217
|
| paddq 16(%esp),%xmm7
|
| @@ -651,18 +641,10 @@ L031ccm64_enc2_loop:
|
| movups %xmm6,(%edi)
|
| .byte 102,15,56,0,213
|
| leal 16(%edi),%edi
|
| - jnz L030ccm64_enc_outer
|
| + jnz L028ccm64_enc_outer
|
| movl 48(%esp),%esp
|
| movl 40(%esp),%edi
|
| movups %xmm3,(%edi)
|
| - pxor %xmm0,%xmm0
|
| - pxor %xmm1,%xmm1
|
| - pxor %xmm2,%xmm2
|
| - pxor %xmm3,%xmm3
|
| - pxor %xmm4,%xmm4
|
| - pxor %xmm5,%xmm5
|
| - pxor %xmm6,%xmm6
|
| - pxor %xmm7,%xmm7
|
| popl %edi
|
| popl %esi
|
| popl %ebx
|
| @@ -709,12 +691,12 @@ L_aesni_ccm64_decrypt_blocks_begin:
|
| movups 16(%edx),%xmm1
|
| leal 32(%edx),%edx
|
| xorps %xmm0,%xmm2
|
| -L032enc1_loop_5:
|
| +L030enc1_loop_5:
|
| .byte 102,15,56,220,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L032enc1_loop_5
|
| + jnz L030enc1_loop_5
|
| .byte 102,15,56,221,209
|
| shll $4,%ebx
|
| movl $16,%ecx
|
| @@ -724,16 +706,16 @@ L032enc1_loop_5:
|
| subl %ebx,%ecx
|
| leal 32(%ebp,%ebx,1),%edx
|
| movl %ecx,%ebx
|
| - jmp L033ccm64_dec_outer
|
| + jmp L031ccm64_dec_outer
|
| .align 4,0x90
|
| -L033ccm64_dec_outer:
|
| +L031ccm64_dec_outer:
|
| xorps %xmm2,%xmm6
|
| movdqa %xmm7,%xmm2
|
| movups %xmm6,(%edi)
|
| leal 16(%edi),%edi
|
| .byte 102,15,56,0,213
|
| subl $1,%eax
|
| - jz L034ccm64_dec_break
|
| + jz L032ccm64_dec_break
|
| movups (%ebp),%xmm0
|
| movl %ebx,%ecx
|
| movups 16(%ebp),%xmm1
|
| @@ -741,7 +723,7 @@ L033ccm64_dec_outer:
|
| xorps %xmm0,%xmm2
|
| xorps %xmm6,%xmm3
|
| movups 32(%ebp),%xmm0
|
| -L035ccm64_dec2_loop:
|
| +L033ccm64_dec2_loop:
|
| .byte 102,15,56,220,209
|
| .byte 102,15,56,220,217
|
| movups (%edx,%ecx,1),%xmm1
|
| @@ -749,7 +731,7 @@ L035ccm64_dec2_loop:
|
| .byte 102,15,56,220,208
|
| .byte 102,15,56,220,216
|
| movups -16(%edx,%ecx,1),%xmm0
|
| - jnz L035ccm64_dec2_loop
|
| + jnz L033ccm64_dec2_loop
|
| movups (%esi),%xmm6
|
| paddq 16(%esp),%xmm7
|
| .byte 102,15,56,220,209
|
| @@ -757,9 +739,9 @@ L035ccm64_dec2_loop:
|
| .byte 102,15,56,221,208
|
| .byte 102,15,56,221,216
|
| leal 16(%esi),%esi
|
| - jmp L033ccm64_dec_outer
|
| + jmp L031ccm64_dec_outer
|
| .align 4,0x90
|
| -L034ccm64_dec_break:
|
| +L032ccm64_dec_break:
|
| movl 240(%ebp),%ecx
|
| movl %ebp,%edx
|
| movups (%edx),%xmm0
|
| @@ -767,24 +749,16 @@ L034ccm64_dec_break:
|
| xorps %xmm0,%xmm6
|
| leal 32(%edx),%edx
|
| xorps %xmm6,%xmm3
|
| -L036enc1_loop_6:
|
| +L034enc1_loop_6:
|
| .byte 102,15,56,220,217
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L036enc1_loop_6
|
| + jnz L034enc1_loop_6
|
| .byte 102,15,56,221,217
|
| movl 48(%esp),%esp
|
| movl 40(%esp),%edi
|
| movups %xmm3,(%edi)
|
| - pxor %xmm0,%xmm0
|
| - pxor %xmm1,%xmm1
|
| - pxor %xmm2,%xmm2
|
| - pxor %xmm3,%xmm3
|
| - pxor %xmm4,%xmm4
|
| - pxor %xmm5,%xmm5
|
| - pxor %xmm6,%xmm6
|
| - pxor %xmm7,%xmm7
|
| popl %edi
|
| popl %esi
|
| popl %ebx
|
| @@ -809,7 +783,7 @@ L_aesni_ctr32_encrypt_blocks_begin:
|
| andl $-16,%esp
|
| movl %ebp,80(%esp)
|
| cmpl $1,%eax
|
| - je L037ctr32_one_shortcut
|
| + je L035ctr32_one_shortcut
|
| movdqu (%ebx),%xmm7
|
| movl $202182159,(%esp)
|
| movl $134810123,4(%esp)
|
| @@ -847,7 +821,7 @@ L_aesni_ctr32_encrypt_blocks_begin:
|
| pshufd $192,%xmm0,%xmm2
|
| pshufd $128,%xmm0,%xmm3
|
| cmpl $6,%eax
|
| - jb L038ctr32_tail
|
| + jb L036ctr32_tail
|
| pxor %xmm6,%xmm7
|
| shll $4,%ecx
|
| movl $16,%ebx
|
| @@ -856,9 +830,9 @@ L_aesni_ctr32_encrypt_blocks_begin:
|
| subl %ecx,%ebx
|
| leal 32(%edx,%ecx,1),%edx
|
| subl $6,%eax
|
| - jmp L039ctr32_loop6
|
| + jmp L037ctr32_loop6
|
| .align 4,0x90
|
| -L039ctr32_loop6:
|
| +L037ctr32_loop6:
|
| pshufd $64,%xmm0,%xmm4
|
| movdqa 32(%esp),%xmm0
|
| pshufd $192,%xmm1,%xmm5
|
| @@ -912,27 +886,27 @@ L039ctr32_loop6:
|
| leal 96(%edi),%edi
|
| pshufd $128,%xmm0,%xmm3
|
| subl $6,%eax
|
| - jnc L039ctr32_loop6
|
| + jnc L037ctr32_loop6
|
| addl $6,%eax
|
| - jz L040ctr32_ret
|
| + jz L038ctr32_ret
|
| movdqu (%ebp),%xmm7
|
| movl %ebp,%edx
|
| pxor 32(%esp),%xmm7
|
| movl 240(%ebp),%ecx
|
| -L038ctr32_tail:
|
| +L036ctr32_tail:
|
| por %xmm7,%xmm2
|
| cmpl $2,%eax
|
| - jb L041ctr32_one
|
| + jb L039ctr32_one
|
| pshufd $64,%xmm0,%xmm4
|
| por %xmm7,%xmm3
|
| - je L042ctr32_two
|
| + je L040ctr32_two
|
| pshufd $192,%xmm1,%xmm5
|
| por %xmm7,%xmm4
|
| cmpl $4,%eax
|
| - jb L043ctr32_three
|
| + jb L041ctr32_three
|
| pshufd $128,%xmm1,%xmm6
|
| por %xmm7,%xmm5
|
| - je L044ctr32_four
|
| + je L042ctr32_four
|
| por %xmm7,%xmm6
|
| call __aesni_encrypt6
|
| movups (%esi),%xmm1
|
| @@ -950,29 +924,29 @@ L038ctr32_tail:
|
| movups %xmm4,32(%edi)
|
| movups %xmm5,48(%edi)
|
| movups %xmm6,64(%edi)
|
| - jmp L040ctr32_ret
|
| + jmp L038ctr32_ret
|
| .align 4,0x90
|
| -L037ctr32_one_shortcut:
|
| +L035ctr32_one_shortcut:
|
| movups (%ebx),%xmm2
|
| movl 240(%edx),%ecx
|
| -L041ctr32_one:
|
| +L039ctr32_one:
|
| movups (%edx),%xmm0
|
| movups 16(%edx),%xmm1
|
| leal 32(%edx),%edx
|
| xorps %xmm0,%xmm2
|
| -L045enc1_loop_7:
|
| +L043enc1_loop_7:
|
| .byte 102,15,56,220,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L045enc1_loop_7
|
| + jnz L043enc1_loop_7
|
| .byte 102,15,56,221,209
|
| movups (%esi),%xmm6
|
| xorps %xmm2,%xmm6
|
| movups %xmm6,(%edi)
|
| - jmp L040ctr32_ret
|
| + jmp L038ctr32_ret
|
| .align 4,0x90
|
| -L042ctr32_two:
|
| +L040ctr32_two:
|
| call __aesni_encrypt2
|
| movups (%esi),%xmm5
|
| movups 16(%esi),%xmm6
|
| @@ -980,9 +954,9 @@ L042ctr32_two:
|
| xorps %xmm6,%xmm3
|
| movups %xmm2,(%edi)
|
| movups %xmm3,16(%edi)
|
| - jmp L040ctr32_ret
|
| + jmp L038ctr32_ret
|
| .align 4,0x90
|
| -L043ctr32_three:
|
| +L041ctr32_three:
|
| call __aesni_encrypt3
|
| movups (%esi),%xmm5
|
| movups 16(%esi),%xmm6
|
| @@ -993,9 +967,9 @@ L043ctr32_three:
|
| xorps %xmm7,%xmm4
|
| movups %xmm3,16(%edi)
|
| movups %xmm4,32(%edi)
|
| - jmp L040ctr32_ret
|
| + jmp L038ctr32_ret
|
| .align 4,0x90
|
| -L044ctr32_four:
|
| +L042ctr32_four:
|
| call __aesni_encrypt4
|
| movups (%esi),%xmm6
|
| movups 16(%esi),%xmm7
|
| @@ -1009,18 +983,7 @@ L044ctr32_four:
|
| xorps %xmm0,%xmm5
|
| movups %xmm4,32(%edi)
|
| movups %xmm5,48(%edi)
|
| -L040ctr32_ret:
|
| - pxor %xmm0,%xmm0
|
| - pxor %xmm1,%xmm1
|
| - pxor %xmm2,%xmm2
|
| - pxor %xmm3,%xmm3
|
| - pxor %xmm4,%xmm4
|
| - movdqa %xmm0,32(%esp)
|
| - pxor %xmm5,%xmm5
|
| - movdqa %xmm0,48(%esp)
|
| - pxor %xmm6,%xmm6
|
| - movdqa %xmm0,64(%esp)
|
| - pxor %xmm7,%xmm7
|
| +L038ctr32_ret:
|
| movl 80(%esp),%esp
|
| popl %edi
|
| popl %esi
|
| @@ -1044,12 +1007,12 @@ L_aesni_xts_encrypt_begin:
|
| movups 16(%edx),%xmm1
|
| leal 32(%edx),%edx
|
| xorps %xmm0,%xmm2
|
| -L046enc1_loop_8:
|
| +L044enc1_loop_8:
|
| .byte 102,15,56,220,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L046enc1_loop_8
|
| + jnz L044enc1_loop_8
|
| .byte 102,15,56,221,209
|
| movl 20(%esp),%esi
|
| movl 24(%esp),%edi
|
| @@ -1073,14 +1036,14 @@ L046enc1_loop_8:
|
| movl %edx,%ebp
|
| movl %ecx,%ebx
|
| subl $96,%eax
|
| - jc L047xts_enc_short
|
| + jc L045xts_enc_short
|
| shll $4,%ecx
|
| movl $16,%ebx
|
| subl %ecx,%ebx
|
| leal 32(%edx,%ecx,1),%edx
|
| - jmp L048xts_enc_loop6
|
| + jmp L046xts_enc_loop6
|
| .align 4,0x90
|
| -L048xts_enc_loop6:
|
| +L046xts_enc_loop6:
|
| pshufd $19,%xmm0,%xmm2
|
| pxor %xmm0,%xmm0
|
| movdqa %xmm1,(%esp)
|
| @@ -1169,23 +1132,23 @@ L048xts_enc_loop6:
|
| pcmpgtd %xmm1,%xmm0
|
| pxor %xmm2,%xmm1
|
| subl $96,%eax
|
| - jnc L048xts_enc_loop6
|
| + jnc L046xts_enc_loop6
|
| movl 240(%ebp),%ecx
|
| movl %ebp,%edx
|
| movl %ecx,%ebx
|
| -L047xts_enc_short:
|
| +L045xts_enc_short:
|
| addl $96,%eax
|
| - jz L049xts_enc_done6x
|
| + jz L047xts_enc_done6x
|
| movdqa %xmm1,%xmm5
|
| cmpl $32,%eax
|
| - jb L050xts_enc_one
|
| + jb L048xts_enc_one
|
| pshufd $19,%xmm0,%xmm2
|
| pxor %xmm0,%xmm0
|
| paddq %xmm1,%xmm1
|
| pand %xmm3,%xmm2
|
| pcmpgtd %xmm1,%xmm0
|
| pxor %xmm2,%xmm1
|
| - je L051xts_enc_two
|
| + je L049xts_enc_two
|
| pshufd $19,%xmm0,%xmm2
|
| pxor %xmm0,%xmm0
|
| movdqa %xmm1,%xmm6
|
| @@ -1194,7 +1157,7 @@ L047xts_enc_short:
|
| pcmpgtd %xmm1,%xmm0
|
| pxor %xmm2,%xmm1
|
| cmpl $64,%eax
|
| - jb L052xts_enc_three
|
| + jb L050xts_enc_three
|
| pshufd $19,%xmm0,%xmm2
|
| pxor %xmm0,%xmm0
|
| movdqa %xmm1,%xmm7
|
| @@ -1204,7 +1167,7 @@ L047xts_enc_short:
|
| pxor %xmm2,%xmm1
|
| movdqa %xmm5,(%esp)
|
| movdqa %xmm6,16(%esp)
|
| - je L053xts_enc_four
|
| + je L051xts_enc_four
|
| movdqa %xmm7,32(%esp)
|
| pshufd $19,%xmm0,%xmm7
|
| movdqa %xmm1,48(%esp)
|
| @@ -1236,9 +1199,9 @@ L047xts_enc_short:
|
| movups %xmm5,48(%edi)
|
| movups %xmm6,64(%edi)
|
| leal 80(%edi),%edi
|
| - jmp L054xts_enc_done
|
| + jmp L052xts_enc_done
|
| .align 4,0x90
|
| -L050xts_enc_one:
|
| +L048xts_enc_one:
|
| movups (%esi),%xmm2
|
| leal 16(%esi),%esi
|
| xorps %xmm5,%xmm2
|
| @@ -1246,20 +1209,20 @@ L050xts_enc_one:
|
| movups 16(%edx),%xmm1
|
| leal 32(%edx),%edx
|
| xorps %xmm0,%xmm2
|
| -L055enc1_loop_9:
|
| +L053enc1_loop_9:
|
| .byte 102,15,56,220,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L055enc1_loop_9
|
| + jnz L053enc1_loop_9
|
| .byte 102,15,56,221,209
|
| xorps %xmm5,%xmm2
|
| movups %xmm2,(%edi)
|
| leal 16(%edi),%edi
|
| movdqa %xmm5,%xmm1
|
| - jmp L054xts_enc_done
|
| + jmp L052xts_enc_done
|
| .align 4,0x90
|
| -L051xts_enc_two:
|
| +L049xts_enc_two:
|
| movaps %xmm1,%xmm6
|
| movups (%esi),%xmm2
|
| movups 16(%esi),%xmm3
|
| @@ -1273,9 +1236,9 @@ L051xts_enc_two:
|
| movups %xmm3,16(%edi)
|
| leal 32(%edi),%edi
|
| movdqa %xmm6,%xmm1
|
| - jmp L054xts_enc_done
|
| + jmp L052xts_enc_done
|
| .align 4,0x90
|
| -L052xts_enc_three:
|
| +L050xts_enc_three:
|
| movaps %xmm1,%xmm7
|
| movups (%esi),%xmm2
|
| movups 16(%esi),%xmm3
|
| @@ -1293,9 +1256,9 @@ L052xts_enc_three:
|
| movups %xmm4,32(%edi)
|
| leal 48(%edi),%edi
|
| movdqa %xmm7,%xmm1
|
| - jmp L054xts_enc_done
|
| + jmp L052xts_enc_done
|
| .align 4,0x90
|
| -L053xts_enc_four:
|
| +L051xts_enc_four:
|
| movaps %xmm1,%xmm6
|
| movups (%esi),%xmm2
|
| movups 16(%esi),%xmm3
|
| @@ -1317,28 +1280,28 @@ L053xts_enc_four:
|
| movups %xmm5,48(%edi)
|
| leal 64(%edi),%edi
|
| movdqa %xmm6,%xmm1
|
| - jmp L054xts_enc_done
|
| + jmp L052xts_enc_done
|
| .align 4,0x90
|
| -L049xts_enc_done6x:
|
| +L047xts_enc_done6x:
|
| movl 112(%esp),%eax
|
| andl $15,%eax
|
| - jz L056xts_enc_ret
|
| + jz L054xts_enc_ret
|
| movdqa %xmm1,%xmm5
|
| movl %eax,112(%esp)
|
| - jmp L057xts_enc_steal
|
| + jmp L055xts_enc_steal
|
| .align 4,0x90
|
| -L054xts_enc_done:
|
| +L052xts_enc_done:
|
| movl 112(%esp),%eax
|
| pxor %xmm0,%xmm0
|
| andl $15,%eax
|
| - jz L056xts_enc_ret
|
| + jz L054xts_enc_ret
|
| pcmpgtd %xmm1,%xmm0
|
| movl %eax,112(%esp)
|
| pshufd $19,%xmm0,%xmm5
|
| paddq %xmm1,%xmm1
|
| pand 96(%esp),%xmm5
|
| pxor %xmm1,%xmm5
|
| -L057xts_enc_steal:
|
| +L055xts_enc_steal:
|
| movzbl (%esi),%ecx
|
| movzbl -16(%edi),%edx
|
| leal 1(%esi),%esi
|
| @@ -1346,7 +1309,7 @@ L057xts_enc_steal:
|
| movb %dl,(%edi)
|
| leal 1(%edi),%edi
|
| subl $1,%eax
|
| - jnz L057xts_enc_steal
|
| + jnz L055xts_enc_steal
|
| subl 112(%esp),%edi
|
| movl %ebp,%edx
|
| movl %ebx,%ecx
|
| @@ -1356,30 +1319,16 @@ L057xts_enc_steal:
|
| movups 16(%edx),%xmm1
|
| leal 32(%edx),%edx
|
| xorps %xmm0,%xmm2
|
| -L058enc1_loop_10:
|
| +L056enc1_loop_10:
|
| .byte 102,15,56,220,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L058enc1_loop_10
|
| + jnz L056enc1_loop_10
|
| .byte 102,15,56,221,209
|
| xorps %xmm5,%xmm2
|
| movups %xmm2,-16(%edi)
|
| -L056xts_enc_ret:
|
| - pxor %xmm0,%xmm0
|
| - pxor %xmm1,%xmm1
|
| - pxor %xmm2,%xmm2
|
| - movdqa %xmm0,(%esp)
|
| - pxor %xmm3,%xmm3
|
| - movdqa %xmm0,16(%esp)
|
| - pxor %xmm4,%xmm4
|
| - movdqa %xmm0,32(%esp)
|
| - pxor %xmm5,%xmm5
|
| - movdqa %xmm0,48(%esp)
|
| - pxor %xmm6,%xmm6
|
| - movdqa %xmm0,64(%esp)
|
| - pxor %xmm7,%xmm7
|
| - movdqa %xmm0,80(%esp)
|
| +L054xts_enc_ret:
|
| movl 116(%esp),%esp
|
| popl %edi
|
| popl %esi
|
| @@ -1403,12 +1352,12 @@ L_aesni_xts_decrypt_begin:
|
| movups 16(%edx),%xmm1
|
| leal 32(%edx),%edx
|
| xorps %xmm0,%xmm2
|
| -L059enc1_loop_11:
|
| +L057enc1_loop_11:
|
| .byte 102,15,56,220,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L059enc1_loop_11
|
| + jnz L057enc1_loop_11
|
| .byte 102,15,56,221,209
|
| movl 20(%esp),%esi
|
| movl 24(%esp),%edi
|
| @@ -1437,14 +1386,14 @@ L059enc1_loop_11:
|
| pcmpgtd %xmm1,%xmm0
|
| andl $-16,%eax
|
| subl $96,%eax
|
| - jc L060xts_dec_short
|
| + jc L058xts_dec_short
|
| shll $4,%ecx
|
| movl $16,%ebx
|
| subl %ecx,%ebx
|
| leal 32(%edx,%ecx,1),%edx
|
| - jmp L061xts_dec_loop6
|
| + jmp L059xts_dec_loop6
|
| .align 4,0x90
|
| -L061xts_dec_loop6:
|
| +L059xts_dec_loop6:
|
| pshufd $19,%xmm0,%xmm2
|
| pxor %xmm0,%xmm0
|
| movdqa %xmm1,(%esp)
|
| @@ -1533,23 +1482,23 @@ L061xts_dec_loop6:
|
| pcmpgtd %xmm1,%xmm0
|
| pxor %xmm2,%xmm1
|
| subl $96,%eax
|
| - jnc L061xts_dec_loop6
|
| + jnc L059xts_dec_loop6
|
| movl 240(%ebp),%ecx
|
| movl %ebp,%edx
|
| movl %ecx,%ebx
|
| -L060xts_dec_short:
|
| +L058xts_dec_short:
|
| addl $96,%eax
|
| - jz L062xts_dec_done6x
|
| + jz L060xts_dec_done6x
|
| movdqa %xmm1,%xmm5
|
| cmpl $32,%eax
|
| - jb L063xts_dec_one
|
| + jb L061xts_dec_one
|
| pshufd $19,%xmm0,%xmm2
|
| pxor %xmm0,%xmm0
|
| paddq %xmm1,%xmm1
|
| pand %xmm3,%xmm2
|
| pcmpgtd %xmm1,%xmm0
|
| pxor %xmm2,%xmm1
|
| - je L064xts_dec_two
|
| + je L062xts_dec_two
|
| pshufd $19,%xmm0,%xmm2
|
| pxor %xmm0,%xmm0
|
| movdqa %xmm1,%xmm6
|
| @@ -1558,7 +1507,7 @@ L060xts_dec_short:
|
| pcmpgtd %xmm1,%xmm0
|
| pxor %xmm2,%xmm1
|
| cmpl $64,%eax
|
| - jb L065xts_dec_three
|
| + jb L063xts_dec_three
|
| pshufd $19,%xmm0,%xmm2
|
| pxor %xmm0,%xmm0
|
| movdqa %xmm1,%xmm7
|
| @@ -1568,7 +1517,7 @@ L060xts_dec_short:
|
| pxor %xmm2,%xmm1
|
| movdqa %xmm5,(%esp)
|
| movdqa %xmm6,16(%esp)
|
| - je L066xts_dec_four
|
| + je L064xts_dec_four
|
| movdqa %xmm7,32(%esp)
|
| pshufd $19,%xmm0,%xmm7
|
| movdqa %xmm1,48(%esp)
|
| @@ -1600,9 +1549,9 @@ L060xts_dec_short:
|
| movups %xmm5,48(%edi)
|
| movups %xmm6,64(%edi)
|
| leal 80(%edi),%edi
|
| - jmp L067xts_dec_done
|
| + jmp L065xts_dec_done
|
| .align 4,0x90
|
| -L063xts_dec_one:
|
| +L061xts_dec_one:
|
| movups (%esi),%xmm2
|
| leal 16(%esi),%esi
|
| xorps %xmm5,%xmm2
|
| @@ -1610,20 +1559,20 @@ L063xts_dec_one:
|
| movups 16(%edx),%xmm1
|
| leal 32(%edx),%edx
|
| xorps %xmm0,%xmm2
|
| -L068dec1_loop_12:
|
| +L066dec1_loop_12:
|
| .byte 102,15,56,222,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L068dec1_loop_12
|
| + jnz L066dec1_loop_12
|
| .byte 102,15,56,223,209
|
| xorps %xmm5,%xmm2
|
| movups %xmm2,(%edi)
|
| leal 16(%edi),%edi
|
| movdqa %xmm5,%xmm1
|
| - jmp L067xts_dec_done
|
| + jmp L065xts_dec_done
|
| .align 4,0x90
|
| -L064xts_dec_two:
|
| +L062xts_dec_two:
|
| movaps %xmm1,%xmm6
|
| movups (%esi),%xmm2
|
| movups 16(%esi),%xmm3
|
| @@ -1637,9 +1586,9 @@ L064xts_dec_two:
|
| movups %xmm3,16(%edi)
|
| leal 32(%edi),%edi
|
| movdqa %xmm6,%xmm1
|
| - jmp L067xts_dec_done
|
| + jmp L065xts_dec_done
|
| .align 4,0x90
|
| -L065xts_dec_three:
|
| +L063xts_dec_three:
|
| movaps %xmm1,%xmm7
|
| movups (%esi),%xmm2
|
| movups 16(%esi),%xmm3
|
| @@ -1657,9 +1606,9 @@ L065xts_dec_three:
|
| movups %xmm4,32(%edi)
|
| leal 48(%edi),%edi
|
| movdqa %xmm7,%xmm1
|
| - jmp L067xts_dec_done
|
| + jmp L065xts_dec_done
|
| .align 4,0x90
|
| -L066xts_dec_four:
|
| +L064xts_dec_four:
|
| movaps %xmm1,%xmm6
|
| movups (%esi),%xmm2
|
| movups 16(%esi),%xmm3
|
| @@ -1681,20 +1630,20 @@ L066xts_dec_four:
|
| movups %xmm5,48(%edi)
|
| leal 64(%edi),%edi
|
| movdqa %xmm6,%xmm1
|
| - jmp L067xts_dec_done
|
| + jmp L065xts_dec_done
|
| .align 4,0x90
|
| -L062xts_dec_done6x:
|
| +L060xts_dec_done6x:
|
| movl 112(%esp),%eax
|
| andl $15,%eax
|
| - jz L069xts_dec_ret
|
| + jz L067xts_dec_ret
|
| movl %eax,112(%esp)
|
| - jmp L070xts_dec_only_one_more
|
| + jmp L068xts_dec_only_one_more
|
| .align 4,0x90
|
| -L067xts_dec_done:
|
| +L065xts_dec_done:
|
| movl 112(%esp),%eax
|
| pxor %xmm0,%xmm0
|
| andl $15,%eax
|
| - jz L069xts_dec_ret
|
| + jz L067xts_dec_ret
|
| pcmpgtd %xmm1,%xmm0
|
| movl %eax,112(%esp)
|
| pshufd $19,%xmm0,%xmm2
|
| @@ -1704,7 +1653,7 @@ L067xts_dec_done:
|
| pand %xmm3,%xmm2
|
| pcmpgtd %xmm1,%xmm0
|
| pxor %xmm2,%xmm1
|
| -L070xts_dec_only_one_more:
|
| +L068xts_dec_only_one_more:
|
| pshufd $19,%xmm0,%xmm5
|
| movdqa %xmm1,%xmm6
|
| paddq %xmm1,%xmm1
|
| @@ -1718,16 +1667,16 @@ L070xts_dec_only_one_more:
|
| movups 16(%edx),%xmm1
|
| leal 32(%edx),%edx
|
| xorps %xmm0,%xmm2
|
| -L071dec1_loop_13:
|
| +L069dec1_loop_13:
|
| .byte 102,15,56,222,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L071dec1_loop_13
|
| + jnz L069dec1_loop_13
|
| .byte 102,15,56,223,209
|
| xorps %xmm5,%xmm2
|
| movups %xmm2,(%edi)
|
| -L072xts_dec_steal:
|
| +L070xts_dec_steal:
|
| movzbl 16(%esi),%ecx
|
| movzbl (%edi),%edx
|
| leal 1(%esi),%esi
|
| @@ -1735,7 +1684,7 @@ L072xts_dec_steal:
|
| movb %dl,16(%edi)
|
| leal 1(%edi),%edi
|
| subl $1,%eax
|
| - jnz L072xts_dec_steal
|
| + jnz L070xts_dec_steal
|
| subl 112(%esp),%edi
|
| movl %ebp,%edx
|
| movl %ebx,%ecx
|
| @@ -1745,30 +1694,16 @@ L072xts_dec_steal:
|
| movups 16(%edx),%xmm1
|
| leal 32(%edx),%edx
|
| xorps %xmm0,%xmm2
|
| -L073dec1_loop_14:
|
| +L071dec1_loop_14:
|
| .byte 102,15,56,222,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L073dec1_loop_14
|
| + jnz L071dec1_loop_14
|
| .byte 102,15,56,223,209
|
| xorps %xmm6,%xmm2
|
| movups %xmm2,(%edi)
|
| -L069xts_dec_ret:
|
| - pxor %xmm0,%xmm0
|
| - pxor %xmm1,%xmm1
|
| - pxor %xmm2,%xmm2
|
| - movdqa %xmm0,(%esp)
|
| - pxor %xmm3,%xmm3
|
| - movdqa %xmm0,16(%esp)
|
| - pxor %xmm4,%xmm4
|
| - movdqa %xmm0,32(%esp)
|
| - pxor %xmm5,%xmm5
|
| - movdqa %xmm0,48(%esp)
|
| - pxor %xmm6,%xmm6
|
| - movdqa %xmm0,64(%esp)
|
| - pxor %xmm7,%xmm7
|
| - movdqa %xmm0,80(%esp)
|
| +L067xts_dec_ret:
|
| movl 116(%esp),%esp
|
| popl %edi
|
| popl %esi
|
| @@ -1793,7 +1728,7 @@ L_aesni_cbc_encrypt_begin:
|
| movl 32(%esp),%edx
|
| movl 36(%esp),%ebp
|
| testl %eax,%eax
|
| - jz L074cbc_abort
|
| + jz L072cbc_abort
|
| cmpl $0,40(%esp)
|
| xchgl %esp,%ebx
|
| movups (%ebp),%xmm7
|
| @@ -1801,14 +1736,14 @@ L_aesni_cbc_encrypt_begin:
|
| movl %edx,%ebp
|
| movl %ebx,16(%esp)
|
| movl %ecx,%ebx
|
| - je L075cbc_decrypt
|
| + je L073cbc_decrypt
|
| movaps %xmm7,%xmm2
|
| cmpl $16,%eax
|
| - jb L076cbc_enc_tail
|
| + jb L074cbc_enc_tail
|
| subl $16,%eax
|
| - jmp L077cbc_enc_loop
|
| + jmp L075cbc_enc_loop
|
| .align 4,0x90
|
| -L077cbc_enc_loop:
|
| +L075cbc_enc_loop:
|
| movups (%esi),%xmm7
|
| leal 16(%esi),%esi
|
| movups (%edx),%xmm0
|
| @@ -1816,25 +1751,24 @@ L077cbc_enc_loop:
|
| xorps %xmm0,%xmm7
|
| leal 32(%edx),%edx
|
| xorps %xmm7,%xmm2
|
| -L078enc1_loop_15:
|
| +L076enc1_loop_15:
|
| .byte 102,15,56,220,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L078enc1_loop_15
|
| + jnz L076enc1_loop_15
|
| .byte 102,15,56,221,209
|
| movl %ebx,%ecx
|
| movl %ebp,%edx
|
| movups %xmm2,(%edi)
|
| leal 16(%edi),%edi
|
| subl $16,%eax
|
| - jnc L077cbc_enc_loop
|
| + jnc L075cbc_enc_loop
|
| addl $16,%eax
|
| - jnz L076cbc_enc_tail
|
| + jnz L074cbc_enc_tail
|
| movaps %xmm2,%xmm7
|
| - pxor %xmm2,%xmm2
|
| - jmp L079cbc_ret
|
| -L076cbc_enc_tail:
|
| + jmp L077cbc_ret
|
| +L074cbc_enc_tail:
|
| movl %eax,%ecx
|
| .long 2767451785
|
| movl $16,%ecx
|
| @@ -1845,20 +1779,20 @@ L076cbc_enc_tail:
|
| movl %ebx,%ecx
|
| movl %edi,%esi
|
| movl %ebp,%edx
|
| - jmp L077cbc_enc_loop
|
| + jmp L075cbc_enc_loop
|
| .align 4,0x90
|
| -L075cbc_decrypt:
|
| +L073cbc_decrypt:
|
| cmpl $80,%eax
|
| - jbe L080cbc_dec_tail
|
| + jbe L078cbc_dec_tail
|
| movaps %xmm7,(%esp)
|
| subl $80,%eax
|
| - jmp L081cbc_dec_loop6_enter
|
| + jmp L079cbc_dec_loop6_enter
|
| .align 4,0x90
|
| -L082cbc_dec_loop6:
|
| +L080cbc_dec_loop6:
|
| movaps %xmm0,(%esp)
|
| movups %xmm7,(%edi)
|
| leal 16(%edi),%edi
|
| -L081cbc_dec_loop6_enter:
|
| +L079cbc_dec_loop6_enter:
|
| movdqu (%esi),%xmm2
|
| movdqu 16(%esi),%xmm3
|
| movdqu 32(%esi),%xmm4
|
| @@ -1888,28 +1822,28 @@ L081cbc_dec_loop6_enter:
|
| movups %xmm6,64(%edi)
|
| leal 80(%edi),%edi
|
| subl $96,%eax
|
| - ja L082cbc_dec_loop6
|
| + ja L080cbc_dec_loop6
|
| movaps %xmm7,%xmm2
|
| movaps %xmm0,%xmm7
|
| addl $80,%eax
|
| - jle L083cbc_dec_clear_tail_collected
|
| + jle L081cbc_dec_tail_collected
|
| movups %xmm2,(%edi)
|
| leal 16(%edi),%edi
|
| -L080cbc_dec_tail:
|
| +L078cbc_dec_tail:
|
| movups (%esi),%xmm2
|
| movaps %xmm2,%xmm6
|
| cmpl $16,%eax
|
| - jbe L084cbc_dec_one
|
| + jbe L082cbc_dec_one
|
| movups 16(%esi),%xmm3
|
| movaps %xmm3,%xmm5
|
| cmpl $32,%eax
|
| - jbe L085cbc_dec_two
|
| + jbe L083cbc_dec_two
|
| movups 32(%esi),%xmm4
|
| cmpl $48,%eax
|
| - jbe L086cbc_dec_three
|
| + jbe L084cbc_dec_three
|
| movups 48(%esi),%xmm5
|
| cmpl $64,%eax
|
| - jbe L087cbc_dec_four
|
| + jbe L085cbc_dec_four
|
| movups 64(%esi),%xmm6
|
| movaps %xmm7,(%esp)
|
| movups (%esi),%xmm2
|
| @@ -1927,62 +1861,55 @@ L080cbc_dec_tail:
|
| xorps %xmm0,%xmm6
|
| movups %xmm2,(%edi)
|
| movups %xmm3,16(%edi)
|
| - pxor %xmm3,%xmm3
|
| movups %xmm4,32(%edi)
|
| - pxor %xmm4,%xmm4
|
| movups %xmm5,48(%edi)
|
| - pxor %xmm5,%xmm5
|
| leal 64(%edi),%edi
|
| movaps %xmm6,%xmm2
|
| - pxor %xmm6,%xmm6
|
| subl $80,%eax
|
| - jmp L088cbc_dec_tail_collected
|
| + jmp L081cbc_dec_tail_collected
|
| .align 4,0x90
|
| -L084cbc_dec_one:
|
| +L082cbc_dec_one:
|
| movups (%edx),%xmm0
|
| movups 16(%edx),%xmm1
|
| leal 32(%edx),%edx
|
| xorps %xmm0,%xmm2
|
| -L089dec1_loop_16:
|
| +L086dec1_loop_16:
|
| .byte 102,15,56,222,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L089dec1_loop_16
|
| + jnz L086dec1_loop_16
|
| .byte 102,15,56,223,209
|
| xorps %xmm7,%xmm2
|
| movaps %xmm6,%xmm7
|
| subl $16,%eax
|
| - jmp L088cbc_dec_tail_collected
|
| + jmp L081cbc_dec_tail_collected
|
| .align 4,0x90
|
| -L085cbc_dec_two:
|
| +L083cbc_dec_two:
|
| call __aesni_decrypt2
|
| xorps %xmm7,%xmm2
|
| xorps %xmm6,%xmm3
|
| movups %xmm2,(%edi)
|
| movaps %xmm3,%xmm2
|
| - pxor %xmm3,%xmm3
|
| leal 16(%edi),%edi
|
| movaps %xmm5,%xmm7
|
| subl $32,%eax
|
| - jmp L088cbc_dec_tail_collected
|
| + jmp L081cbc_dec_tail_collected
|
| .align 4,0x90
|
| -L086cbc_dec_three:
|
| +L084cbc_dec_three:
|
| call __aesni_decrypt3
|
| xorps %xmm7,%xmm2
|
| xorps %xmm6,%xmm3
|
| xorps %xmm5,%xmm4
|
| movups %xmm2,(%edi)
|
| movaps %xmm4,%xmm2
|
| - pxor %xmm4,%xmm4
|
| movups %xmm3,16(%edi)
|
| - pxor %xmm3,%xmm3
|
| leal 32(%edi),%edi
|
| movups 32(%esi),%xmm7
|
| subl $48,%eax
|
| - jmp L088cbc_dec_tail_collected
|
| + jmp L081cbc_dec_tail_collected
|
| .align 4,0x90
|
| -L087cbc_dec_four:
|
| +L085cbc_dec_four:
|
| call __aesni_decrypt4
|
| movups 16(%esi),%xmm1
|
| movups 32(%esi),%xmm0
|
| @@ -1992,44 +1919,28 @@ L087cbc_dec_four:
|
| movups %xmm2,(%edi)
|
| xorps %xmm1,%xmm4
|
| movups %xmm3,16(%edi)
|
| - pxor %xmm3,%xmm3
|
| xorps %xmm0,%xmm5
|
| movups %xmm4,32(%edi)
|
| - pxor %xmm4,%xmm4
|
| leal 48(%edi),%edi
|
| movaps %xmm5,%xmm2
|
| - pxor %xmm5,%xmm5
|
| subl $64,%eax
|
| - jmp L088cbc_dec_tail_collected
|
| -.align 4,0x90
|
| -L083cbc_dec_clear_tail_collected:
|
| - pxor %xmm3,%xmm3
|
| - pxor %xmm4,%xmm4
|
| - pxor %xmm5,%xmm5
|
| - pxor %xmm6,%xmm6
|
| -L088cbc_dec_tail_collected:
|
| +L081cbc_dec_tail_collected:
|
| andl $15,%eax
|
| - jnz L090cbc_dec_tail_partial
|
| + jnz L087cbc_dec_tail_partial
|
| movups %xmm2,(%edi)
|
| - pxor %xmm0,%xmm0
|
| - jmp L079cbc_ret
|
| + jmp L077cbc_ret
|
| .align 4,0x90
|
| -L090cbc_dec_tail_partial:
|
| +L087cbc_dec_tail_partial:
|
| movaps %xmm2,(%esp)
|
| - pxor %xmm0,%xmm0
|
| movl $16,%ecx
|
| movl %esp,%esi
|
| subl %eax,%ecx
|
| .long 2767451785
|
| - movdqa %xmm2,(%esp)
|
| -L079cbc_ret:
|
| +L077cbc_ret:
|
| movl 16(%esp),%esp
|
| movl 36(%esp),%ebp
|
| - pxor %xmm2,%xmm2
|
| - pxor %xmm1,%xmm1
|
| movups %xmm7,(%ebp)
|
| - pxor %xmm7,%xmm7
|
| -L074cbc_abort:
|
| +L072cbc_abort:
|
| popl %edi
|
| popl %esi
|
| popl %ebx
|
| @@ -2038,62 +1949,52 @@ L074cbc_abort:
|
| .private_extern __aesni_set_encrypt_key
|
| .align 4
|
| __aesni_set_encrypt_key:
|
| - pushl %ebp
|
| - pushl %ebx
|
| testl %eax,%eax
|
| - jz L091bad_pointer
|
| + jz L088bad_pointer
|
| testl %edx,%edx
|
| - jz L091bad_pointer
|
| - call L092pic
|
| -L092pic:
|
| - popl %ebx
|
| - leal Lkey_const-L092pic(%ebx),%ebx
|
| - movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lkey_const(%ebx),%ebp
|
| + jz L088bad_pointer
|
| movups (%eax),%xmm0
|
| xorps %xmm4,%xmm4
|
| - movl 4(%ebp),%ebp
|
| leal 16(%edx),%edx
|
| - andl $268437504,%ebp
|
| cmpl $256,%ecx
|
| - je L09314rounds
|
| + je L08914rounds
|
| cmpl $192,%ecx
|
| - je L09412rounds
|
| + je L09012rounds
|
| cmpl $128,%ecx
|
| - jne L095bad_keybits
|
| + jne L091bad_keybits
|
| .align 4,0x90
|
| -L09610rounds:
|
| - cmpl $268435456,%ebp
|
| - je L09710rounds_alt
|
| +L09210rounds:
|
| movl $9,%ecx
|
| movups %xmm0,-16(%edx)
|
| .byte 102,15,58,223,200,1
|
| - call L098key_128_cold
|
| + call L093key_128_cold
|
| .byte 102,15,58,223,200,2
|
| - call L099key_128
|
| + call L094key_128
|
| .byte 102,15,58,223,200,4
|
| - call L099key_128
|
| + call L094key_128
|
| .byte 102,15,58,223,200,8
|
| - call L099key_128
|
| + call L094key_128
|
| .byte 102,15,58,223,200,16
|
| - call L099key_128
|
| + call L094key_128
|
| .byte 102,15,58,223,200,32
|
| - call L099key_128
|
| + call L094key_128
|
| .byte 102,15,58,223,200,64
|
| - call L099key_128
|
| + call L094key_128
|
| .byte 102,15,58,223,200,128
|
| - call L099key_128
|
| + call L094key_128
|
| .byte 102,15,58,223,200,27
|
| - call L099key_128
|
| + call L094key_128
|
| .byte 102,15,58,223,200,54
|
| - call L099key_128
|
| + call L094key_128
|
| movups %xmm0,(%edx)
|
| movl %ecx,80(%edx)
|
| - jmp L100good_key
|
| + xorl %eax,%eax
|
| + ret
|
| .align 4,0x90
|
| -L099key_128:
|
| +L094key_128:
|
| movups %xmm0,(%edx)
|
| leal 16(%edx),%edx
|
| -L098key_128_cold:
|
| +L093key_128_cold:
|
| shufps $16,%xmm0,%xmm4
|
| xorps %xmm4,%xmm0
|
| shufps $140,%xmm0,%xmm4
|
| @@ -2102,91 +2003,38 @@ L098key_128_cold:
|
| xorps %xmm1,%xmm0
|
| ret
|
| .align 4,0x90
|
| -L09710rounds_alt:
|
| - movdqa (%ebx),%xmm5
|
| - movl $8,%ecx
|
| - movdqa 32(%ebx),%xmm4
|
| - movdqa %xmm0,%xmm2
|
| - movdqu %xmm0,-16(%edx)
|
| -L101loop_key128:
|
| -.byte 102,15,56,0,197
|
| -.byte 102,15,56,221,196
|
| - pslld $1,%xmm4
|
| - leal 16(%edx),%edx
|
| - movdqa %xmm2,%xmm3
|
| - pslldq $4,%xmm2
|
| - pxor %xmm2,%xmm3
|
| - pslldq $4,%xmm2
|
| - pxor %xmm2,%xmm3
|
| - pslldq $4,%xmm2
|
| - pxor %xmm3,%xmm2
|
| - pxor %xmm2,%xmm0
|
| - movdqu %xmm0,-16(%edx)
|
| - movdqa %xmm0,%xmm2
|
| - decl %ecx
|
| - jnz L101loop_key128
|
| - movdqa 48(%ebx),%xmm4
|
| -.byte 102,15,56,0,197
|
| -.byte 102,15,56,221,196
|
| - pslld $1,%xmm4
|
| - movdqa %xmm2,%xmm3
|
| - pslldq $4,%xmm2
|
| - pxor %xmm2,%xmm3
|
| - pslldq $4,%xmm2
|
| - pxor %xmm2,%xmm3
|
| - pslldq $4,%xmm2
|
| - pxor %xmm3,%xmm2
|
| - pxor %xmm2,%xmm0
|
| - movdqu %xmm0,(%edx)
|
| - movdqa %xmm0,%xmm2
|
| -.byte 102,15,56,0,197
|
| -.byte 102,15,56,221,196
|
| - movdqa %xmm2,%xmm3
|
| - pslldq $4,%xmm2
|
| - pxor %xmm2,%xmm3
|
| - pslldq $4,%xmm2
|
| - pxor %xmm2,%xmm3
|
| - pslldq $4,%xmm2
|
| - pxor %xmm3,%xmm2
|
| - pxor %xmm2,%xmm0
|
| - movdqu %xmm0,16(%edx)
|
| - movl $9,%ecx
|
| - movl %ecx,96(%edx)
|
| - jmp L100good_key
|
| -.align 4,0x90
|
| -L09412rounds:
|
| +L09012rounds:
|
| movq 16(%eax),%xmm2
|
| - cmpl $268435456,%ebp
|
| - je L10212rounds_alt
|
| movl $11,%ecx
|
| movups %xmm0,-16(%edx)
|
| .byte 102,15,58,223,202,1
|
| - call L103key_192a_cold
|
| + call L095key_192a_cold
|
| .byte 102,15,58,223,202,2
|
| - call L104key_192b
|
| + call L096key_192b
|
| .byte 102,15,58,223,202,4
|
| - call L105key_192a
|
| + call L097key_192a
|
| .byte 102,15,58,223,202,8
|
| - call L104key_192b
|
| + call L096key_192b
|
| .byte 102,15,58,223,202,16
|
| - call L105key_192a
|
| + call L097key_192a
|
| .byte 102,15,58,223,202,32
|
| - call L104key_192b
|
| + call L096key_192b
|
| .byte 102,15,58,223,202,64
|
| - call L105key_192a
|
| + call L097key_192a
|
| .byte 102,15,58,223,202,128
|
| - call L104key_192b
|
| + call L096key_192b
|
| movups %xmm0,(%edx)
|
| movl %ecx,48(%edx)
|
| - jmp L100good_key
|
| + xorl %eax,%eax
|
| + ret
|
| .align 4,0x90
|
| -L105key_192a:
|
| +L097key_192a:
|
| movups %xmm0,(%edx)
|
| leal 16(%edx),%edx
|
| .align 4,0x90
|
| -L103key_192a_cold:
|
| +L095key_192a_cold:
|
| movaps %xmm2,%xmm5
|
| -L106key_192b_warm:
|
| +L098key_192b_warm:
|
| shufps $16,%xmm0,%xmm4
|
| movdqa %xmm2,%xmm3
|
| xorps %xmm4,%xmm0
|
| @@ -2200,90 +2048,56 @@ L106key_192b_warm:
|
| pxor %xmm3,%xmm2
|
| ret
|
| .align 4,0x90
|
| -L104key_192b:
|
| +L096key_192b:
|
| movaps %xmm0,%xmm3
|
| shufps $68,%xmm0,%xmm5
|
| movups %xmm5,(%edx)
|
| shufps $78,%xmm2,%xmm3
|
| movups %xmm3,16(%edx)
|
| leal 32(%edx),%edx
|
| - jmp L106key_192b_warm
|
| + jmp L098key_192b_warm
|
| .align 4,0x90
|
| -L10212rounds_alt:
|
| - movdqa 16(%ebx),%xmm5
|
| - movdqa 32(%ebx),%xmm4
|
| - movl $8,%ecx
|
| - movdqu %xmm0,-16(%edx)
|
| -L107loop_key192:
|
| - movq %xmm2,(%edx)
|
| - movdqa %xmm2,%xmm1
|
| -.byte 102,15,56,0,213
|
| -.byte 102,15,56,221,212
|
| - pslld $1,%xmm4
|
| - leal 24(%edx),%edx
|
| - movdqa %xmm0,%xmm3
|
| - pslldq $4,%xmm0
|
| - pxor %xmm0,%xmm3
|
| - pslldq $4,%xmm0
|
| - pxor %xmm0,%xmm3
|
| - pslldq $4,%xmm0
|
| - pxor %xmm3,%xmm0
|
| - pshufd $255,%xmm0,%xmm3
|
| - pxor %xmm1,%xmm3
|
| - pslldq $4,%xmm1
|
| - pxor %xmm1,%xmm3
|
| - pxor %xmm2,%xmm0
|
| - pxor %xmm3,%xmm2
|
| - movdqu %xmm0,-16(%edx)
|
| - decl %ecx
|
| - jnz L107loop_key192
|
| - movl $11,%ecx
|
| - movl %ecx,32(%edx)
|
| - jmp L100good_key
|
| -.align 4,0x90
|
| -L09314rounds:
|
| +L08914rounds:
|
| movups 16(%eax),%xmm2
|
| - leal 16(%edx),%edx
|
| - cmpl $268435456,%ebp
|
| - je L10814rounds_alt
|
| movl $13,%ecx
|
| + leal 16(%edx),%edx
|
| movups %xmm0,-32(%edx)
|
| movups %xmm2,-16(%edx)
|
| .byte 102,15,58,223,202,1
|
| - call L109key_256a_cold
|
| + call L099key_256a_cold
|
| .byte 102,15,58,223,200,1
|
| - call L110key_256b
|
| + call L100key_256b
|
| .byte 102,15,58,223,202,2
|
| - call L111key_256a
|
| + call L101key_256a
|
| .byte 102,15,58,223,200,2
|
| - call L110key_256b
|
| + call L100key_256b
|
| .byte 102,15,58,223,202,4
|
| - call L111key_256a
|
| + call L101key_256a
|
| .byte 102,15,58,223,200,4
|
| - call L110key_256b
|
| + call L100key_256b
|
| .byte 102,15,58,223,202,8
|
| - call L111key_256a
|
| + call L101key_256a
|
| .byte 102,15,58,223,200,8
|
| - call L110key_256b
|
| + call L100key_256b
|
| .byte 102,15,58,223,202,16
|
| - call L111key_256a
|
| + call L101key_256a
|
| .byte 102,15,58,223,200,16
|
| - call L110key_256b
|
| + call L100key_256b
|
| .byte 102,15,58,223,202,32
|
| - call L111key_256a
|
| + call L101key_256a
|
| .byte 102,15,58,223,200,32
|
| - call L110key_256b
|
| + call L100key_256b
|
| .byte 102,15,58,223,202,64
|
| - call L111key_256a
|
| + call L101key_256a
|
| movups %xmm0,(%edx)
|
| movl %ecx,16(%edx)
|
| xorl %eax,%eax
|
| - jmp L100good_key
|
| + ret
|
| .align 4,0x90
|
| -L111key_256a:
|
| +L101key_256a:
|
| movups %xmm2,(%edx)
|
| leal 16(%edx),%edx
|
| -L109key_256a_cold:
|
| +L099key_256a_cold:
|
| shufps $16,%xmm0,%xmm4
|
| xorps %xmm4,%xmm0
|
| shufps $140,%xmm0,%xmm4
|
| @@ -2292,7 +2106,7 @@ L109key_256a_cold:
|
| xorps %xmm1,%xmm0
|
| ret
|
| .align 4,0x90
|
| -L110key_256b:
|
| +L100key_256b:
|
| movups %xmm0,(%edx)
|
| leal 16(%edx),%edx
|
| shufps $16,%xmm2,%xmm4
|
| @@ -2302,70 +2116,13 @@ L110key_256b:
|
| shufps $170,%xmm1,%xmm1
|
| xorps %xmm1,%xmm2
|
| ret
|
| -.align 4,0x90
|
| -L10814rounds_alt:
|
| - movdqa (%ebx),%xmm5
|
| - movdqa 32(%ebx),%xmm4
|
| - movl $7,%ecx
|
| - movdqu %xmm0,-32(%edx)
|
| - movdqa %xmm2,%xmm1
|
| - movdqu %xmm2,-16(%edx)
|
| -L112loop_key256:
|
| -.byte 102,15,56,0,213
|
| -.byte 102,15,56,221,212
|
| - movdqa %xmm0,%xmm3
|
| - pslldq $4,%xmm0
|
| - pxor %xmm0,%xmm3
|
| - pslldq $4,%xmm0
|
| - pxor %xmm0,%xmm3
|
| - pslldq $4,%xmm0
|
| - pxor %xmm3,%xmm0
|
| - pslld $1,%xmm4
|
| - pxor %xmm2,%xmm0
|
| - movdqu %xmm0,(%edx)
|
| - decl %ecx
|
| - jz L113done_key256
|
| - pshufd $255,%xmm0,%xmm2
|
| - pxor %xmm3,%xmm3
|
| -.byte 102,15,56,221,211
|
| - movdqa %xmm1,%xmm3
|
| - pslldq $4,%xmm1
|
| - pxor %xmm1,%xmm3
|
| - pslldq $4,%xmm1
|
| - pxor %xmm1,%xmm3
|
| - pslldq $4,%xmm1
|
| - pxor %xmm3,%xmm1
|
| - pxor %xmm1,%xmm2
|
| - movdqu %xmm2,16(%edx)
|
| - leal 32(%edx),%edx
|
| - movdqa %xmm2,%xmm1
|
| - jmp L112loop_key256
|
| -L113done_key256:
|
| - movl $13,%ecx
|
| - movl %ecx,16(%edx)
|
| -L100good_key:
|
| - pxor %xmm0,%xmm0
|
| - pxor %xmm1,%xmm1
|
| - pxor %xmm2,%xmm2
|
| - pxor %xmm3,%xmm3
|
| - pxor %xmm4,%xmm4
|
| - pxor %xmm5,%xmm5
|
| - xorl %eax,%eax
|
| - popl %ebx
|
| - popl %ebp
|
| - ret
|
| .align 2,0x90
|
| -L091bad_pointer:
|
| +L088bad_pointer:
|
| movl $-1,%eax
|
| - popl %ebx
|
| - popl %ebp
|
| ret
|
| .align 2,0x90
|
| -L095bad_keybits:
|
| - pxor %xmm0,%xmm0
|
| +L091bad_keybits:
|
| movl $-2,%eax
|
| - popl %ebx
|
| - popl %ebp
|
| ret
|
| .globl _aesni_set_encrypt_key
|
| .private_extern _aesni_set_encrypt_key
|
| @@ -2389,7 +2146,7 @@ L_aesni_set_decrypt_key_begin:
|
| movl 12(%esp),%edx
|
| shll $4,%ecx
|
| testl %eax,%eax
|
| - jnz L114dec_key_ret
|
| + jnz L102dec_key_ret
|
| leal 16(%edx,%ecx,1),%eax
|
| movups (%edx),%xmm0
|
| movups (%eax),%xmm1
|
| @@ -2397,7 +2154,7 @@ L_aesni_set_decrypt_key_begin:
|
| movups %xmm1,(%edx)
|
| leal 16(%edx),%edx
|
| leal -16(%eax),%eax
|
| -L115dec_key_inverse:
|
| +L103dec_key_inverse:
|
| movups (%edx),%xmm0
|
| movups (%eax),%xmm1
|
| .byte 102,15,56,219,192
|
| @@ -2407,27 +2164,15 @@ L115dec_key_inverse:
|
| movups %xmm0,16(%eax)
|
| movups %xmm1,-16(%edx)
|
| cmpl %edx,%eax
|
| - ja L115dec_key_inverse
|
| + ja L103dec_key_inverse
|
| movups (%edx),%xmm0
|
| .byte 102,15,56,219,192
|
| movups %xmm0,(%edx)
|
| - pxor %xmm0,%xmm0
|
| - pxor %xmm1,%xmm1
|
| xorl %eax,%eax
|
| -L114dec_key_ret:
|
| +L102dec_key_ret:
|
| ret
|
| -.align 6,0x90
|
| -Lkey_const:
|
| -.long 202313229,202313229,202313229,202313229
|
| -.long 67569157,67569157,67569157,67569157
|
| -.long 1,1,1,1
|
| -.long 27,27,27,27
|
| .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
|
| .byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
|
| .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
|
| .byte 115,108,46,111,114,103,62,0
|
| -.section __IMPORT,__pointers,non_lazy_symbol_pointers
|
| -L_OPENSSL_ia32cap_P$non_lazy_ptr:
|
| -.indirect_symbol _OPENSSL_ia32cap_P
|
| -.long 0
|
| #endif
|
|
|