| Index: third_party/boringssl/mac-x86/crypto/aes/aesni-x86.S
|
| diff --git a/third_party/boringssl/mac-x86/crypto/aes/aesni-x86.S b/third_party/boringssl/mac-x86/crypto/aes/aesni-x86.S
|
| index 9000478d20e1a132fcb8f1b96c9cef9d627b22c1..07719ba7ae8f6fc5b8e8ab41ab143ddb131ede4a 100644
|
| --- a/third_party/boringssl/mac-x86/crypto/aes/aesni-x86.S
|
| +++ b/third_party/boringssl/mac-x86/crypto/aes/aesni-x86.S
|
| @@ -22,7 +22,10 @@ L000enc1_loop_1:
|
| leal 16(%edx),%edx
|
| jnz L000enc1_loop_1
|
| .byte 102,15,56,221,209
|
| + pxor %xmm0,%xmm0
|
| + pxor %xmm1,%xmm1
|
| movups %xmm2,(%eax)
|
| + pxor %xmm2,%xmm2
|
| ret
|
| .globl _aesni_decrypt
|
| .private_extern _aesni_decrypt
|
| @@ -45,7 +48,10 @@ L001dec1_loop_2:
|
| leal 16(%edx),%edx
|
| jnz L001dec1_loop_2
|
| .byte 102,15,56,223,209
|
| + pxor %xmm0,%xmm0
|
| + pxor %xmm1,%xmm1
|
| movups %xmm2,(%eax)
|
| + pxor %xmm2,%xmm2
|
| ret
|
| .private_extern __aesni_encrypt2
|
| .align 4
|
| @@ -252,17 +258,15 @@ __aesni_encrypt6:
|
| negl %ecx
|
| .byte 102,15,56,220,225
|
| pxor %xmm0,%xmm7
|
| + movups (%edx,%ecx,1),%xmm0
|
| addl $16,%ecx
|
| -.byte 102,15,56,220,233
|
| -.byte 102,15,56,220,241
|
| -.byte 102,15,56,220,249
|
| - movups -16(%edx,%ecx,1),%xmm0
|
| - jmp L_aesni_encrypt6_enter
|
| + jmp L008_aesni_encrypt6_inner
|
| .align 4,0x90
|
| -L008enc6_loop:
|
| +L009enc6_loop:
|
| .byte 102,15,56,220,209
|
| .byte 102,15,56,220,217
|
| .byte 102,15,56,220,225
|
| +L008_aesni_encrypt6_inner:
|
| .byte 102,15,56,220,233
|
| .byte 102,15,56,220,241
|
| .byte 102,15,56,220,249
|
| @@ -276,7 +280,7 @@ L_aesni_encrypt6_enter:
|
| .byte 102,15,56,220,240
|
| .byte 102,15,56,220,248
|
| movups -16(%edx,%ecx,1),%xmm0
|
| - jnz L008enc6_loop
|
| + jnz L009enc6_loop
|
| .byte 102,15,56,220,209
|
| .byte 102,15,56,220,217
|
| .byte 102,15,56,220,225
|
| @@ -307,17 +311,15 @@ __aesni_decrypt6:
|
| negl %ecx
|
| .byte 102,15,56,222,225
|
| pxor %xmm0,%xmm7
|
| + movups (%edx,%ecx,1),%xmm0
|
| addl $16,%ecx
|
| -.byte 102,15,56,222,233
|
| -.byte 102,15,56,222,241
|
| -.byte 102,15,56,222,249
|
| - movups -16(%edx,%ecx,1),%xmm0
|
| - jmp L_aesni_decrypt6_enter
|
| + jmp L010_aesni_decrypt6_inner
|
| .align 4,0x90
|
| -L009dec6_loop:
|
| +L011dec6_loop:
|
| .byte 102,15,56,222,209
|
| .byte 102,15,56,222,217
|
| .byte 102,15,56,222,225
|
| +L010_aesni_decrypt6_inner:
|
| .byte 102,15,56,222,233
|
| .byte 102,15,56,222,241
|
| .byte 102,15,56,222,249
|
| @@ -331,7 +333,7 @@ L_aesni_decrypt6_enter:
|
| .byte 102,15,56,222,240
|
| .byte 102,15,56,222,248
|
| movups -16(%edx,%ecx,1),%xmm0
|
| - jnz L009dec6_loop
|
| + jnz L011dec6_loop
|
| .byte 102,15,56,222,209
|
| .byte 102,15,56,222,217
|
| .byte 102,15,56,222,225
|
| @@ -360,14 +362,14 @@ L_aesni_ecb_encrypt_begin:
|
| movl 32(%esp),%edx
|
| movl 36(%esp),%ebx
|
| andl $-16,%eax
|
| - jz L010ecb_ret
|
| + jz L012ecb_ret
|
| movl 240(%edx),%ecx
|
| testl %ebx,%ebx
|
| - jz L011ecb_decrypt
|
| + jz L013ecb_decrypt
|
| movl %edx,%ebp
|
| movl %ecx,%ebx
|
| cmpl $96,%eax
|
| - jb L012ecb_enc_tail
|
| + jb L014ecb_enc_tail
|
| movdqu (%esi),%xmm2
|
| movdqu 16(%esi),%xmm3
|
| movdqu 32(%esi),%xmm4
|
| @@ -376,9 +378,9 @@ L_aesni_ecb_encrypt_begin:
|
| movdqu 80(%esi),%xmm7
|
| leal 96(%esi),%esi
|
| subl $96,%eax
|
| - jmp L013ecb_enc_loop6_enter
|
| + jmp L015ecb_enc_loop6_enter
|
| .align 4,0x90
|
| -L014ecb_enc_loop6:
|
| +L016ecb_enc_loop6:
|
| movups %xmm2,(%edi)
|
| movdqu (%esi),%xmm2
|
| movups %xmm3,16(%edi)
|
| @@ -393,12 +395,12 @@ L014ecb_enc_loop6:
|
| leal 96(%edi),%edi
|
| movdqu 80(%esi),%xmm7
|
| leal 96(%esi),%esi
|
| -L013ecb_enc_loop6_enter:
|
| +L015ecb_enc_loop6_enter:
|
| call __aesni_encrypt6
|
| movl %ebp,%edx
|
| movl %ebx,%ecx
|
| subl $96,%eax
|
| - jnc L014ecb_enc_loop6
|
| + jnc L016ecb_enc_loop6
|
| movups %xmm2,(%edi)
|
| movups %xmm3,16(%edi)
|
| movups %xmm4,32(%edi)
|
| @@ -407,18 +409,18 @@ L013ecb_enc_loop6_enter:
|
| movups %xmm7,80(%edi)
|
| leal 96(%edi),%edi
|
| addl $96,%eax
|
| - jz L010ecb_ret
|
| -L012ecb_enc_tail:
|
| + jz L012ecb_ret
|
| +L014ecb_enc_tail:
|
| movups (%esi),%xmm2
|
| cmpl $32,%eax
|
| - jb L015ecb_enc_one
|
| + jb L017ecb_enc_one
|
| movups 16(%esi),%xmm3
|
| - je L016ecb_enc_two
|
| + je L018ecb_enc_two
|
| movups 32(%esi),%xmm4
|
| cmpl $64,%eax
|
| - jb L017ecb_enc_three
|
| + jb L019ecb_enc_three
|
| movups 48(%esi),%xmm5
|
| - je L018ecb_enc_four
|
| + je L020ecb_enc_four
|
| movups 64(%esi),%xmm6
|
| xorps %xmm7,%xmm7
|
| call __aesni_encrypt6
|
| @@ -427,49 +429,49 @@ L012ecb_enc_tail:
|
| movups %xmm4,32(%edi)
|
| movups %xmm5,48(%edi)
|
| movups %xmm6,64(%edi)
|
| - jmp L010ecb_ret
|
| + jmp L012ecb_ret
|
| .align 4,0x90
|
| -L015ecb_enc_one:
|
| +L017ecb_enc_one:
|
| movups (%edx),%xmm0
|
| movups 16(%edx),%xmm1
|
| leal 32(%edx),%edx
|
| xorps %xmm0,%xmm2
|
| -L019enc1_loop_3:
|
| +L021enc1_loop_3:
|
| .byte 102,15,56,220,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L019enc1_loop_3
|
| + jnz L021enc1_loop_3
|
| .byte 102,15,56,221,209
|
| movups %xmm2,(%edi)
|
| - jmp L010ecb_ret
|
| + jmp L012ecb_ret
|
| .align 4,0x90
|
| -L016ecb_enc_two:
|
| +L018ecb_enc_two:
|
| call __aesni_encrypt2
|
| movups %xmm2,(%edi)
|
| movups %xmm3,16(%edi)
|
| - jmp L010ecb_ret
|
| + jmp L012ecb_ret
|
| .align 4,0x90
|
| -L017ecb_enc_three:
|
| +L019ecb_enc_three:
|
| call __aesni_encrypt3
|
| movups %xmm2,(%edi)
|
| movups %xmm3,16(%edi)
|
| movups %xmm4,32(%edi)
|
| - jmp L010ecb_ret
|
| + jmp L012ecb_ret
|
| .align 4,0x90
|
| -L018ecb_enc_four:
|
| +L020ecb_enc_four:
|
| call __aesni_encrypt4
|
| movups %xmm2,(%edi)
|
| movups %xmm3,16(%edi)
|
| movups %xmm4,32(%edi)
|
| movups %xmm5,48(%edi)
|
| - jmp L010ecb_ret
|
| + jmp L012ecb_ret
|
| .align 4,0x90
|
| -L011ecb_decrypt:
|
| +L013ecb_decrypt:
|
| movl %edx,%ebp
|
| movl %ecx,%ebx
|
| cmpl $96,%eax
|
| - jb L020ecb_dec_tail
|
| + jb L022ecb_dec_tail
|
| movdqu (%esi),%xmm2
|
| movdqu 16(%esi),%xmm3
|
| movdqu 32(%esi),%xmm4
|
| @@ -478,9 +480,9 @@ L011ecb_decrypt:
|
| movdqu 80(%esi),%xmm7
|
| leal 96(%esi),%esi
|
| subl $96,%eax
|
| - jmp L021ecb_dec_loop6_enter
|
| + jmp L023ecb_dec_loop6_enter
|
| .align 4,0x90
|
| -L022ecb_dec_loop6:
|
| +L024ecb_dec_loop6:
|
| movups %xmm2,(%edi)
|
| movdqu (%esi),%xmm2
|
| movups %xmm3,16(%edi)
|
| @@ -495,12 +497,12 @@ L022ecb_dec_loop6:
|
| leal 96(%edi),%edi
|
| movdqu 80(%esi),%xmm7
|
| leal 96(%esi),%esi
|
| -L021ecb_dec_loop6_enter:
|
| +L023ecb_dec_loop6_enter:
|
| call __aesni_decrypt6
|
| movl %ebp,%edx
|
| movl %ebx,%ecx
|
| subl $96,%eax
|
| - jnc L022ecb_dec_loop6
|
| + jnc L024ecb_dec_loop6
|
| movups %xmm2,(%edi)
|
| movups %xmm3,16(%edi)
|
| movups %xmm4,32(%edi)
|
| @@ -509,18 +511,18 @@ L021ecb_dec_loop6_enter:
|
| movups %xmm7,80(%edi)
|
| leal 96(%edi),%edi
|
| addl $96,%eax
|
| - jz L010ecb_ret
|
| -L020ecb_dec_tail:
|
| + jz L012ecb_ret
|
| +L022ecb_dec_tail:
|
| movups (%esi),%xmm2
|
| cmpl $32,%eax
|
| - jb L023ecb_dec_one
|
| + jb L025ecb_dec_one
|
| movups 16(%esi),%xmm3
|
| - je L024ecb_dec_two
|
| + je L026ecb_dec_two
|
| movups 32(%esi),%xmm4
|
| cmpl $64,%eax
|
| - jb L025ecb_dec_three
|
| + jb L027ecb_dec_three
|
| movups 48(%esi),%xmm5
|
| - je L026ecb_dec_four
|
| + je L028ecb_dec_four
|
| movups 64(%esi),%xmm6
|
| xorps %xmm7,%xmm7
|
| call __aesni_decrypt6
|
| @@ -529,43 +531,51 @@ L020ecb_dec_tail:
|
| movups %xmm4,32(%edi)
|
| movups %xmm5,48(%edi)
|
| movups %xmm6,64(%edi)
|
| - jmp L010ecb_ret
|
| + jmp L012ecb_ret
|
| .align 4,0x90
|
| -L023ecb_dec_one:
|
| +L025ecb_dec_one:
|
| movups (%edx),%xmm0
|
| movups 16(%edx),%xmm1
|
| leal 32(%edx),%edx
|
| xorps %xmm0,%xmm2
|
| -L027dec1_loop_4:
|
| +L029dec1_loop_4:
|
| .byte 102,15,56,222,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L027dec1_loop_4
|
| + jnz L029dec1_loop_4
|
| .byte 102,15,56,223,209
|
| movups %xmm2,(%edi)
|
| - jmp L010ecb_ret
|
| + jmp L012ecb_ret
|
| .align 4,0x90
|
| -L024ecb_dec_two:
|
| +L026ecb_dec_two:
|
| call __aesni_decrypt2
|
| movups %xmm2,(%edi)
|
| movups %xmm3,16(%edi)
|
| - jmp L010ecb_ret
|
| + jmp L012ecb_ret
|
| .align 4,0x90
|
| -L025ecb_dec_three:
|
| +L027ecb_dec_three:
|
| call __aesni_decrypt3
|
| movups %xmm2,(%edi)
|
| movups %xmm3,16(%edi)
|
| movups %xmm4,32(%edi)
|
| - jmp L010ecb_ret
|
| + jmp L012ecb_ret
|
| .align 4,0x90
|
| -L026ecb_dec_four:
|
| +L028ecb_dec_four:
|
| call __aesni_decrypt4
|
| movups %xmm2,(%edi)
|
| movups %xmm3,16(%edi)
|
| movups %xmm4,32(%edi)
|
| movups %xmm5,48(%edi)
|
| -L010ecb_ret:
|
| +L012ecb_ret:
|
| + pxor %xmm0,%xmm0
|
| + pxor %xmm1,%xmm1
|
| + pxor %xmm2,%xmm2
|
| + pxor %xmm3,%xmm3
|
| + pxor %xmm4,%xmm4
|
| + pxor %xmm5,%xmm5
|
| + pxor %xmm6,%xmm6
|
| + pxor %xmm7,%xmm7
|
| popl %edi
|
| popl %esi
|
| popl %ebx
|
| @@ -611,7 +621,7 @@ L_aesni_ccm64_encrypt_blocks_begin:
|
| leal 32(%edx,%ecx,1),%edx
|
| subl %ecx,%ebx
|
| .byte 102,15,56,0,253
|
| -L028ccm64_enc_outer:
|
| +L030ccm64_enc_outer:
|
| movups (%ebp),%xmm0
|
| movl %ebx,%ecx
|
| movups (%esi),%xmm6
|
| @@ -620,7 +630,7 @@ L028ccm64_enc_outer:
|
| xorps %xmm6,%xmm0
|
| xorps %xmm0,%xmm3
|
| movups 32(%ebp),%xmm0
|
| -L029ccm64_enc2_loop:
|
| +L031ccm64_enc2_loop:
|
| .byte 102,15,56,220,209
|
| .byte 102,15,56,220,217
|
| movups (%edx,%ecx,1),%xmm1
|
| @@ -628,7 +638,7 @@ L029ccm64_enc2_loop:
|
| .byte 102,15,56,220,208
|
| .byte 102,15,56,220,216
|
| movups -16(%edx,%ecx,1),%xmm0
|
| - jnz L029ccm64_enc2_loop
|
| + jnz L031ccm64_enc2_loop
|
| .byte 102,15,56,220,209
|
| .byte 102,15,56,220,217
|
| paddq 16(%esp),%xmm7
|
| @@ -641,10 +651,18 @@ L029ccm64_enc2_loop:
|
| movups %xmm6,(%edi)
|
| .byte 102,15,56,0,213
|
| leal 16(%edi),%edi
|
| - jnz L028ccm64_enc_outer
|
| + jnz L030ccm64_enc_outer
|
| movl 48(%esp),%esp
|
| movl 40(%esp),%edi
|
| movups %xmm3,(%edi)
|
| + pxor %xmm0,%xmm0
|
| + pxor %xmm1,%xmm1
|
| + pxor %xmm2,%xmm2
|
| + pxor %xmm3,%xmm3
|
| + pxor %xmm4,%xmm4
|
| + pxor %xmm5,%xmm5
|
| + pxor %xmm6,%xmm6
|
| + pxor %xmm7,%xmm7
|
| popl %edi
|
| popl %esi
|
| popl %ebx
|
| @@ -691,12 +709,12 @@ L_aesni_ccm64_decrypt_blocks_begin:
|
| movups 16(%edx),%xmm1
|
| leal 32(%edx),%edx
|
| xorps %xmm0,%xmm2
|
| -L030enc1_loop_5:
|
| +L032enc1_loop_5:
|
| .byte 102,15,56,220,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L030enc1_loop_5
|
| + jnz L032enc1_loop_5
|
| .byte 102,15,56,221,209
|
| shll $4,%ebx
|
| movl $16,%ecx
|
| @@ -706,16 +724,16 @@ L030enc1_loop_5:
|
| subl %ebx,%ecx
|
| leal 32(%ebp,%ebx,1),%edx
|
| movl %ecx,%ebx
|
| - jmp L031ccm64_dec_outer
|
| + jmp L033ccm64_dec_outer
|
| .align 4,0x90
|
| -L031ccm64_dec_outer:
|
| +L033ccm64_dec_outer:
|
| xorps %xmm2,%xmm6
|
| movdqa %xmm7,%xmm2
|
| movups %xmm6,(%edi)
|
| leal 16(%edi),%edi
|
| .byte 102,15,56,0,213
|
| subl $1,%eax
|
| - jz L032ccm64_dec_break
|
| + jz L034ccm64_dec_break
|
| movups (%ebp),%xmm0
|
| movl %ebx,%ecx
|
| movups 16(%ebp),%xmm1
|
| @@ -723,7 +741,7 @@ L031ccm64_dec_outer:
|
| xorps %xmm0,%xmm2
|
| xorps %xmm6,%xmm3
|
| movups 32(%ebp),%xmm0
|
| -L033ccm64_dec2_loop:
|
| +L035ccm64_dec2_loop:
|
| .byte 102,15,56,220,209
|
| .byte 102,15,56,220,217
|
| movups (%edx,%ecx,1),%xmm1
|
| @@ -731,7 +749,7 @@ L033ccm64_dec2_loop:
|
| .byte 102,15,56,220,208
|
| .byte 102,15,56,220,216
|
| movups -16(%edx,%ecx,1),%xmm0
|
| - jnz L033ccm64_dec2_loop
|
| + jnz L035ccm64_dec2_loop
|
| movups (%esi),%xmm6
|
| paddq 16(%esp),%xmm7
|
| .byte 102,15,56,220,209
|
| @@ -739,9 +757,9 @@ L033ccm64_dec2_loop:
|
| .byte 102,15,56,221,208
|
| .byte 102,15,56,221,216
|
| leal 16(%esi),%esi
|
| - jmp L031ccm64_dec_outer
|
| + jmp L033ccm64_dec_outer
|
| .align 4,0x90
|
| -L032ccm64_dec_break:
|
| +L034ccm64_dec_break:
|
| movl 240(%ebp),%ecx
|
| movl %ebp,%edx
|
| movups (%edx),%xmm0
|
| @@ -749,16 +767,24 @@ L032ccm64_dec_break:
|
| xorps %xmm0,%xmm6
|
| leal 32(%edx),%edx
|
| xorps %xmm6,%xmm3
|
| -L034enc1_loop_6:
|
| +L036enc1_loop_6:
|
| .byte 102,15,56,220,217
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L034enc1_loop_6
|
| + jnz L036enc1_loop_6
|
| .byte 102,15,56,221,217
|
| movl 48(%esp),%esp
|
| movl 40(%esp),%edi
|
| movups %xmm3,(%edi)
|
| + pxor %xmm0,%xmm0
|
| + pxor %xmm1,%xmm1
|
| + pxor %xmm2,%xmm2
|
| + pxor %xmm3,%xmm3
|
| + pxor %xmm4,%xmm4
|
| + pxor %xmm5,%xmm5
|
| + pxor %xmm6,%xmm6
|
| + pxor %xmm7,%xmm7
|
| popl %edi
|
| popl %esi
|
| popl %ebx
|
| @@ -783,7 +809,7 @@ L_aesni_ctr32_encrypt_blocks_begin:
|
| andl $-16,%esp
|
| movl %ebp,80(%esp)
|
| cmpl $1,%eax
|
| - je L035ctr32_one_shortcut
|
| + je L037ctr32_one_shortcut
|
| movdqu (%ebx),%xmm7
|
| movl $202182159,(%esp)
|
| movl $134810123,4(%esp)
|
| @@ -821,7 +847,7 @@ L_aesni_ctr32_encrypt_blocks_begin:
|
| pshufd $192,%xmm0,%xmm2
|
| pshufd $128,%xmm0,%xmm3
|
| cmpl $6,%eax
|
| - jb L036ctr32_tail
|
| + jb L038ctr32_tail
|
| pxor %xmm6,%xmm7
|
| shll $4,%ecx
|
| movl $16,%ebx
|
| @@ -830,9 +856,9 @@ L_aesni_ctr32_encrypt_blocks_begin:
|
| subl %ecx,%ebx
|
| leal 32(%edx,%ecx,1),%edx
|
| subl $6,%eax
|
| - jmp L037ctr32_loop6
|
| + jmp L039ctr32_loop6
|
| .align 4,0x90
|
| -L037ctr32_loop6:
|
| +L039ctr32_loop6:
|
| pshufd $64,%xmm0,%xmm4
|
| movdqa 32(%esp),%xmm0
|
| pshufd $192,%xmm1,%xmm5
|
| @@ -886,27 +912,27 @@ L037ctr32_loop6:
|
| leal 96(%edi),%edi
|
| pshufd $128,%xmm0,%xmm3
|
| subl $6,%eax
|
| - jnc L037ctr32_loop6
|
| + jnc L039ctr32_loop6
|
| addl $6,%eax
|
| - jz L038ctr32_ret
|
| + jz L040ctr32_ret
|
| movdqu (%ebp),%xmm7
|
| movl %ebp,%edx
|
| pxor 32(%esp),%xmm7
|
| movl 240(%ebp),%ecx
|
| -L036ctr32_tail:
|
| +L038ctr32_tail:
|
| por %xmm7,%xmm2
|
| cmpl $2,%eax
|
| - jb L039ctr32_one
|
| + jb L041ctr32_one
|
| pshufd $64,%xmm0,%xmm4
|
| por %xmm7,%xmm3
|
| - je L040ctr32_two
|
| + je L042ctr32_two
|
| pshufd $192,%xmm1,%xmm5
|
| por %xmm7,%xmm4
|
| cmpl $4,%eax
|
| - jb L041ctr32_three
|
| + jb L043ctr32_three
|
| pshufd $128,%xmm1,%xmm6
|
| por %xmm7,%xmm5
|
| - je L042ctr32_four
|
| + je L044ctr32_four
|
| por %xmm7,%xmm6
|
| call __aesni_encrypt6
|
| movups (%esi),%xmm1
|
| @@ -924,29 +950,29 @@ L036ctr32_tail:
|
| movups %xmm4,32(%edi)
|
| movups %xmm5,48(%edi)
|
| movups %xmm6,64(%edi)
|
| - jmp L038ctr32_ret
|
| + jmp L040ctr32_ret
|
| .align 4,0x90
|
| -L035ctr32_one_shortcut:
|
| +L037ctr32_one_shortcut:
|
| movups (%ebx),%xmm2
|
| movl 240(%edx),%ecx
|
| -L039ctr32_one:
|
| +L041ctr32_one:
|
| movups (%edx),%xmm0
|
| movups 16(%edx),%xmm1
|
| leal 32(%edx),%edx
|
| xorps %xmm0,%xmm2
|
| -L043enc1_loop_7:
|
| +L045enc1_loop_7:
|
| .byte 102,15,56,220,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L043enc1_loop_7
|
| + jnz L045enc1_loop_7
|
| .byte 102,15,56,221,209
|
| movups (%esi),%xmm6
|
| xorps %xmm2,%xmm6
|
| movups %xmm6,(%edi)
|
| - jmp L038ctr32_ret
|
| + jmp L040ctr32_ret
|
| .align 4,0x90
|
| -L040ctr32_two:
|
| +L042ctr32_two:
|
| call __aesni_encrypt2
|
| movups (%esi),%xmm5
|
| movups 16(%esi),%xmm6
|
| @@ -954,9 +980,9 @@ L040ctr32_two:
|
| xorps %xmm6,%xmm3
|
| movups %xmm2,(%edi)
|
| movups %xmm3,16(%edi)
|
| - jmp L038ctr32_ret
|
| + jmp L040ctr32_ret
|
| .align 4,0x90
|
| -L041ctr32_three:
|
| +L043ctr32_three:
|
| call __aesni_encrypt3
|
| movups (%esi),%xmm5
|
| movups 16(%esi),%xmm6
|
| @@ -967,9 +993,9 @@ L041ctr32_three:
|
| xorps %xmm7,%xmm4
|
| movups %xmm3,16(%edi)
|
| movups %xmm4,32(%edi)
|
| - jmp L038ctr32_ret
|
| + jmp L040ctr32_ret
|
| .align 4,0x90
|
| -L042ctr32_four:
|
| +L044ctr32_four:
|
| call __aesni_encrypt4
|
| movups (%esi),%xmm6
|
| movups 16(%esi),%xmm7
|
| @@ -983,7 +1009,18 @@ L042ctr32_four:
|
| xorps %xmm0,%xmm5
|
| movups %xmm4,32(%edi)
|
| movups %xmm5,48(%edi)
|
| -L038ctr32_ret:
|
| +L040ctr32_ret:
|
| + pxor %xmm0,%xmm0
|
| + pxor %xmm1,%xmm1
|
| + pxor %xmm2,%xmm2
|
| + pxor %xmm3,%xmm3
|
| + pxor %xmm4,%xmm4
|
| + movdqa %xmm0,32(%esp)
|
| + pxor %xmm5,%xmm5
|
| + movdqa %xmm0,48(%esp)
|
| + pxor %xmm6,%xmm6
|
| + movdqa %xmm0,64(%esp)
|
| + pxor %xmm7,%xmm7
|
| movl 80(%esp),%esp
|
| popl %edi
|
| popl %esi
|
| @@ -1007,12 +1044,12 @@ L_aesni_xts_encrypt_begin:
|
| movups 16(%edx),%xmm1
|
| leal 32(%edx),%edx
|
| xorps %xmm0,%xmm2
|
| -L044enc1_loop_8:
|
| +L046enc1_loop_8:
|
| .byte 102,15,56,220,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L044enc1_loop_8
|
| + jnz L046enc1_loop_8
|
| .byte 102,15,56,221,209
|
| movl 20(%esp),%esi
|
| movl 24(%esp),%edi
|
| @@ -1036,14 +1073,14 @@ L044enc1_loop_8:
|
| movl %edx,%ebp
|
| movl %ecx,%ebx
|
| subl $96,%eax
|
| - jc L045xts_enc_short
|
| + jc L047xts_enc_short
|
| shll $4,%ecx
|
| movl $16,%ebx
|
| subl %ecx,%ebx
|
| leal 32(%edx,%ecx,1),%edx
|
| - jmp L046xts_enc_loop6
|
| + jmp L048xts_enc_loop6
|
| .align 4,0x90
|
| -L046xts_enc_loop6:
|
| +L048xts_enc_loop6:
|
| pshufd $19,%xmm0,%xmm2
|
| pxor %xmm0,%xmm0
|
| movdqa %xmm1,(%esp)
|
| @@ -1132,23 +1169,23 @@ L046xts_enc_loop6:
|
| pcmpgtd %xmm1,%xmm0
|
| pxor %xmm2,%xmm1
|
| subl $96,%eax
|
| - jnc L046xts_enc_loop6
|
| + jnc L048xts_enc_loop6
|
| movl 240(%ebp),%ecx
|
| movl %ebp,%edx
|
| movl %ecx,%ebx
|
| -L045xts_enc_short:
|
| +L047xts_enc_short:
|
| addl $96,%eax
|
| - jz L047xts_enc_done6x
|
| + jz L049xts_enc_done6x
|
| movdqa %xmm1,%xmm5
|
| cmpl $32,%eax
|
| - jb L048xts_enc_one
|
| + jb L050xts_enc_one
|
| pshufd $19,%xmm0,%xmm2
|
| pxor %xmm0,%xmm0
|
| paddq %xmm1,%xmm1
|
| pand %xmm3,%xmm2
|
| pcmpgtd %xmm1,%xmm0
|
| pxor %xmm2,%xmm1
|
| - je L049xts_enc_two
|
| + je L051xts_enc_two
|
| pshufd $19,%xmm0,%xmm2
|
| pxor %xmm0,%xmm0
|
| movdqa %xmm1,%xmm6
|
| @@ -1157,7 +1194,7 @@ L045xts_enc_short:
|
| pcmpgtd %xmm1,%xmm0
|
| pxor %xmm2,%xmm1
|
| cmpl $64,%eax
|
| - jb L050xts_enc_three
|
| + jb L052xts_enc_three
|
| pshufd $19,%xmm0,%xmm2
|
| pxor %xmm0,%xmm0
|
| movdqa %xmm1,%xmm7
|
| @@ -1167,7 +1204,7 @@ L045xts_enc_short:
|
| pxor %xmm2,%xmm1
|
| movdqa %xmm5,(%esp)
|
| movdqa %xmm6,16(%esp)
|
| - je L051xts_enc_four
|
| + je L053xts_enc_four
|
| movdqa %xmm7,32(%esp)
|
| pshufd $19,%xmm0,%xmm7
|
| movdqa %xmm1,48(%esp)
|
| @@ -1199,9 +1236,9 @@ L045xts_enc_short:
|
| movups %xmm5,48(%edi)
|
| movups %xmm6,64(%edi)
|
| leal 80(%edi),%edi
|
| - jmp L052xts_enc_done
|
| + jmp L054xts_enc_done
|
| .align 4,0x90
|
| -L048xts_enc_one:
|
| +L050xts_enc_one:
|
| movups (%esi),%xmm2
|
| leal 16(%esi),%esi
|
| xorps %xmm5,%xmm2
|
| @@ -1209,20 +1246,20 @@ L048xts_enc_one:
|
| movups 16(%edx),%xmm1
|
| leal 32(%edx),%edx
|
| xorps %xmm0,%xmm2
|
| -L053enc1_loop_9:
|
| +L055enc1_loop_9:
|
| .byte 102,15,56,220,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L053enc1_loop_9
|
| + jnz L055enc1_loop_9
|
| .byte 102,15,56,221,209
|
| xorps %xmm5,%xmm2
|
| movups %xmm2,(%edi)
|
| leal 16(%edi),%edi
|
| movdqa %xmm5,%xmm1
|
| - jmp L052xts_enc_done
|
| + jmp L054xts_enc_done
|
| .align 4,0x90
|
| -L049xts_enc_two:
|
| +L051xts_enc_two:
|
| movaps %xmm1,%xmm6
|
| movups (%esi),%xmm2
|
| movups 16(%esi),%xmm3
|
| @@ -1236,9 +1273,9 @@ L049xts_enc_two:
|
| movups %xmm3,16(%edi)
|
| leal 32(%edi),%edi
|
| movdqa %xmm6,%xmm1
|
| - jmp L052xts_enc_done
|
| + jmp L054xts_enc_done
|
| .align 4,0x90
|
| -L050xts_enc_three:
|
| +L052xts_enc_three:
|
| movaps %xmm1,%xmm7
|
| movups (%esi),%xmm2
|
| movups 16(%esi),%xmm3
|
| @@ -1256,9 +1293,9 @@ L050xts_enc_three:
|
| movups %xmm4,32(%edi)
|
| leal 48(%edi),%edi
|
| movdqa %xmm7,%xmm1
|
| - jmp L052xts_enc_done
|
| + jmp L054xts_enc_done
|
| .align 4,0x90
|
| -L051xts_enc_four:
|
| +L053xts_enc_four:
|
| movaps %xmm1,%xmm6
|
| movups (%esi),%xmm2
|
| movups 16(%esi),%xmm3
|
| @@ -1280,28 +1317,28 @@ L051xts_enc_four:
|
| movups %xmm5,48(%edi)
|
| leal 64(%edi),%edi
|
| movdqa %xmm6,%xmm1
|
| - jmp L052xts_enc_done
|
| + jmp L054xts_enc_done
|
| .align 4,0x90
|
| -L047xts_enc_done6x:
|
| +L049xts_enc_done6x:
|
| movl 112(%esp),%eax
|
| andl $15,%eax
|
| - jz L054xts_enc_ret
|
| + jz L056xts_enc_ret
|
| movdqa %xmm1,%xmm5
|
| movl %eax,112(%esp)
|
| - jmp L055xts_enc_steal
|
| + jmp L057xts_enc_steal
|
| .align 4,0x90
|
| -L052xts_enc_done:
|
| +L054xts_enc_done:
|
| movl 112(%esp),%eax
|
| pxor %xmm0,%xmm0
|
| andl $15,%eax
|
| - jz L054xts_enc_ret
|
| + jz L056xts_enc_ret
|
| pcmpgtd %xmm1,%xmm0
|
| movl %eax,112(%esp)
|
| pshufd $19,%xmm0,%xmm5
|
| paddq %xmm1,%xmm1
|
| pand 96(%esp),%xmm5
|
| pxor %xmm1,%xmm5
|
| -L055xts_enc_steal:
|
| +L057xts_enc_steal:
|
| movzbl (%esi),%ecx
|
| movzbl -16(%edi),%edx
|
| leal 1(%esi),%esi
|
| @@ -1309,7 +1346,7 @@ L055xts_enc_steal:
|
| movb %dl,(%edi)
|
| leal 1(%edi),%edi
|
| subl $1,%eax
|
| - jnz L055xts_enc_steal
|
| + jnz L057xts_enc_steal
|
| subl 112(%esp),%edi
|
| movl %ebp,%edx
|
| movl %ebx,%ecx
|
| @@ -1319,16 +1356,30 @@ L055xts_enc_steal:
|
| movups 16(%edx),%xmm1
|
| leal 32(%edx),%edx
|
| xorps %xmm0,%xmm2
|
| -L056enc1_loop_10:
|
| +L058enc1_loop_10:
|
| .byte 102,15,56,220,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L056enc1_loop_10
|
| + jnz L058enc1_loop_10
|
| .byte 102,15,56,221,209
|
| xorps %xmm5,%xmm2
|
| movups %xmm2,-16(%edi)
|
| -L054xts_enc_ret:
|
| +L056xts_enc_ret:
|
| + pxor %xmm0,%xmm0
|
| + pxor %xmm1,%xmm1
|
| + pxor %xmm2,%xmm2
|
| + movdqa %xmm0,(%esp)
|
| + pxor %xmm3,%xmm3
|
| + movdqa %xmm0,16(%esp)
|
| + pxor %xmm4,%xmm4
|
| + movdqa %xmm0,32(%esp)
|
| + pxor %xmm5,%xmm5
|
| + movdqa %xmm0,48(%esp)
|
| + pxor %xmm6,%xmm6
|
| + movdqa %xmm0,64(%esp)
|
| + pxor %xmm7,%xmm7
|
| + movdqa %xmm0,80(%esp)
|
| movl 116(%esp),%esp
|
| popl %edi
|
| popl %esi
|
| @@ -1352,12 +1403,12 @@ L_aesni_xts_decrypt_begin:
|
| movups 16(%edx),%xmm1
|
| leal 32(%edx),%edx
|
| xorps %xmm0,%xmm2
|
| -L057enc1_loop_11:
|
| +L059enc1_loop_11:
|
| .byte 102,15,56,220,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L057enc1_loop_11
|
| + jnz L059enc1_loop_11
|
| .byte 102,15,56,221,209
|
| movl 20(%esp),%esi
|
| movl 24(%esp),%edi
|
| @@ -1386,14 +1437,14 @@ L057enc1_loop_11:
|
| pcmpgtd %xmm1,%xmm0
|
| andl $-16,%eax
|
| subl $96,%eax
|
| - jc L058xts_dec_short
|
| + jc L060xts_dec_short
|
| shll $4,%ecx
|
| movl $16,%ebx
|
| subl %ecx,%ebx
|
| leal 32(%edx,%ecx,1),%edx
|
| - jmp L059xts_dec_loop6
|
| + jmp L061xts_dec_loop6
|
| .align 4,0x90
|
| -L059xts_dec_loop6:
|
| +L061xts_dec_loop6:
|
| pshufd $19,%xmm0,%xmm2
|
| pxor %xmm0,%xmm0
|
| movdqa %xmm1,(%esp)
|
| @@ -1482,23 +1533,23 @@ L059xts_dec_loop6:
|
| pcmpgtd %xmm1,%xmm0
|
| pxor %xmm2,%xmm1
|
| subl $96,%eax
|
| - jnc L059xts_dec_loop6
|
| + jnc L061xts_dec_loop6
|
| movl 240(%ebp),%ecx
|
| movl %ebp,%edx
|
| movl %ecx,%ebx
|
| -L058xts_dec_short:
|
| +L060xts_dec_short:
|
| addl $96,%eax
|
| - jz L060xts_dec_done6x
|
| + jz L062xts_dec_done6x
|
| movdqa %xmm1,%xmm5
|
| cmpl $32,%eax
|
| - jb L061xts_dec_one
|
| + jb L063xts_dec_one
|
| pshufd $19,%xmm0,%xmm2
|
| pxor %xmm0,%xmm0
|
| paddq %xmm1,%xmm1
|
| pand %xmm3,%xmm2
|
| pcmpgtd %xmm1,%xmm0
|
| pxor %xmm2,%xmm1
|
| - je L062xts_dec_two
|
| + je L064xts_dec_two
|
| pshufd $19,%xmm0,%xmm2
|
| pxor %xmm0,%xmm0
|
| movdqa %xmm1,%xmm6
|
| @@ -1507,7 +1558,7 @@ L058xts_dec_short:
|
| pcmpgtd %xmm1,%xmm0
|
| pxor %xmm2,%xmm1
|
| cmpl $64,%eax
|
| - jb L063xts_dec_three
|
| + jb L065xts_dec_three
|
| pshufd $19,%xmm0,%xmm2
|
| pxor %xmm0,%xmm0
|
| movdqa %xmm1,%xmm7
|
| @@ -1517,7 +1568,7 @@ L058xts_dec_short:
|
| pxor %xmm2,%xmm1
|
| movdqa %xmm5,(%esp)
|
| movdqa %xmm6,16(%esp)
|
| - je L064xts_dec_four
|
| + je L066xts_dec_four
|
| movdqa %xmm7,32(%esp)
|
| pshufd $19,%xmm0,%xmm7
|
| movdqa %xmm1,48(%esp)
|
| @@ -1549,9 +1600,9 @@ L058xts_dec_short:
|
| movups %xmm5,48(%edi)
|
| movups %xmm6,64(%edi)
|
| leal 80(%edi),%edi
|
| - jmp L065xts_dec_done
|
| + jmp L067xts_dec_done
|
| .align 4,0x90
|
| -L061xts_dec_one:
|
| +L063xts_dec_one:
|
| movups (%esi),%xmm2
|
| leal 16(%esi),%esi
|
| xorps %xmm5,%xmm2
|
| @@ -1559,20 +1610,20 @@ L061xts_dec_one:
|
| movups 16(%edx),%xmm1
|
| leal 32(%edx),%edx
|
| xorps %xmm0,%xmm2
|
| -L066dec1_loop_12:
|
| +L068dec1_loop_12:
|
| .byte 102,15,56,222,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L066dec1_loop_12
|
| + jnz L068dec1_loop_12
|
| .byte 102,15,56,223,209
|
| xorps %xmm5,%xmm2
|
| movups %xmm2,(%edi)
|
| leal 16(%edi),%edi
|
| movdqa %xmm5,%xmm1
|
| - jmp L065xts_dec_done
|
| + jmp L067xts_dec_done
|
| .align 4,0x90
|
| -L062xts_dec_two:
|
| +L064xts_dec_two:
|
| movaps %xmm1,%xmm6
|
| movups (%esi),%xmm2
|
| movups 16(%esi),%xmm3
|
| @@ -1586,9 +1637,9 @@ L062xts_dec_two:
|
| movups %xmm3,16(%edi)
|
| leal 32(%edi),%edi
|
| movdqa %xmm6,%xmm1
|
| - jmp L065xts_dec_done
|
| + jmp L067xts_dec_done
|
| .align 4,0x90
|
| -L063xts_dec_three:
|
| +L065xts_dec_three:
|
| movaps %xmm1,%xmm7
|
| movups (%esi),%xmm2
|
| movups 16(%esi),%xmm3
|
| @@ -1606,9 +1657,9 @@ L063xts_dec_three:
|
| movups %xmm4,32(%edi)
|
| leal 48(%edi),%edi
|
| movdqa %xmm7,%xmm1
|
| - jmp L065xts_dec_done
|
| + jmp L067xts_dec_done
|
| .align 4,0x90
|
| -L064xts_dec_four:
|
| +L066xts_dec_four:
|
| movaps %xmm1,%xmm6
|
| movups (%esi),%xmm2
|
| movups 16(%esi),%xmm3
|
| @@ -1630,20 +1681,20 @@ L064xts_dec_four:
|
| movups %xmm5,48(%edi)
|
| leal 64(%edi),%edi
|
| movdqa %xmm6,%xmm1
|
| - jmp L065xts_dec_done
|
| + jmp L067xts_dec_done
|
| .align 4,0x90
|
| -L060xts_dec_done6x:
|
| +L062xts_dec_done6x:
|
| movl 112(%esp),%eax
|
| andl $15,%eax
|
| - jz L067xts_dec_ret
|
| + jz L069xts_dec_ret
|
| movl %eax,112(%esp)
|
| - jmp L068xts_dec_only_one_more
|
| + jmp L070xts_dec_only_one_more
|
| .align 4,0x90
|
| -L065xts_dec_done:
|
| +L067xts_dec_done:
|
| movl 112(%esp),%eax
|
| pxor %xmm0,%xmm0
|
| andl $15,%eax
|
| - jz L067xts_dec_ret
|
| + jz L069xts_dec_ret
|
| pcmpgtd %xmm1,%xmm0
|
| movl %eax,112(%esp)
|
| pshufd $19,%xmm0,%xmm2
|
| @@ -1653,7 +1704,7 @@ L065xts_dec_done:
|
| pand %xmm3,%xmm2
|
| pcmpgtd %xmm1,%xmm0
|
| pxor %xmm2,%xmm1
|
| -L068xts_dec_only_one_more:
|
| +L070xts_dec_only_one_more:
|
| pshufd $19,%xmm0,%xmm5
|
| movdqa %xmm1,%xmm6
|
| paddq %xmm1,%xmm1
|
| @@ -1667,16 +1718,16 @@ L068xts_dec_only_one_more:
|
| movups 16(%edx),%xmm1
|
| leal 32(%edx),%edx
|
| xorps %xmm0,%xmm2
|
| -L069dec1_loop_13:
|
| +L071dec1_loop_13:
|
| .byte 102,15,56,222,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L069dec1_loop_13
|
| + jnz L071dec1_loop_13
|
| .byte 102,15,56,223,209
|
| xorps %xmm5,%xmm2
|
| movups %xmm2,(%edi)
|
| -L070xts_dec_steal:
|
| +L072xts_dec_steal:
|
| movzbl 16(%esi),%ecx
|
| movzbl (%edi),%edx
|
| leal 1(%esi),%esi
|
| @@ -1684,7 +1735,7 @@ L070xts_dec_steal:
|
| movb %dl,16(%edi)
|
| leal 1(%edi),%edi
|
| subl $1,%eax
|
| - jnz L070xts_dec_steal
|
| + jnz L072xts_dec_steal
|
| subl 112(%esp),%edi
|
| movl %ebp,%edx
|
| movl %ebx,%ecx
|
| @@ -1694,16 +1745,30 @@ L070xts_dec_steal:
|
| movups 16(%edx),%xmm1
|
| leal 32(%edx),%edx
|
| xorps %xmm0,%xmm2
|
| -L071dec1_loop_14:
|
| +L073dec1_loop_14:
|
| .byte 102,15,56,222,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L071dec1_loop_14
|
| + jnz L073dec1_loop_14
|
| .byte 102,15,56,223,209
|
| xorps %xmm6,%xmm2
|
| movups %xmm2,(%edi)
|
| -L067xts_dec_ret:
|
| +L069xts_dec_ret:
|
| + pxor %xmm0,%xmm0
|
| + pxor %xmm1,%xmm1
|
| + pxor %xmm2,%xmm2
|
| + movdqa %xmm0,(%esp)
|
| + pxor %xmm3,%xmm3
|
| + movdqa %xmm0,16(%esp)
|
| + pxor %xmm4,%xmm4
|
| + movdqa %xmm0,32(%esp)
|
| + pxor %xmm5,%xmm5
|
| + movdqa %xmm0,48(%esp)
|
| + pxor %xmm6,%xmm6
|
| + movdqa %xmm0,64(%esp)
|
| + pxor %xmm7,%xmm7
|
| + movdqa %xmm0,80(%esp)
|
| movl 116(%esp),%esp
|
| popl %edi
|
| popl %esi
|
| @@ -1728,7 +1793,7 @@ L_aesni_cbc_encrypt_begin:
|
| movl 32(%esp),%edx
|
| movl 36(%esp),%ebp
|
| testl %eax,%eax
|
| - jz L072cbc_abort
|
| + jz L074cbc_abort
|
| cmpl $0,40(%esp)
|
| xchgl %esp,%ebx
|
| movups (%ebp),%xmm7
|
| @@ -1736,14 +1801,14 @@ L_aesni_cbc_encrypt_begin:
|
| movl %edx,%ebp
|
| movl %ebx,16(%esp)
|
| movl %ecx,%ebx
|
| - je L073cbc_decrypt
|
| + je L075cbc_decrypt
|
| movaps %xmm7,%xmm2
|
| cmpl $16,%eax
|
| - jb L074cbc_enc_tail
|
| + jb L076cbc_enc_tail
|
| subl $16,%eax
|
| - jmp L075cbc_enc_loop
|
| + jmp L077cbc_enc_loop
|
| .align 4,0x90
|
| -L075cbc_enc_loop:
|
| +L077cbc_enc_loop:
|
| movups (%esi),%xmm7
|
| leal 16(%esi),%esi
|
| movups (%edx),%xmm0
|
| @@ -1751,24 +1816,25 @@ L075cbc_enc_loop:
|
| xorps %xmm0,%xmm7
|
| leal 32(%edx),%edx
|
| xorps %xmm7,%xmm2
|
| -L076enc1_loop_15:
|
| +L078enc1_loop_15:
|
| .byte 102,15,56,220,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L076enc1_loop_15
|
| + jnz L078enc1_loop_15
|
| .byte 102,15,56,221,209
|
| movl %ebx,%ecx
|
| movl %ebp,%edx
|
| movups %xmm2,(%edi)
|
| leal 16(%edi),%edi
|
| subl $16,%eax
|
| - jnc L075cbc_enc_loop
|
| + jnc L077cbc_enc_loop
|
| addl $16,%eax
|
| - jnz L074cbc_enc_tail
|
| + jnz L076cbc_enc_tail
|
| movaps %xmm2,%xmm7
|
| - jmp L077cbc_ret
|
| -L074cbc_enc_tail:
|
| + pxor %xmm2,%xmm2
|
| + jmp L079cbc_ret
|
| +L076cbc_enc_tail:
|
| movl %eax,%ecx
|
| .long 2767451785
|
| movl $16,%ecx
|
| @@ -1779,20 +1845,20 @@ L074cbc_enc_tail:
|
| movl %ebx,%ecx
|
| movl %edi,%esi
|
| movl %ebp,%edx
|
| - jmp L075cbc_enc_loop
|
| + jmp L077cbc_enc_loop
|
| .align 4,0x90
|
| -L073cbc_decrypt:
|
| +L075cbc_decrypt:
|
| cmpl $80,%eax
|
| - jbe L078cbc_dec_tail
|
| + jbe L080cbc_dec_tail
|
| movaps %xmm7,(%esp)
|
| subl $80,%eax
|
| - jmp L079cbc_dec_loop6_enter
|
| + jmp L081cbc_dec_loop6_enter
|
| .align 4,0x90
|
| -L080cbc_dec_loop6:
|
| +L082cbc_dec_loop6:
|
| movaps %xmm0,(%esp)
|
| movups %xmm7,(%edi)
|
| leal 16(%edi),%edi
|
| -L079cbc_dec_loop6_enter:
|
| +L081cbc_dec_loop6_enter:
|
| movdqu (%esi),%xmm2
|
| movdqu 16(%esi),%xmm3
|
| movdqu 32(%esi),%xmm4
|
| @@ -1822,28 +1888,28 @@ L079cbc_dec_loop6_enter:
|
| movups %xmm6,64(%edi)
|
| leal 80(%edi),%edi
|
| subl $96,%eax
|
| - ja L080cbc_dec_loop6
|
| + ja L082cbc_dec_loop6
|
| movaps %xmm7,%xmm2
|
| movaps %xmm0,%xmm7
|
| addl $80,%eax
|
| - jle L081cbc_dec_tail_collected
|
| + jle L083cbc_dec_clear_tail_collected
|
| movups %xmm2,(%edi)
|
| leal 16(%edi),%edi
|
| -L078cbc_dec_tail:
|
| +L080cbc_dec_tail:
|
| movups (%esi),%xmm2
|
| movaps %xmm2,%xmm6
|
| cmpl $16,%eax
|
| - jbe L082cbc_dec_one
|
| + jbe L084cbc_dec_one
|
| movups 16(%esi),%xmm3
|
| movaps %xmm3,%xmm5
|
| cmpl $32,%eax
|
| - jbe L083cbc_dec_two
|
| + jbe L085cbc_dec_two
|
| movups 32(%esi),%xmm4
|
| cmpl $48,%eax
|
| - jbe L084cbc_dec_three
|
| + jbe L086cbc_dec_three
|
| movups 48(%esi),%xmm5
|
| cmpl $64,%eax
|
| - jbe L085cbc_dec_four
|
| + jbe L087cbc_dec_four
|
| movups 64(%esi),%xmm6
|
| movaps %xmm7,(%esp)
|
| movups (%esi),%xmm2
|
| @@ -1861,55 +1927,62 @@ L078cbc_dec_tail:
|
| xorps %xmm0,%xmm6
|
| movups %xmm2,(%edi)
|
| movups %xmm3,16(%edi)
|
| + pxor %xmm3,%xmm3
|
| movups %xmm4,32(%edi)
|
| + pxor %xmm4,%xmm4
|
| movups %xmm5,48(%edi)
|
| + pxor %xmm5,%xmm5
|
| leal 64(%edi),%edi
|
| movaps %xmm6,%xmm2
|
| + pxor %xmm6,%xmm6
|
| subl $80,%eax
|
| - jmp L081cbc_dec_tail_collected
|
| + jmp L088cbc_dec_tail_collected
|
| .align 4,0x90
|
| -L082cbc_dec_one:
|
| +L084cbc_dec_one:
|
| movups (%edx),%xmm0
|
| movups 16(%edx),%xmm1
|
| leal 32(%edx),%edx
|
| xorps %xmm0,%xmm2
|
| -L086dec1_loop_16:
|
| +L089dec1_loop_16:
|
| .byte 102,15,56,222,209
|
| decl %ecx
|
| movups (%edx),%xmm1
|
| leal 16(%edx),%edx
|
| - jnz L086dec1_loop_16
|
| + jnz L089dec1_loop_16
|
| .byte 102,15,56,223,209
|
| xorps %xmm7,%xmm2
|
| movaps %xmm6,%xmm7
|
| subl $16,%eax
|
| - jmp L081cbc_dec_tail_collected
|
| + jmp L088cbc_dec_tail_collected
|
| .align 4,0x90
|
| -L083cbc_dec_two:
|
| +L085cbc_dec_two:
|
| call __aesni_decrypt2
|
| xorps %xmm7,%xmm2
|
| xorps %xmm6,%xmm3
|
| movups %xmm2,(%edi)
|
| movaps %xmm3,%xmm2
|
| + pxor %xmm3,%xmm3
|
| leal 16(%edi),%edi
|
| movaps %xmm5,%xmm7
|
| subl $32,%eax
|
| - jmp L081cbc_dec_tail_collected
|
| + jmp L088cbc_dec_tail_collected
|
| .align 4,0x90
|
| -L084cbc_dec_three:
|
| +L086cbc_dec_three:
|
| call __aesni_decrypt3
|
| xorps %xmm7,%xmm2
|
| xorps %xmm6,%xmm3
|
| xorps %xmm5,%xmm4
|
| movups %xmm2,(%edi)
|
| movaps %xmm4,%xmm2
|
| + pxor %xmm4,%xmm4
|
| movups %xmm3,16(%edi)
|
| + pxor %xmm3,%xmm3
|
| leal 32(%edi),%edi
|
| movups 32(%esi),%xmm7
|
| subl $48,%eax
|
| - jmp L081cbc_dec_tail_collected
|
| + jmp L088cbc_dec_tail_collected
|
| .align 4,0x90
|
| -L085cbc_dec_four:
|
| +L087cbc_dec_four:
|
| call __aesni_decrypt4
|
| movups 16(%esi),%xmm1
|
| movups 32(%esi),%xmm0
|
| @@ -1919,28 +1992,44 @@ L085cbc_dec_four:
|
| movups %xmm2,(%edi)
|
| xorps %xmm1,%xmm4
|
| movups %xmm3,16(%edi)
|
| + pxor %xmm3,%xmm3
|
| xorps %xmm0,%xmm5
|
| movups %xmm4,32(%edi)
|
| + pxor %xmm4,%xmm4
|
| leal 48(%edi),%edi
|
| movaps %xmm5,%xmm2
|
| + pxor %xmm5,%xmm5
|
| subl $64,%eax
|
| -L081cbc_dec_tail_collected:
|
| + jmp L088cbc_dec_tail_collected
|
| +.align 4,0x90
|
| +L083cbc_dec_clear_tail_collected:
|
| + pxor %xmm3,%xmm3
|
| + pxor %xmm4,%xmm4
|
| + pxor %xmm5,%xmm5
|
| + pxor %xmm6,%xmm6
|
| +L088cbc_dec_tail_collected:
|
| andl $15,%eax
|
| - jnz L087cbc_dec_tail_partial
|
| + jnz L090cbc_dec_tail_partial
|
| movups %xmm2,(%edi)
|
| - jmp L077cbc_ret
|
| + pxor %xmm0,%xmm0
|
| + jmp L079cbc_ret
|
| .align 4,0x90
|
| -L087cbc_dec_tail_partial:
|
| +L090cbc_dec_tail_partial:
|
| movaps %xmm2,(%esp)
|
| + pxor %xmm0,%xmm0
|
| movl $16,%ecx
|
| movl %esp,%esi
|
| subl %eax,%ecx
|
| .long 2767451785
|
| -L077cbc_ret:
|
| + movdqa %xmm2,(%esp)
|
| +L079cbc_ret:
|
| movl 16(%esp),%esp
|
| movl 36(%esp),%ebp
|
| + pxor %xmm2,%xmm2
|
| + pxor %xmm1,%xmm1
|
| movups %xmm7,(%ebp)
|
| -L072cbc_abort:
|
| + pxor %xmm7,%xmm7
|
| +L074cbc_abort:
|
| popl %edi
|
| popl %esi
|
| popl %ebx
|
| @@ -1949,52 +2038,62 @@ L072cbc_abort:
|
| .private_extern __aesni_set_encrypt_key
|
| .align 4
|
| __aesni_set_encrypt_key:
|
| + pushl %ebp
|
| + pushl %ebx
|
| testl %eax,%eax
|
| - jz L088bad_pointer
|
| + jz L091bad_pointer
|
| testl %edx,%edx
|
| - jz L088bad_pointer
|
| + jz L091bad_pointer
|
| + call L092pic
|
| +L092pic:
|
| + popl %ebx
|
| + leal Lkey_const-L092pic(%ebx),%ebx
|
| + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lkey_const(%ebx),%ebp
|
| movups (%eax),%xmm0
|
| xorps %xmm4,%xmm4
|
| + movl 4(%ebp),%ebp
|
| leal 16(%edx),%edx
|
| + andl $268437504,%ebp
|
| cmpl $256,%ecx
|
| - je L08914rounds
|
| + je L09314rounds
|
| cmpl $192,%ecx
|
| - je L09012rounds
|
| + je L09412rounds
|
| cmpl $128,%ecx
|
| - jne L091bad_keybits
|
| + jne L095bad_keybits
|
| .align 4,0x90
|
| -L09210rounds:
|
| +L09610rounds:
|
| + cmpl $268435456,%ebp
|
| + je L09710rounds_alt
|
| movl $9,%ecx
|
| movups %xmm0,-16(%edx)
|
| .byte 102,15,58,223,200,1
|
| - call L093key_128_cold
|
| + call L098key_128_cold
|
| .byte 102,15,58,223,200,2
|
| - call L094key_128
|
| + call L099key_128
|
| .byte 102,15,58,223,200,4
|
| - call L094key_128
|
| + call L099key_128
|
| .byte 102,15,58,223,200,8
|
| - call L094key_128
|
| + call L099key_128
|
| .byte 102,15,58,223,200,16
|
| - call L094key_128
|
| + call L099key_128
|
| .byte 102,15,58,223,200,32
|
| - call L094key_128
|
| + call L099key_128
|
| .byte 102,15,58,223,200,64
|
| - call L094key_128
|
| + call L099key_128
|
| .byte 102,15,58,223,200,128
|
| - call L094key_128
|
| + call L099key_128
|
| .byte 102,15,58,223,200,27
|
| - call L094key_128
|
| + call L099key_128
|
| .byte 102,15,58,223,200,54
|
| - call L094key_128
|
| + call L099key_128
|
| movups %xmm0,(%edx)
|
| movl %ecx,80(%edx)
|
| - xorl %eax,%eax
|
| - ret
|
| + jmp L100good_key
|
| .align 4,0x90
|
| -L094key_128:
|
| +L099key_128:
|
| movups %xmm0,(%edx)
|
| leal 16(%edx),%edx
|
| -L093key_128_cold:
|
| +L098key_128_cold:
|
| shufps $16,%xmm0,%xmm4
|
| xorps %xmm4,%xmm0
|
| shufps $140,%xmm0,%xmm4
|
| @@ -2003,38 +2102,91 @@ L093key_128_cold:
|
| xorps %xmm1,%xmm0
|
| ret
|
| .align 4,0x90
|
| -L09012rounds:
|
| +L09710rounds_alt:
|
| + movdqa (%ebx),%xmm5
|
| + movl $8,%ecx
|
| + movdqa 32(%ebx),%xmm4
|
| + movdqa %xmm0,%xmm2
|
| + movdqu %xmm0,-16(%edx)
|
| +L101loop_key128:
|
| +.byte 102,15,56,0,197
|
| +.byte 102,15,56,221,196
|
| + pslld $1,%xmm4
|
| + leal 16(%edx),%edx
|
| + movdqa %xmm2,%xmm3
|
| + pslldq $4,%xmm2
|
| + pxor %xmm2,%xmm3
|
| + pslldq $4,%xmm2
|
| + pxor %xmm2,%xmm3
|
| + pslldq $4,%xmm2
|
| + pxor %xmm3,%xmm2
|
| + pxor %xmm2,%xmm0
|
| + movdqu %xmm0,-16(%edx)
|
| + movdqa %xmm0,%xmm2
|
| + decl %ecx
|
| + jnz L101loop_key128
|
| + movdqa 48(%ebx),%xmm4
|
| +.byte 102,15,56,0,197
|
| +.byte 102,15,56,221,196
|
| + pslld $1,%xmm4
|
| + movdqa %xmm2,%xmm3
|
| + pslldq $4,%xmm2
|
| + pxor %xmm2,%xmm3
|
| + pslldq $4,%xmm2
|
| + pxor %xmm2,%xmm3
|
| + pslldq $4,%xmm2
|
| + pxor %xmm3,%xmm2
|
| + pxor %xmm2,%xmm0
|
| + movdqu %xmm0,(%edx)
|
| + movdqa %xmm0,%xmm2
|
| +.byte 102,15,56,0,197
|
| +.byte 102,15,56,221,196
|
| + movdqa %xmm2,%xmm3
|
| + pslldq $4,%xmm2
|
| + pxor %xmm2,%xmm3
|
| + pslldq $4,%xmm2
|
| + pxor %xmm2,%xmm3
|
| + pslldq $4,%xmm2
|
| + pxor %xmm3,%xmm2
|
| + pxor %xmm2,%xmm0
|
| + movdqu %xmm0,16(%edx)
|
| + movl $9,%ecx
|
| + movl %ecx,96(%edx)
|
| + jmp L100good_key
|
| +.align 4,0x90
|
| +L09412rounds:
|
| movq 16(%eax),%xmm2
|
| + cmpl $268435456,%ebp
|
| + je L10212rounds_alt
|
| movl $11,%ecx
|
| movups %xmm0,-16(%edx)
|
| .byte 102,15,58,223,202,1
|
| - call L095key_192a_cold
|
| + call L103key_192a_cold
|
| .byte 102,15,58,223,202,2
|
| - call L096key_192b
|
| + call L104key_192b
|
| .byte 102,15,58,223,202,4
|
| - call L097key_192a
|
| + call L105key_192a
|
| .byte 102,15,58,223,202,8
|
| - call L096key_192b
|
| + call L104key_192b
|
| .byte 102,15,58,223,202,16
|
| - call L097key_192a
|
| + call L105key_192a
|
| .byte 102,15,58,223,202,32
|
| - call L096key_192b
|
| + call L104key_192b
|
| .byte 102,15,58,223,202,64
|
| - call L097key_192a
|
| + call L105key_192a
|
| .byte 102,15,58,223,202,128
|
| - call L096key_192b
|
| + call L104key_192b
|
| movups %xmm0,(%edx)
|
| movl %ecx,48(%edx)
|
| - xorl %eax,%eax
|
| - ret
|
| + jmp L100good_key
|
| .align 4,0x90
|
| -L097key_192a:
|
| +L105key_192a:
|
| movups %xmm0,(%edx)
|
| leal 16(%edx),%edx
|
| .align 4,0x90
|
| -L095key_192a_cold:
|
| +L103key_192a_cold:
|
| movaps %xmm2,%xmm5
|
| -L098key_192b_warm:
|
| +L106key_192b_warm:
|
| shufps $16,%xmm0,%xmm4
|
| movdqa %xmm2,%xmm3
|
| xorps %xmm4,%xmm0
|
| @@ -2048,56 +2200,90 @@ L098key_192b_warm:
|
| pxor %xmm3,%xmm2
|
| ret
|
| .align 4,0x90
|
| -L096key_192b:
|
| +L104key_192b:
|
| movaps %xmm0,%xmm3
|
| shufps $68,%xmm0,%xmm5
|
| movups %xmm5,(%edx)
|
| shufps $78,%xmm2,%xmm3
|
| movups %xmm3,16(%edx)
|
| leal 32(%edx),%edx
|
| - jmp L098key_192b_warm
|
| + jmp L106key_192b_warm
|
| .align 4,0x90
|
| -L08914rounds:
|
| +L10212rounds_alt:
|
| + movdqa 16(%ebx),%xmm5
|
| + movdqa 32(%ebx),%xmm4
|
| + movl $8,%ecx
|
| + movdqu %xmm0,-16(%edx)
|
| +L107loop_key192:
|
| + movq %xmm2,(%edx)
|
| + movdqa %xmm2,%xmm1
|
| +.byte 102,15,56,0,213
|
| +.byte 102,15,56,221,212
|
| + pslld $1,%xmm4
|
| + leal 24(%edx),%edx
|
| + movdqa %xmm0,%xmm3
|
| + pslldq $4,%xmm0
|
| + pxor %xmm0,%xmm3
|
| + pslldq $4,%xmm0
|
| + pxor %xmm0,%xmm3
|
| + pslldq $4,%xmm0
|
| + pxor %xmm3,%xmm0
|
| + pshufd $255,%xmm0,%xmm3
|
| + pxor %xmm1,%xmm3
|
| + pslldq $4,%xmm1
|
| + pxor %xmm1,%xmm3
|
| + pxor %xmm2,%xmm0
|
| + pxor %xmm3,%xmm2
|
| + movdqu %xmm0,-16(%edx)
|
| + decl %ecx
|
| + jnz L107loop_key192
|
| + movl $11,%ecx
|
| + movl %ecx,32(%edx)
|
| + jmp L100good_key
|
| +.align 4,0x90
|
| +L09314rounds:
|
| movups 16(%eax),%xmm2
|
| - movl $13,%ecx
|
| leal 16(%edx),%edx
|
| + cmpl $268435456,%ebp
|
| + je L10814rounds_alt
|
| + movl $13,%ecx
|
| movups %xmm0,-32(%edx)
|
| movups %xmm2,-16(%edx)
|
| .byte 102,15,58,223,202,1
|
| - call L099key_256a_cold
|
| + call L109key_256a_cold
|
| .byte 102,15,58,223,200,1
|
| - call L100key_256b
|
| + call L110key_256b
|
| .byte 102,15,58,223,202,2
|
| - call L101key_256a
|
| + call L111key_256a
|
| .byte 102,15,58,223,200,2
|
| - call L100key_256b
|
| + call L110key_256b
|
| .byte 102,15,58,223,202,4
|
| - call L101key_256a
|
| + call L111key_256a
|
| .byte 102,15,58,223,200,4
|
| - call L100key_256b
|
| + call L110key_256b
|
| .byte 102,15,58,223,202,8
|
| - call L101key_256a
|
| + call L111key_256a
|
| .byte 102,15,58,223,200,8
|
| - call L100key_256b
|
| + call L110key_256b
|
| .byte 102,15,58,223,202,16
|
| - call L101key_256a
|
| + call L111key_256a
|
| .byte 102,15,58,223,200,16
|
| - call L100key_256b
|
| + call L110key_256b
|
| .byte 102,15,58,223,202,32
|
| - call L101key_256a
|
| + call L111key_256a
|
| .byte 102,15,58,223,200,32
|
| - call L100key_256b
|
| + call L110key_256b
|
| .byte 102,15,58,223,202,64
|
| - call L101key_256a
|
| + call L111key_256a
|
| movups %xmm0,(%edx)
|
| movl %ecx,16(%edx)
|
| xorl %eax,%eax
|
| - ret
|
| + jmp L100good_key
|
| .align 4,0x90
|
| -L101key_256a:
|
| +L111key_256a:
|
| movups %xmm2,(%edx)
|
| leal 16(%edx),%edx
|
| -L099key_256a_cold:
|
| +L109key_256a_cold:
|
| shufps $16,%xmm0,%xmm4
|
| xorps %xmm4,%xmm0
|
| shufps $140,%xmm0,%xmm4
|
| @@ -2106,7 +2292,7 @@ L099key_256a_cold:
|
| xorps %xmm1,%xmm0
|
| ret
|
| .align 4,0x90
|
| -L100key_256b:
|
| +L110key_256b:
|
| movups %xmm0,(%edx)
|
| leal 16(%edx),%edx
|
| shufps $16,%xmm2,%xmm4
|
| @@ -2116,13 +2302,70 @@ L100key_256b:
|
| shufps $170,%xmm1,%xmm1
|
| xorps %xmm1,%xmm2
|
| ret
|
| +.align 4,0x90
|
| +L10814rounds_alt:
|
| + movdqa (%ebx),%xmm5
|
| + movdqa 32(%ebx),%xmm4
|
| + movl $7,%ecx
|
| + movdqu %xmm0,-32(%edx)
|
| + movdqa %xmm2,%xmm1
|
| + movdqu %xmm2,-16(%edx)
|
| +L112loop_key256:
|
| +.byte 102,15,56,0,213
|
| +.byte 102,15,56,221,212
|
| + movdqa %xmm0,%xmm3
|
| + pslldq $4,%xmm0
|
| + pxor %xmm0,%xmm3
|
| + pslldq $4,%xmm0
|
| + pxor %xmm0,%xmm3
|
| + pslldq $4,%xmm0
|
| + pxor %xmm3,%xmm0
|
| + pslld $1,%xmm4
|
| + pxor %xmm2,%xmm0
|
| + movdqu %xmm0,(%edx)
|
| + decl %ecx
|
| + jz L113done_key256
|
| + pshufd $255,%xmm0,%xmm2
|
| + pxor %xmm3,%xmm3
|
| +.byte 102,15,56,221,211
|
| + movdqa %xmm1,%xmm3
|
| + pslldq $4,%xmm1
|
| + pxor %xmm1,%xmm3
|
| + pslldq $4,%xmm1
|
| + pxor %xmm1,%xmm3
|
| + pslldq $4,%xmm1
|
| + pxor %xmm3,%xmm1
|
| + pxor %xmm1,%xmm2
|
| + movdqu %xmm2,16(%edx)
|
| + leal 32(%edx),%edx
|
| + movdqa %xmm2,%xmm1
|
| + jmp L112loop_key256
|
| +L113done_key256:
|
| + movl $13,%ecx
|
| + movl %ecx,16(%edx)
|
| +L100good_key:
|
| + pxor %xmm0,%xmm0
|
| + pxor %xmm1,%xmm1
|
| + pxor %xmm2,%xmm2
|
| + pxor %xmm3,%xmm3
|
| + pxor %xmm4,%xmm4
|
| + pxor %xmm5,%xmm5
|
| + xorl %eax,%eax
|
| + popl %ebx
|
| + popl %ebp
|
| + ret
|
| .align 2,0x90
|
| -L088bad_pointer:
|
| +L091bad_pointer:
|
| movl $-1,%eax
|
| + popl %ebx
|
| + popl %ebp
|
| ret
|
| .align 2,0x90
|
| -L091bad_keybits:
|
| +L095bad_keybits:
|
| + pxor %xmm0,%xmm0
|
| movl $-2,%eax
|
| + popl %ebx
|
| + popl %ebp
|
| ret
|
| .globl _aesni_set_encrypt_key
|
| .private_extern _aesni_set_encrypt_key
|
| @@ -2146,7 +2389,7 @@ L_aesni_set_decrypt_key_begin:
|
| movl 12(%esp),%edx
|
| shll $4,%ecx
|
| testl %eax,%eax
|
| - jnz L102dec_key_ret
|
| + jnz L114dec_key_ret
|
| leal 16(%edx,%ecx,1),%eax
|
| movups (%edx),%xmm0
|
| movups (%eax),%xmm1
|
| @@ -2154,7 +2397,7 @@ L_aesni_set_decrypt_key_begin:
|
| movups %xmm1,(%edx)
|
| leal 16(%edx),%edx
|
| leal -16(%eax),%eax
|
| -L103dec_key_inverse:
|
| +L115dec_key_inverse:
|
| movups (%edx),%xmm0
|
| movups (%eax),%xmm1
|
| .byte 102,15,56,219,192
|
| @@ -2164,15 +2407,27 @@ L103dec_key_inverse:
|
| movups %xmm0,16(%eax)
|
| movups %xmm1,-16(%edx)
|
| cmpl %edx,%eax
|
| - ja L103dec_key_inverse
|
| + ja L115dec_key_inverse
|
| movups (%edx),%xmm0
|
| .byte 102,15,56,219,192
|
| movups %xmm0,(%edx)
|
| + pxor %xmm0,%xmm0
|
| + pxor %xmm1,%xmm1
|
| xorl %eax,%eax
|
| -L102dec_key_ret:
|
| +L114dec_key_ret:
|
| ret
|
| +.align 6,0x90
|
| +Lkey_const:
|
| +.long 202313229,202313229,202313229,202313229
|
| +.long 67569157,67569157,67569157,67569157
|
| +.long 1,1,1,1
|
| +.long 27,27,27,27
|
| .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
|
| .byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
|
| .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
|
| .byte 115,108,46,111,114,103,62,0
|
| +.section __IMPORT,__pointers,non_lazy_symbol_pointers
|
| +L_OPENSSL_ia32cap_P$non_lazy_ptr:
|
| +.indirect_symbol _OPENSSL_ia32cap_P
|
| +.long 0
|
| #endif
|
|
|