Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(286)

Side by Side Diff: third_party/boringssl/linux-x86_64/crypto/aes/aesni-x86_64.S

Issue 1136743004: Roll src/third_party/boringssl/src 68de407:771a138 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 #if defined(__x86_64__) 1 #if defined(__x86_64__)
2 .text 2 .text
3 .extern OPENSSL_ia32cap_P 3 .extern OPENSSL_ia32cap_P
4 .hidden OPENSSL_ia32cap_P 4 .hidden OPENSSL_ia32cap_P
5 .globl aesni_encrypt 5 .globl aesni_encrypt
6 .hidden aesni_encrypt 6 .hidden aesni_encrypt
7 .type aesni_encrypt,@function 7 .type aesni_encrypt,@function
8 .align 16 8 .align 16
9 aesni_encrypt: 9 aesni_encrypt:
10 movups (%rdi),%xmm2 10 movups (%rdi),%xmm2
11 movl 240(%rdx),%eax 11 movl 240(%rdx),%eax
12 movups (%rdx),%xmm0 12 movups (%rdx),%xmm0
13 movups 16(%rdx),%xmm1 13 movups 16(%rdx),%xmm1
14 leaq 32(%rdx),%rdx 14 leaq 32(%rdx),%rdx
15 xorps %xmm0,%xmm2 15 xorps %xmm0,%xmm2
16 .Loop_enc1_1: 16 .Loop_enc1_1:
17 .byte 102,15,56,220,209 17 .byte 102,15,56,220,209
18 decl %eax 18 decl %eax
19 movups (%rdx),%xmm1 19 movups (%rdx),%xmm1
20 leaq 16(%rdx),%rdx 20 leaq 16(%rdx),%rdx
21 jnz .Loop_enc1_1 21 jnz .Loop_enc1_1
22 .byte 102,15,56,221,209 22 .byte 102,15,56,221,209
23 pxor %xmm0,%xmm0
24 pxor %xmm1,%xmm1
23 movups %xmm2,(%rsi) 25 movups %xmm2,(%rsi)
26 pxor %xmm2,%xmm2
24 .byte 0xf3,0xc3 27 .byte 0xf3,0xc3
25 .size aesni_encrypt,.-aesni_encrypt 28 .size aesni_encrypt,.-aesni_encrypt
26 29
27 .globl aesni_decrypt 30 .globl aesni_decrypt
28 .hidden aesni_decrypt 31 .hidden aesni_decrypt
29 .type aesni_decrypt,@function 32 .type aesni_decrypt,@function
30 .align 16 33 .align 16
31 aesni_decrypt: 34 aesni_decrypt:
32 movups (%rdi),%xmm2 35 movups (%rdi),%xmm2
33 movl 240(%rdx),%eax 36 movl 240(%rdx),%eax
34 movups (%rdx),%xmm0 37 movups (%rdx),%xmm0
35 movups 16(%rdx),%xmm1 38 movups 16(%rdx),%xmm1
36 leaq 32(%rdx),%rdx 39 leaq 32(%rdx),%rdx
37 xorps %xmm0,%xmm2 40 xorps %xmm0,%xmm2
38 .Loop_dec1_2: 41 .Loop_dec1_2:
39 .byte 102,15,56,222,209 42 .byte 102,15,56,222,209
40 decl %eax 43 decl %eax
41 movups (%rdx),%xmm1 44 movups (%rdx),%xmm1
42 leaq 16(%rdx),%rdx 45 leaq 16(%rdx),%rdx
43 jnz .Loop_dec1_2 46 jnz .Loop_dec1_2
44 .byte 102,15,56,223,209 47 .byte 102,15,56,223,209
48 pxor %xmm0,%xmm0
49 pxor %xmm1,%xmm1
45 movups %xmm2,(%rsi) 50 movups %xmm2,(%rsi)
51 pxor %xmm2,%xmm2
46 .byte 0xf3,0xc3 52 .byte 0xf3,0xc3
47 .size aesni_decrypt, .-aesni_decrypt 53 .size aesni_decrypt, .-aesni_decrypt
48 .type _aesni_encrypt2,@function 54 .type _aesni_encrypt2,@function
49 .align 16 55 .align 16
50 _aesni_encrypt2: 56 _aesni_encrypt2:
51 movups (%rcx),%xmm0 57 movups (%rcx),%xmm0
52 shll $4,%eax 58 shll $4,%eax
53 movups 16(%rcx),%xmm1 59 movups 16(%rcx),%xmm1
54 xorps %xmm0,%xmm2 60 xorps %xmm0,%xmm2
55 xorps %xmm0,%xmm3 61 xorps %xmm0,%xmm3
(...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after
261 pxor %xmm0,%xmm3 267 pxor %xmm0,%xmm3
262 pxor %xmm0,%xmm4 268 pxor %xmm0,%xmm4
263 .byte 102,15,56,220,209 269 .byte 102,15,56,220,209
264 leaq 32(%rcx,%rax,1),%rcx 270 leaq 32(%rcx,%rax,1),%rcx
265 negq %rax 271 negq %rax
266 .byte 102,15,56,220,217 272 .byte 102,15,56,220,217
267 pxor %xmm0,%xmm5 273 pxor %xmm0,%xmm5
268 pxor %xmm0,%xmm6 274 pxor %xmm0,%xmm6
269 .byte 102,15,56,220,225 275 .byte 102,15,56,220,225
270 pxor %xmm0,%xmm7 276 pxor %xmm0,%xmm7
277 movups (%rcx,%rax,1),%xmm0
271 addq $16,%rax 278 addq $16,%rax
272 .byte 102,15,56,220,233
273 .byte 102,15,56,220,241
274 .byte 102,15,56,220,249
275 movups -16(%rcx,%rax,1),%xmm0
276 jmp .Lenc_loop6_enter 279 jmp .Lenc_loop6_enter
277 .align 16 280 .align 16
278 .Lenc_loop6: 281 .Lenc_loop6:
279 .byte 102,15,56,220,209 282 .byte 102,15,56,220,209
280 .byte 102,15,56,220,217 283 .byte 102,15,56,220,217
281 .byte 102,15,56,220,225 284 .byte 102,15,56,220,225
285 .Lenc_loop6_enter:
282 .byte 102,15,56,220,233 286 .byte 102,15,56,220,233
283 .byte 102,15,56,220,241 287 .byte 102,15,56,220,241
284 .byte 102,15,56,220,249 288 .byte 102,15,56,220,249
285 .Lenc_loop6_enter:
286 movups (%rcx,%rax,1),%xmm1 289 movups (%rcx,%rax,1),%xmm1
287 addq $32,%rax 290 addq $32,%rax
288 .byte 102,15,56,220,208 291 .byte 102,15,56,220,208
289 .byte 102,15,56,220,216 292 .byte 102,15,56,220,216
290 .byte 102,15,56,220,224 293 .byte 102,15,56,220,224
291 .byte 102,15,56,220,232 294 .byte 102,15,56,220,232
292 .byte 102,15,56,220,240 295 .byte 102,15,56,220,240
293 .byte 102,15,56,220,248 296 .byte 102,15,56,220,248
294 movups -16(%rcx,%rax,1),%xmm0 297 movups -16(%rcx,%rax,1),%xmm0
295 jnz .Lenc_loop6 298 jnz .Lenc_loop6
(...skipping 22 matching lines...) Expand all
318 pxor %xmm0,%xmm3 321 pxor %xmm0,%xmm3
319 pxor %xmm0,%xmm4 322 pxor %xmm0,%xmm4
320 .byte 102,15,56,222,209 323 .byte 102,15,56,222,209
321 leaq 32(%rcx,%rax,1),%rcx 324 leaq 32(%rcx,%rax,1),%rcx
322 negq %rax 325 negq %rax
323 .byte 102,15,56,222,217 326 .byte 102,15,56,222,217
324 pxor %xmm0,%xmm5 327 pxor %xmm0,%xmm5
325 pxor %xmm0,%xmm6 328 pxor %xmm0,%xmm6
326 .byte 102,15,56,222,225 329 .byte 102,15,56,222,225
327 pxor %xmm0,%xmm7 330 pxor %xmm0,%xmm7
331 movups (%rcx,%rax,1),%xmm0
328 addq $16,%rax 332 addq $16,%rax
329 .byte 102,15,56,222,233
330 .byte 102,15,56,222,241
331 .byte 102,15,56,222,249
332 movups -16(%rcx,%rax,1),%xmm0
333 jmp .Ldec_loop6_enter 333 jmp .Ldec_loop6_enter
334 .align 16 334 .align 16
335 .Ldec_loop6: 335 .Ldec_loop6:
336 .byte 102,15,56,222,209 336 .byte 102,15,56,222,209
337 .byte 102,15,56,222,217 337 .byte 102,15,56,222,217
338 .byte 102,15,56,222,225 338 .byte 102,15,56,222,225
339 .Ldec_loop6_enter:
339 .byte 102,15,56,222,233 340 .byte 102,15,56,222,233
340 .byte 102,15,56,222,241 341 .byte 102,15,56,222,241
341 .byte 102,15,56,222,249 342 .byte 102,15,56,222,249
342 .Ldec_loop6_enter:
343 movups (%rcx,%rax,1),%xmm1 343 movups (%rcx,%rax,1),%xmm1
344 addq $32,%rax 344 addq $32,%rax
345 .byte 102,15,56,222,208 345 .byte 102,15,56,222,208
346 .byte 102,15,56,222,216 346 .byte 102,15,56,222,216
347 .byte 102,15,56,222,224 347 .byte 102,15,56,222,224
348 .byte 102,15,56,222,232 348 .byte 102,15,56,222,232
349 .byte 102,15,56,222,240 349 .byte 102,15,56,222,240
350 .byte 102,15,56,222,248 350 .byte 102,15,56,222,248
351 movups -16(%rcx,%rax,1),%xmm0 351 movups -16(%rcx,%rax,1),%xmm0
352 jnz .Ldec_loop6 352 jnz .Ldec_loop6
(...skipping 19 matching lines...) Expand all
372 shll $4,%eax 372 shll $4,%eax
373 movups 16(%rcx),%xmm1 373 movups 16(%rcx),%xmm1
374 xorps %xmm0,%xmm2 374 xorps %xmm0,%xmm2
375 xorps %xmm0,%xmm3 375 xorps %xmm0,%xmm3
376 pxor %xmm0,%xmm4 376 pxor %xmm0,%xmm4
377 pxor %xmm0,%xmm5 377 pxor %xmm0,%xmm5
378 pxor %xmm0,%xmm6 378 pxor %xmm0,%xmm6
379 leaq 32(%rcx,%rax,1),%rcx 379 leaq 32(%rcx,%rax,1),%rcx
380 negq %rax 380 negq %rax
381 .byte 102,15,56,220,209 381 .byte 102,15,56,220,209
382 pxor %xmm0,%xmm7
383 pxor %xmm0,%xmm8
384 .byte 102,15,56,220,217
385 pxor %xmm0,%xmm9
386 movups (%rcx,%rax,1),%xmm0
382 addq $16,%rax 387 addq $16,%rax
383 » pxor» %xmm0,%xmm7 388 » jmp» .Lenc_loop8_inner
384 .byte» 102,15,56,220,217
385 » pxor» %xmm0,%xmm8
386 » pxor» %xmm0,%xmm9
387 .byte» 102,15,56,220,225
388 .byte» 102,15,56,220,233
389 .byte» 102,15,56,220,241
390 .byte» 102,15,56,220,249
391 .byte» 102,68,15,56,220,193
392 .byte» 102,68,15,56,220,201
393 » movups» -16(%rcx,%rax,1),%xmm0
394 » jmp» .Lenc_loop8_enter
395 .align 16 389 .align 16
396 .Lenc_loop8: 390 .Lenc_loop8:
397 .byte 102,15,56,220,209 391 .byte 102,15,56,220,209
398 .byte 102,15,56,220,217 392 .byte 102,15,56,220,217
393 .Lenc_loop8_inner:
399 .byte 102,15,56,220,225 394 .byte 102,15,56,220,225
400 .byte 102,15,56,220,233 395 .byte 102,15,56,220,233
401 .byte 102,15,56,220,241 396 .byte 102,15,56,220,241
402 .byte 102,15,56,220,249 397 .byte 102,15,56,220,249
403 .byte 102,68,15,56,220,193 398 .byte 102,68,15,56,220,193
404 .byte 102,68,15,56,220,201 399 .byte 102,68,15,56,220,201
405 .Lenc_loop8_enter: 400 .Lenc_loop8_enter:
406 movups (%rcx,%rax,1),%xmm1 401 movups (%rcx,%rax,1),%xmm1
407 addq $32,%rax 402 addq $32,%rax
408 .byte 102,15,56,220,208 403 .byte 102,15,56,220,208
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
441 shll $4,%eax 436 shll $4,%eax
442 movups 16(%rcx),%xmm1 437 movups 16(%rcx),%xmm1
443 xorps %xmm0,%xmm2 438 xorps %xmm0,%xmm2
444 xorps %xmm0,%xmm3 439 xorps %xmm0,%xmm3
445 pxor %xmm0,%xmm4 440 pxor %xmm0,%xmm4
446 pxor %xmm0,%xmm5 441 pxor %xmm0,%xmm5
447 pxor %xmm0,%xmm6 442 pxor %xmm0,%xmm6
448 leaq 32(%rcx,%rax,1),%rcx 443 leaq 32(%rcx,%rax,1),%rcx
449 negq %rax 444 negq %rax
450 .byte 102,15,56,222,209 445 .byte 102,15,56,222,209
446 pxor %xmm0,%xmm7
447 pxor %xmm0,%xmm8
448 .byte 102,15,56,222,217
449 pxor %xmm0,%xmm9
450 movups (%rcx,%rax,1),%xmm0
451 addq $16,%rax 451 addq $16,%rax
452 » pxor» %xmm0,%xmm7 452 » jmp» .Ldec_loop8_inner
453 .byte» 102,15,56,222,217
454 » pxor» %xmm0,%xmm8
455 » pxor» %xmm0,%xmm9
456 .byte» 102,15,56,222,225
457 .byte» 102,15,56,222,233
458 .byte» 102,15,56,222,241
459 .byte» 102,15,56,222,249
460 .byte» 102,68,15,56,222,193
461 .byte» 102,68,15,56,222,201
462 » movups» -16(%rcx,%rax,1),%xmm0
463 » jmp» .Ldec_loop8_enter
464 .align 16 453 .align 16
465 .Ldec_loop8: 454 .Ldec_loop8:
466 .byte 102,15,56,222,209 455 .byte 102,15,56,222,209
467 .byte 102,15,56,222,217 456 .byte 102,15,56,222,217
457 .Ldec_loop8_inner:
468 .byte 102,15,56,222,225 458 .byte 102,15,56,222,225
469 .byte 102,15,56,222,233 459 .byte 102,15,56,222,233
470 .byte 102,15,56,222,241 460 .byte 102,15,56,222,241
471 .byte 102,15,56,222,249 461 .byte 102,15,56,222,249
472 .byte 102,68,15,56,222,193 462 .byte 102,68,15,56,222,193
473 .byte 102,68,15,56,222,201 463 .byte 102,68,15,56,222,201
474 .Ldec_loop8_enter: 464 .Ldec_loop8_enter:
475 movups (%rcx,%rax,1),%xmm1 465 movups (%rcx,%rax,1),%xmm1
476 addq $32,%rax 466 addq $32,%rax
477 .byte 102,15,56,222,208 467 .byte 102,15,56,222,208
(...skipping 107 matching lines...) Expand 10 before | Expand all | Expand 10 after
585 cmpq $64,%rdx 575 cmpq $64,%rdx
586 jb .Lecb_enc_three 576 jb .Lecb_enc_three
587 movups 48(%rdi),%xmm5 577 movups 48(%rdi),%xmm5
588 je .Lecb_enc_four 578 je .Lecb_enc_four
589 movups 64(%rdi),%xmm6 579 movups 64(%rdi),%xmm6
590 cmpq $96,%rdx 580 cmpq $96,%rdx
591 jb .Lecb_enc_five 581 jb .Lecb_enc_five
592 movups 80(%rdi),%xmm7 582 movups 80(%rdi),%xmm7
593 je .Lecb_enc_six 583 je .Lecb_enc_six
594 movdqu 96(%rdi),%xmm8 584 movdqu 96(%rdi),%xmm8
585 xorps %xmm9,%xmm9
595 call _aesni_encrypt8 586 call _aesni_encrypt8
596 movups %xmm2,(%rsi) 587 movups %xmm2,(%rsi)
597 movups %xmm3,16(%rsi) 588 movups %xmm3,16(%rsi)
598 movups %xmm4,32(%rsi) 589 movups %xmm4,32(%rsi)
599 movups %xmm5,48(%rsi) 590 movups %xmm5,48(%rsi)
600 movups %xmm6,64(%rsi) 591 movups %xmm6,64(%rsi)
601 movups %xmm7,80(%rsi) 592 movups %xmm7,80(%rsi)
602 movups %xmm8,96(%rsi) 593 movups %xmm8,96(%rsi)
603 jmp .Lecb_ret 594 jmp .Lecb_ret
604 .align 16 595 .align 16
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
698 leaq 128(%rdi),%rdi 689 leaq 128(%rdi),%rdi
699 .Lecb_dec_loop8_enter: 690 .Lecb_dec_loop8_enter:
700 691
701 call _aesni_decrypt8 692 call _aesni_decrypt8
702 693
703 movups (%r11),%xmm0 694 movups (%r11),%xmm0
704 subq $128,%rdx 695 subq $128,%rdx
705 jnc .Lecb_dec_loop8 696 jnc .Lecb_dec_loop8
706 697
707 movups %xmm2,(%rsi) 698 movups %xmm2,(%rsi)
699 pxor %xmm2,%xmm2
708 movq %r11,%rcx 700 movq %r11,%rcx
709 movups %xmm3,16(%rsi) 701 movups %xmm3,16(%rsi)
702 pxor %xmm3,%xmm3
710 movl %r10d,%eax 703 movl %r10d,%eax
711 movups %xmm4,32(%rsi) 704 movups %xmm4,32(%rsi)
705 pxor %xmm4,%xmm4
712 movups %xmm5,48(%rsi) 706 movups %xmm5,48(%rsi)
707 pxor %xmm5,%xmm5
713 movups %xmm6,64(%rsi) 708 movups %xmm6,64(%rsi)
709 pxor %xmm6,%xmm6
714 movups %xmm7,80(%rsi) 710 movups %xmm7,80(%rsi)
711 pxor %xmm7,%xmm7
715 movups %xmm8,96(%rsi) 712 movups %xmm8,96(%rsi)
713 pxor %xmm8,%xmm8
716 movups %xmm9,112(%rsi) 714 movups %xmm9,112(%rsi)
715 pxor %xmm9,%xmm9
717 leaq 128(%rsi),%rsi 716 leaq 128(%rsi),%rsi
718 addq $128,%rdx 717 addq $128,%rdx
719 jz .Lecb_ret 718 jz .Lecb_ret
720 719
721 .Lecb_dec_tail: 720 .Lecb_dec_tail:
722 movups (%rdi),%xmm2 721 movups (%rdi),%xmm2
723 cmpq $32,%rdx 722 cmpq $32,%rdx
724 jb .Lecb_dec_one 723 jb .Lecb_dec_one
725 movups 16(%rdi),%xmm3 724 movups 16(%rdi),%xmm3
726 je .Lecb_dec_two 725 je .Lecb_dec_two
727 movups 32(%rdi),%xmm4 726 movups 32(%rdi),%xmm4
728 cmpq $64,%rdx 727 cmpq $64,%rdx
729 jb .Lecb_dec_three 728 jb .Lecb_dec_three
730 movups 48(%rdi),%xmm5 729 movups 48(%rdi),%xmm5
731 je .Lecb_dec_four 730 je .Lecb_dec_four
732 movups 64(%rdi),%xmm6 731 movups 64(%rdi),%xmm6
733 cmpq $96,%rdx 732 cmpq $96,%rdx
734 jb .Lecb_dec_five 733 jb .Lecb_dec_five
735 movups 80(%rdi),%xmm7 734 movups 80(%rdi),%xmm7
736 je .Lecb_dec_six 735 je .Lecb_dec_six
737 movups 96(%rdi),%xmm8 736 movups 96(%rdi),%xmm8
738 movups (%rcx),%xmm0 737 movups (%rcx),%xmm0
738 xorps %xmm9,%xmm9
739 call _aesni_decrypt8 739 call _aesni_decrypt8
740 movups %xmm2,(%rsi) 740 movups %xmm2,(%rsi)
741 pxor %xmm2,%xmm2
741 movups %xmm3,16(%rsi) 742 movups %xmm3,16(%rsi)
743 pxor %xmm3,%xmm3
742 movups %xmm4,32(%rsi) 744 movups %xmm4,32(%rsi)
745 pxor %xmm4,%xmm4
743 movups %xmm5,48(%rsi) 746 movups %xmm5,48(%rsi)
747 pxor %xmm5,%xmm5
744 movups %xmm6,64(%rsi) 748 movups %xmm6,64(%rsi)
749 pxor %xmm6,%xmm6
745 movups %xmm7,80(%rsi) 750 movups %xmm7,80(%rsi)
751 pxor %xmm7,%xmm7
746 movups %xmm8,96(%rsi) 752 movups %xmm8,96(%rsi)
753 pxor %xmm8,%xmm8
754 pxor %xmm9,%xmm9
747 jmp .Lecb_ret 755 jmp .Lecb_ret
748 .align 16 756 .align 16
749 .Lecb_dec_one: 757 .Lecb_dec_one:
750 movups (%rcx),%xmm0 758 movups (%rcx),%xmm0
751 movups 16(%rcx),%xmm1 759 movups 16(%rcx),%xmm1
752 leaq 32(%rcx),%rcx 760 leaq 32(%rcx),%rcx
753 xorps %xmm0,%xmm2 761 xorps %xmm0,%xmm2
754 .Loop_dec1_4: 762 .Loop_dec1_4:
755 .byte 102,15,56,222,209 763 .byte 102,15,56,222,209
756 decl %eax 764 decl %eax
757 movups (%rcx),%xmm1 765 movups (%rcx),%xmm1
758 leaq 16(%rcx),%rcx 766 leaq 16(%rcx),%rcx
759 jnz .Loop_dec1_4 767 jnz .Loop_dec1_4
760 .byte 102,15,56,223,209 768 .byte 102,15,56,223,209
761 movups %xmm2,(%rsi) 769 movups %xmm2,(%rsi)
770 pxor %xmm2,%xmm2
762 jmp .Lecb_ret 771 jmp .Lecb_ret
763 .align 16 772 .align 16
764 .Lecb_dec_two: 773 .Lecb_dec_two:
765 call _aesni_decrypt2 774 call _aesni_decrypt2
766 movups %xmm2,(%rsi) 775 movups %xmm2,(%rsi)
776 pxor %xmm2,%xmm2
767 movups %xmm3,16(%rsi) 777 movups %xmm3,16(%rsi)
778 pxor %xmm3,%xmm3
768 jmp .Lecb_ret 779 jmp .Lecb_ret
769 .align 16 780 .align 16
770 .Lecb_dec_three: 781 .Lecb_dec_three:
771 call _aesni_decrypt3 782 call _aesni_decrypt3
772 movups %xmm2,(%rsi) 783 movups %xmm2,(%rsi)
784 pxor %xmm2,%xmm2
773 movups %xmm3,16(%rsi) 785 movups %xmm3,16(%rsi)
786 pxor %xmm3,%xmm3
774 movups %xmm4,32(%rsi) 787 movups %xmm4,32(%rsi)
788 pxor %xmm4,%xmm4
775 jmp .Lecb_ret 789 jmp .Lecb_ret
776 .align 16 790 .align 16
777 .Lecb_dec_four: 791 .Lecb_dec_four:
778 call _aesni_decrypt4 792 call _aesni_decrypt4
779 movups %xmm2,(%rsi) 793 movups %xmm2,(%rsi)
794 pxor %xmm2,%xmm2
780 movups %xmm3,16(%rsi) 795 movups %xmm3,16(%rsi)
796 pxor %xmm3,%xmm3
781 movups %xmm4,32(%rsi) 797 movups %xmm4,32(%rsi)
798 pxor %xmm4,%xmm4
782 movups %xmm5,48(%rsi) 799 movups %xmm5,48(%rsi)
800 pxor %xmm5,%xmm5
783 jmp .Lecb_ret 801 jmp .Lecb_ret
784 .align 16 802 .align 16
785 .Lecb_dec_five: 803 .Lecb_dec_five:
786 xorps %xmm7,%xmm7 804 xorps %xmm7,%xmm7
787 call _aesni_decrypt6 805 call _aesni_decrypt6
788 movups %xmm2,(%rsi) 806 movups %xmm2,(%rsi)
807 pxor %xmm2,%xmm2
789 movups %xmm3,16(%rsi) 808 movups %xmm3,16(%rsi)
809 pxor %xmm3,%xmm3
790 movups %xmm4,32(%rsi) 810 movups %xmm4,32(%rsi)
811 pxor %xmm4,%xmm4
791 movups %xmm5,48(%rsi) 812 movups %xmm5,48(%rsi)
813 pxor %xmm5,%xmm5
792 movups %xmm6,64(%rsi) 814 movups %xmm6,64(%rsi)
815 pxor %xmm6,%xmm6
816 pxor %xmm7,%xmm7
793 jmp .Lecb_ret 817 jmp .Lecb_ret
794 .align 16 818 .align 16
795 .Lecb_dec_six: 819 .Lecb_dec_six:
796 call _aesni_decrypt6 820 call _aesni_decrypt6
797 movups %xmm2,(%rsi) 821 movups %xmm2,(%rsi)
822 pxor %xmm2,%xmm2
798 movups %xmm3,16(%rsi) 823 movups %xmm3,16(%rsi)
824 pxor %xmm3,%xmm3
799 movups %xmm4,32(%rsi) 825 movups %xmm4,32(%rsi)
826 pxor %xmm4,%xmm4
800 movups %xmm5,48(%rsi) 827 movups %xmm5,48(%rsi)
828 pxor %xmm5,%xmm5
801 movups %xmm6,64(%rsi) 829 movups %xmm6,64(%rsi)
830 pxor %xmm6,%xmm6
802 movups %xmm7,80(%rsi) 831 movups %xmm7,80(%rsi)
832 pxor %xmm7,%xmm7
803 833
804 .Lecb_ret: 834 .Lecb_ret:
835 xorps %xmm0,%xmm0
836 pxor %xmm1,%xmm1
805 .byte 0xf3,0xc3 837 .byte 0xf3,0xc3
806 .size aesni_ecb_encrypt,.-aesni_ecb_encrypt 838 .size aesni_ecb_encrypt,.-aesni_ecb_encrypt
807 .globl aesni_ccm64_encrypt_blocks 839 .globl aesni_ccm64_encrypt_blocks
808 .hidden aesni_ccm64_encrypt_blocks 840 .hidden aesni_ccm64_encrypt_blocks
809 .type aesni_ccm64_encrypt_blocks,@function 841 .type aesni_ccm64_encrypt_blocks,@function
810 .align 16 842 .align 16
811 aesni_ccm64_encrypt_blocks: 843 aesni_ccm64_encrypt_blocks:
812 movl 240(%rcx),%eax 844 movl 240(%rcx),%eax
813 movdqu (%r8),%xmm6 845 movdqu (%r8),%xmm6
814 movdqa .Lincrement64(%rip),%xmm9 846 movdqa .Lincrement64(%rip),%xmm9
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
852 .byte 102,15,56,221,216 884 .byte 102,15,56,221,216
853 885
854 leaq 16(%rdi),%rdi 886 leaq 16(%rdi),%rdi
855 xorps %xmm2,%xmm8 887 xorps %xmm2,%xmm8
856 movdqa %xmm6,%xmm2 888 movdqa %xmm6,%xmm2
857 movups %xmm8,(%rsi) 889 movups %xmm8,(%rsi)
858 .byte 102,15,56,0,215 890 .byte 102,15,56,0,215
859 leaq 16(%rsi),%rsi 891 leaq 16(%rsi),%rsi
860 jnz .Lccm64_enc_outer 892 jnz .Lccm64_enc_outer
861 893
894 pxor %xmm0,%xmm0
895 pxor %xmm1,%xmm1
896 pxor %xmm2,%xmm2
862 movups %xmm3,(%r9) 897 movups %xmm3,(%r9)
898 pxor %xmm3,%xmm3
899 pxor %xmm8,%xmm8
900 pxor %xmm6,%xmm6
863 .byte 0xf3,0xc3 901 .byte 0xf3,0xc3
864 .size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks 902 .size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
865 .globl aesni_ccm64_decrypt_blocks 903 .globl aesni_ccm64_decrypt_blocks
866 .hidden aesni_ccm64_decrypt_blocks 904 .hidden aesni_ccm64_decrypt_blocks
867 .type aesni_ccm64_decrypt_blocks,@function 905 .type aesni_ccm64_decrypt_blocks,@function
868 .align 16 906 .align 16
869 aesni_ccm64_decrypt_blocks: 907 aesni_ccm64_decrypt_blocks:
870 movl 240(%rcx),%eax 908 movl 240(%rcx),%eax
871 movups (%r8),%xmm6 909 movups (%r8),%xmm6
872 movdqu (%r9),%xmm3 910 movdqu (%r9),%xmm3
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
944 xorps %xmm0,%xmm8 982 xorps %xmm0,%xmm8
945 leaq 32(%r11),%r11 983 leaq 32(%r11),%r11
946 xorps %xmm8,%xmm3 984 xorps %xmm8,%xmm3
947 .Loop_enc1_6: 985 .Loop_enc1_6:
948 .byte 102,15,56,220,217 986 .byte 102,15,56,220,217
949 decl %eax 987 decl %eax
950 movups (%r11),%xmm1 988 movups (%r11),%xmm1
951 leaq 16(%r11),%r11 989 leaq 16(%r11),%r11
952 jnz .Loop_enc1_6 990 jnz .Loop_enc1_6
953 .byte 102,15,56,221,217 991 .byte 102,15,56,221,217
992 pxor %xmm0,%xmm0
993 pxor %xmm1,%xmm1
994 pxor %xmm2,%xmm2
954 movups %xmm3,(%r9) 995 movups %xmm3,(%r9)
996 pxor %xmm3,%xmm3
997 pxor %xmm8,%xmm8
998 pxor %xmm6,%xmm6
955 .byte 0xf3,0xc3 999 .byte 0xf3,0xc3
956 .size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks 1000 .size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
957 .globl aesni_ctr32_encrypt_blocks 1001 .globl aesni_ctr32_encrypt_blocks
958 .hidden aesni_ctr32_encrypt_blocks 1002 .hidden aesni_ctr32_encrypt_blocks
959 .type aesni_ctr32_encrypt_blocks,@function 1003 .type aesni_ctr32_encrypt_blocks,@function
960 .align 16 1004 .align 16
961 aesni_ctr32_encrypt_blocks: 1005 aesni_ctr32_encrypt_blocks:
1006 cmpq $1,%rdx
1007 jne .Lctr32_bulk
1008
1009
1010
1011 movups (%r8),%xmm2
1012 movups (%rdi),%xmm3
1013 movl 240(%rcx),%edx
1014 movups (%rcx),%xmm0
1015 movups 16(%rcx),%xmm1
1016 leaq 32(%rcx),%rcx
1017 xorps %xmm0,%xmm2
1018 .Loop_enc1_7:
1019 .byte 102,15,56,220,209
1020 decl %edx
1021 movups (%rcx),%xmm1
1022 leaq 16(%rcx),%rcx
1023 jnz .Loop_enc1_7
1024 .byte 102,15,56,221,209
1025 pxor %xmm0,%xmm0
1026 pxor %xmm1,%xmm1
1027 xorps %xmm3,%xmm2
1028 pxor %xmm3,%xmm3
1029 movups %xmm2,(%rsi)
1030 xorps %xmm2,%xmm2
1031 jmp .Lctr32_epilogue
1032
1033 .align 16
1034 .Lctr32_bulk:
962 leaq (%rsp),%rax 1035 leaq (%rsp),%rax
963 pushq %rbp 1036 pushq %rbp
964 subq $128,%rsp 1037 subq $128,%rsp
965 andq $-16,%rsp 1038 andq $-16,%rsp
966 leaq -8(%rax),%rbp 1039 leaq -8(%rax),%rbp
967 1040
968 » cmpq» $1,%rdx 1041
969 » je» .Lctr32_one_shortcut 1042
970 1043
971 movdqu (%r8),%xmm2 1044 movdqu (%r8),%xmm2
972 movdqu (%rcx),%xmm0 1045 movdqu (%rcx),%xmm0
973 movl 12(%r8),%r8d 1046 movl 12(%r8),%r8d
974 pxor %xmm0,%xmm2 1047 pxor %xmm0,%xmm2
975 movl 12(%rcx),%r11d 1048 movl 12(%rcx),%r11d
976 movdqa %xmm2,0(%rsp) 1049 movdqa %xmm2,0(%rsp)
977 bswapl %r8d 1050 bswapl %r8d
978 movdqa %xmm2,%xmm3 1051 movdqa %xmm2,%xmm3
979 movdqa %xmm2,%xmm4 1052 movdqa %xmm2,%xmm4
(...skipping 370 matching lines...) Expand 10 before | Expand all | Expand 10 after
1350 leaq 128(%rsi),%rsi 1423 leaq 128(%rsi),%rsi
1351 1424
1352 subq $8,%rdx 1425 subq $8,%rdx
1353 jnc .Lctr32_loop8 1426 jnc .Lctr32_loop8
1354 1427
1355 addq $8,%rdx 1428 addq $8,%rdx
1356 jz .Lctr32_done 1429 jz .Lctr32_done
1357 leaq -128(%rcx),%rcx 1430 leaq -128(%rcx),%rcx
1358 1431
1359 .Lctr32_tail: 1432 .Lctr32_tail:
1433
1434
1360 leaq 16(%rcx),%rcx 1435 leaq 16(%rcx),%rcx
1361 cmpq $4,%rdx 1436 cmpq $4,%rdx
1362 jb .Lctr32_loop3 1437 jb .Lctr32_loop3
1363 je .Lctr32_loop4 1438 je .Lctr32_loop4
1364 1439
1440
1365 shll $4,%eax 1441 shll $4,%eax
1366 movdqa 96(%rsp),%xmm8 1442 movdqa 96(%rsp),%xmm8
1367 pxor %xmm9,%xmm9 1443 pxor %xmm9,%xmm9
1368 1444
1369 movups 16(%rcx),%xmm0 1445 movups 16(%rcx),%xmm0
1370 .byte 102,15,56,220,209 1446 .byte 102,15,56,220,209
1371 .byte 102,15,56,220,217 1447 .byte 102,15,56,220,217
1372 leaq 32-16(%rcx,%rax,1),%rcx 1448 leaq 32-16(%rcx,%rax,1),%rcx
1373 negq %rax 1449 negq %rax
1374 .byte 102,15,56,220,225 1450 .byte 102,15,56,220,225
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
1457 jb .Lctr32_done 1533 jb .Lctr32_done
1458 1534
1459 movups 16(%rdi),%xmm11 1535 movups 16(%rdi),%xmm11
1460 xorps %xmm11,%xmm3 1536 xorps %xmm11,%xmm3
1461 movups %xmm3,16(%rsi) 1537 movups %xmm3,16(%rsi)
1462 je .Lctr32_done 1538 je .Lctr32_done
1463 1539
1464 movups 32(%rdi),%xmm12 1540 movups 32(%rdi),%xmm12
1465 xorps %xmm12,%xmm4 1541 xorps %xmm12,%xmm4
1466 movups %xmm4,32(%rsi) 1542 movups %xmm4,32(%rsi)
1467 jmp .Lctr32_done
1468 1543
1469 .align 16
1470 .Lctr32_one_shortcut:
1471 movups (%r8),%xmm2
1472 movups (%rdi),%xmm10
1473 movl 240(%rcx),%eax
1474 movups (%rcx),%xmm0
1475 movups 16(%rcx),%xmm1
1476 leaq 32(%rcx),%rcx
1477 xorps %xmm0,%xmm2
1478 .Loop_enc1_7:
1479 .byte 102,15,56,220,209
1480 decl %eax
1481 movups (%rcx),%xmm1
1482 leaq 16(%rcx),%rcx
1483 jnz .Loop_enc1_7
1484 .byte 102,15,56,221,209
1485 xorps %xmm10,%xmm2
1486 movups %xmm2,(%rsi)
1487 jmp .Lctr32_done
1488
1489 .align 16
1490 .Lctr32_done: 1544 .Lctr32_done:
1545 xorps %xmm0,%xmm0
1546 xorl %r11d,%r11d
1547 pxor %xmm1,%xmm1
1548 pxor %xmm2,%xmm2
1549 pxor %xmm3,%xmm3
1550 pxor %xmm4,%xmm4
1551 pxor %xmm5,%xmm5
1552 pxor %xmm6,%xmm6
1553 pxor %xmm7,%xmm7
1554 movaps %xmm0,0(%rsp)
1555 pxor %xmm8,%xmm8
1556 movaps %xmm0,16(%rsp)
1557 pxor %xmm9,%xmm9
1558 movaps %xmm0,32(%rsp)
1559 pxor %xmm10,%xmm10
1560 movaps %xmm0,48(%rsp)
1561 pxor %xmm11,%xmm11
1562 movaps %xmm0,64(%rsp)
1563 pxor %xmm12,%xmm12
1564 movaps %xmm0,80(%rsp)
1565 pxor %xmm13,%xmm13
1566 movaps %xmm0,96(%rsp)
1567 pxor %xmm14,%xmm14
1568 movaps %xmm0,112(%rsp)
1569 pxor %xmm15,%xmm15
1491 leaq (%rbp),%rsp 1570 leaq (%rbp),%rsp
1492 popq %rbp 1571 popq %rbp
1493 .Lctr32_epilogue: 1572 .Lctr32_epilogue:
1494 .byte 0xf3,0xc3 1573 .byte 0xf3,0xc3
1495 .size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks 1574 .size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
1496 .globl aesni_xts_encrypt 1575 .globl aesni_xts_encrypt
1497 .hidden aesni_xts_encrypt 1576 .hidden aesni_xts_encrypt
1498 .type aesni_xts_encrypt,@function 1577 .type aesni_xts_encrypt,@function
1499 .align 16 1578 .align 16
1500 aesni_xts_encrypt: 1579 aesni_xts_encrypt:
(...skipping 251 matching lines...) Expand 10 before | Expand all | Expand 10 after
1752 movups %xmm7,-16(%rsi) 1831 movups %xmm7,-16(%rsi)
1753 subq $96,%rdx 1832 subq $96,%rdx
1754 jnc .Lxts_enc_grandloop 1833 jnc .Lxts_enc_grandloop
1755 1834
1756 movl $16+96,%eax 1835 movl $16+96,%eax
1757 subl %r10d,%eax 1836 subl %r10d,%eax
1758 movq %r11,%rcx 1837 movq %r11,%rcx
1759 shrl $4,%eax 1838 shrl $4,%eax
1760 1839
1761 .Lxts_enc_short: 1840 .Lxts_enc_short:
1841
1762 movl %eax,%r10d 1842 movl %eax,%r10d
1763 pxor %xmm0,%xmm10 1843 pxor %xmm0,%xmm10
1764 addq $96,%rdx 1844 addq $96,%rdx
1765 jz .Lxts_enc_done 1845 jz .Lxts_enc_done
1766 1846
1767 pxor %xmm0,%xmm11 1847 pxor %xmm0,%xmm11
1768 cmpq $32,%rdx 1848 cmpq $32,%rdx
1769 jb .Lxts_enc_one 1849 jb .Lxts_enc_one
1770 pxor %xmm0,%xmm12 1850 pxor %xmm0,%xmm12
1771 je .Lxts_enc_two 1851 je .Lxts_enc_two
1772 1852
1773 pxor %xmm0,%xmm13 1853 pxor %xmm0,%xmm13
1774 cmpq $64,%rdx 1854 cmpq $64,%rdx
1775 jb .Lxts_enc_three 1855 jb .Lxts_enc_three
1776 pxor %xmm0,%xmm14 1856 pxor %xmm0,%xmm14
1777 je .Lxts_enc_four 1857 je .Lxts_enc_four
1778 1858
1779 movdqu (%rdi),%xmm2 1859 movdqu (%rdi),%xmm2
1780 movdqu 16(%rdi),%xmm3 1860 movdqu 16(%rdi),%xmm3
1781 movdqu 32(%rdi),%xmm4 1861 movdqu 32(%rdi),%xmm4
1782 pxor %xmm10,%xmm2 1862 pxor %xmm10,%xmm2
1783 movdqu 48(%rdi),%xmm5 1863 movdqu 48(%rdi),%xmm5
1784 pxor %xmm11,%xmm3 1864 pxor %xmm11,%xmm3
1785 movdqu 64(%rdi),%xmm6 1865 movdqu 64(%rdi),%xmm6
1786 leaq 80(%rdi),%rdi 1866 leaq 80(%rdi),%rdi
1787 pxor %xmm12,%xmm4 1867 pxor %xmm12,%xmm4
1788 pxor %xmm13,%xmm5 1868 pxor %xmm13,%xmm5
1789 pxor %xmm14,%xmm6 1869 pxor %xmm14,%xmm6
1870 pxor %xmm7,%xmm7
1790 1871
1791 call _aesni_encrypt6 1872 call _aesni_encrypt6
1792 1873
1793 xorps %xmm10,%xmm2 1874 xorps %xmm10,%xmm2
1794 movdqa %xmm15,%xmm10 1875 movdqa %xmm15,%xmm10
1795 xorps %xmm11,%xmm3 1876 xorps %xmm11,%xmm3
1796 xorps %xmm12,%xmm4 1877 xorps %xmm12,%xmm4
1797 movdqu %xmm2,(%rsi) 1878 movdqu %xmm2,(%rsi)
1798 xorps %xmm13,%xmm5 1879 xorps %xmm13,%xmm5
1799 movdqu %xmm3,16(%rsi) 1880 movdqu %xmm3,16(%rsi)
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after
1922 .byte 102,15,56,220,209 2003 .byte 102,15,56,220,209
1923 decl %eax 2004 decl %eax
1924 movups (%rcx),%xmm1 2005 movups (%rcx),%xmm1
1925 leaq 16(%rcx),%rcx 2006 leaq 16(%rcx),%rcx
1926 jnz .Loop_enc1_10 2007 jnz .Loop_enc1_10
1927 .byte 102,15,56,221,209 2008 .byte 102,15,56,221,209
1928 xorps %xmm10,%xmm2 2009 xorps %xmm10,%xmm2
1929 movups %xmm2,-16(%rsi) 2010 movups %xmm2,-16(%rsi)
1930 2011
1931 .Lxts_enc_ret: 2012 .Lxts_enc_ret:
2013 xorps %xmm0,%xmm0
2014 pxor %xmm1,%xmm1
2015 pxor %xmm2,%xmm2
2016 pxor %xmm3,%xmm3
2017 pxor %xmm4,%xmm4
2018 pxor %xmm5,%xmm5
2019 pxor %xmm6,%xmm6
2020 pxor %xmm7,%xmm7
2021 movaps %xmm0,0(%rsp)
2022 pxor %xmm8,%xmm8
2023 movaps %xmm0,16(%rsp)
2024 pxor %xmm9,%xmm9
2025 movaps %xmm0,32(%rsp)
2026 pxor %xmm10,%xmm10
2027 movaps %xmm0,48(%rsp)
2028 pxor %xmm11,%xmm11
2029 movaps %xmm0,64(%rsp)
2030 pxor %xmm12,%xmm12
2031 movaps %xmm0,80(%rsp)
2032 pxor %xmm13,%xmm13
2033 movaps %xmm0,96(%rsp)
2034 pxor %xmm14,%xmm14
2035 pxor %xmm15,%xmm15
1932 leaq (%rbp),%rsp 2036 leaq (%rbp),%rsp
1933 popq %rbp 2037 popq %rbp
1934 .Lxts_enc_epilogue: 2038 .Lxts_enc_epilogue:
1935 .byte 0xf3,0xc3 2039 .byte 0xf3,0xc3
1936 .size aesni_xts_encrypt,.-aesni_xts_encrypt 2040 .size aesni_xts_encrypt,.-aesni_xts_encrypt
1937 .globl aesni_xts_decrypt 2041 .globl aesni_xts_decrypt
1938 .hidden aesni_xts_decrypt 2042 .hidden aesni_xts_decrypt
1939 .type aesni_xts_decrypt,@function 2043 .type aesni_xts_decrypt,@function
1940 .align 16 2044 .align 16
1941 aesni_xts_decrypt: 2045 aesni_xts_decrypt:
(...skipping 257 matching lines...) Expand 10 before | Expand all | Expand 10 after
2199 movups %xmm7,-16(%rsi) 2303 movups %xmm7,-16(%rsi)
2200 subq $96,%rdx 2304 subq $96,%rdx
2201 jnc .Lxts_dec_grandloop 2305 jnc .Lxts_dec_grandloop
2202 2306
2203 movl $16+96,%eax 2307 movl $16+96,%eax
2204 subl %r10d,%eax 2308 subl %r10d,%eax
2205 movq %r11,%rcx 2309 movq %r11,%rcx
2206 shrl $4,%eax 2310 shrl $4,%eax
2207 2311
2208 .Lxts_dec_short: 2312 .Lxts_dec_short:
2313
2209 movl %eax,%r10d 2314 movl %eax,%r10d
2210 pxor %xmm0,%xmm10 2315 pxor %xmm0,%xmm10
2211 pxor %xmm0,%xmm11 2316 pxor %xmm0,%xmm11
2212 addq $96,%rdx 2317 addq $96,%rdx
2213 jz .Lxts_dec_done 2318 jz .Lxts_dec_done
2214 2319
2215 pxor %xmm0,%xmm12 2320 pxor %xmm0,%xmm12
2216 cmpq $32,%rdx 2321 cmpq $32,%rdx
2217 jb .Lxts_dec_one 2322 jb .Lxts_dec_one
2218 pxor %xmm0,%xmm13 2323 pxor %xmm0,%xmm13
(...skipping 182 matching lines...) Expand 10 before | Expand all | Expand 10 after
2401 .byte 102,15,56,222,209 2506 .byte 102,15,56,222,209
2402 decl %eax 2507 decl %eax
2403 movups (%rcx),%xmm1 2508 movups (%rcx),%xmm1
2404 leaq 16(%rcx),%rcx 2509 leaq 16(%rcx),%rcx
2405 jnz .Loop_dec1_14 2510 jnz .Loop_dec1_14
2406 .byte 102,15,56,223,209 2511 .byte 102,15,56,223,209
2407 xorps %xmm10,%xmm2 2512 xorps %xmm10,%xmm2
2408 movups %xmm2,(%rsi) 2513 movups %xmm2,(%rsi)
2409 2514
2410 .Lxts_dec_ret: 2515 .Lxts_dec_ret:
2516 xorps %xmm0,%xmm0
2517 pxor %xmm1,%xmm1
2518 pxor %xmm2,%xmm2
2519 pxor %xmm3,%xmm3
2520 pxor %xmm4,%xmm4
2521 pxor %xmm5,%xmm5
2522 pxor %xmm6,%xmm6
2523 pxor %xmm7,%xmm7
2524 movaps %xmm0,0(%rsp)
2525 pxor %xmm8,%xmm8
2526 movaps %xmm0,16(%rsp)
2527 pxor %xmm9,%xmm9
2528 movaps %xmm0,32(%rsp)
2529 pxor %xmm10,%xmm10
2530 movaps %xmm0,48(%rsp)
2531 pxor %xmm11,%xmm11
2532 movaps %xmm0,64(%rsp)
2533 pxor %xmm12,%xmm12
2534 movaps %xmm0,80(%rsp)
2535 pxor %xmm13,%xmm13
2536 movaps %xmm0,96(%rsp)
2537 pxor %xmm14,%xmm14
2538 pxor %xmm15,%xmm15
2411 leaq (%rbp),%rsp 2539 leaq (%rbp),%rsp
2412 popq %rbp 2540 popq %rbp
2413 .Lxts_dec_epilogue: 2541 .Lxts_dec_epilogue:
2414 .byte 0xf3,0xc3 2542 .byte 0xf3,0xc3
2415 .size aesni_xts_decrypt,.-aesni_xts_decrypt 2543 .size aesni_xts_decrypt,.-aesni_xts_decrypt
2416 .globl aesni_cbc_encrypt 2544 .globl aesni_cbc_encrypt
2417 .hidden aesni_cbc_encrypt 2545 .hidden aesni_cbc_encrypt
2418 .type aesni_cbc_encrypt,@function 2546 .type aesni_cbc_encrypt,@function
2419 .align 16 2547 .align 16
2420 aesni_cbc_encrypt: 2548 aesni_cbc_encrypt:
(...skipping 29 matching lines...) Expand all
2450 jnz .Loop_enc1_15 2578 jnz .Loop_enc1_15
2451 .byte 102,15,56,221,209 2579 .byte 102,15,56,221,209
2452 movl %r10d,%eax 2580 movl %r10d,%eax
2453 movq %r11,%rcx 2581 movq %r11,%rcx
2454 movups %xmm2,0(%rsi) 2582 movups %xmm2,0(%rsi)
2455 leaq 16(%rsi),%rsi 2583 leaq 16(%rsi),%rsi
2456 subq $16,%rdx 2584 subq $16,%rdx
2457 jnc .Lcbc_enc_loop 2585 jnc .Lcbc_enc_loop
2458 addq $16,%rdx 2586 addq $16,%rdx
2459 jnz .Lcbc_enc_tail 2587 jnz .Lcbc_enc_tail
2588 pxor %xmm0,%xmm0
2589 pxor %xmm1,%xmm1
2460 movups %xmm2,(%r8) 2590 movups %xmm2,(%r8)
2591 pxor %xmm2,%xmm2
2592 pxor %xmm3,%xmm3
2461 jmp .Lcbc_ret 2593 jmp .Lcbc_ret
2462 2594
2463 .Lcbc_enc_tail: 2595 .Lcbc_enc_tail:
2464 movq %rdx,%rcx 2596 movq %rdx,%rcx
2465 xchgq %rdi,%rsi 2597 xchgq %rdi,%rsi
2466 .long 0x9066A4F3 2598 .long 0x9066A4F3
2467 movl $16,%ecx 2599 movl $16,%ecx
2468 subq %rdx,%rcx 2600 subq %rdx,%rcx
2469 xorl %eax,%eax 2601 xorl %eax,%eax
2470 .long 0x9066AAF3 2602 .long 0x9066AAF3
2471 leaq -16(%rdi),%rdi 2603 leaq -16(%rdi),%rdi
2472 movl %r10d,%eax 2604 movl %r10d,%eax
2473 movq %rdi,%rsi 2605 movq %rdi,%rsi
2474 movq %r11,%rcx 2606 movq %r11,%rcx
2475 xorq %rdx,%rdx 2607 xorq %rdx,%rdx
2476 jmp .Lcbc_enc_loop 2608 jmp .Lcbc_enc_loop
2477 2609
2478 .align 16 2610 .align 16
2479 .Lcbc_decrypt: 2611 .Lcbc_decrypt:
2612 cmpq $16,%rdx
2613 jne .Lcbc_decrypt_bulk
2614
2615
2616
2617 movdqu (%rdi),%xmm2
2618 movdqu (%r8),%xmm3
2619 movdqa %xmm2,%xmm4
2620 movups (%rcx),%xmm0
2621 movups 16(%rcx),%xmm1
2622 leaq 32(%rcx),%rcx
2623 xorps %xmm0,%xmm2
2624 .Loop_dec1_16:
2625 .byte 102,15,56,222,209
2626 decl %r10d
2627 movups (%rcx),%xmm1
2628 leaq 16(%rcx),%rcx
2629 jnz .Loop_dec1_16
2630 .byte 102,15,56,223,209
2631 pxor %xmm0,%xmm0
2632 pxor %xmm1,%xmm1
2633 movdqu %xmm4,(%r8)
2634 xorps %xmm3,%xmm2
2635 pxor %xmm3,%xmm3
2636 movups %xmm2,(%rsi)
2637 pxor %xmm2,%xmm2
2638 jmp .Lcbc_ret
2639 .align 16
2640 .Lcbc_decrypt_bulk:
2480 leaq (%rsp),%rax 2641 leaq (%rsp),%rax
2481 pushq %rbp 2642 pushq %rbp
2482 subq $16,%rsp 2643 subq $16,%rsp
2483 andq $-16,%rsp 2644 andq $-16,%rsp
2484 leaq -8(%rax),%rbp 2645 leaq -8(%rax),%rbp
2485 movups (%r8),%xmm10 2646 movups (%r8),%xmm10
2486 movl %r10d,%eax 2647 movl %r10d,%eax
2487 cmpq $80,%rdx 2648 cmpq $80,%rdx
2488 jbe .Lcbc_dec_tail 2649 jbe .Lcbc_dec_tail
2489 2650
(...skipping 216 matching lines...) Expand 10 before | Expand all | Expand 10 after
2706 movdqa %xmm1,%xmm7 2867 movdqa %xmm1,%xmm7
2707 movups %xmm8,96(%rsi) 2868 movups %xmm8,96(%rsi)
2708 leaq 112(%rsi),%rsi 2869 leaq 112(%rsi),%rsi
2709 2870
2710 subq $128,%rdx 2871 subq $128,%rdx
2711 ja .Lcbc_dec_loop8 2872 ja .Lcbc_dec_loop8
2712 2873
2713 movaps %xmm9,%xmm2 2874 movaps %xmm9,%xmm2
2714 leaq -112(%rcx),%rcx 2875 leaq -112(%rcx),%rcx
2715 addq $112,%rdx 2876 addq $112,%rdx
2716 » jle» .Lcbc_dec_tail_collected 2877 » jle» .Lcbc_dec_clear_tail_collected
2717 movups %xmm9,(%rsi) 2878 movups %xmm9,(%rsi)
2718 leaq 16(%rsi),%rsi 2879 leaq 16(%rsi),%rsi
2719 cmpq $80,%rdx 2880 cmpq $80,%rdx
2720 jbe .Lcbc_dec_tail 2881 jbe .Lcbc_dec_tail
2721 2882
2722 movaps %xmm11,%xmm2 2883 movaps %xmm11,%xmm2
2723 .Lcbc_dec_six_or_seven: 2884 .Lcbc_dec_six_or_seven:
2724 cmpq $96,%rdx 2885 cmpq $96,%rdx
2725 ja .Lcbc_dec_seven 2886 ja .Lcbc_dec_seven
2726 2887
2727 movaps %xmm7,%xmm8 2888 movaps %xmm7,%xmm8
2728 call _aesni_decrypt6 2889 call _aesni_decrypt6
2729 pxor %xmm10,%xmm2 2890 pxor %xmm10,%xmm2
2730 movaps %xmm8,%xmm10 2891 movaps %xmm8,%xmm10
2731 pxor %xmm11,%xmm3 2892 pxor %xmm11,%xmm3
2732 movdqu %xmm2,(%rsi) 2893 movdqu %xmm2,(%rsi)
2733 pxor %xmm12,%xmm4 2894 pxor %xmm12,%xmm4
2734 movdqu %xmm3,16(%rsi) 2895 movdqu %xmm3,16(%rsi)
2896 pxor %xmm3,%xmm3
2735 pxor %xmm13,%xmm5 2897 pxor %xmm13,%xmm5
2736 movdqu %xmm4,32(%rsi) 2898 movdqu %xmm4,32(%rsi)
2899 pxor %xmm4,%xmm4
2737 pxor %xmm14,%xmm6 2900 pxor %xmm14,%xmm6
2738 movdqu %xmm5,48(%rsi) 2901 movdqu %xmm5,48(%rsi)
2902 pxor %xmm5,%xmm5
2739 pxor %xmm15,%xmm7 2903 pxor %xmm15,%xmm7
2740 movdqu %xmm6,64(%rsi) 2904 movdqu %xmm6,64(%rsi)
2905 pxor %xmm6,%xmm6
2741 leaq 80(%rsi),%rsi 2906 leaq 80(%rsi),%rsi
2742 movdqa %xmm7,%xmm2 2907 movdqa %xmm7,%xmm2
2908 pxor %xmm7,%xmm7
2743 jmp .Lcbc_dec_tail_collected 2909 jmp .Lcbc_dec_tail_collected
2744 2910
2745 .align 16 2911 .align 16
2746 .Lcbc_dec_seven: 2912 .Lcbc_dec_seven:
2747 movups 96(%rdi),%xmm8 2913 movups 96(%rdi),%xmm8
2748 xorps %xmm9,%xmm9 2914 xorps %xmm9,%xmm9
2749 call _aesni_decrypt8 2915 call _aesni_decrypt8
2750 movups 80(%rdi),%xmm9 2916 movups 80(%rdi),%xmm9
2751 pxor %xmm10,%xmm2 2917 pxor %xmm10,%xmm2
2752 movups 96(%rdi),%xmm10 2918 movups 96(%rdi),%xmm10
2753 pxor %xmm11,%xmm3 2919 pxor %xmm11,%xmm3
2754 movdqu %xmm2,(%rsi) 2920 movdqu %xmm2,(%rsi)
2755 pxor %xmm12,%xmm4 2921 pxor %xmm12,%xmm4
2756 movdqu %xmm3,16(%rsi) 2922 movdqu %xmm3,16(%rsi)
2923 pxor %xmm3,%xmm3
2757 pxor %xmm13,%xmm5 2924 pxor %xmm13,%xmm5
2758 movdqu %xmm4,32(%rsi) 2925 movdqu %xmm4,32(%rsi)
2926 pxor %xmm4,%xmm4
2759 pxor %xmm14,%xmm6 2927 pxor %xmm14,%xmm6
2760 movdqu %xmm5,48(%rsi) 2928 movdqu %xmm5,48(%rsi)
2929 pxor %xmm5,%xmm5
2761 pxor %xmm15,%xmm7 2930 pxor %xmm15,%xmm7
2762 movdqu %xmm6,64(%rsi) 2931 movdqu %xmm6,64(%rsi)
2932 pxor %xmm6,%xmm6
2763 pxor %xmm9,%xmm8 2933 pxor %xmm9,%xmm8
2764 movdqu %xmm7,80(%rsi) 2934 movdqu %xmm7,80(%rsi)
2935 pxor %xmm7,%xmm7
2765 leaq 96(%rsi),%rsi 2936 leaq 96(%rsi),%rsi
2766 movdqa %xmm8,%xmm2 2937 movdqa %xmm8,%xmm2
2938 pxor %xmm8,%xmm8
2939 pxor %xmm9,%xmm9
2767 jmp .Lcbc_dec_tail_collected 2940 jmp .Lcbc_dec_tail_collected
2768 2941
2769 .align 16 2942 .align 16
2770 .Lcbc_dec_loop6: 2943 .Lcbc_dec_loop6:
2771 movups %xmm7,(%rsi) 2944 movups %xmm7,(%rsi)
2772 leaq 16(%rsi),%rsi 2945 leaq 16(%rsi),%rsi
2773 movdqu 0(%rdi),%xmm2 2946 movdqu 0(%rdi),%xmm2
2774 movdqu 16(%rdi),%xmm3 2947 movdqu 16(%rdi),%xmm3
2775 movdqa %xmm2,%xmm11 2948 movdqa %xmm2,%xmm11
2776 movdqu 32(%rdi),%xmm4 2949 movdqu 32(%rdi),%xmm4
(...skipping 23 matching lines...) Expand all
2800 movdqu %xmm5,48(%rsi) 2973 movdqu %xmm5,48(%rsi)
2801 pxor %xmm15,%xmm7 2974 pxor %xmm15,%xmm7
2802 movl %r10d,%eax 2975 movl %r10d,%eax
2803 movdqu %xmm6,64(%rsi) 2976 movdqu %xmm6,64(%rsi)
2804 leaq 80(%rsi),%rsi 2977 leaq 80(%rsi),%rsi
2805 subq $96,%rdx 2978 subq $96,%rdx
2806 ja .Lcbc_dec_loop6 2979 ja .Lcbc_dec_loop6
2807 2980
2808 movdqa %xmm7,%xmm2 2981 movdqa %xmm7,%xmm2
2809 addq $80,%rdx 2982 addq $80,%rdx
2810 » jle» .Lcbc_dec_tail_collected 2983 » jle» .Lcbc_dec_clear_tail_collected
2811 movups %xmm7,(%rsi) 2984 movups %xmm7,(%rsi)
2812 leaq 16(%rsi),%rsi 2985 leaq 16(%rsi),%rsi
2813 2986
2814 .Lcbc_dec_tail: 2987 .Lcbc_dec_tail:
2815 movups (%rdi),%xmm2 2988 movups (%rdi),%xmm2
2816 subq $16,%rdx 2989 subq $16,%rdx
2817 jbe .Lcbc_dec_one 2990 jbe .Lcbc_dec_one
2818 2991
2819 movups 16(%rdi),%xmm3 2992 movups 16(%rdi),%xmm3
2820 movaps %xmm2,%xmm11 2993 movaps %xmm2,%xmm11
(...skipping 14 matching lines...) Expand all
2835 movaps %xmm5,%xmm14 3008 movaps %xmm5,%xmm14
2836 movaps %xmm6,%xmm15 3009 movaps %xmm6,%xmm15
2837 xorps %xmm7,%xmm7 3010 xorps %xmm7,%xmm7
2838 call _aesni_decrypt6 3011 call _aesni_decrypt6
2839 pxor %xmm10,%xmm2 3012 pxor %xmm10,%xmm2
2840 movaps %xmm15,%xmm10 3013 movaps %xmm15,%xmm10
2841 pxor %xmm11,%xmm3 3014 pxor %xmm11,%xmm3
2842 movdqu %xmm2,(%rsi) 3015 movdqu %xmm2,(%rsi)
2843 pxor %xmm12,%xmm4 3016 pxor %xmm12,%xmm4
2844 movdqu %xmm3,16(%rsi) 3017 movdqu %xmm3,16(%rsi)
3018 pxor %xmm3,%xmm3
2845 pxor %xmm13,%xmm5 3019 pxor %xmm13,%xmm5
2846 movdqu %xmm4,32(%rsi) 3020 movdqu %xmm4,32(%rsi)
3021 pxor %xmm4,%xmm4
2847 pxor %xmm14,%xmm6 3022 pxor %xmm14,%xmm6
2848 movdqu %xmm5,48(%rsi) 3023 movdqu %xmm5,48(%rsi)
3024 pxor %xmm5,%xmm5
2849 leaq 64(%rsi),%rsi 3025 leaq 64(%rsi),%rsi
2850 movdqa %xmm6,%xmm2 3026 movdqa %xmm6,%xmm2
3027 pxor %xmm6,%xmm6
3028 pxor %xmm7,%xmm7
2851 subq $16,%rdx 3029 subq $16,%rdx
2852 jmp .Lcbc_dec_tail_collected 3030 jmp .Lcbc_dec_tail_collected
2853 3031
2854 .align 16 3032 .align 16
2855 .Lcbc_dec_one: 3033 .Lcbc_dec_one:
2856 movaps %xmm2,%xmm11 3034 movaps %xmm2,%xmm11
2857 movups (%rcx),%xmm0 3035 movups (%rcx),%xmm0
2858 movups 16(%rcx),%xmm1 3036 movups 16(%rcx),%xmm1
2859 leaq 32(%rcx),%rcx 3037 leaq 32(%rcx),%rcx
2860 xorps %xmm0,%xmm2 3038 xorps %xmm0,%xmm2
2861 .Loop_dec1_16: 3039 .Loop_dec1_17:
2862 .byte 102,15,56,222,209 3040 .byte 102,15,56,222,209
2863 decl %eax 3041 decl %eax
2864 movups (%rcx),%xmm1 3042 movups (%rcx),%xmm1
2865 leaq 16(%rcx),%rcx 3043 leaq 16(%rcx),%rcx
2866 » jnz» .Loop_dec1_16 3044 » jnz» .Loop_dec1_17
2867 .byte 102,15,56,223,209 3045 .byte 102,15,56,223,209
2868 xorps %xmm10,%xmm2 3046 xorps %xmm10,%xmm2
2869 movaps %xmm11,%xmm10 3047 movaps %xmm11,%xmm10
2870 jmp .Lcbc_dec_tail_collected 3048 jmp .Lcbc_dec_tail_collected
2871 .align 16 3049 .align 16
2872 .Lcbc_dec_two: 3050 .Lcbc_dec_two:
2873 movaps %xmm3,%xmm12 3051 movaps %xmm3,%xmm12
2874 call _aesni_decrypt2 3052 call _aesni_decrypt2
2875 pxor %xmm10,%xmm2 3053 pxor %xmm10,%xmm2
2876 movaps %xmm12,%xmm10 3054 movaps %xmm12,%xmm10
2877 pxor %xmm11,%xmm3 3055 pxor %xmm11,%xmm3
2878 movdqu %xmm2,(%rsi) 3056 movdqu %xmm2,(%rsi)
2879 movdqa %xmm3,%xmm2 3057 movdqa %xmm3,%xmm2
3058 pxor %xmm3,%xmm3
2880 leaq 16(%rsi),%rsi 3059 leaq 16(%rsi),%rsi
2881 jmp .Lcbc_dec_tail_collected 3060 jmp .Lcbc_dec_tail_collected
2882 .align 16 3061 .align 16
2883 .Lcbc_dec_three: 3062 .Lcbc_dec_three:
2884 movaps %xmm4,%xmm13 3063 movaps %xmm4,%xmm13
2885 call _aesni_decrypt3 3064 call _aesni_decrypt3
2886 pxor %xmm10,%xmm2 3065 pxor %xmm10,%xmm2
2887 movaps %xmm13,%xmm10 3066 movaps %xmm13,%xmm10
2888 pxor %xmm11,%xmm3 3067 pxor %xmm11,%xmm3
2889 movdqu %xmm2,(%rsi) 3068 movdqu %xmm2,(%rsi)
2890 pxor %xmm12,%xmm4 3069 pxor %xmm12,%xmm4
2891 movdqu %xmm3,16(%rsi) 3070 movdqu %xmm3,16(%rsi)
3071 pxor %xmm3,%xmm3
2892 movdqa %xmm4,%xmm2 3072 movdqa %xmm4,%xmm2
3073 pxor %xmm4,%xmm4
2893 leaq 32(%rsi),%rsi 3074 leaq 32(%rsi),%rsi
2894 jmp .Lcbc_dec_tail_collected 3075 jmp .Lcbc_dec_tail_collected
2895 .align 16 3076 .align 16
2896 .Lcbc_dec_four: 3077 .Lcbc_dec_four:
2897 movaps %xmm5,%xmm14 3078 movaps %xmm5,%xmm14
2898 call _aesni_decrypt4 3079 call _aesni_decrypt4
2899 pxor %xmm10,%xmm2 3080 pxor %xmm10,%xmm2
2900 movaps %xmm14,%xmm10 3081 movaps %xmm14,%xmm10
2901 pxor %xmm11,%xmm3 3082 pxor %xmm11,%xmm3
2902 movdqu %xmm2,(%rsi) 3083 movdqu %xmm2,(%rsi)
2903 pxor %xmm12,%xmm4 3084 pxor %xmm12,%xmm4
2904 movdqu %xmm3,16(%rsi) 3085 movdqu %xmm3,16(%rsi)
3086 pxor %xmm3,%xmm3
2905 pxor %xmm13,%xmm5 3087 pxor %xmm13,%xmm5
2906 movdqu %xmm4,32(%rsi) 3088 movdqu %xmm4,32(%rsi)
3089 pxor %xmm4,%xmm4
2907 movdqa %xmm5,%xmm2 3090 movdqa %xmm5,%xmm2
3091 pxor %xmm5,%xmm5
2908 leaq 48(%rsi),%rsi 3092 leaq 48(%rsi),%rsi
2909 jmp .Lcbc_dec_tail_collected 3093 jmp .Lcbc_dec_tail_collected
2910 3094
2911 .align 16 3095 .align 16
3096 .Lcbc_dec_clear_tail_collected:
3097 pxor %xmm3,%xmm3
3098 pxor %xmm4,%xmm4
3099 pxor %xmm5,%xmm5
3100 pxor %xmm6,%xmm6
3101 pxor %xmm7,%xmm7
3102 pxor %xmm8,%xmm8
3103 pxor %xmm9,%xmm9
2912 .Lcbc_dec_tail_collected: 3104 .Lcbc_dec_tail_collected:
2913 movups %xmm10,(%r8) 3105 movups %xmm10,(%r8)
2914 andq $15,%rdx 3106 andq $15,%rdx
2915 jnz .Lcbc_dec_tail_partial 3107 jnz .Lcbc_dec_tail_partial
2916 movups %xmm2,(%rsi) 3108 movups %xmm2,(%rsi)
3109 pxor %xmm2,%xmm2
2917 jmp .Lcbc_dec_ret 3110 jmp .Lcbc_dec_ret
2918 .align 16 3111 .align 16
2919 .Lcbc_dec_tail_partial: 3112 .Lcbc_dec_tail_partial:
2920 movaps %xmm2,(%rsp) 3113 movaps %xmm2,(%rsp)
3114 pxor %xmm2,%xmm2
2921 movq $16,%rcx 3115 movq $16,%rcx
2922 movq %rsi,%rdi 3116 movq %rsi,%rdi
2923 subq %rdx,%rcx 3117 subq %rdx,%rcx
2924 leaq (%rsp),%rsi 3118 leaq (%rsp),%rsi
2925 .long 0x9066A4F3 3119 .long 0x9066A4F3
3120 movdqa %xmm2,(%rsp)
2926 3121
2927 .Lcbc_dec_ret: 3122 .Lcbc_dec_ret:
3123 xorps %xmm0,%xmm0
3124 pxor %xmm1,%xmm1
2928 leaq (%rbp),%rsp 3125 leaq (%rbp),%rsp
2929 popq %rbp 3126 popq %rbp
2930 .Lcbc_ret: 3127 .Lcbc_ret:
2931 .byte 0xf3,0xc3 3128 .byte 0xf3,0xc3
2932 .size aesni_cbc_encrypt,.-aesni_cbc_encrypt 3129 .size aesni_cbc_encrypt,.-aesni_cbc_encrypt
2933 .globl aesni_set_decrypt_key 3130 .globl aesni_set_decrypt_key
2934 .hidden aesni_set_decrypt_key 3131 .hidden aesni_set_decrypt_key
2935 .type aesni_set_decrypt_key,@function 3132 .type aesni_set_decrypt_key,@function
2936 .align 16 3133 .align 16
2937 aesni_set_decrypt_key: 3134 aesni_set_decrypt_key:
(...skipping 18 matching lines...) Expand all
2956 .byte 102,15,56,219,201 3153 .byte 102,15,56,219,201
2957 leaq 16(%rdx),%rdx 3154 leaq 16(%rdx),%rdx
2958 leaq -16(%rdi),%rdi 3155 leaq -16(%rdi),%rdi
2959 movups %xmm0,16(%rdi) 3156 movups %xmm0,16(%rdi)
2960 movups %xmm1,-16(%rdx) 3157 movups %xmm1,-16(%rdx)
2961 cmpq %rdx,%rdi 3158 cmpq %rdx,%rdi
2962 ja .Ldec_key_inverse 3159 ja .Ldec_key_inverse
2963 3160
2964 movups (%rdx),%xmm0 3161 movups (%rdx),%xmm0
2965 .byte 102,15,56,219,192 3162 .byte 102,15,56,219,192
3163 pxor %xmm1,%xmm1
2966 movups %xmm0,(%rdi) 3164 movups %xmm0,(%rdi)
3165 pxor %xmm0,%xmm0
2967 .Ldec_key_ret: 3166 .Ldec_key_ret:
2968 addq $8,%rsp 3167 addq $8,%rsp
2969 .byte 0xf3,0xc3 3168 .byte 0xf3,0xc3
2970 .LSEH_end_set_decrypt_key: 3169 .LSEH_end_set_decrypt_key:
2971 .size aesni_set_decrypt_key,.-aesni_set_decrypt_key 3170 .size aesni_set_decrypt_key,.-aesni_set_decrypt_key
2972 .globl aesni_set_encrypt_key 3171 .globl aesni_set_encrypt_key
2973 .hidden aesni_set_encrypt_key 3172 .hidden aesni_set_encrypt_key
2974 .type aesni_set_encrypt_key,@function 3173 .type aesni_set_encrypt_key,@function
2975 .align 16 3174 .align 16
2976 aesni_set_encrypt_key: 3175 aesni_set_encrypt_key:
2977 __aesni_set_encrypt_key: 3176 __aesni_set_encrypt_key:
2978 .byte 0x48,0x83,0xEC,0x08 3177 .byte 0x48,0x83,0xEC,0x08
2979 movq $-1,%rax 3178 movq $-1,%rax
2980 testq %rdi,%rdi 3179 testq %rdi,%rdi
2981 jz .Lenc_key_ret 3180 jz .Lenc_key_ret
2982 testq %rdx,%rdx 3181 testq %rdx,%rdx
2983 jz .Lenc_key_ret 3182 jz .Lenc_key_ret
2984 3183
3184 movl $268437504,%r10d
2985 movups (%rdi),%xmm0 3185 movups (%rdi),%xmm0
2986 xorps %xmm4,%xmm4 3186 xorps %xmm4,%xmm4
3187 andl OPENSSL_ia32cap_P+4(%rip),%r10d
2987 leaq 16(%rdx),%rax 3188 leaq 16(%rdx),%rax
2988 cmpl $256,%esi 3189 cmpl $256,%esi
2989 je .L14rounds 3190 je .L14rounds
2990 cmpl $192,%esi 3191 cmpl $192,%esi
2991 je .L12rounds 3192 je .L12rounds
2992 cmpl $128,%esi 3193 cmpl $128,%esi
2993 jne .Lbad_keybits 3194 jne .Lbad_keybits
2994 3195
2995 .L10rounds: 3196 .L10rounds:
2996 movl $9,%esi 3197 movl $9,%esi
3198 cmpl $268435456,%r10d
3199 je .L10rounds_alt
3200
2997 movups %xmm0,(%rdx) 3201 movups %xmm0,(%rdx)
2998 .byte 102,15,58,223,200,1 3202 .byte 102,15,58,223,200,1
2999 call .Lkey_expansion_128_cold 3203 call .Lkey_expansion_128_cold
3000 .byte 102,15,58,223,200,2 3204 .byte 102,15,58,223,200,2
3001 call .Lkey_expansion_128 3205 call .Lkey_expansion_128
3002 .byte 102,15,58,223,200,4 3206 .byte 102,15,58,223,200,4
3003 call .Lkey_expansion_128 3207 call .Lkey_expansion_128
3004 .byte 102,15,58,223,200,8 3208 .byte 102,15,58,223,200,8
3005 call .Lkey_expansion_128 3209 call .Lkey_expansion_128
3006 .byte 102,15,58,223,200,16 3210 .byte 102,15,58,223,200,16
3007 call .Lkey_expansion_128 3211 call .Lkey_expansion_128
3008 .byte 102,15,58,223,200,32 3212 .byte 102,15,58,223,200,32
3009 call .Lkey_expansion_128 3213 call .Lkey_expansion_128
3010 .byte 102,15,58,223,200,64 3214 .byte 102,15,58,223,200,64
3011 call .Lkey_expansion_128 3215 call .Lkey_expansion_128
3012 .byte 102,15,58,223,200,128 3216 .byte 102,15,58,223,200,128
3013 call .Lkey_expansion_128 3217 call .Lkey_expansion_128
3014 .byte 102,15,58,223,200,27 3218 .byte 102,15,58,223,200,27
3015 call .Lkey_expansion_128 3219 call .Lkey_expansion_128
3016 .byte 102,15,58,223,200,54 3220 .byte 102,15,58,223,200,54
3017 call .Lkey_expansion_128 3221 call .Lkey_expansion_128
3018 movups %xmm0,(%rax) 3222 movups %xmm0,(%rax)
3019 movl %esi,80(%rax) 3223 movl %esi,80(%rax)
3020 xorl %eax,%eax 3224 xorl %eax,%eax
3021 jmp .Lenc_key_ret 3225 jmp .Lenc_key_ret
3022 3226
3023 .align 16 3227 .align 16
3228 .L10rounds_alt:
3229 movdqa .Lkey_rotate(%rip),%xmm5
3230 movl $8,%r10d
3231 movdqa .Lkey_rcon1(%rip),%xmm4
3232 movdqa %xmm0,%xmm2
3233 movdqu %xmm0,(%rdx)
3234 jmp .Loop_key128
3235
3236 .align 16
3237 .Loop_key128:
3238 .byte 102,15,56,0,197
3239 .byte 102,15,56,221,196
3240 pslld $1,%xmm4
3241 leaq 16(%rax),%rax
3242
3243 movdqa %xmm2,%xmm3
3244 pslldq $4,%xmm2
3245 pxor %xmm2,%xmm3
3246 pslldq $4,%xmm2
3247 pxor %xmm2,%xmm3
3248 pslldq $4,%xmm2
3249 pxor %xmm3,%xmm2
3250
3251 pxor %xmm2,%xmm0
3252 movdqu %xmm0,-16(%rax)
3253 movdqa %xmm0,%xmm2
3254
3255 decl %r10d
3256 jnz .Loop_key128
3257
3258 movdqa .Lkey_rcon1b(%rip),%xmm4
3259
3260 .byte 102,15,56,0,197
3261 .byte 102,15,56,221,196
3262 pslld $1,%xmm4
3263
3264 movdqa %xmm2,%xmm3
3265 pslldq $4,%xmm2
3266 pxor %xmm2,%xmm3
3267 pslldq $4,%xmm2
3268 pxor %xmm2,%xmm3
3269 pslldq $4,%xmm2
3270 pxor %xmm3,%xmm2
3271
3272 pxor %xmm2,%xmm0
3273 movdqu %xmm0,(%rax)
3274
3275 movdqa %xmm0,%xmm2
3276 .byte 102,15,56,0,197
3277 .byte 102,15,56,221,196
3278
3279 movdqa %xmm2,%xmm3
3280 pslldq $4,%xmm2
3281 pxor %xmm2,%xmm3
3282 pslldq $4,%xmm2
3283 pxor %xmm2,%xmm3
3284 pslldq $4,%xmm2
3285 pxor %xmm3,%xmm2
3286
3287 pxor %xmm2,%xmm0
3288 movdqu %xmm0,16(%rax)
3289
3290 movl %esi,96(%rax)
3291 xorl %eax,%eax
3292 jmp .Lenc_key_ret
3293
3294 .align 16
3024 .L12rounds: 3295 .L12rounds:
3025 movq 16(%rdi),%xmm2 3296 movq 16(%rdi),%xmm2
3026 movl $11,%esi 3297 movl $11,%esi
3298 cmpl $268435456,%r10d
3299 je .L12rounds_alt
3300
3027 movups %xmm0,(%rdx) 3301 movups %xmm0,(%rdx)
3028 .byte 102,15,58,223,202,1 3302 .byte 102,15,58,223,202,1
3029 call .Lkey_expansion_192a_cold 3303 call .Lkey_expansion_192a_cold
3030 .byte 102,15,58,223,202,2 3304 .byte 102,15,58,223,202,2
3031 call .Lkey_expansion_192b 3305 call .Lkey_expansion_192b
3032 .byte 102,15,58,223,202,4 3306 .byte 102,15,58,223,202,4
3033 call .Lkey_expansion_192a 3307 call .Lkey_expansion_192a
3034 .byte 102,15,58,223,202,8 3308 .byte 102,15,58,223,202,8
3035 call .Lkey_expansion_192b 3309 call .Lkey_expansion_192b
3036 .byte 102,15,58,223,202,16 3310 .byte 102,15,58,223,202,16
3037 call .Lkey_expansion_192a 3311 call .Lkey_expansion_192a
3038 .byte 102,15,58,223,202,32 3312 .byte 102,15,58,223,202,32
3039 call .Lkey_expansion_192b 3313 call .Lkey_expansion_192b
3040 .byte 102,15,58,223,202,64 3314 .byte 102,15,58,223,202,64
3041 call .Lkey_expansion_192a 3315 call .Lkey_expansion_192a
3042 .byte 102,15,58,223,202,128 3316 .byte 102,15,58,223,202,128
3043 call .Lkey_expansion_192b 3317 call .Lkey_expansion_192b
3044 movups %xmm0,(%rax) 3318 movups %xmm0,(%rax)
3045 movl %esi,48(%rax) 3319 movl %esi,48(%rax)
3046 xorq %rax,%rax 3320 xorq %rax,%rax
3047 jmp .Lenc_key_ret 3321 jmp .Lenc_key_ret
3048 3322
3049 .align 16 3323 .align 16
3324 .L12rounds_alt:
3325 movdqa .Lkey_rotate192(%rip),%xmm5
3326 movdqa .Lkey_rcon1(%rip),%xmm4
3327 movl $8,%r10d
3328 movdqu %xmm0,(%rdx)
3329 jmp .Loop_key192
3330
3331 .align 16
3332 .Loop_key192:
3333 movq %xmm2,0(%rax)
3334 movdqa %xmm2,%xmm1
3335 .byte 102,15,56,0,213
3336 .byte 102,15,56,221,212
3337 pslld $1,%xmm4
3338 leaq 24(%rax),%rax
3339
3340 movdqa %xmm0,%xmm3
3341 pslldq $4,%xmm0
3342 pxor %xmm0,%xmm3
3343 pslldq $4,%xmm0
3344 pxor %xmm0,%xmm3
3345 pslldq $4,%xmm0
3346 pxor %xmm3,%xmm0
3347
3348 pshufd $255,%xmm0,%xmm3
3349 pxor %xmm1,%xmm3
3350 pslldq $4,%xmm1
3351 pxor %xmm1,%xmm3
3352
3353 pxor %xmm2,%xmm0
3354 pxor %xmm3,%xmm2
3355 movdqu %xmm0,-16(%rax)
3356
3357 decl %r10d
3358 jnz .Loop_key192
3359
3360 movl %esi,32(%rax)
3361 xorl %eax,%eax
3362 jmp .Lenc_key_ret
3363
3364 .align 16
3050 .L14rounds: 3365 .L14rounds:
3051 movups 16(%rdi),%xmm2 3366 movups 16(%rdi),%xmm2
3052 movl $13,%esi 3367 movl $13,%esi
3053 leaq 16(%rax),%rax 3368 leaq 16(%rax),%rax
3369 cmpl $268435456,%r10d
3370 je .L14rounds_alt
3371
3054 movups %xmm0,(%rdx) 3372 movups %xmm0,(%rdx)
3055 movups %xmm2,16(%rdx) 3373 movups %xmm2,16(%rdx)
3056 .byte 102,15,58,223,202,1 3374 .byte 102,15,58,223,202,1
3057 call .Lkey_expansion_256a_cold 3375 call .Lkey_expansion_256a_cold
3058 .byte 102,15,58,223,200,1 3376 .byte 102,15,58,223,200,1
3059 call .Lkey_expansion_256b 3377 call .Lkey_expansion_256b
3060 .byte 102,15,58,223,202,2 3378 .byte 102,15,58,223,202,2
3061 call .Lkey_expansion_256a 3379 call .Lkey_expansion_256a
3062 .byte 102,15,58,223,200,2 3380 .byte 102,15,58,223,200,2
3063 call .Lkey_expansion_256b 3381 call .Lkey_expansion_256b
(...skipping 14 matching lines...) Expand all
3078 .byte 102,15,58,223,200,32 3396 .byte 102,15,58,223,200,32
3079 call .Lkey_expansion_256b 3397 call .Lkey_expansion_256b
3080 .byte 102,15,58,223,202,64 3398 .byte 102,15,58,223,202,64
3081 call .Lkey_expansion_256a 3399 call .Lkey_expansion_256a
3082 movups %xmm0,(%rax) 3400 movups %xmm0,(%rax)
3083 movl %esi,16(%rax) 3401 movl %esi,16(%rax)
3084 xorq %rax,%rax 3402 xorq %rax,%rax
3085 jmp .Lenc_key_ret 3403 jmp .Lenc_key_ret
3086 3404
3087 .align 16 3405 .align 16
3406 .L14rounds_alt:
3407 movdqa .Lkey_rotate(%rip),%xmm5
3408 movdqa .Lkey_rcon1(%rip),%xmm4
3409 movl $7,%r10d
3410 movdqu %xmm0,0(%rdx)
3411 movdqa %xmm2,%xmm1
3412 movdqu %xmm2,16(%rdx)
3413 jmp .Loop_key256
3414
3415 .align 16
3416 .Loop_key256:
3417 .byte 102,15,56,0,213
3418 .byte 102,15,56,221,212
3419
3420 movdqa %xmm0,%xmm3
3421 pslldq $4,%xmm0
3422 pxor %xmm0,%xmm3
3423 pslldq $4,%xmm0
3424 pxor %xmm0,%xmm3
3425 pslldq $4,%xmm0
3426 pxor %xmm3,%xmm0
3427 pslld $1,%xmm4
3428
3429 pxor %xmm2,%xmm0
3430 movdqu %xmm0,(%rax)
3431
3432 decl %r10d
3433 jz .Ldone_key256
3434
3435 pshufd $255,%xmm0,%xmm2
3436 pxor %xmm3,%xmm3
3437 .byte 102,15,56,221,211
3438
3439 movdqa %xmm1,%xmm3
3440 pslldq $4,%xmm1
3441 pxor %xmm1,%xmm3
3442 pslldq $4,%xmm1
3443 pxor %xmm1,%xmm3
3444 pslldq $4,%xmm1
3445 pxor %xmm3,%xmm1
3446
3447 pxor %xmm1,%xmm2
3448 movdqu %xmm2,16(%rax)
3449 leaq 32(%rax),%rax
3450 movdqa %xmm2,%xmm1
3451
3452 jmp .Loop_key256
3453
3454 .Ldone_key256:
3455 movl %esi,16(%rax)
3456 xorl %eax,%eax
3457 jmp .Lenc_key_ret
3458
3459 .align 16
3088 .Lbad_keybits: 3460 .Lbad_keybits:
3089 movq $-2,%rax 3461 movq $-2,%rax
3090 .Lenc_key_ret: 3462 .Lenc_key_ret:
3463 pxor %xmm0,%xmm0
3464 pxor %xmm1,%xmm1
3465 pxor %xmm2,%xmm2
3466 pxor %xmm3,%xmm3
3467 pxor %xmm4,%xmm4
3468 pxor %xmm5,%xmm5
3091 addq $8,%rsp 3469 addq $8,%rsp
3092 .byte 0xf3,0xc3 3470 .byte 0xf3,0xc3
3093 .LSEH_end_set_encrypt_key: 3471 .LSEH_end_set_encrypt_key:
3094 3472
3095 .align 16 3473 .align 16
3096 .Lkey_expansion_128: 3474 .Lkey_expansion_128:
3097 movups %xmm0,(%rax) 3475 movups %xmm0,(%rax)
3098 leaq 16(%rax),%rax 3476 leaq 16(%rax),%rax
3099 .Lkey_expansion_128_cold: 3477 .Lkey_expansion_128_cold:
3100 shufps $16,%xmm0,%xmm4 3478 shufps $16,%xmm0,%xmm4
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
3166 .Lbswap_mask: 3544 .Lbswap_mask:
3167 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 3545 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
3168 .Lincrement32: 3546 .Lincrement32:
3169 .long 6,6,6,0 3547 .long 6,6,6,0
3170 .Lincrement64: 3548 .Lincrement64:
3171 .long 1,0,0,0 3549 .long 1,0,0,0
3172 .Lxts_magic: 3550 .Lxts_magic:
3173 .long 0x87,0,1,0 3551 .long 0x87,0,1,0
3174 .Lincrement1: 3552 .Lincrement1:
3175 .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 3553 .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3554 .Lkey_rotate:
3555 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
3556 .Lkey_rotate192:
3557 .long 0x04070605,0x04070605,0x04070605,0x04070605
3558 .Lkey_rcon1:
3559 .long 1,1,1,1
3560 .Lkey_rcon1b:
3561 .long 0x1b,0x1b,0x1b,0x1b
3176 3562
3177 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32 ,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101 ,110,115,115,108,46,111,114,103,62,0 3563 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32 ,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101 ,110,115,115,108,46,111,114,103,62,0
3178 .align 64 3564 .align 64
3179 #endif 3565 #endif
OLDNEW
« no previous file with comments | « third_party/boringssl/linux-x86/crypto/aes/aesni-x86.S ('k') | third_party/boringssl/mac-x86/crypto/aes/aesni-x86.S » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698