OLD | NEW |
1 #if defined(__x86_64__) | 1 #if defined(__x86_64__) |
2 .text | 2 .text |
3 | 3 |
4 .globl _aesni_encrypt | 4 .globl _aesni_encrypt |
5 .private_extern _aesni_encrypt | 5 .private_extern _aesni_encrypt |
6 | 6 |
7 .p2align 4 | 7 .p2align 4 |
8 _aesni_encrypt: | 8 _aesni_encrypt: |
9 movups (%rdi),%xmm2 | 9 movups (%rdi),%xmm2 |
10 movl 240(%rdx),%eax | 10 movl 240(%rdx),%eax |
11 movups (%rdx),%xmm0 | 11 movups (%rdx),%xmm0 |
12 movups 16(%rdx),%xmm1 | 12 movups 16(%rdx),%xmm1 |
13 leaq 32(%rdx),%rdx | 13 leaq 32(%rdx),%rdx |
14 xorps %xmm0,%xmm2 | 14 xorps %xmm0,%xmm2 |
15 L$oop_enc1_1: | 15 L$oop_enc1_1: |
16 .byte 102,15,56,220,209 | 16 .byte 102,15,56,220,209 |
17 decl %eax | 17 decl %eax |
18 movups (%rdx),%xmm1 | 18 movups (%rdx),%xmm1 |
19 leaq 16(%rdx),%rdx | 19 leaq 16(%rdx),%rdx |
20 jnz L$oop_enc1_1 | 20 jnz L$oop_enc1_1 |
21 .byte 102,15,56,221,209 | 21 .byte 102,15,56,221,209 |
| 22 pxor %xmm0,%xmm0 |
| 23 pxor %xmm1,%xmm1 |
22 movups %xmm2,(%rsi) | 24 movups %xmm2,(%rsi) |
| 25 pxor %xmm2,%xmm2 |
23 .byte 0xf3,0xc3 | 26 .byte 0xf3,0xc3 |
24 | 27 |
25 | 28 |
26 .globl _aesni_decrypt | 29 .globl _aesni_decrypt |
27 .private_extern _aesni_decrypt | 30 .private_extern _aesni_decrypt |
28 | 31 |
29 .p2align 4 | 32 .p2align 4 |
30 _aesni_decrypt: | 33 _aesni_decrypt: |
31 movups (%rdi),%xmm2 | 34 movups (%rdi),%xmm2 |
32 movl 240(%rdx),%eax | 35 movl 240(%rdx),%eax |
33 movups (%rdx),%xmm0 | 36 movups (%rdx),%xmm0 |
34 movups 16(%rdx),%xmm1 | 37 movups 16(%rdx),%xmm1 |
35 leaq 32(%rdx),%rdx | 38 leaq 32(%rdx),%rdx |
36 xorps %xmm0,%xmm2 | 39 xorps %xmm0,%xmm2 |
37 L$oop_dec1_2: | 40 L$oop_dec1_2: |
38 .byte 102,15,56,222,209 | 41 .byte 102,15,56,222,209 |
39 decl %eax | 42 decl %eax |
40 movups (%rdx),%xmm1 | 43 movups (%rdx),%xmm1 |
41 leaq 16(%rdx),%rdx | 44 leaq 16(%rdx),%rdx |
42 jnz L$oop_dec1_2 | 45 jnz L$oop_dec1_2 |
43 .byte 102,15,56,223,209 | 46 .byte 102,15,56,223,209 |
| 47 pxor %xmm0,%xmm0 |
| 48 pxor %xmm1,%xmm1 |
44 movups %xmm2,(%rsi) | 49 movups %xmm2,(%rsi) |
| 50 pxor %xmm2,%xmm2 |
45 .byte 0xf3,0xc3 | 51 .byte 0xf3,0xc3 |
46 | 52 |
47 | 53 |
48 .p2align 4 | 54 .p2align 4 |
49 _aesni_encrypt2: | 55 _aesni_encrypt2: |
50 movups (%rcx),%xmm0 | 56 movups (%rcx),%xmm0 |
51 shll $4,%eax | 57 shll $4,%eax |
52 movups 16(%rcx),%xmm1 | 58 movups 16(%rcx),%xmm1 |
53 xorps %xmm0,%xmm2 | 59 xorps %xmm0,%xmm2 |
54 xorps %xmm0,%xmm3 | 60 xorps %xmm0,%xmm3 |
(...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
260 pxor %xmm0,%xmm3 | 266 pxor %xmm0,%xmm3 |
261 pxor %xmm0,%xmm4 | 267 pxor %xmm0,%xmm4 |
262 .byte 102,15,56,220,209 | 268 .byte 102,15,56,220,209 |
263 leaq 32(%rcx,%rax,1),%rcx | 269 leaq 32(%rcx,%rax,1),%rcx |
264 negq %rax | 270 negq %rax |
265 .byte 102,15,56,220,217 | 271 .byte 102,15,56,220,217 |
266 pxor %xmm0,%xmm5 | 272 pxor %xmm0,%xmm5 |
267 pxor %xmm0,%xmm6 | 273 pxor %xmm0,%xmm6 |
268 .byte 102,15,56,220,225 | 274 .byte 102,15,56,220,225 |
269 pxor %xmm0,%xmm7 | 275 pxor %xmm0,%xmm7 |
| 276 movups (%rcx,%rax,1),%xmm0 |
270 addq $16,%rax | 277 addq $16,%rax |
271 .byte 102,15,56,220,233 | |
272 .byte 102,15,56,220,241 | |
273 .byte 102,15,56,220,249 | |
274 movups -16(%rcx,%rax,1),%xmm0 | |
275 jmp L$enc_loop6_enter | 278 jmp L$enc_loop6_enter |
276 .p2align 4 | 279 .p2align 4 |
277 L$enc_loop6: | 280 L$enc_loop6: |
278 .byte 102,15,56,220,209 | 281 .byte 102,15,56,220,209 |
279 .byte 102,15,56,220,217 | 282 .byte 102,15,56,220,217 |
280 .byte 102,15,56,220,225 | 283 .byte 102,15,56,220,225 |
| 284 L$enc_loop6_enter: |
281 .byte 102,15,56,220,233 | 285 .byte 102,15,56,220,233 |
282 .byte 102,15,56,220,241 | 286 .byte 102,15,56,220,241 |
283 .byte 102,15,56,220,249 | 287 .byte 102,15,56,220,249 |
284 L$enc_loop6_enter: | |
285 movups (%rcx,%rax,1),%xmm1 | 288 movups (%rcx,%rax,1),%xmm1 |
286 addq $32,%rax | 289 addq $32,%rax |
287 .byte 102,15,56,220,208 | 290 .byte 102,15,56,220,208 |
288 .byte 102,15,56,220,216 | 291 .byte 102,15,56,220,216 |
289 .byte 102,15,56,220,224 | 292 .byte 102,15,56,220,224 |
290 .byte 102,15,56,220,232 | 293 .byte 102,15,56,220,232 |
291 .byte 102,15,56,220,240 | 294 .byte 102,15,56,220,240 |
292 .byte 102,15,56,220,248 | 295 .byte 102,15,56,220,248 |
293 movups -16(%rcx,%rax,1),%xmm0 | 296 movups -16(%rcx,%rax,1),%xmm0 |
294 jnz L$enc_loop6 | 297 jnz L$enc_loop6 |
(...skipping 22 matching lines...) Expand all Loading... |
317 pxor %xmm0,%xmm3 | 320 pxor %xmm0,%xmm3 |
318 pxor %xmm0,%xmm4 | 321 pxor %xmm0,%xmm4 |
319 .byte 102,15,56,222,209 | 322 .byte 102,15,56,222,209 |
320 leaq 32(%rcx,%rax,1),%rcx | 323 leaq 32(%rcx,%rax,1),%rcx |
321 negq %rax | 324 negq %rax |
322 .byte 102,15,56,222,217 | 325 .byte 102,15,56,222,217 |
323 pxor %xmm0,%xmm5 | 326 pxor %xmm0,%xmm5 |
324 pxor %xmm0,%xmm6 | 327 pxor %xmm0,%xmm6 |
325 .byte 102,15,56,222,225 | 328 .byte 102,15,56,222,225 |
326 pxor %xmm0,%xmm7 | 329 pxor %xmm0,%xmm7 |
| 330 movups (%rcx,%rax,1),%xmm0 |
327 addq $16,%rax | 331 addq $16,%rax |
328 .byte 102,15,56,222,233 | |
329 .byte 102,15,56,222,241 | |
330 .byte 102,15,56,222,249 | |
331 movups -16(%rcx,%rax,1),%xmm0 | |
332 jmp L$dec_loop6_enter | 332 jmp L$dec_loop6_enter |
333 .p2align 4 | 333 .p2align 4 |
334 L$dec_loop6: | 334 L$dec_loop6: |
335 .byte 102,15,56,222,209 | 335 .byte 102,15,56,222,209 |
336 .byte 102,15,56,222,217 | 336 .byte 102,15,56,222,217 |
337 .byte 102,15,56,222,225 | 337 .byte 102,15,56,222,225 |
| 338 L$dec_loop6_enter: |
338 .byte 102,15,56,222,233 | 339 .byte 102,15,56,222,233 |
339 .byte 102,15,56,222,241 | 340 .byte 102,15,56,222,241 |
340 .byte 102,15,56,222,249 | 341 .byte 102,15,56,222,249 |
341 L$dec_loop6_enter: | |
342 movups (%rcx,%rax,1),%xmm1 | 342 movups (%rcx,%rax,1),%xmm1 |
343 addq $32,%rax | 343 addq $32,%rax |
344 .byte 102,15,56,222,208 | 344 .byte 102,15,56,222,208 |
345 .byte 102,15,56,222,216 | 345 .byte 102,15,56,222,216 |
346 .byte 102,15,56,222,224 | 346 .byte 102,15,56,222,224 |
347 .byte 102,15,56,222,232 | 347 .byte 102,15,56,222,232 |
348 .byte 102,15,56,222,240 | 348 .byte 102,15,56,222,240 |
349 .byte 102,15,56,222,248 | 349 .byte 102,15,56,222,248 |
350 movups -16(%rcx,%rax,1),%xmm0 | 350 movups -16(%rcx,%rax,1),%xmm0 |
351 jnz L$dec_loop6 | 351 jnz L$dec_loop6 |
(...skipping 19 matching lines...) Expand all Loading... |
371 shll $4,%eax | 371 shll $4,%eax |
372 movups 16(%rcx),%xmm1 | 372 movups 16(%rcx),%xmm1 |
373 xorps %xmm0,%xmm2 | 373 xorps %xmm0,%xmm2 |
374 xorps %xmm0,%xmm3 | 374 xorps %xmm0,%xmm3 |
375 pxor %xmm0,%xmm4 | 375 pxor %xmm0,%xmm4 |
376 pxor %xmm0,%xmm5 | 376 pxor %xmm0,%xmm5 |
377 pxor %xmm0,%xmm6 | 377 pxor %xmm0,%xmm6 |
378 leaq 32(%rcx,%rax,1),%rcx | 378 leaq 32(%rcx,%rax,1),%rcx |
379 negq %rax | 379 negq %rax |
380 .byte 102,15,56,220,209 | 380 .byte 102,15,56,220,209 |
| 381 pxor %xmm0,%xmm7 |
| 382 pxor %xmm0,%xmm8 |
| 383 .byte 102,15,56,220,217 |
| 384 pxor %xmm0,%xmm9 |
| 385 movups (%rcx,%rax,1),%xmm0 |
381 addq $16,%rax | 386 addq $16,%rax |
382 » pxor» %xmm0,%xmm7 | 387 » jmp» L$enc_loop8_inner |
383 .byte» 102,15,56,220,217 | |
384 » pxor» %xmm0,%xmm8 | |
385 » pxor» %xmm0,%xmm9 | |
386 .byte» 102,15,56,220,225 | |
387 .byte» 102,15,56,220,233 | |
388 .byte» 102,15,56,220,241 | |
389 .byte» 102,15,56,220,249 | |
390 .byte» 102,68,15,56,220,193 | |
391 .byte» 102,68,15,56,220,201 | |
392 » movups» -16(%rcx,%rax,1),%xmm0 | |
393 » jmp» L$enc_loop8_enter | |
394 .p2align 4 | 388 .p2align 4 |
395 L$enc_loop8: | 389 L$enc_loop8: |
396 .byte 102,15,56,220,209 | 390 .byte 102,15,56,220,209 |
397 .byte 102,15,56,220,217 | 391 .byte 102,15,56,220,217 |
| 392 L$enc_loop8_inner: |
398 .byte 102,15,56,220,225 | 393 .byte 102,15,56,220,225 |
399 .byte 102,15,56,220,233 | 394 .byte 102,15,56,220,233 |
400 .byte 102,15,56,220,241 | 395 .byte 102,15,56,220,241 |
401 .byte 102,15,56,220,249 | 396 .byte 102,15,56,220,249 |
402 .byte 102,68,15,56,220,193 | 397 .byte 102,68,15,56,220,193 |
403 .byte 102,68,15,56,220,201 | 398 .byte 102,68,15,56,220,201 |
404 L$enc_loop8_enter: | 399 L$enc_loop8_enter: |
405 movups (%rcx,%rax,1),%xmm1 | 400 movups (%rcx,%rax,1),%xmm1 |
406 addq $32,%rax | 401 addq $32,%rax |
407 .byte 102,15,56,220,208 | 402 .byte 102,15,56,220,208 |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
440 shll $4,%eax | 435 shll $4,%eax |
441 movups 16(%rcx),%xmm1 | 436 movups 16(%rcx),%xmm1 |
442 xorps %xmm0,%xmm2 | 437 xorps %xmm0,%xmm2 |
443 xorps %xmm0,%xmm3 | 438 xorps %xmm0,%xmm3 |
444 pxor %xmm0,%xmm4 | 439 pxor %xmm0,%xmm4 |
445 pxor %xmm0,%xmm5 | 440 pxor %xmm0,%xmm5 |
446 pxor %xmm0,%xmm6 | 441 pxor %xmm0,%xmm6 |
447 leaq 32(%rcx,%rax,1),%rcx | 442 leaq 32(%rcx,%rax,1),%rcx |
448 negq %rax | 443 negq %rax |
449 .byte 102,15,56,222,209 | 444 .byte 102,15,56,222,209 |
| 445 pxor %xmm0,%xmm7 |
| 446 pxor %xmm0,%xmm8 |
| 447 .byte 102,15,56,222,217 |
| 448 pxor %xmm0,%xmm9 |
| 449 movups (%rcx,%rax,1),%xmm0 |
450 addq $16,%rax | 450 addq $16,%rax |
451 » pxor» %xmm0,%xmm7 | 451 » jmp» L$dec_loop8_inner |
452 .byte» 102,15,56,222,217 | |
453 » pxor» %xmm0,%xmm8 | |
454 » pxor» %xmm0,%xmm9 | |
455 .byte» 102,15,56,222,225 | |
456 .byte» 102,15,56,222,233 | |
457 .byte» 102,15,56,222,241 | |
458 .byte» 102,15,56,222,249 | |
459 .byte» 102,68,15,56,222,193 | |
460 .byte» 102,68,15,56,222,201 | |
461 » movups» -16(%rcx,%rax,1),%xmm0 | |
462 » jmp» L$dec_loop8_enter | |
463 .p2align 4 | 452 .p2align 4 |
464 L$dec_loop8: | 453 L$dec_loop8: |
465 .byte 102,15,56,222,209 | 454 .byte 102,15,56,222,209 |
466 .byte 102,15,56,222,217 | 455 .byte 102,15,56,222,217 |
| 456 L$dec_loop8_inner: |
467 .byte 102,15,56,222,225 | 457 .byte 102,15,56,222,225 |
468 .byte 102,15,56,222,233 | 458 .byte 102,15,56,222,233 |
469 .byte 102,15,56,222,241 | 459 .byte 102,15,56,222,241 |
470 .byte 102,15,56,222,249 | 460 .byte 102,15,56,222,249 |
471 .byte 102,68,15,56,222,193 | 461 .byte 102,68,15,56,222,193 |
472 .byte 102,68,15,56,222,201 | 462 .byte 102,68,15,56,222,201 |
473 L$dec_loop8_enter: | 463 L$dec_loop8_enter: |
474 movups (%rcx,%rax,1),%xmm1 | 464 movups (%rcx,%rax,1),%xmm1 |
475 addq $32,%rax | 465 addq $32,%rax |
476 .byte 102,15,56,222,208 | 466 .byte 102,15,56,222,208 |
(...skipping 107 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
584 cmpq $64,%rdx | 574 cmpq $64,%rdx |
585 jb L$ecb_enc_three | 575 jb L$ecb_enc_three |
586 movups 48(%rdi),%xmm5 | 576 movups 48(%rdi),%xmm5 |
587 je L$ecb_enc_four | 577 je L$ecb_enc_four |
588 movups 64(%rdi),%xmm6 | 578 movups 64(%rdi),%xmm6 |
589 cmpq $96,%rdx | 579 cmpq $96,%rdx |
590 jb L$ecb_enc_five | 580 jb L$ecb_enc_five |
591 movups 80(%rdi),%xmm7 | 581 movups 80(%rdi),%xmm7 |
592 je L$ecb_enc_six | 582 je L$ecb_enc_six |
593 movdqu 96(%rdi),%xmm8 | 583 movdqu 96(%rdi),%xmm8 |
| 584 xorps %xmm9,%xmm9 |
594 call _aesni_encrypt8 | 585 call _aesni_encrypt8 |
595 movups %xmm2,(%rsi) | 586 movups %xmm2,(%rsi) |
596 movups %xmm3,16(%rsi) | 587 movups %xmm3,16(%rsi) |
597 movups %xmm4,32(%rsi) | 588 movups %xmm4,32(%rsi) |
598 movups %xmm5,48(%rsi) | 589 movups %xmm5,48(%rsi) |
599 movups %xmm6,64(%rsi) | 590 movups %xmm6,64(%rsi) |
600 movups %xmm7,80(%rsi) | 591 movups %xmm7,80(%rsi) |
601 movups %xmm8,96(%rsi) | 592 movups %xmm8,96(%rsi) |
602 jmp L$ecb_ret | 593 jmp L$ecb_ret |
603 .p2align 4 | 594 .p2align 4 |
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
697 leaq 128(%rdi),%rdi | 688 leaq 128(%rdi),%rdi |
698 L$ecb_dec_loop8_enter: | 689 L$ecb_dec_loop8_enter: |
699 | 690 |
700 call _aesni_decrypt8 | 691 call _aesni_decrypt8 |
701 | 692 |
702 movups (%r11),%xmm0 | 693 movups (%r11),%xmm0 |
703 subq $128,%rdx | 694 subq $128,%rdx |
704 jnc L$ecb_dec_loop8 | 695 jnc L$ecb_dec_loop8 |
705 | 696 |
706 movups %xmm2,(%rsi) | 697 movups %xmm2,(%rsi) |
| 698 pxor %xmm2,%xmm2 |
707 movq %r11,%rcx | 699 movq %r11,%rcx |
708 movups %xmm3,16(%rsi) | 700 movups %xmm3,16(%rsi) |
| 701 pxor %xmm3,%xmm3 |
709 movl %r10d,%eax | 702 movl %r10d,%eax |
710 movups %xmm4,32(%rsi) | 703 movups %xmm4,32(%rsi) |
| 704 pxor %xmm4,%xmm4 |
711 movups %xmm5,48(%rsi) | 705 movups %xmm5,48(%rsi) |
| 706 pxor %xmm5,%xmm5 |
712 movups %xmm6,64(%rsi) | 707 movups %xmm6,64(%rsi) |
| 708 pxor %xmm6,%xmm6 |
713 movups %xmm7,80(%rsi) | 709 movups %xmm7,80(%rsi) |
| 710 pxor %xmm7,%xmm7 |
714 movups %xmm8,96(%rsi) | 711 movups %xmm8,96(%rsi) |
| 712 pxor %xmm8,%xmm8 |
715 movups %xmm9,112(%rsi) | 713 movups %xmm9,112(%rsi) |
| 714 pxor %xmm9,%xmm9 |
716 leaq 128(%rsi),%rsi | 715 leaq 128(%rsi),%rsi |
717 addq $128,%rdx | 716 addq $128,%rdx |
718 jz L$ecb_ret | 717 jz L$ecb_ret |
719 | 718 |
720 L$ecb_dec_tail: | 719 L$ecb_dec_tail: |
721 movups (%rdi),%xmm2 | 720 movups (%rdi),%xmm2 |
722 cmpq $32,%rdx | 721 cmpq $32,%rdx |
723 jb L$ecb_dec_one | 722 jb L$ecb_dec_one |
724 movups 16(%rdi),%xmm3 | 723 movups 16(%rdi),%xmm3 |
725 je L$ecb_dec_two | 724 je L$ecb_dec_two |
726 movups 32(%rdi),%xmm4 | 725 movups 32(%rdi),%xmm4 |
727 cmpq $64,%rdx | 726 cmpq $64,%rdx |
728 jb L$ecb_dec_three | 727 jb L$ecb_dec_three |
729 movups 48(%rdi),%xmm5 | 728 movups 48(%rdi),%xmm5 |
730 je L$ecb_dec_four | 729 je L$ecb_dec_four |
731 movups 64(%rdi),%xmm6 | 730 movups 64(%rdi),%xmm6 |
732 cmpq $96,%rdx | 731 cmpq $96,%rdx |
733 jb L$ecb_dec_five | 732 jb L$ecb_dec_five |
734 movups 80(%rdi),%xmm7 | 733 movups 80(%rdi),%xmm7 |
735 je L$ecb_dec_six | 734 je L$ecb_dec_six |
736 movups 96(%rdi),%xmm8 | 735 movups 96(%rdi),%xmm8 |
737 movups (%rcx),%xmm0 | 736 movups (%rcx),%xmm0 |
| 737 xorps %xmm9,%xmm9 |
738 call _aesni_decrypt8 | 738 call _aesni_decrypt8 |
739 movups %xmm2,(%rsi) | 739 movups %xmm2,(%rsi) |
| 740 pxor %xmm2,%xmm2 |
740 movups %xmm3,16(%rsi) | 741 movups %xmm3,16(%rsi) |
| 742 pxor %xmm3,%xmm3 |
741 movups %xmm4,32(%rsi) | 743 movups %xmm4,32(%rsi) |
| 744 pxor %xmm4,%xmm4 |
742 movups %xmm5,48(%rsi) | 745 movups %xmm5,48(%rsi) |
| 746 pxor %xmm5,%xmm5 |
743 movups %xmm6,64(%rsi) | 747 movups %xmm6,64(%rsi) |
| 748 pxor %xmm6,%xmm6 |
744 movups %xmm7,80(%rsi) | 749 movups %xmm7,80(%rsi) |
| 750 pxor %xmm7,%xmm7 |
745 movups %xmm8,96(%rsi) | 751 movups %xmm8,96(%rsi) |
| 752 pxor %xmm8,%xmm8 |
| 753 pxor %xmm9,%xmm9 |
746 jmp L$ecb_ret | 754 jmp L$ecb_ret |
747 .p2align 4 | 755 .p2align 4 |
748 L$ecb_dec_one: | 756 L$ecb_dec_one: |
749 movups (%rcx),%xmm0 | 757 movups (%rcx),%xmm0 |
750 movups 16(%rcx),%xmm1 | 758 movups 16(%rcx),%xmm1 |
751 leaq 32(%rcx),%rcx | 759 leaq 32(%rcx),%rcx |
752 xorps %xmm0,%xmm2 | 760 xorps %xmm0,%xmm2 |
753 L$oop_dec1_4: | 761 L$oop_dec1_4: |
754 .byte 102,15,56,222,209 | 762 .byte 102,15,56,222,209 |
755 decl %eax | 763 decl %eax |
756 movups (%rcx),%xmm1 | 764 movups (%rcx),%xmm1 |
757 leaq 16(%rcx),%rcx | 765 leaq 16(%rcx),%rcx |
758 jnz L$oop_dec1_4 | 766 jnz L$oop_dec1_4 |
759 .byte 102,15,56,223,209 | 767 .byte 102,15,56,223,209 |
760 movups %xmm2,(%rsi) | 768 movups %xmm2,(%rsi) |
| 769 pxor %xmm2,%xmm2 |
761 jmp L$ecb_ret | 770 jmp L$ecb_ret |
762 .p2align 4 | 771 .p2align 4 |
763 L$ecb_dec_two: | 772 L$ecb_dec_two: |
764 call _aesni_decrypt2 | 773 call _aesni_decrypt2 |
765 movups %xmm2,(%rsi) | 774 movups %xmm2,(%rsi) |
| 775 pxor %xmm2,%xmm2 |
766 movups %xmm3,16(%rsi) | 776 movups %xmm3,16(%rsi) |
| 777 pxor %xmm3,%xmm3 |
767 jmp L$ecb_ret | 778 jmp L$ecb_ret |
768 .p2align 4 | 779 .p2align 4 |
769 L$ecb_dec_three: | 780 L$ecb_dec_three: |
770 call _aesni_decrypt3 | 781 call _aesni_decrypt3 |
771 movups %xmm2,(%rsi) | 782 movups %xmm2,(%rsi) |
| 783 pxor %xmm2,%xmm2 |
772 movups %xmm3,16(%rsi) | 784 movups %xmm3,16(%rsi) |
| 785 pxor %xmm3,%xmm3 |
773 movups %xmm4,32(%rsi) | 786 movups %xmm4,32(%rsi) |
| 787 pxor %xmm4,%xmm4 |
774 jmp L$ecb_ret | 788 jmp L$ecb_ret |
775 .p2align 4 | 789 .p2align 4 |
776 L$ecb_dec_four: | 790 L$ecb_dec_four: |
777 call _aesni_decrypt4 | 791 call _aesni_decrypt4 |
778 movups %xmm2,(%rsi) | 792 movups %xmm2,(%rsi) |
| 793 pxor %xmm2,%xmm2 |
779 movups %xmm3,16(%rsi) | 794 movups %xmm3,16(%rsi) |
| 795 pxor %xmm3,%xmm3 |
780 movups %xmm4,32(%rsi) | 796 movups %xmm4,32(%rsi) |
| 797 pxor %xmm4,%xmm4 |
781 movups %xmm5,48(%rsi) | 798 movups %xmm5,48(%rsi) |
| 799 pxor %xmm5,%xmm5 |
782 jmp L$ecb_ret | 800 jmp L$ecb_ret |
783 .p2align 4 | 801 .p2align 4 |
784 L$ecb_dec_five: | 802 L$ecb_dec_five: |
785 xorps %xmm7,%xmm7 | 803 xorps %xmm7,%xmm7 |
786 call _aesni_decrypt6 | 804 call _aesni_decrypt6 |
787 movups %xmm2,(%rsi) | 805 movups %xmm2,(%rsi) |
| 806 pxor %xmm2,%xmm2 |
788 movups %xmm3,16(%rsi) | 807 movups %xmm3,16(%rsi) |
| 808 pxor %xmm3,%xmm3 |
789 movups %xmm4,32(%rsi) | 809 movups %xmm4,32(%rsi) |
| 810 pxor %xmm4,%xmm4 |
790 movups %xmm5,48(%rsi) | 811 movups %xmm5,48(%rsi) |
| 812 pxor %xmm5,%xmm5 |
791 movups %xmm6,64(%rsi) | 813 movups %xmm6,64(%rsi) |
| 814 pxor %xmm6,%xmm6 |
| 815 pxor %xmm7,%xmm7 |
792 jmp L$ecb_ret | 816 jmp L$ecb_ret |
793 .p2align 4 | 817 .p2align 4 |
794 L$ecb_dec_six: | 818 L$ecb_dec_six: |
795 call _aesni_decrypt6 | 819 call _aesni_decrypt6 |
796 movups %xmm2,(%rsi) | 820 movups %xmm2,(%rsi) |
| 821 pxor %xmm2,%xmm2 |
797 movups %xmm3,16(%rsi) | 822 movups %xmm3,16(%rsi) |
| 823 pxor %xmm3,%xmm3 |
798 movups %xmm4,32(%rsi) | 824 movups %xmm4,32(%rsi) |
| 825 pxor %xmm4,%xmm4 |
799 movups %xmm5,48(%rsi) | 826 movups %xmm5,48(%rsi) |
| 827 pxor %xmm5,%xmm5 |
800 movups %xmm6,64(%rsi) | 828 movups %xmm6,64(%rsi) |
| 829 pxor %xmm6,%xmm6 |
801 movups %xmm7,80(%rsi) | 830 movups %xmm7,80(%rsi) |
| 831 pxor %xmm7,%xmm7 |
802 | 832 |
803 L$ecb_ret: | 833 L$ecb_ret: |
| 834 xorps %xmm0,%xmm0 |
| 835 pxor %xmm1,%xmm1 |
804 .byte 0xf3,0xc3 | 836 .byte 0xf3,0xc3 |
805 | 837 |
806 .globl _aesni_ccm64_encrypt_blocks | 838 .globl _aesni_ccm64_encrypt_blocks |
807 .private_extern _aesni_ccm64_encrypt_blocks | 839 .private_extern _aesni_ccm64_encrypt_blocks |
808 | 840 |
809 .p2align 4 | 841 .p2align 4 |
810 _aesni_ccm64_encrypt_blocks: | 842 _aesni_ccm64_encrypt_blocks: |
811 movl 240(%rcx),%eax | 843 movl 240(%rcx),%eax |
812 movdqu (%r8),%xmm6 | 844 movdqu (%r8),%xmm6 |
813 movdqa L$increment64(%rip),%xmm9 | 845 movdqa L$increment64(%rip),%xmm9 |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
851 .byte 102,15,56,221,216 | 883 .byte 102,15,56,221,216 |
852 | 884 |
853 leaq 16(%rdi),%rdi | 885 leaq 16(%rdi),%rdi |
854 xorps %xmm2,%xmm8 | 886 xorps %xmm2,%xmm8 |
855 movdqa %xmm6,%xmm2 | 887 movdqa %xmm6,%xmm2 |
856 movups %xmm8,(%rsi) | 888 movups %xmm8,(%rsi) |
857 .byte 102,15,56,0,215 | 889 .byte 102,15,56,0,215 |
858 leaq 16(%rsi),%rsi | 890 leaq 16(%rsi),%rsi |
859 jnz L$ccm64_enc_outer | 891 jnz L$ccm64_enc_outer |
860 | 892 |
| 893 pxor %xmm0,%xmm0 |
| 894 pxor %xmm1,%xmm1 |
| 895 pxor %xmm2,%xmm2 |
861 movups %xmm3,(%r9) | 896 movups %xmm3,(%r9) |
| 897 pxor %xmm3,%xmm3 |
| 898 pxor %xmm8,%xmm8 |
| 899 pxor %xmm6,%xmm6 |
862 .byte 0xf3,0xc3 | 900 .byte 0xf3,0xc3 |
863 | 901 |
864 .globl _aesni_ccm64_decrypt_blocks | 902 .globl _aesni_ccm64_decrypt_blocks |
865 .private_extern _aesni_ccm64_decrypt_blocks | 903 .private_extern _aesni_ccm64_decrypt_blocks |
866 | 904 |
867 .p2align 4 | 905 .p2align 4 |
868 _aesni_ccm64_decrypt_blocks: | 906 _aesni_ccm64_decrypt_blocks: |
869 movl 240(%rcx),%eax | 907 movl 240(%rcx),%eax |
870 movups (%r8),%xmm6 | 908 movups (%r8),%xmm6 |
871 movdqu (%r9),%xmm3 | 909 movdqu (%r9),%xmm3 |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
943 xorps %xmm0,%xmm8 | 981 xorps %xmm0,%xmm8 |
944 leaq 32(%r11),%r11 | 982 leaq 32(%r11),%r11 |
945 xorps %xmm8,%xmm3 | 983 xorps %xmm8,%xmm3 |
946 L$oop_enc1_6: | 984 L$oop_enc1_6: |
947 .byte 102,15,56,220,217 | 985 .byte 102,15,56,220,217 |
948 decl %eax | 986 decl %eax |
949 movups (%r11),%xmm1 | 987 movups (%r11),%xmm1 |
950 leaq 16(%r11),%r11 | 988 leaq 16(%r11),%r11 |
951 jnz L$oop_enc1_6 | 989 jnz L$oop_enc1_6 |
952 .byte 102,15,56,221,217 | 990 .byte 102,15,56,221,217 |
| 991 pxor %xmm0,%xmm0 |
| 992 pxor %xmm1,%xmm1 |
| 993 pxor %xmm2,%xmm2 |
953 movups %xmm3,(%r9) | 994 movups %xmm3,(%r9) |
| 995 pxor %xmm3,%xmm3 |
| 996 pxor %xmm8,%xmm8 |
| 997 pxor %xmm6,%xmm6 |
954 .byte 0xf3,0xc3 | 998 .byte 0xf3,0xc3 |
955 | 999 |
956 .globl _aesni_ctr32_encrypt_blocks | 1000 .globl _aesni_ctr32_encrypt_blocks |
957 .private_extern _aesni_ctr32_encrypt_blocks | 1001 .private_extern _aesni_ctr32_encrypt_blocks |
958 | 1002 |
959 .p2align 4 | 1003 .p2align 4 |
960 _aesni_ctr32_encrypt_blocks: | 1004 _aesni_ctr32_encrypt_blocks: |
| 1005 cmpq $1,%rdx |
| 1006 jne L$ctr32_bulk |
| 1007 |
| 1008 |
| 1009 |
| 1010 movups (%r8),%xmm2 |
| 1011 movups (%rdi),%xmm3 |
| 1012 movl 240(%rcx),%edx |
| 1013 movups (%rcx),%xmm0 |
| 1014 movups 16(%rcx),%xmm1 |
| 1015 leaq 32(%rcx),%rcx |
| 1016 xorps %xmm0,%xmm2 |
| 1017 L$oop_enc1_7: |
| 1018 .byte 102,15,56,220,209 |
| 1019 decl %edx |
| 1020 movups (%rcx),%xmm1 |
| 1021 leaq 16(%rcx),%rcx |
| 1022 jnz L$oop_enc1_7 |
| 1023 .byte 102,15,56,221,209 |
| 1024 pxor %xmm0,%xmm0 |
| 1025 pxor %xmm1,%xmm1 |
| 1026 xorps %xmm3,%xmm2 |
| 1027 pxor %xmm3,%xmm3 |
| 1028 movups %xmm2,(%rsi) |
| 1029 xorps %xmm2,%xmm2 |
| 1030 jmp L$ctr32_epilogue |
| 1031 |
| 1032 .p2align 4 |
| 1033 L$ctr32_bulk: |
961 leaq (%rsp),%rax | 1034 leaq (%rsp),%rax |
962 pushq %rbp | 1035 pushq %rbp |
963 subq $128,%rsp | 1036 subq $128,%rsp |
964 andq $-16,%rsp | 1037 andq $-16,%rsp |
965 leaq -8(%rax),%rbp | 1038 leaq -8(%rax),%rbp |
966 | 1039 |
967 » cmpq» $1,%rdx | 1040 |
968 » je» L$ctr32_one_shortcut | 1041 |
969 | 1042 |
970 movdqu (%r8),%xmm2 | 1043 movdqu (%r8),%xmm2 |
971 movdqu (%rcx),%xmm0 | 1044 movdqu (%rcx),%xmm0 |
972 movl 12(%r8),%r8d | 1045 movl 12(%r8),%r8d |
973 pxor %xmm0,%xmm2 | 1046 pxor %xmm0,%xmm2 |
974 movl 12(%rcx),%r11d | 1047 movl 12(%rcx),%r11d |
975 movdqa %xmm2,0(%rsp) | 1048 movdqa %xmm2,0(%rsp) |
976 bswapl %r8d | 1049 bswapl %r8d |
977 movdqa %xmm2,%xmm3 | 1050 movdqa %xmm2,%xmm3 |
978 movdqa %xmm2,%xmm4 | 1051 movdqa %xmm2,%xmm4 |
(...skipping 370 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1349 leaq 128(%rsi),%rsi | 1422 leaq 128(%rsi),%rsi |
1350 | 1423 |
1351 subq $8,%rdx | 1424 subq $8,%rdx |
1352 jnc L$ctr32_loop8 | 1425 jnc L$ctr32_loop8 |
1353 | 1426 |
1354 addq $8,%rdx | 1427 addq $8,%rdx |
1355 jz L$ctr32_done | 1428 jz L$ctr32_done |
1356 leaq -128(%rcx),%rcx | 1429 leaq -128(%rcx),%rcx |
1357 | 1430 |
1358 L$ctr32_tail: | 1431 L$ctr32_tail: |
| 1432 |
| 1433 |
1359 leaq 16(%rcx),%rcx | 1434 leaq 16(%rcx),%rcx |
1360 cmpq $4,%rdx | 1435 cmpq $4,%rdx |
1361 jb L$ctr32_loop3 | 1436 jb L$ctr32_loop3 |
1362 je L$ctr32_loop4 | 1437 je L$ctr32_loop4 |
1363 | 1438 |
| 1439 |
1364 shll $4,%eax | 1440 shll $4,%eax |
1365 movdqa 96(%rsp),%xmm8 | 1441 movdqa 96(%rsp),%xmm8 |
1366 pxor %xmm9,%xmm9 | 1442 pxor %xmm9,%xmm9 |
1367 | 1443 |
1368 movups 16(%rcx),%xmm0 | 1444 movups 16(%rcx),%xmm0 |
1369 .byte 102,15,56,220,209 | 1445 .byte 102,15,56,220,209 |
1370 .byte 102,15,56,220,217 | 1446 .byte 102,15,56,220,217 |
1371 leaq 32-16(%rcx,%rax,1),%rcx | 1447 leaq 32-16(%rcx,%rax,1),%rcx |
1372 negq %rax | 1448 negq %rax |
1373 .byte 102,15,56,220,225 | 1449 .byte 102,15,56,220,225 |
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1456 jb L$ctr32_done | 1532 jb L$ctr32_done |
1457 | 1533 |
1458 movups 16(%rdi),%xmm11 | 1534 movups 16(%rdi),%xmm11 |
1459 xorps %xmm11,%xmm3 | 1535 xorps %xmm11,%xmm3 |
1460 movups %xmm3,16(%rsi) | 1536 movups %xmm3,16(%rsi) |
1461 je L$ctr32_done | 1537 je L$ctr32_done |
1462 | 1538 |
1463 movups 32(%rdi),%xmm12 | 1539 movups 32(%rdi),%xmm12 |
1464 xorps %xmm12,%xmm4 | 1540 xorps %xmm12,%xmm4 |
1465 movups %xmm4,32(%rsi) | 1541 movups %xmm4,32(%rsi) |
1466 jmp L$ctr32_done | |
1467 | 1542 |
1468 .p2align 4 | |
1469 L$ctr32_one_shortcut: | |
1470 movups (%r8),%xmm2 | |
1471 movups (%rdi),%xmm10 | |
1472 movl 240(%rcx),%eax | |
1473 movups (%rcx),%xmm0 | |
1474 movups 16(%rcx),%xmm1 | |
1475 leaq 32(%rcx),%rcx | |
1476 xorps %xmm0,%xmm2 | |
1477 L$oop_enc1_7: | |
1478 .byte 102,15,56,220,209 | |
1479 decl %eax | |
1480 movups (%rcx),%xmm1 | |
1481 leaq 16(%rcx),%rcx | |
1482 jnz L$oop_enc1_7 | |
1483 .byte 102,15,56,221,209 | |
1484 xorps %xmm10,%xmm2 | |
1485 movups %xmm2,(%rsi) | |
1486 jmp L$ctr32_done | |
1487 | |
1488 .p2align 4 | |
1489 L$ctr32_done: | 1543 L$ctr32_done: |
| 1544 xorps %xmm0,%xmm0 |
| 1545 xorl %r11d,%r11d |
| 1546 pxor %xmm1,%xmm1 |
| 1547 pxor %xmm2,%xmm2 |
| 1548 pxor %xmm3,%xmm3 |
| 1549 pxor %xmm4,%xmm4 |
| 1550 pxor %xmm5,%xmm5 |
| 1551 pxor %xmm6,%xmm6 |
| 1552 pxor %xmm7,%xmm7 |
| 1553 movaps %xmm0,0(%rsp) |
| 1554 pxor %xmm8,%xmm8 |
| 1555 movaps %xmm0,16(%rsp) |
| 1556 pxor %xmm9,%xmm9 |
| 1557 movaps %xmm0,32(%rsp) |
| 1558 pxor %xmm10,%xmm10 |
| 1559 movaps %xmm0,48(%rsp) |
| 1560 pxor %xmm11,%xmm11 |
| 1561 movaps %xmm0,64(%rsp) |
| 1562 pxor %xmm12,%xmm12 |
| 1563 movaps %xmm0,80(%rsp) |
| 1564 pxor %xmm13,%xmm13 |
| 1565 movaps %xmm0,96(%rsp) |
| 1566 pxor %xmm14,%xmm14 |
| 1567 movaps %xmm0,112(%rsp) |
| 1568 pxor %xmm15,%xmm15 |
1490 leaq (%rbp),%rsp | 1569 leaq (%rbp),%rsp |
1491 popq %rbp | 1570 popq %rbp |
1492 L$ctr32_epilogue: | 1571 L$ctr32_epilogue: |
1493 .byte 0xf3,0xc3 | 1572 .byte 0xf3,0xc3 |
1494 | 1573 |
1495 .globl _aesni_xts_encrypt | 1574 .globl _aesni_xts_encrypt |
1496 .private_extern _aesni_xts_encrypt | 1575 .private_extern _aesni_xts_encrypt |
1497 | 1576 |
1498 .p2align 4 | 1577 .p2align 4 |
1499 _aesni_xts_encrypt: | 1578 _aesni_xts_encrypt: |
(...skipping 251 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1751 movups %xmm7,-16(%rsi) | 1830 movups %xmm7,-16(%rsi) |
1752 subq $96,%rdx | 1831 subq $96,%rdx |
1753 jnc L$xts_enc_grandloop | 1832 jnc L$xts_enc_grandloop |
1754 | 1833 |
1755 movl $16+96,%eax | 1834 movl $16+96,%eax |
1756 subl %r10d,%eax | 1835 subl %r10d,%eax |
1757 movq %r11,%rcx | 1836 movq %r11,%rcx |
1758 shrl $4,%eax | 1837 shrl $4,%eax |
1759 | 1838 |
1760 L$xts_enc_short: | 1839 L$xts_enc_short: |
| 1840 |
1761 movl %eax,%r10d | 1841 movl %eax,%r10d |
1762 pxor %xmm0,%xmm10 | 1842 pxor %xmm0,%xmm10 |
1763 addq $96,%rdx | 1843 addq $96,%rdx |
1764 jz L$xts_enc_done | 1844 jz L$xts_enc_done |
1765 | 1845 |
1766 pxor %xmm0,%xmm11 | 1846 pxor %xmm0,%xmm11 |
1767 cmpq $32,%rdx | 1847 cmpq $32,%rdx |
1768 jb L$xts_enc_one | 1848 jb L$xts_enc_one |
1769 pxor %xmm0,%xmm12 | 1849 pxor %xmm0,%xmm12 |
1770 je L$xts_enc_two | 1850 je L$xts_enc_two |
1771 | 1851 |
1772 pxor %xmm0,%xmm13 | 1852 pxor %xmm0,%xmm13 |
1773 cmpq $64,%rdx | 1853 cmpq $64,%rdx |
1774 jb L$xts_enc_three | 1854 jb L$xts_enc_three |
1775 pxor %xmm0,%xmm14 | 1855 pxor %xmm0,%xmm14 |
1776 je L$xts_enc_four | 1856 je L$xts_enc_four |
1777 | 1857 |
1778 movdqu (%rdi),%xmm2 | 1858 movdqu (%rdi),%xmm2 |
1779 movdqu 16(%rdi),%xmm3 | 1859 movdqu 16(%rdi),%xmm3 |
1780 movdqu 32(%rdi),%xmm4 | 1860 movdqu 32(%rdi),%xmm4 |
1781 pxor %xmm10,%xmm2 | 1861 pxor %xmm10,%xmm2 |
1782 movdqu 48(%rdi),%xmm5 | 1862 movdqu 48(%rdi),%xmm5 |
1783 pxor %xmm11,%xmm3 | 1863 pxor %xmm11,%xmm3 |
1784 movdqu 64(%rdi),%xmm6 | 1864 movdqu 64(%rdi),%xmm6 |
1785 leaq 80(%rdi),%rdi | 1865 leaq 80(%rdi),%rdi |
1786 pxor %xmm12,%xmm4 | 1866 pxor %xmm12,%xmm4 |
1787 pxor %xmm13,%xmm5 | 1867 pxor %xmm13,%xmm5 |
1788 pxor %xmm14,%xmm6 | 1868 pxor %xmm14,%xmm6 |
| 1869 pxor %xmm7,%xmm7 |
1789 | 1870 |
1790 call _aesni_encrypt6 | 1871 call _aesni_encrypt6 |
1791 | 1872 |
1792 xorps %xmm10,%xmm2 | 1873 xorps %xmm10,%xmm2 |
1793 movdqa %xmm15,%xmm10 | 1874 movdqa %xmm15,%xmm10 |
1794 xorps %xmm11,%xmm3 | 1875 xorps %xmm11,%xmm3 |
1795 xorps %xmm12,%xmm4 | 1876 xorps %xmm12,%xmm4 |
1796 movdqu %xmm2,(%rsi) | 1877 movdqu %xmm2,(%rsi) |
1797 xorps %xmm13,%xmm5 | 1878 xorps %xmm13,%xmm5 |
1798 movdqu %xmm3,16(%rsi) | 1879 movdqu %xmm3,16(%rsi) |
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1921 .byte 102,15,56,220,209 | 2002 .byte 102,15,56,220,209 |
1922 decl %eax | 2003 decl %eax |
1923 movups (%rcx),%xmm1 | 2004 movups (%rcx),%xmm1 |
1924 leaq 16(%rcx),%rcx | 2005 leaq 16(%rcx),%rcx |
1925 jnz L$oop_enc1_10 | 2006 jnz L$oop_enc1_10 |
1926 .byte 102,15,56,221,209 | 2007 .byte 102,15,56,221,209 |
1927 xorps %xmm10,%xmm2 | 2008 xorps %xmm10,%xmm2 |
1928 movups %xmm2,-16(%rsi) | 2009 movups %xmm2,-16(%rsi) |
1929 | 2010 |
1930 L$xts_enc_ret: | 2011 L$xts_enc_ret: |
| 2012 xorps %xmm0,%xmm0 |
| 2013 pxor %xmm1,%xmm1 |
| 2014 pxor %xmm2,%xmm2 |
| 2015 pxor %xmm3,%xmm3 |
| 2016 pxor %xmm4,%xmm4 |
| 2017 pxor %xmm5,%xmm5 |
| 2018 pxor %xmm6,%xmm6 |
| 2019 pxor %xmm7,%xmm7 |
| 2020 movaps %xmm0,0(%rsp) |
| 2021 pxor %xmm8,%xmm8 |
| 2022 movaps %xmm0,16(%rsp) |
| 2023 pxor %xmm9,%xmm9 |
| 2024 movaps %xmm0,32(%rsp) |
| 2025 pxor %xmm10,%xmm10 |
| 2026 movaps %xmm0,48(%rsp) |
| 2027 pxor %xmm11,%xmm11 |
| 2028 movaps %xmm0,64(%rsp) |
| 2029 pxor %xmm12,%xmm12 |
| 2030 movaps %xmm0,80(%rsp) |
| 2031 pxor %xmm13,%xmm13 |
| 2032 movaps %xmm0,96(%rsp) |
| 2033 pxor %xmm14,%xmm14 |
| 2034 pxor %xmm15,%xmm15 |
1931 leaq (%rbp),%rsp | 2035 leaq (%rbp),%rsp |
1932 popq %rbp | 2036 popq %rbp |
1933 L$xts_enc_epilogue: | 2037 L$xts_enc_epilogue: |
1934 .byte 0xf3,0xc3 | 2038 .byte 0xf3,0xc3 |
1935 | 2039 |
1936 .globl _aesni_xts_decrypt | 2040 .globl _aesni_xts_decrypt |
1937 .private_extern _aesni_xts_decrypt | 2041 .private_extern _aesni_xts_decrypt |
1938 | 2042 |
1939 .p2align 4 | 2043 .p2align 4 |
1940 _aesni_xts_decrypt: | 2044 _aesni_xts_decrypt: |
(...skipping 257 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2198 movups %xmm7,-16(%rsi) | 2302 movups %xmm7,-16(%rsi) |
2199 subq $96,%rdx | 2303 subq $96,%rdx |
2200 jnc L$xts_dec_grandloop | 2304 jnc L$xts_dec_grandloop |
2201 | 2305 |
2202 movl $16+96,%eax | 2306 movl $16+96,%eax |
2203 subl %r10d,%eax | 2307 subl %r10d,%eax |
2204 movq %r11,%rcx | 2308 movq %r11,%rcx |
2205 shrl $4,%eax | 2309 shrl $4,%eax |
2206 | 2310 |
2207 L$xts_dec_short: | 2311 L$xts_dec_short: |
| 2312 |
2208 movl %eax,%r10d | 2313 movl %eax,%r10d |
2209 pxor %xmm0,%xmm10 | 2314 pxor %xmm0,%xmm10 |
2210 pxor %xmm0,%xmm11 | 2315 pxor %xmm0,%xmm11 |
2211 addq $96,%rdx | 2316 addq $96,%rdx |
2212 jz L$xts_dec_done | 2317 jz L$xts_dec_done |
2213 | 2318 |
2214 pxor %xmm0,%xmm12 | 2319 pxor %xmm0,%xmm12 |
2215 cmpq $32,%rdx | 2320 cmpq $32,%rdx |
2216 jb L$xts_dec_one | 2321 jb L$xts_dec_one |
2217 pxor %xmm0,%xmm13 | 2322 pxor %xmm0,%xmm13 |
(...skipping 182 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2400 .byte 102,15,56,222,209 | 2505 .byte 102,15,56,222,209 |
2401 decl %eax | 2506 decl %eax |
2402 movups (%rcx),%xmm1 | 2507 movups (%rcx),%xmm1 |
2403 leaq 16(%rcx),%rcx | 2508 leaq 16(%rcx),%rcx |
2404 jnz L$oop_dec1_14 | 2509 jnz L$oop_dec1_14 |
2405 .byte 102,15,56,223,209 | 2510 .byte 102,15,56,223,209 |
2406 xorps %xmm10,%xmm2 | 2511 xorps %xmm10,%xmm2 |
2407 movups %xmm2,(%rsi) | 2512 movups %xmm2,(%rsi) |
2408 | 2513 |
2409 L$xts_dec_ret: | 2514 L$xts_dec_ret: |
| 2515 xorps %xmm0,%xmm0 |
| 2516 pxor %xmm1,%xmm1 |
| 2517 pxor %xmm2,%xmm2 |
| 2518 pxor %xmm3,%xmm3 |
| 2519 pxor %xmm4,%xmm4 |
| 2520 pxor %xmm5,%xmm5 |
| 2521 pxor %xmm6,%xmm6 |
| 2522 pxor %xmm7,%xmm7 |
| 2523 movaps %xmm0,0(%rsp) |
| 2524 pxor %xmm8,%xmm8 |
| 2525 movaps %xmm0,16(%rsp) |
| 2526 pxor %xmm9,%xmm9 |
| 2527 movaps %xmm0,32(%rsp) |
| 2528 pxor %xmm10,%xmm10 |
| 2529 movaps %xmm0,48(%rsp) |
| 2530 pxor %xmm11,%xmm11 |
| 2531 movaps %xmm0,64(%rsp) |
| 2532 pxor %xmm12,%xmm12 |
| 2533 movaps %xmm0,80(%rsp) |
| 2534 pxor %xmm13,%xmm13 |
| 2535 movaps %xmm0,96(%rsp) |
| 2536 pxor %xmm14,%xmm14 |
| 2537 pxor %xmm15,%xmm15 |
2410 leaq (%rbp),%rsp | 2538 leaq (%rbp),%rsp |
2411 popq %rbp | 2539 popq %rbp |
2412 L$xts_dec_epilogue: | 2540 L$xts_dec_epilogue: |
2413 .byte 0xf3,0xc3 | 2541 .byte 0xf3,0xc3 |
2414 | 2542 |
2415 .globl _aesni_cbc_encrypt | 2543 .globl _aesni_cbc_encrypt |
2416 .private_extern _aesni_cbc_encrypt | 2544 .private_extern _aesni_cbc_encrypt |
2417 | 2545 |
2418 .p2align 4 | 2546 .p2align 4 |
2419 _aesni_cbc_encrypt: | 2547 _aesni_cbc_encrypt: |
(...skipping 29 matching lines...) Expand all Loading... |
2449 jnz L$oop_enc1_15 | 2577 jnz L$oop_enc1_15 |
2450 .byte 102,15,56,221,209 | 2578 .byte 102,15,56,221,209 |
2451 movl %r10d,%eax | 2579 movl %r10d,%eax |
2452 movq %r11,%rcx | 2580 movq %r11,%rcx |
2453 movups %xmm2,0(%rsi) | 2581 movups %xmm2,0(%rsi) |
2454 leaq 16(%rsi),%rsi | 2582 leaq 16(%rsi),%rsi |
2455 subq $16,%rdx | 2583 subq $16,%rdx |
2456 jnc L$cbc_enc_loop | 2584 jnc L$cbc_enc_loop |
2457 addq $16,%rdx | 2585 addq $16,%rdx |
2458 jnz L$cbc_enc_tail | 2586 jnz L$cbc_enc_tail |
| 2587 pxor %xmm0,%xmm0 |
| 2588 pxor %xmm1,%xmm1 |
2459 movups %xmm2,(%r8) | 2589 movups %xmm2,(%r8) |
| 2590 pxor %xmm2,%xmm2 |
| 2591 pxor %xmm3,%xmm3 |
2460 jmp L$cbc_ret | 2592 jmp L$cbc_ret |
2461 | 2593 |
2462 L$cbc_enc_tail: | 2594 L$cbc_enc_tail: |
2463 movq %rdx,%rcx | 2595 movq %rdx,%rcx |
2464 xchgq %rdi,%rsi | 2596 xchgq %rdi,%rsi |
2465 .long 0x9066A4F3 | 2597 .long 0x9066A4F3 |
2466 movl $16,%ecx | 2598 movl $16,%ecx |
2467 subq %rdx,%rcx | 2599 subq %rdx,%rcx |
2468 xorl %eax,%eax | 2600 xorl %eax,%eax |
2469 .long 0x9066AAF3 | 2601 .long 0x9066AAF3 |
2470 leaq -16(%rdi),%rdi | 2602 leaq -16(%rdi),%rdi |
2471 movl %r10d,%eax | 2603 movl %r10d,%eax |
2472 movq %rdi,%rsi | 2604 movq %rdi,%rsi |
2473 movq %r11,%rcx | 2605 movq %r11,%rcx |
2474 xorq %rdx,%rdx | 2606 xorq %rdx,%rdx |
2475 jmp L$cbc_enc_loop | 2607 jmp L$cbc_enc_loop |
2476 | 2608 |
2477 .p2align 4 | 2609 .p2align 4 |
2478 L$cbc_decrypt: | 2610 L$cbc_decrypt: |
| 2611 cmpq $16,%rdx |
| 2612 jne L$cbc_decrypt_bulk |
| 2613 |
| 2614 |
| 2615 |
| 2616 movdqu (%rdi),%xmm2 |
| 2617 movdqu (%r8),%xmm3 |
| 2618 movdqa %xmm2,%xmm4 |
| 2619 movups (%rcx),%xmm0 |
| 2620 movups 16(%rcx),%xmm1 |
| 2621 leaq 32(%rcx),%rcx |
| 2622 xorps %xmm0,%xmm2 |
| 2623 L$oop_dec1_16: |
| 2624 .byte 102,15,56,222,209 |
| 2625 decl %r10d |
| 2626 movups (%rcx),%xmm1 |
| 2627 leaq 16(%rcx),%rcx |
| 2628 jnz L$oop_dec1_16 |
| 2629 .byte 102,15,56,223,209 |
| 2630 pxor %xmm0,%xmm0 |
| 2631 pxor %xmm1,%xmm1 |
| 2632 movdqu %xmm4,(%r8) |
| 2633 xorps %xmm3,%xmm2 |
| 2634 pxor %xmm3,%xmm3 |
| 2635 movups %xmm2,(%rsi) |
| 2636 pxor %xmm2,%xmm2 |
| 2637 jmp L$cbc_ret |
| 2638 .p2align 4 |
| 2639 L$cbc_decrypt_bulk: |
2479 leaq (%rsp),%rax | 2640 leaq (%rsp),%rax |
2480 pushq %rbp | 2641 pushq %rbp |
2481 subq $16,%rsp | 2642 subq $16,%rsp |
2482 andq $-16,%rsp | 2643 andq $-16,%rsp |
2483 leaq -8(%rax),%rbp | 2644 leaq -8(%rax),%rbp |
2484 movups (%r8),%xmm10 | 2645 movups (%r8),%xmm10 |
2485 movl %r10d,%eax | 2646 movl %r10d,%eax |
2486 cmpq $80,%rdx | 2647 cmpq $80,%rdx |
2487 jbe L$cbc_dec_tail | 2648 jbe L$cbc_dec_tail |
2488 | 2649 |
(...skipping 216 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2705 movdqa %xmm1,%xmm7 | 2866 movdqa %xmm1,%xmm7 |
2706 movups %xmm8,96(%rsi) | 2867 movups %xmm8,96(%rsi) |
2707 leaq 112(%rsi),%rsi | 2868 leaq 112(%rsi),%rsi |
2708 | 2869 |
2709 subq $128,%rdx | 2870 subq $128,%rdx |
2710 ja L$cbc_dec_loop8 | 2871 ja L$cbc_dec_loop8 |
2711 | 2872 |
2712 movaps %xmm9,%xmm2 | 2873 movaps %xmm9,%xmm2 |
2713 leaq -112(%rcx),%rcx | 2874 leaq -112(%rcx),%rcx |
2714 addq $112,%rdx | 2875 addq $112,%rdx |
2715 » jle» L$cbc_dec_tail_collected | 2876 » jle» L$cbc_dec_clear_tail_collected |
2716 movups %xmm9,(%rsi) | 2877 movups %xmm9,(%rsi) |
2717 leaq 16(%rsi),%rsi | 2878 leaq 16(%rsi),%rsi |
2718 cmpq $80,%rdx | 2879 cmpq $80,%rdx |
2719 jbe L$cbc_dec_tail | 2880 jbe L$cbc_dec_tail |
2720 | 2881 |
2721 movaps %xmm11,%xmm2 | 2882 movaps %xmm11,%xmm2 |
2722 L$cbc_dec_six_or_seven: | 2883 L$cbc_dec_six_or_seven: |
2723 cmpq $96,%rdx | 2884 cmpq $96,%rdx |
2724 ja L$cbc_dec_seven | 2885 ja L$cbc_dec_seven |
2725 | 2886 |
2726 movaps %xmm7,%xmm8 | 2887 movaps %xmm7,%xmm8 |
2727 call _aesni_decrypt6 | 2888 call _aesni_decrypt6 |
2728 pxor %xmm10,%xmm2 | 2889 pxor %xmm10,%xmm2 |
2729 movaps %xmm8,%xmm10 | 2890 movaps %xmm8,%xmm10 |
2730 pxor %xmm11,%xmm3 | 2891 pxor %xmm11,%xmm3 |
2731 movdqu %xmm2,(%rsi) | 2892 movdqu %xmm2,(%rsi) |
2732 pxor %xmm12,%xmm4 | 2893 pxor %xmm12,%xmm4 |
2733 movdqu %xmm3,16(%rsi) | 2894 movdqu %xmm3,16(%rsi) |
| 2895 pxor %xmm3,%xmm3 |
2734 pxor %xmm13,%xmm5 | 2896 pxor %xmm13,%xmm5 |
2735 movdqu %xmm4,32(%rsi) | 2897 movdqu %xmm4,32(%rsi) |
| 2898 pxor %xmm4,%xmm4 |
2736 pxor %xmm14,%xmm6 | 2899 pxor %xmm14,%xmm6 |
2737 movdqu %xmm5,48(%rsi) | 2900 movdqu %xmm5,48(%rsi) |
| 2901 pxor %xmm5,%xmm5 |
2738 pxor %xmm15,%xmm7 | 2902 pxor %xmm15,%xmm7 |
2739 movdqu %xmm6,64(%rsi) | 2903 movdqu %xmm6,64(%rsi) |
| 2904 pxor %xmm6,%xmm6 |
2740 leaq 80(%rsi),%rsi | 2905 leaq 80(%rsi),%rsi |
2741 movdqa %xmm7,%xmm2 | 2906 movdqa %xmm7,%xmm2 |
| 2907 pxor %xmm7,%xmm7 |
2742 jmp L$cbc_dec_tail_collected | 2908 jmp L$cbc_dec_tail_collected |
2743 | 2909 |
2744 .p2align 4 | 2910 .p2align 4 |
2745 L$cbc_dec_seven: | 2911 L$cbc_dec_seven: |
2746 movups 96(%rdi),%xmm8 | 2912 movups 96(%rdi),%xmm8 |
2747 xorps %xmm9,%xmm9 | 2913 xorps %xmm9,%xmm9 |
2748 call _aesni_decrypt8 | 2914 call _aesni_decrypt8 |
2749 movups 80(%rdi),%xmm9 | 2915 movups 80(%rdi),%xmm9 |
2750 pxor %xmm10,%xmm2 | 2916 pxor %xmm10,%xmm2 |
2751 movups 96(%rdi),%xmm10 | 2917 movups 96(%rdi),%xmm10 |
2752 pxor %xmm11,%xmm3 | 2918 pxor %xmm11,%xmm3 |
2753 movdqu %xmm2,(%rsi) | 2919 movdqu %xmm2,(%rsi) |
2754 pxor %xmm12,%xmm4 | 2920 pxor %xmm12,%xmm4 |
2755 movdqu %xmm3,16(%rsi) | 2921 movdqu %xmm3,16(%rsi) |
| 2922 pxor %xmm3,%xmm3 |
2756 pxor %xmm13,%xmm5 | 2923 pxor %xmm13,%xmm5 |
2757 movdqu %xmm4,32(%rsi) | 2924 movdqu %xmm4,32(%rsi) |
| 2925 pxor %xmm4,%xmm4 |
2758 pxor %xmm14,%xmm6 | 2926 pxor %xmm14,%xmm6 |
2759 movdqu %xmm5,48(%rsi) | 2927 movdqu %xmm5,48(%rsi) |
| 2928 pxor %xmm5,%xmm5 |
2760 pxor %xmm15,%xmm7 | 2929 pxor %xmm15,%xmm7 |
2761 movdqu %xmm6,64(%rsi) | 2930 movdqu %xmm6,64(%rsi) |
| 2931 pxor %xmm6,%xmm6 |
2762 pxor %xmm9,%xmm8 | 2932 pxor %xmm9,%xmm8 |
2763 movdqu %xmm7,80(%rsi) | 2933 movdqu %xmm7,80(%rsi) |
| 2934 pxor %xmm7,%xmm7 |
2764 leaq 96(%rsi),%rsi | 2935 leaq 96(%rsi),%rsi |
2765 movdqa %xmm8,%xmm2 | 2936 movdqa %xmm8,%xmm2 |
| 2937 pxor %xmm8,%xmm8 |
| 2938 pxor %xmm9,%xmm9 |
2766 jmp L$cbc_dec_tail_collected | 2939 jmp L$cbc_dec_tail_collected |
2767 | 2940 |
2768 .p2align 4 | 2941 .p2align 4 |
2769 L$cbc_dec_loop6: | 2942 L$cbc_dec_loop6: |
2770 movups %xmm7,(%rsi) | 2943 movups %xmm7,(%rsi) |
2771 leaq 16(%rsi),%rsi | 2944 leaq 16(%rsi),%rsi |
2772 movdqu 0(%rdi),%xmm2 | 2945 movdqu 0(%rdi),%xmm2 |
2773 movdqu 16(%rdi),%xmm3 | 2946 movdqu 16(%rdi),%xmm3 |
2774 movdqa %xmm2,%xmm11 | 2947 movdqa %xmm2,%xmm11 |
2775 movdqu 32(%rdi),%xmm4 | 2948 movdqu 32(%rdi),%xmm4 |
(...skipping 23 matching lines...) Expand all Loading... |
2799 movdqu %xmm5,48(%rsi) | 2972 movdqu %xmm5,48(%rsi) |
2800 pxor %xmm15,%xmm7 | 2973 pxor %xmm15,%xmm7 |
2801 movl %r10d,%eax | 2974 movl %r10d,%eax |
2802 movdqu %xmm6,64(%rsi) | 2975 movdqu %xmm6,64(%rsi) |
2803 leaq 80(%rsi),%rsi | 2976 leaq 80(%rsi),%rsi |
2804 subq $96,%rdx | 2977 subq $96,%rdx |
2805 ja L$cbc_dec_loop6 | 2978 ja L$cbc_dec_loop6 |
2806 | 2979 |
2807 movdqa %xmm7,%xmm2 | 2980 movdqa %xmm7,%xmm2 |
2808 addq $80,%rdx | 2981 addq $80,%rdx |
2809 » jle» L$cbc_dec_tail_collected | 2982 » jle» L$cbc_dec_clear_tail_collected |
2810 movups %xmm7,(%rsi) | 2983 movups %xmm7,(%rsi) |
2811 leaq 16(%rsi),%rsi | 2984 leaq 16(%rsi),%rsi |
2812 | 2985 |
2813 L$cbc_dec_tail: | 2986 L$cbc_dec_tail: |
2814 movups (%rdi),%xmm2 | 2987 movups (%rdi),%xmm2 |
2815 subq $16,%rdx | 2988 subq $16,%rdx |
2816 jbe L$cbc_dec_one | 2989 jbe L$cbc_dec_one |
2817 | 2990 |
2818 movups 16(%rdi),%xmm3 | 2991 movups 16(%rdi),%xmm3 |
2819 movaps %xmm2,%xmm11 | 2992 movaps %xmm2,%xmm11 |
(...skipping 14 matching lines...) Expand all Loading... |
2834 movaps %xmm5,%xmm14 | 3007 movaps %xmm5,%xmm14 |
2835 movaps %xmm6,%xmm15 | 3008 movaps %xmm6,%xmm15 |
2836 xorps %xmm7,%xmm7 | 3009 xorps %xmm7,%xmm7 |
2837 call _aesni_decrypt6 | 3010 call _aesni_decrypt6 |
2838 pxor %xmm10,%xmm2 | 3011 pxor %xmm10,%xmm2 |
2839 movaps %xmm15,%xmm10 | 3012 movaps %xmm15,%xmm10 |
2840 pxor %xmm11,%xmm3 | 3013 pxor %xmm11,%xmm3 |
2841 movdqu %xmm2,(%rsi) | 3014 movdqu %xmm2,(%rsi) |
2842 pxor %xmm12,%xmm4 | 3015 pxor %xmm12,%xmm4 |
2843 movdqu %xmm3,16(%rsi) | 3016 movdqu %xmm3,16(%rsi) |
| 3017 pxor %xmm3,%xmm3 |
2844 pxor %xmm13,%xmm5 | 3018 pxor %xmm13,%xmm5 |
2845 movdqu %xmm4,32(%rsi) | 3019 movdqu %xmm4,32(%rsi) |
| 3020 pxor %xmm4,%xmm4 |
2846 pxor %xmm14,%xmm6 | 3021 pxor %xmm14,%xmm6 |
2847 movdqu %xmm5,48(%rsi) | 3022 movdqu %xmm5,48(%rsi) |
| 3023 pxor %xmm5,%xmm5 |
2848 leaq 64(%rsi),%rsi | 3024 leaq 64(%rsi),%rsi |
2849 movdqa %xmm6,%xmm2 | 3025 movdqa %xmm6,%xmm2 |
| 3026 pxor %xmm6,%xmm6 |
| 3027 pxor %xmm7,%xmm7 |
2850 subq $16,%rdx | 3028 subq $16,%rdx |
2851 jmp L$cbc_dec_tail_collected | 3029 jmp L$cbc_dec_tail_collected |
2852 | 3030 |
2853 .p2align 4 | 3031 .p2align 4 |
2854 L$cbc_dec_one: | 3032 L$cbc_dec_one: |
2855 movaps %xmm2,%xmm11 | 3033 movaps %xmm2,%xmm11 |
2856 movups (%rcx),%xmm0 | 3034 movups (%rcx),%xmm0 |
2857 movups 16(%rcx),%xmm1 | 3035 movups 16(%rcx),%xmm1 |
2858 leaq 32(%rcx),%rcx | 3036 leaq 32(%rcx),%rcx |
2859 xorps %xmm0,%xmm2 | 3037 xorps %xmm0,%xmm2 |
2860 L$oop_dec1_16: | 3038 L$oop_dec1_17: |
2861 .byte 102,15,56,222,209 | 3039 .byte 102,15,56,222,209 |
2862 decl %eax | 3040 decl %eax |
2863 movups (%rcx),%xmm1 | 3041 movups (%rcx),%xmm1 |
2864 leaq 16(%rcx),%rcx | 3042 leaq 16(%rcx),%rcx |
2865 » jnz» L$oop_dec1_16 | 3043 » jnz» L$oop_dec1_17 |
2866 .byte 102,15,56,223,209 | 3044 .byte 102,15,56,223,209 |
2867 xorps %xmm10,%xmm2 | 3045 xorps %xmm10,%xmm2 |
2868 movaps %xmm11,%xmm10 | 3046 movaps %xmm11,%xmm10 |
2869 jmp L$cbc_dec_tail_collected | 3047 jmp L$cbc_dec_tail_collected |
2870 .p2align 4 | 3048 .p2align 4 |
2871 L$cbc_dec_two: | 3049 L$cbc_dec_two: |
2872 movaps %xmm3,%xmm12 | 3050 movaps %xmm3,%xmm12 |
2873 call _aesni_decrypt2 | 3051 call _aesni_decrypt2 |
2874 pxor %xmm10,%xmm2 | 3052 pxor %xmm10,%xmm2 |
2875 movaps %xmm12,%xmm10 | 3053 movaps %xmm12,%xmm10 |
2876 pxor %xmm11,%xmm3 | 3054 pxor %xmm11,%xmm3 |
2877 movdqu %xmm2,(%rsi) | 3055 movdqu %xmm2,(%rsi) |
2878 movdqa %xmm3,%xmm2 | 3056 movdqa %xmm3,%xmm2 |
| 3057 pxor %xmm3,%xmm3 |
2879 leaq 16(%rsi),%rsi | 3058 leaq 16(%rsi),%rsi |
2880 jmp L$cbc_dec_tail_collected | 3059 jmp L$cbc_dec_tail_collected |
2881 .p2align 4 | 3060 .p2align 4 |
2882 L$cbc_dec_three: | 3061 L$cbc_dec_three: |
2883 movaps %xmm4,%xmm13 | 3062 movaps %xmm4,%xmm13 |
2884 call _aesni_decrypt3 | 3063 call _aesni_decrypt3 |
2885 pxor %xmm10,%xmm2 | 3064 pxor %xmm10,%xmm2 |
2886 movaps %xmm13,%xmm10 | 3065 movaps %xmm13,%xmm10 |
2887 pxor %xmm11,%xmm3 | 3066 pxor %xmm11,%xmm3 |
2888 movdqu %xmm2,(%rsi) | 3067 movdqu %xmm2,(%rsi) |
2889 pxor %xmm12,%xmm4 | 3068 pxor %xmm12,%xmm4 |
2890 movdqu %xmm3,16(%rsi) | 3069 movdqu %xmm3,16(%rsi) |
| 3070 pxor %xmm3,%xmm3 |
2891 movdqa %xmm4,%xmm2 | 3071 movdqa %xmm4,%xmm2 |
| 3072 pxor %xmm4,%xmm4 |
2892 leaq 32(%rsi),%rsi | 3073 leaq 32(%rsi),%rsi |
2893 jmp L$cbc_dec_tail_collected | 3074 jmp L$cbc_dec_tail_collected |
2894 .p2align 4 | 3075 .p2align 4 |
2895 L$cbc_dec_four: | 3076 L$cbc_dec_four: |
2896 movaps %xmm5,%xmm14 | 3077 movaps %xmm5,%xmm14 |
2897 call _aesni_decrypt4 | 3078 call _aesni_decrypt4 |
2898 pxor %xmm10,%xmm2 | 3079 pxor %xmm10,%xmm2 |
2899 movaps %xmm14,%xmm10 | 3080 movaps %xmm14,%xmm10 |
2900 pxor %xmm11,%xmm3 | 3081 pxor %xmm11,%xmm3 |
2901 movdqu %xmm2,(%rsi) | 3082 movdqu %xmm2,(%rsi) |
2902 pxor %xmm12,%xmm4 | 3083 pxor %xmm12,%xmm4 |
2903 movdqu %xmm3,16(%rsi) | 3084 movdqu %xmm3,16(%rsi) |
| 3085 pxor %xmm3,%xmm3 |
2904 pxor %xmm13,%xmm5 | 3086 pxor %xmm13,%xmm5 |
2905 movdqu %xmm4,32(%rsi) | 3087 movdqu %xmm4,32(%rsi) |
| 3088 pxor %xmm4,%xmm4 |
2906 movdqa %xmm5,%xmm2 | 3089 movdqa %xmm5,%xmm2 |
| 3090 pxor %xmm5,%xmm5 |
2907 leaq 48(%rsi),%rsi | 3091 leaq 48(%rsi),%rsi |
2908 jmp L$cbc_dec_tail_collected | 3092 jmp L$cbc_dec_tail_collected |
2909 | 3093 |
2910 .p2align 4 | 3094 .p2align 4 |
| 3095 L$cbc_dec_clear_tail_collected: |
| 3096 pxor %xmm3,%xmm3 |
| 3097 pxor %xmm4,%xmm4 |
| 3098 pxor %xmm5,%xmm5 |
| 3099 pxor %xmm6,%xmm6 |
| 3100 pxor %xmm7,%xmm7 |
| 3101 pxor %xmm8,%xmm8 |
| 3102 pxor %xmm9,%xmm9 |
2911 L$cbc_dec_tail_collected: | 3103 L$cbc_dec_tail_collected: |
2912 movups %xmm10,(%r8) | 3104 movups %xmm10,(%r8) |
2913 andq $15,%rdx | 3105 andq $15,%rdx |
2914 jnz L$cbc_dec_tail_partial | 3106 jnz L$cbc_dec_tail_partial |
2915 movups %xmm2,(%rsi) | 3107 movups %xmm2,(%rsi) |
| 3108 pxor %xmm2,%xmm2 |
2916 jmp L$cbc_dec_ret | 3109 jmp L$cbc_dec_ret |
2917 .p2align 4 | 3110 .p2align 4 |
2918 L$cbc_dec_tail_partial: | 3111 L$cbc_dec_tail_partial: |
2919 movaps %xmm2,(%rsp) | 3112 movaps %xmm2,(%rsp) |
| 3113 pxor %xmm2,%xmm2 |
2920 movq $16,%rcx | 3114 movq $16,%rcx |
2921 movq %rsi,%rdi | 3115 movq %rsi,%rdi |
2922 subq %rdx,%rcx | 3116 subq %rdx,%rcx |
2923 leaq (%rsp),%rsi | 3117 leaq (%rsp),%rsi |
2924 .long 0x9066A4F3 | 3118 .long 0x9066A4F3 |
| 3119 movdqa %xmm2,(%rsp) |
2925 | 3120 |
2926 L$cbc_dec_ret: | 3121 L$cbc_dec_ret: |
| 3122 xorps %xmm0,%xmm0 |
| 3123 pxor %xmm1,%xmm1 |
2927 leaq (%rbp),%rsp | 3124 leaq (%rbp),%rsp |
2928 popq %rbp | 3125 popq %rbp |
2929 L$cbc_ret: | 3126 L$cbc_ret: |
2930 .byte 0xf3,0xc3 | 3127 .byte 0xf3,0xc3 |
2931 | 3128 |
2932 .globl _aesni_set_decrypt_key | 3129 .globl _aesni_set_decrypt_key |
2933 .private_extern _aesni_set_decrypt_key | 3130 .private_extern _aesni_set_decrypt_key |
2934 | 3131 |
2935 .p2align 4 | 3132 .p2align 4 |
2936 _aesni_set_decrypt_key: | 3133 _aesni_set_decrypt_key: |
(...skipping 18 matching lines...) Expand all Loading... |
2955 .byte 102,15,56,219,201 | 3152 .byte 102,15,56,219,201 |
2956 leaq 16(%rdx),%rdx | 3153 leaq 16(%rdx),%rdx |
2957 leaq -16(%rdi),%rdi | 3154 leaq -16(%rdi),%rdi |
2958 movups %xmm0,16(%rdi) | 3155 movups %xmm0,16(%rdi) |
2959 movups %xmm1,-16(%rdx) | 3156 movups %xmm1,-16(%rdx) |
2960 cmpq %rdx,%rdi | 3157 cmpq %rdx,%rdi |
2961 ja L$dec_key_inverse | 3158 ja L$dec_key_inverse |
2962 | 3159 |
2963 movups (%rdx),%xmm0 | 3160 movups (%rdx),%xmm0 |
2964 .byte 102,15,56,219,192 | 3161 .byte 102,15,56,219,192 |
| 3162 pxor %xmm1,%xmm1 |
2965 movups %xmm0,(%rdi) | 3163 movups %xmm0,(%rdi) |
| 3164 pxor %xmm0,%xmm0 |
2966 L$dec_key_ret: | 3165 L$dec_key_ret: |
2967 addq $8,%rsp | 3166 addq $8,%rsp |
2968 .byte 0xf3,0xc3 | 3167 .byte 0xf3,0xc3 |
2969 L$SEH_end_set_decrypt_key: | 3168 L$SEH_end_set_decrypt_key: |
2970 | 3169 |
2971 .globl _aesni_set_encrypt_key | 3170 .globl _aesni_set_encrypt_key |
2972 .private_extern _aesni_set_encrypt_key | 3171 .private_extern _aesni_set_encrypt_key |
2973 | 3172 |
2974 .p2align 4 | 3173 .p2align 4 |
2975 _aesni_set_encrypt_key: | 3174 _aesni_set_encrypt_key: |
2976 __aesni_set_encrypt_key: | 3175 __aesni_set_encrypt_key: |
2977 .byte 0x48,0x83,0xEC,0x08 | 3176 .byte 0x48,0x83,0xEC,0x08 |
2978 movq $-1,%rax | 3177 movq $-1,%rax |
2979 testq %rdi,%rdi | 3178 testq %rdi,%rdi |
2980 jz L$enc_key_ret | 3179 jz L$enc_key_ret |
2981 testq %rdx,%rdx | 3180 testq %rdx,%rdx |
2982 jz L$enc_key_ret | 3181 jz L$enc_key_ret |
2983 | 3182 |
| 3183 movl $268437504,%r10d |
2984 movups (%rdi),%xmm0 | 3184 movups (%rdi),%xmm0 |
2985 xorps %xmm4,%xmm4 | 3185 xorps %xmm4,%xmm4 |
| 3186 andl _OPENSSL_ia32cap_P+4(%rip),%r10d |
2986 leaq 16(%rdx),%rax | 3187 leaq 16(%rdx),%rax |
2987 cmpl $256,%esi | 3188 cmpl $256,%esi |
2988 je L$14rounds | 3189 je L$14rounds |
2989 cmpl $192,%esi | 3190 cmpl $192,%esi |
2990 je L$12rounds | 3191 je L$12rounds |
2991 cmpl $128,%esi | 3192 cmpl $128,%esi |
2992 jne L$bad_keybits | 3193 jne L$bad_keybits |
2993 | 3194 |
2994 L$10rounds: | 3195 L$10rounds: |
2995 movl $9,%esi | 3196 movl $9,%esi |
| 3197 cmpl $268435456,%r10d |
| 3198 je L$10rounds_alt |
| 3199 |
2996 movups %xmm0,(%rdx) | 3200 movups %xmm0,(%rdx) |
2997 .byte 102,15,58,223,200,1 | 3201 .byte 102,15,58,223,200,1 |
2998 call L$key_expansion_128_cold | 3202 call L$key_expansion_128_cold |
2999 .byte 102,15,58,223,200,2 | 3203 .byte 102,15,58,223,200,2 |
3000 call L$key_expansion_128 | 3204 call L$key_expansion_128 |
3001 .byte 102,15,58,223,200,4 | 3205 .byte 102,15,58,223,200,4 |
3002 call L$key_expansion_128 | 3206 call L$key_expansion_128 |
3003 .byte 102,15,58,223,200,8 | 3207 .byte 102,15,58,223,200,8 |
3004 call L$key_expansion_128 | 3208 call L$key_expansion_128 |
3005 .byte 102,15,58,223,200,16 | 3209 .byte 102,15,58,223,200,16 |
3006 call L$key_expansion_128 | 3210 call L$key_expansion_128 |
3007 .byte 102,15,58,223,200,32 | 3211 .byte 102,15,58,223,200,32 |
3008 call L$key_expansion_128 | 3212 call L$key_expansion_128 |
3009 .byte 102,15,58,223,200,64 | 3213 .byte 102,15,58,223,200,64 |
3010 call L$key_expansion_128 | 3214 call L$key_expansion_128 |
3011 .byte 102,15,58,223,200,128 | 3215 .byte 102,15,58,223,200,128 |
3012 call L$key_expansion_128 | 3216 call L$key_expansion_128 |
3013 .byte 102,15,58,223,200,27 | 3217 .byte 102,15,58,223,200,27 |
3014 call L$key_expansion_128 | 3218 call L$key_expansion_128 |
3015 .byte 102,15,58,223,200,54 | 3219 .byte 102,15,58,223,200,54 |
3016 call L$key_expansion_128 | 3220 call L$key_expansion_128 |
3017 movups %xmm0,(%rax) | 3221 movups %xmm0,(%rax) |
3018 movl %esi,80(%rax) | 3222 movl %esi,80(%rax) |
3019 xorl %eax,%eax | 3223 xorl %eax,%eax |
3020 jmp L$enc_key_ret | 3224 jmp L$enc_key_ret |
3021 | 3225 |
3022 .p2align 4 | 3226 .p2align 4 |
| 3227 L$10rounds_alt: |
| 3228 movdqa L$key_rotate(%rip),%xmm5 |
| 3229 movl $8,%r10d |
| 3230 movdqa L$key_rcon1(%rip),%xmm4 |
| 3231 movdqa %xmm0,%xmm2 |
| 3232 movdqu %xmm0,(%rdx) |
| 3233 jmp L$oop_key128 |
| 3234 |
| 3235 .p2align 4 |
| 3236 L$oop_key128: |
| 3237 .byte 102,15,56,0,197 |
| 3238 .byte 102,15,56,221,196 |
| 3239 pslld $1,%xmm4 |
| 3240 leaq 16(%rax),%rax |
| 3241 |
| 3242 movdqa %xmm2,%xmm3 |
| 3243 pslldq $4,%xmm2 |
| 3244 pxor %xmm2,%xmm3 |
| 3245 pslldq $4,%xmm2 |
| 3246 pxor %xmm2,%xmm3 |
| 3247 pslldq $4,%xmm2 |
| 3248 pxor %xmm3,%xmm2 |
| 3249 |
| 3250 pxor %xmm2,%xmm0 |
| 3251 movdqu %xmm0,-16(%rax) |
| 3252 movdqa %xmm0,%xmm2 |
| 3253 |
| 3254 decl %r10d |
| 3255 jnz L$oop_key128 |
| 3256 |
| 3257 movdqa L$key_rcon1b(%rip),%xmm4 |
| 3258 |
| 3259 .byte 102,15,56,0,197 |
| 3260 .byte 102,15,56,221,196 |
| 3261 pslld $1,%xmm4 |
| 3262 |
| 3263 movdqa %xmm2,%xmm3 |
| 3264 pslldq $4,%xmm2 |
| 3265 pxor %xmm2,%xmm3 |
| 3266 pslldq $4,%xmm2 |
| 3267 pxor %xmm2,%xmm3 |
| 3268 pslldq $4,%xmm2 |
| 3269 pxor %xmm3,%xmm2 |
| 3270 |
| 3271 pxor %xmm2,%xmm0 |
| 3272 movdqu %xmm0,(%rax) |
| 3273 |
| 3274 movdqa %xmm0,%xmm2 |
| 3275 .byte 102,15,56,0,197 |
| 3276 .byte 102,15,56,221,196 |
| 3277 |
| 3278 movdqa %xmm2,%xmm3 |
| 3279 pslldq $4,%xmm2 |
| 3280 pxor %xmm2,%xmm3 |
| 3281 pslldq $4,%xmm2 |
| 3282 pxor %xmm2,%xmm3 |
| 3283 pslldq $4,%xmm2 |
| 3284 pxor %xmm3,%xmm2 |
| 3285 |
| 3286 pxor %xmm2,%xmm0 |
| 3287 movdqu %xmm0,16(%rax) |
| 3288 |
| 3289 movl %esi,96(%rax) |
| 3290 xorl %eax,%eax |
| 3291 jmp L$enc_key_ret |
| 3292 |
| 3293 .p2align 4 |
3023 L$12rounds: | 3294 L$12rounds: |
3024 movq 16(%rdi),%xmm2 | 3295 movq 16(%rdi),%xmm2 |
3025 movl $11,%esi | 3296 movl $11,%esi |
| 3297 cmpl $268435456,%r10d |
| 3298 je L$12rounds_alt |
| 3299 |
3026 movups %xmm0,(%rdx) | 3300 movups %xmm0,(%rdx) |
3027 .byte 102,15,58,223,202,1 | 3301 .byte 102,15,58,223,202,1 |
3028 call L$key_expansion_192a_cold | 3302 call L$key_expansion_192a_cold |
3029 .byte 102,15,58,223,202,2 | 3303 .byte 102,15,58,223,202,2 |
3030 call L$key_expansion_192b | 3304 call L$key_expansion_192b |
3031 .byte 102,15,58,223,202,4 | 3305 .byte 102,15,58,223,202,4 |
3032 call L$key_expansion_192a | 3306 call L$key_expansion_192a |
3033 .byte 102,15,58,223,202,8 | 3307 .byte 102,15,58,223,202,8 |
3034 call L$key_expansion_192b | 3308 call L$key_expansion_192b |
3035 .byte 102,15,58,223,202,16 | 3309 .byte 102,15,58,223,202,16 |
3036 call L$key_expansion_192a | 3310 call L$key_expansion_192a |
3037 .byte 102,15,58,223,202,32 | 3311 .byte 102,15,58,223,202,32 |
3038 call L$key_expansion_192b | 3312 call L$key_expansion_192b |
3039 .byte 102,15,58,223,202,64 | 3313 .byte 102,15,58,223,202,64 |
3040 call L$key_expansion_192a | 3314 call L$key_expansion_192a |
3041 .byte 102,15,58,223,202,128 | 3315 .byte 102,15,58,223,202,128 |
3042 call L$key_expansion_192b | 3316 call L$key_expansion_192b |
3043 movups %xmm0,(%rax) | 3317 movups %xmm0,(%rax) |
3044 movl %esi,48(%rax) | 3318 movl %esi,48(%rax) |
3045 xorq %rax,%rax | 3319 xorq %rax,%rax |
3046 jmp L$enc_key_ret | 3320 jmp L$enc_key_ret |
3047 | 3321 |
3048 .p2align 4 | 3322 .p2align 4 |
| 3323 L$12rounds_alt: |
| 3324 movdqa L$key_rotate192(%rip),%xmm5 |
| 3325 movdqa L$key_rcon1(%rip),%xmm4 |
| 3326 movl $8,%r10d |
| 3327 movdqu %xmm0,(%rdx) |
| 3328 jmp L$oop_key192 |
| 3329 |
| 3330 .p2align 4 |
| 3331 L$oop_key192: |
| 3332 movq %xmm2,0(%rax) |
| 3333 movdqa %xmm2,%xmm1 |
| 3334 .byte 102,15,56,0,213 |
| 3335 .byte 102,15,56,221,212 |
| 3336 pslld $1,%xmm4 |
| 3337 leaq 24(%rax),%rax |
| 3338 |
| 3339 movdqa %xmm0,%xmm3 |
| 3340 pslldq $4,%xmm0 |
| 3341 pxor %xmm0,%xmm3 |
| 3342 pslldq $4,%xmm0 |
| 3343 pxor %xmm0,%xmm3 |
| 3344 pslldq $4,%xmm0 |
| 3345 pxor %xmm3,%xmm0 |
| 3346 |
| 3347 pshufd $255,%xmm0,%xmm3 |
| 3348 pxor %xmm1,%xmm3 |
| 3349 pslldq $4,%xmm1 |
| 3350 pxor %xmm1,%xmm3 |
| 3351 |
| 3352 pxor %xmm2,%xmm0 |
| 3353 pxor %xmm3,%xmm2 |
| 3354 movdqu %xmm0,-16(%rax) |
| 3355 |
| 3356 decl %r10d |
| 3357 jnz L$oop_key192 |
| 3358 |
| 3359 movl %esi,32(%rax) |
| 3360 xorl %eax,%eax |
| 3361 jmp L$enc_key_ret |
| 3362 |
| 3363 .p2align 4 |
3049 L$14rounds: | 3364 L$14rounds: |
3050 movups 16(%rdi),%xmm2 | 3365 movups 16(%rdi),%xmm2 |
3051 movl $13,%esi | 3366 movl $13,%esi |
3052 leaq 16(%rax),%rax | 3367 leaq 16(%rax),%rax |
| 3368 cmpl $268435456,%r10d |
| 3369 je L$14rounds_alt |
| 3370 |
3053 movups %xmm0,(%rdx) | 3371 movups %xmm0,(%rdx) |
3054 movups %xmm2,16(%rdx) | 3372 movups %xmm2,16(%rdx) |
3055 .byte 102,15,58,223,202,1 | 3373 .byte 102,15,58,223,202,1 |
3056 call L$key_expansion_256a_cold | 3374 call L$key_expansion_256a_cold |
3057 .byte 102,15,58,223,200,1 | 3375 .byte 102,15,58,223,200,1 |
3058 call L$key_expansion_256b | 3376 call L$key_expansion_256b |
3059 .byte 102,15,58,223,202,2 | 3377 .byte 102,15,58,223,202,2 |
3060 call L$key_expansion_256a | 3378 call L$key_expansion_256a |
3061 .byte 102,15,58,223,200,2 | 3379 .byte 102,15,58,223,200,2 |
3062 call L$key_expansion_256b | 3380 call L$key_expansion_256b |
(...skipping 14 matching lines...) Expand all Loading... |
3077 .byte 102,15,58,223,200,32 | 3395 .byte 102,15,58,223,200,32 |
3078 call L$key_expansion_256b | 3396 call L$key_expansion_256b |
3079 .byte 102,15,58,223,202,64 | 3397 .byte 102,15,58,223,202,64 |
3080 call L$key_expansion_256a | 3398 call L$key_expansion_256a |
3081 movups %xmm0,(%rax) | 3399 movups %xmm0,(%rax) |
3082 movl %esi,16(%rax) | 3400 movl %esi,16(%rax) |
3083 xorq %rax,%rax | 3401 xorq %rax,%rax |
3084 jmp L$enc_key_ret | 3402 jmp L$enc_key_ret |
3085 | 3403 |
3086 .p2align 4 | 3404 .p2align 4 |
| 3405 L$14rounds_alt: |
| 3406 movdqa L$key_rotate(%rip),%xmm5 |
| 3407 movdqa L$key_rcon1(%rip),%xmm4 |
| 3408 movl $7,%r10d |
| 3409 movdqu %xmm0,0(%rdx) |
| 3410 movdqa %xmm2,%xmm1 |
| 3411 movdqu %xmm2,16(%rdx) |
| 3412 jmp L$oop_key256 |
| 3413 |
| 3414 .p2align 4 |
| 3415 L$oop_key256: |
| 3416 .byte 102,15,56,0,213 |
| 3417 .byte 102,15,56,221,212 |
| 3418 |
| 3419 movdqa %xmm0,%xmm3 |
| 3420 pslldq $4,%xmm0 |
| 3421 pxor %xmm0,%xmm3 |
| 3422 pslldq $4,%xmm0 |
| 3423 pxor %xmm0,%xmm3 |
| 3424 pslldq $4,%xmm0 |
| 3425 pxor %xmm3,%xmm0 |
| 3426 pslld $1,%xmm4 |
| 3427 |
| 3428 pxor %xmm2,%xmm0 |
| 3429 movdqu %xmm0,(%rax) |
| 3430 |
| 3431 decl %r10d |
| 3432 jz L$done_key256 |
| 3433 |
| 3434 pshufd $255,%xmm0,%xmm2 |
| 3435 pxor %xmm3,%xmm3 |
| 3436 .byte 102,15,56,221,211 |
| 3437 |
| 3438 movdqa %xmm1,%xmm3 |
| 3439 pslldq $4,%xmm1 |
| 3440 pxor %xmm1,%xmm3 |
| 3441 pslldq $4,%xmm1 |
| 3442 pxor %xmm1,%xmm3 |
| 3443 pslldq $4,%xmm1 |
| 3444 pxor %xmm3,%xmm1 |
| 3445 |
| 3446 pxor %xmm1,%xmm2 |
| 3447 movdqu %xmm2,16(%rax) |
| 3448 leaq 32(%rax),%rax |
| 3449 movdqa %xmm2,%xmm1 |
| 3450 |
| 3451 jmp L$oop_key256 |
| 3452 |
| 3453 L$done_key256: |
| 3454 movl %esi,16(%rax) |
| 3455 xorl %eax,%eax |
| 3456 jmp L$enc_key_ret |
| 3457 |
| 3458 .p2align 4 |
3087 L$bad_keybits: | 3459 L$bad_keybits: |
3088 movq $-2,%rax | 3460 movq $-2,%rax |
3089 L$enc_key_ret: | 3461 L$enc_key_ret: |
| 3462 pxor %xmm0,%xmm0 |
| 3463 pxor %xmm1,%xmm1 |
| 3464 pxor %xmm2,%xmm2 |
| 3465 pxor %xmm3,%xmm3 |
| 3466 pxor %xmm4,%xmm4 |
| 3467 pxor %xmm5,%xmm5 |
3090 addq $8,%rsp | 3468 addq $8,%rsp |
3091 .byte 0xf3,0xc3 | 3469 .byte 0xf3,0xc3 |
3092 L$SEH_end_set_encrypt_key: | 3470 L$SEH_end_set_encrypt_key: |
3093 | 3471 |
3094 .p2align 4 | 3472 .p2align 4 |
3095 L$key_expansion_128: | 3473 L$key_expansion_128: |
3096 movups %xmm0,(%rax) | 3474 movups %xmm0,(%rax) |
3097 leaq 16(%rax),%rax | 3475 leaq 16(%rax),%rax |
3098 L$key_expansion_128_cold: | 3476 L$key_expansion_128_cold: |
3099 shufps $16,%xmm0,%xmm4 | 3477 shufps $16,%xmm0,%xmm4 |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3165 L$bswap_mask: | 3543 L$bswap_mask: |
3166 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 | 3544 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 |
3167 L$increment32: | 3545 L$increment32: |
3168 .long 6,6,6,0 | 3546 .long 6,6,6,0 |
3169 L$increment64: | 3547 L$increment64: |
3170 .long 1,0,0,0 | 3548 .long 1,0,0,0 |
3171 L$xts_magic: | 3549 L$xts_magic: |
3172 .long 0x87,0,1,0 | 3550 .long 0x87,0,1,0 |
3173 L$increment1: | 3551 L$increment1: |
3174 .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 | 3552 .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 |
| 3553 L$key_rotate: |
| 3554 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d |
| 3555 L$key_rotate192: |
| 3556 .long 0x04070605,0x04070605,0x04070605,0x04070605 |
| 3557 L$key_rcon1: |
| 3558 .long 1,1,1,1 |
| 3559 L$key_rcon1b: |
| 3560 .long 0x1b,0x1b,0x1b,0x1b |
3175 | 3561 |
3176 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32
,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101
,110,115,115,108,46,111,114,103,62,0 | 3562 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32
,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101
,110,115,115,108,46,111,114,103,62,0 |
3177 .p2align 6 | 3563 .p2align 6 |
3178 #endif | 3564 #endif |
OLD | NEW |