OLD | NEW |
1 #if defined(__x86_64__) | 1 #if defined(__x86_64__) |
2 .text | 2 .text |
3 | 3 |
4 | 4 |
5 .globl _sha1_block_data_order | 5 .globl _sha1_block_data_order |
6 .private_extern _sha1_block_data_order | 6 .private_extern _sha1_block_data_order |
7 | 7 |
8 .p2align 4 | 8 .p2align 4 |
9 _sha1_block_data_order: | 9 _sha1_block_data_order: |
10 movl _OPENSSL_ia32cap_P+0(%rip),%r9d | 10 movl _OPENSSL_ia32cap_P+0(%rip),%r9d |
11 movl _OPENSSL_ia32cap_P+4(%rip),%r8d | 11 movl _OPENSSL_ia32cap_P+4(%rip),%r8d |
12 movl _OPENSSL_ia32cap_P+8(%rip),%r10d | 12 movl _OPENSSL_ia32cap_P+8(%rip),%r10d |
13 testl $512,%r8d | 13 testl $512,%r8d |
14 jz L$ialu | 14 jz L$ialu |
| 15 andl $268435456,%r8d |
| 16 andl $1073741824,%r9d |
| 17 orl %r9d,%r8d |
| 18 cmpl $1342177280,%r8d |
| 19 je _avx_shortcut |
15 jmp _ssse3_shortcut | 20 jmp _ssse3_shortcut |
16 | 21 |
17 .p2align 4 | 22 .p2align 4 |
18 L$ialu: | 23 L$ialu: |
19 movq %rsp,%rax | 24 movq %rsp,%rax |
20 pushq %rbx | 25 pushq %rbx |
21 pushq %rbp | 26 pushq %rbp |
22 pushq %r12 | 27 pushq %r12 |
23 pushq %r13 | 28 pushq %r13 |
24 pushq %r14 | 29 pushq %r14 |
(...skipping 2375 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2400 leaq (%r14),%rsi | 2405 leaq (%r14),%rsi |
2401 movq -40(%rsi),%r14 | 2406 movq -40(%rsi),%r14 |
2402 movq -32(%rsi),%r13 | 2407 movq -32(%rsi),%r13 |
2403 movq -24(%rsi),%r12 | 2408 movq -24(%rsi),%r12 |
2404 movq -16(%rsi),%rbp | 2409 movq -16(%rsi),%rbp |
2405 movq -8(%rsi),%rbx | 2410 movq -8(%rsi),%rbx |
2406 leaq (%rsi),%rsp | 2411 leaq (%rsi),%rsp |
2407 L$epilogue_ssse3: | 2412 L$epilogue_ssse3: |
2408 .byte 0xf3,0xc3 | 2413 .byte 0xf3,0xc3 |
2409 | 2414 |
| 2415 |
| 2416 .p2align 4 |
| 2417 sha1_block_data_order_avx: |
| 2418 _avx_shortcut: |
| 2419 movq %rsp,%rax |
| 2420 pushq %rbx |
| 2421 pushq %rbp |
| 2422 pushq %r12 |
| 2423 pushq %r13 |
| 2424 pushq %r14 |
| 2425 leaq -64(%rsp),%rsp |
| 2426 vzeroupper |
| 2427 movq %rax,%r14 |
| 2428 andq $-64,%rsp |
| 2429 movq %rdi,%r8 |
| 2430 movq %rsi,%r9 |
| 2431 movq %rdx,%r10 |
| 2432 |
| 2433 shlq $6,%r10 |
| 2434 addq %r9,%r10 |
| 2435 leaq K_XX_XX+64(%rip),%r11 |
| 2436 |
| 2437 movl 0(%r8),%eax |
| 2438 movl 4(%r8),%ebx |
| 2439 movl 8(%r8),%ecx |
| 2440 movl 12(%r8),%edx |
| 2441 movl %ebx,%esi |
| 2442 movl 16(%r8),%ebp |
| 2443 movl %ecx,%edi |
| 2444 xorl %edx,%edi |
| 2445 andl %edi,%esi |
| 2446 |
| 2447 vmovdqa 64(%r11),%xmm6 |
| 2448 vmovdqa -64(%r11),%xmm11 |
| 2449 vmovdqu 0(%r9),%xmm0 |
| 2450 vmovdqu 16(%r9),%xmm1 |
| 2451 vmovdqu 32(%r9),%xmm2 |
| 2452 vmovdqu 48(%r9),%xmm3 |
| 2453 vpshufb %xmm6,%xmm0,%xmm0 |
| 2454 addq $64,%r9 |
| 2455 vpshufb %xmm6,%xmm1,%xmm1 |
| 2456 vpshufb %xmm6,%xmm2,%xmm2 |
| 2457 vpshufb %xmm6,%xmm3,%xmm3 |
| 2458 vpaddd %xmm11,%xmm0,%xmm4 |
| 2459 vpaddd %xmm11,%xmm1,%xmm5 |
| 2460 vpaddd %xmm11,%xmm2,%xmm6 |
| 2461 vmovdqa %xmm4,0(%rsp) |
| 2462 vmovdqa %xmm5,16(%rsp) |
| 2463 vmovdqa %xmm6,32(%rsp) |
| 2464 jmp L$oop_avx |
| 2465 .p2align 4 |
| 2466 L$oop_avx: |
| 2467 shrdl $2,%ebx,%ebx |
| 2468 xorl %edx,%esi |
| 2469 vpalignr $8,%xmm0,%xmm1,%xmm4 |
| 2470 movl %eax,%edi |
| 2471 addl 0(%rsp),%ebp |
| 2472 vpaddd %xmm3,%xmm11,%xmm9 |
| 2473 xorl %ecx,%ebx |
| 2474 shldl $5,%eax,%eax |
| 2475 vpsrldq $4,%xmm3,%xmm8 |
| 2476 addl %esi,%ebp |
| 2477 andl %ebx,%edi |
| 2478 vpxor %xmm0,%xmm4,%xmm4 |
| 2479 xorl %ecx,%ebx |
| 2480 addl %eax,%ebp |
| 2481 vpxor %xmm2,%xmm8,%xmm8 |
| 2482 shrdl $7,%eax,%eax |
| 2483 xorl %ecx,%edi |
| 2484 movl %ebp,%esi |
| 2485 addl 4(%rsp),%edx |
| 2486 vpxor %xmm8,%xmm4,%xmm4 |
| 2487 xorl %ebx,%eax |
| 2488 shldl $5,%ebp,%ebp |
| 2489 vmovdqa %xmm9,48(%rsp) |
| 2490 addl %edi,%edx |
| 2491 andl %eax,%esi |
| 2492 vpsrld $31,%xmm4,%xmm8 |
| 2493 xorl %ebx,%eax |
| 2494 addl %ebp,%edx |
| 2495 shrdl $7,%ebp,%ebp |
| 2496 xorl %ebx,%esi |
| 2497 vpslldq $12,%xmm4,%xmm10 |
| 2498 vpaddd %xmm4,%xmm4,%xmm4 |
| 2499 movl %edx,%edi |
| 2500 addl 8(%rsp),%ecx |
| 2501 xorl %eax,%ebp |
| 2502 shldl $5,%edx,%edx |
| 2503 vpsrld $30,%xmm10,%xmm9 |
| 2504 vpor %xmm8,%xmm4,%xmm4 |
| 2505 addl %esi,%ecx |
| 2506 andl %ebp,%edi |
| 2507 xorl %eax,%ebp |
| 2508 addl %edx,%ecx |
| 2509 vpslld $2,%xmm10,%xmm10 |
| 2510 vpxor %xmm9,%xmm4,%xmm4 |
| 2511 shrdl $7,%edx,%edx |
| 2512 xorl %eax,%edi |
| 2513 movl %ecx,%esi |
| 2514 addl 12(%rsp),%ebx |
| 2515 vpxor %xmm10,%xmm4,%xmm4 |
| 2516 xorl %ebp,%edx |
| 2517 shldl $5,%ecx,%ecx |
| 2518 addl %edi,%ebx |
| 2519 andl %edx,%esi |
| 2520 xorl %ebp,%edx |
| 2521 addl %ecx,%ebx |
| 2522 shrdl $7,%ecx,%ecx |
| 2523 xorl %ebp,%esi |
| 2524 vpalignr $8,%xmm1,%xmm2,%xmm5 |
| 2525 movl %ebx,%edi |
| 2526 addl 16(%rsp),%eax |
| 2527 vpaddd %xmm4,%xmm11,%xmm9 |
| 2528 xorl %edx,%ecx |
| 2529 shldl $5,%ebx,%ebx |
| 2530 vpsrldq $4,%xmm4,%xmm8 |
| 2531 addl %esi,%eax |
| 2532 andl %ecx,%edi |
| 2533 vpxor %xmm1,%xmm5,%xmm5 |
| 2534 xorl %edx,%ecx |
| 2535 addl %ebx,%eax |
| 2536 vpxor %xmm3,%xmm8,%xmm8 |
| 2537 shrdl $7,%ebx,%ebx |
| 2538 xorl %edx,%edi |
| 2539 movl %eax,%esi |
| 2540 addl 20(%rsp),%ebp |
| 2541 vpxor %xmm8,%xmm5,%xmm5 |
| 2542 xorl %ecx,%ebx |
| 2543 shldl $5,%eax,%eax |
| 2544 vmovdqa %xmm9,0(%rsp) |
| 2545 addl %edi,%ebp |
| 2546 andl %ebx,%esi |
| 2547 vpsrld $31,%xmm5,%xmm8 |
| 2548 xorl %ecx,%ebx |
| 2549 addl %eax,%ebp |
| 2550 shrdl $7,%eax,%eax |
| 2551 xorl %ecx,%esi |
| 2552 vpslldq $12,%xmm5,%xmm10 |
| 2553 vpaddd %xmm5,%xmm5,%xmm5 |
| 2554 movl %ebp,%edi |
| 2555 addl 24(%rsp),%edx |
| 2556 xorl %ebx,%eax |
| 2557 shldl $5,%ebp,%ebp |
| 2558 vpsrld $30,%xmm10,%xmm9 |
| 2559 vpor %xmm8,%xmm5,%xmm5 |
| 2560 addl %esi,%edx |
| 2561 andl %eax,%edi |
| 2562 xorl %ebx,%eax |
| 2563 addl %ebp,%edx |
| 2564 vpslld $2,%xmm10,%xmm10 |
| 2565 vpxor %xmm9,%xmm5,%xmm5 |
| 2566 shrdl $7,%ebp,%ebp |
| 2567 xorl %ebx,%edi |
| 2568 movl %edx,%esi |
| 2569 addl 28(%rsp),%ecx |
| 2570 vpxor %xmm10,%xmm5,%xmm5 |
| 2571 xorl %eax,%ebp |
| 2572 shldl $5,%edx,%edx |
| 2573 vmovdqa -32(%r11),%xmm11 |
| 2574 addl %edi,%ecx |
| 2575 andl %ebp,%esi |
| 2576 xorl %eax,%ebp |
| 2577 addl %edx,%ecx |
| 2578 shrdl $7,%edx,%edx |
| 2579 xorl %eax,%esi |
| 2580 vpalignr $8,%xmm2,%xmm3,%xmm6 |
| 2581 movl %ecx,%edi |
| 2582 addl 32(%rsp),%ebx |
| 2583 vpaddd %xmm5,%xmm11,%xmm9 |
| 2584 xorl %ebp,%edx |
| 2585 shldl $5,%ecx,%ecx |
| 2586 vpsrldq $4,%xmm5,%xmm8 |
| 2587 addl %esi,%ebx |
| 2588 andl %edx,%edi |
| 2589 vpxor %xmm2,%xmm6,%xmm6 |
| 2590 xorl %ebp,%edx |
| 2591 addl %ecx,%ebx |
| 2592 vpxor %xmm4,%xmm8,%xmm8 |
| 2593 shrdl $7,%ecx,%ecx |
| 2594 xorl %ebp,%edi |
| 2595 movl %ebx,%esi |
| 2596 addl 36(%rsp),%eax |
| 2597 vpxor %xmm8,%xmm6,%xmm6 |
| 2598 xorl %edx,%ecx |
| 2599 shldl $5,%ebx,%ebx |
| 2600 vmovdqa %xmm9,16(%rsp) |
| 2601 addl %edi,%eax |
| 2602 andl %ecx,%esi |
| 2603 vpsrld $31,%xmm6,%xmm8 |
| 2604 xorl %edx,%ecx |
| 2605 addl %ebx,%eax |
| 2606 shrdl $7,%ebx,%ebx |
| 2607 xorl %edx,%esi |
| 2608 vpslldq $12,%xmm6,%xmm10 |
| 2609 vpaddd %xmm6,%xmm6,%xmm6 |
| 2610 movl %eax,%edi |
| 2611 addl 40(%rsp),%ebp |
| 2612 xorl %ecx,%ebx |
| 2613 shldl $5,%eax,%eax |
| 2614 vpsrld $30,%xmm10,%xmm9 |
| 2615 vpor %xmm8,%xmm6,%xmm6 |
| 2616 addl %esi,%ebp |
| 2617 andl %ebx,%edi |
| 2618 xorl %ecx,%ebx |
| 2619 addl %eax,%ebp |
| 2620 vpslld $2,%xmm10,%xmm10 |
| 2621 vpxor %xmm9,%xmm6,%xmm6 |
| 2622 shrdl $7,%eax,%eax |
| 2623 xorl %ecx,%edi |
| 2624 movl %ebp,%esi |
| 2625 addl 44(%rsp),%edx |
| 2626 vpxor %xmm10,%xmm6,%xmm6 |
| 2627 xorl %ebx,%eax |
| 2628 shldl $5,%ebp,%ebp |
| 2629 addl %edi,%edx |
| 2630 andl %eax,%esi |
| 2631 xorl %ebx,%eax |
| 2632 addl %ebp,%edx |
| 2633 shrdl $7,%ebp,%ebp |
| 2634 xorl %ebx,%esi |
| 2635 vpalignr $8,%xmm3,%xmm4,%xmm7 |
| 2636 movl %edx,%edi |
| 2637 addl 48(%rsp),%ecx |
| 2638 vpaddd %xmm6,%xmm11,%xmm9 |
| 2639 xorl %eax,%ebp |
| 2640 shldl $5,%edx,%edx |
| 2641 vpsrldq $4,%xmm6,%xmm8 |
| 2642 addl %esi,%ecx |
| 2643 andl %ebp,%edi |
| 2644 vpxor %xmm3,%xmm7,%xmm7 |
| 2645 xorl %eax,%ebp |
| 2646 addl %edx,%ecx |
| 2647 vpxor %xmm5,%xmm8,%xmm8 |
| 2648 shrdl $7,%edx,%edx |
| 2649 xorl %eax,%edi |
| 2650 movl %ecx,%esi |
| 2651 addl 52(%rsp),%ebx |
| 2652 vpxor %xmm8,%xmm7,%xmm7 |
| 2653 xorl %ebp,%edx |
| 2654 shldl $5,%ecx,%ecx |
| 2655 vmovdqa %xmm9,32(%rsp) |
| 2656 addl %edi,%ebx |
| 2657 andl %edx,%esi |
| 2658 vpsrld $31,%xmm7,%xmm8 |
| 2659 xorl %ebp,%edx |
| 2660 addl %ecx,%ebx |
| 2661 shrdl $7,%ecx,%ecx |
| 2662 xorl %ebp,%esi |
| 2663 vpslldq $12,%xmm7,%xmm10 |
| 2664 vpaddd %xmm7,%xmm7,%xmm7 |
| 2665 movl %ebx,%edi |
| 2666 addl 56(%rsp),%eax |
| 2667 xorl %edx,%ecx |
| 2668 shldl $5,%ebx,%ebx |
| 2669 vpsrld $30,%xmm10,%xmm9 |
| 2670 vpor %xmm8,%xmm7,%xmm7 |
| 2671 addl %esi,%eax |
| 2672 andl %ecx,%edi |
| 2673 xorl %edx,%ecx |
| 2674 addl %ebx,%eax |
| 2675 vpslld $2,%xmm10,%xmm10 |
| 2676 vpxor %xmm9,%xmm7,%xmm7 |
| 2677 shrdl $7,%ebx,%ebx |
| 2678 xorl %edx,%edi |
| 2679 movl %eax,%esi |
| 2680 addl 60(%rsp),%ebp |
| 2681 vpxor %xmm10,%xmm7,%xmm7 |
| 2682 xorl %ecx,%ebx |
| 2683 shldl $5,%eax,%eax |
| 2684 addl %edi,%ebp |
| 2685 andl %ebx,%esi |
| 2686 xorl %ecx,%ebx |
| 2687 addl %eax,%ebp |
| 2688 vpalignr $8,%xmm6,%xmm7,%xmm8 |
| 2689 vpxor %xmm4,%xmm0,%xmm0 |
| 2690 shrdl $7,%eax,%eax |
| 2691 xorl %ecx,%esi |
| 2692 movl %ebp,%edi |
| 2693 addl 0(%rsp),%edx |
| 2694 vpxor %xmm1,%xmm0,%xmm0 |
| 2695 xorl %ebx,%eax |
| 2696 shldl $5,%ebp,%ebp |
| 2697 vpaddd %xmm7,%xmm11,%xmm9 |
| 2698 addl %esi,%edx |
| 2699 andl %eax,%edi |
| 2700 vpxor %xmm8,%xmm0,%xmm0 |
| 2701 xorl %ebx,%eax |
| 2702 addl %ebp,%edx |
| 2703 shrdl $7,%ebp,%ebp |
| 2704 xorl %ebx,%edi |
| 2705 vpsrld $30,%xmm0,%xmm8 |
| 2706 vmovdqa %xmm9,48(%rsp) |
| 2707 movl %edx,%esi |
| 2708 addl 4(%rsp),%ecx |
| 2709 xorl %eax,%ebp |
| 2710 shldl $5,%edx,%edx |
| 2711 vpslld $2,%xmm0,%xmm0 |
| 2712 addl %edi,%ecx |
| 2713 andl %ebp,%esi |
| 2714 xorl %eax,%ebp |
| 2715 addl %edx,%ecx |
| 2716 shrdl $7,%edx,%edx |
| 2717 xorl %eax,%esi |
| 2718 movl %ecx,%edi |
| 2719 addl 8(%rsp),%ebx |
| 2720 vpor %xmm8,%xmm0,%xmm0 |
| 2721 xorl %ebp,%edx |
| 2722 shldl $5,%ecx,%ecx |
| 2723 addl %esi,%ebx |
| 2724 andl %edx,%edi |
| 2725 xorl %ebp,%edx |
| 2726 addl %ecx,%ebx |
| 2727 addl 12(%rsp),%eax |
| 2728 xorl %ebp,%edi |
| 2729 movl %ebx,%esi |
| 2730 shldl $5,%ebx,%ebx |
| 2731 addl %edi,%eax |
| 2732 xorl %edx,%esi |
| 2733 shrdl $7,%ecx,%ecx |
| 2734 addl %ebx,%eax |
| 2735 vpalignr $8,%xmm7,%xmm0,%xmm8 |
| 2736 vpxor %xmm5,%xmm1,%xmm1 |
| 2737 addl 16(%rsp),%ebp |
| 2738 xorl %ecx,%esi |
| 2739 movl %eax,%edi |
| 2740 shldl $5,%eax,%eax |
| 2741 vpxor %xmm2,%xmm1,%xmm1 |
| 2742 addl %esi,%ebp |
| 2743 xorl %ecx,%edi |
| 2744 vpaddd %xmm0,%xmm11,%xmm9 |
| 2745 shrdl $7,%ebx,%ebx |
| 2746 addl %eax,%ebp |
| 2747 vpxor %xmm8,%xmm1,%xmm1 |
| 2748 addl 20(%rsp),%edx |
| 2749 xorl %ebx,%edi |
| 2750 movl %ebp,%esi |
| 2751 shldl $5,%ebp,%ebp |
| 2752 vpsrld $30,%xmm1,%xmm8 |
| 2753 vmovdqa %xmm9,0(%rsp) |
| 2754 addl %edi,%edx |
| 2755 xorl %ebx,%esi |
| 2756 shrdl $7,%eax,%eax |
| 2757 addl %ebp,%edx |
| 2758 vpslld $2,%xmm1,%xmm1 |
| 2759 addl 24(%rsp),%ecx |
| 2760 xorl %eax,%esi |
| 2761 movl %edx,%edi |
| 2762 shldl $5,%edx,%edx |
| 2763 addl %esi,%ecx |
| 2764 xorl %eax,%edi |
| 2765 shrdl $7,%ebp,%ebp |
| 2766 addl %edx,%ecx |
| 2767 vpor %xmm8,%xmm1,%xmm1 |
| 2768 addl 28(%rsp),%ebx |
| 2769 xorl %ebp,%edi |
| 2770 movl %ecx,%esi |
| 2771 shldl $5,%ecx,%ecx |
| 2772 addl %edi,%ebx |
| 2773 xorl %ebp,%esi |
| 2774 shrdl $7,%edx,%edx |
| 2775 addl %ecx,%ebx |
| 2776 vpalignr $8,%xmm0,%xmm1,%xmm8 |
| 2777 vpxor %xmm6,%xmm2,%xmm2 |
| 2778 addl 32(%rsp),%eax |
| 2779 xorl %edx,%esi |
| 2780 movl %ebx,%edi |
| 2781 shldl $5,%ebx,%ebx |
| 2782 vpxor %xmm3,%xmm2,%xmm2 |
| 2783 addl %esi,%eax |
| 2784 xorl %edx,%edi |
| 2785 vpaddd %xmm1,%xmm11,%xmm9 |
| 2786 vmovdqa 0(%r11),%xmm11 |
| 2787 shrdl $7,%ecx,%ecx |
| 2788 addl %ebx,%eax |
| 2789 vpxor %xmm8,%xmm2,%xmm2 |
| 2790 addl 36(%rsp),%ebp |
| 2791 xorl %ecx,%edi |
| 2792 movl %eax,%esi |
| 2793 shldl $5,%eax,%eax |
| 2794 vpsrld $30,%xmm2,%xmm8 |
| 2795 vmovdqa %xmm9,16(%rsp) |
| 2796 addl %edi,%ebp |
| 2797 xorl %ecx,%esi |
| 2798 shrdl $7,%ebx,%ebx |
| 2799 addl %eax,%ebp |
| 2800 vpslld $2,%xmm2,%xmm2 |
| 2801 addl 40(%rsp),%edx |
| 2802 xorl %ebx,%esi |
| 2803 movl %ebp,%edi |
| 2804 shldl $5,%ebp,%ebp |
| 2805 addl %esi,%edx |
| 2806 xorl %ebx,%edi |
| 2807 shrdl $7,%eax,%eax |
| 2808 addl %ebp,%edx |
| 2809 vpor %xmm8,%xmm2,%xmm2 |
| 2810 addl 44(%rsp),%ecx |
| 2811 xorl %eax,%edi |
| 2812 movl %edx,%esi |
| 2813 shldl $5,%edx,%edx |
| 2814 addl %edi,%ecx |
| 2815 xorl %eax,%esi |
| 2816 shrdl $7,%ebp,%ebp |
| 2817 addl %edx,%ecx |
| 2818 vpalignr $8,%xmm1,%xmm2,%xmm8 |
| 2819 vpxor %xmm7,%xmm3,%xmm3 |
| 2820 addl 48(%rsp),%ebx |
| 2821 xorl %ebp,%esi |
| 2822 movl %ecx,%edi |
| 2823 shldl $5,%ecx,%ecx |
| 2824 vpxor %xmm4,%xmm3,%xmm3 |
| 2825 addl %esi,%ebx |
| 2826 xorl %ebp,%edi |
| 2827 vpaddd %xmm2,%xmm11,%xmm9 |
| 2828 shrdl $7,%edx,%edx |
| 2829 addl %ecx,%ebx |
| 2830 vpxor %xmm8,%xmm3,%xmm3 |
| 2831 addl 52(%rsp),%eax |
| 2832 xorl %edx,%edi |
| 2833 movl %ebx,%esi |
| 2834 shldl $5,%ebx,%ebx |
| 2835 vpsrld $30,%xmm3,%xmm8 |
| 2836 vmovdqa %xmm9,32(%rsp) |
| 2837 addl %edi,%eax |
| 2838 xorl %edx,%esi |
| 2839 shrdl $7,%ecx,%ecx |
| 2840 addl %ebx,%eax |
| 2841 vpslld $2,%xmm3,%xmm3 |
| 2842 addl 56(%rsp),%ebp |
| 2843 xorl %ecx,%esi |
| 2844 movl %eax,%edi |
| 2845 shldl $5,%eax,%eax |
| 2846 addl %esi,%ebp |
| 2847 xorl %ecx,%edi |
| 2848 shrdl $7,%ebx,%ebx |
| 2849 addl %eax,%ebp |
| 2850 vpor %xmm8,%xmm3,%xmm3 |
| 2851 addl 60(%rsp),%edx |
| 2852 xorl %ebx,%edi |
| 2853 movl %ebp,%esi |
| 2854 shldl $5,%ebp,%ebp |
| 2855 addl %edi,%edx |
| 2856 xorl %ebx,%esi |
| 2857 shrdl $7,%eax,%eax |
| 2858 addl %ebp,%edx |
| 2859 vpalignr $8,%xmm2,%xmm3,%xmm8 |
| 2860 vpxor %xmm0,%xmm4,%xmm4 |
| 2861 addl 0(%rsp),%ecx |
| 2862 xorl %eax,%esi |
| 2863 movl %edx,%edi |
| 2864 shldl $5,%edx,%edx |
| 2865 vpxor %xmm5,%xmm4,%xmm4 |
| 2866 addl %esi,%ecx |
| 2867 xorl %eax,%edi |
| 2868 vpaddd %xmm3,%xmm11,%xmm9 |
| 2869 shrdl $7,%ebp,%ebp |
| 2870 addl %edx,%ecx |
| 2871 vpxor %xmm8,%xmm4,%xmm4 |
| 2872 addl 4(%rsp),%ebx |
| 2873 xorl %ebp,%edi |
| 2874 movl %ecx,%esi |
| 2875 shldl $5,%ecx,%ecx |
| 2876 vpsrld $30,%xmm4,%xmm8 |
| 2877 vmovdqa %xmm9,48(%rsp) |
| 2878 addl %edi,%ebx |
| 2879 xorl %ebp,%esi |
| 2880 shrdl $7,%edx,%edx |
| 2881 addl %ecx,%ebx |
| 2882 vpslld $2,%xmm4,%xmm4 |
| 2883 addl 8(%rsp),%eax |
| 2884 xorl %edx,%esi |
| 2885 movl %ebx,%edi |
| 2886 shldl $5,%ebx,%ebx |
| 2887 addl %esi,%eax |
| 2888 xorl %edx,%edi |
| 2889 shrdl $7,%ecx,%ecx |
| 2890 addl %ebx,%eax |
| 2891 vpor %xmm8,%xmm4,%xmm4 |
| 2892 addl 12(%rsp),%ebp |
| 2893 xorl %ecx,%edi |
| 2894 movl %eax,%esi |
| 2895 shldl $5,%eax,%eax |
| 2896 addl %edi,%ebp |
| 2897 xorl %ecx,%esi |
| 2898 shrdl $7,%ebx,%ebx |
| 2899 addl %eax,%ebp |
| 2900 vpalignr $8,%xmm3,%xmm4,%xmm8 |
| 2901 vpxor %xmm1,%xmm5,%xmm5 |
| 2902 addl 16(%rsp),%edx |
| 2903 xorl %ebx,%esi |
| 2904 movl %ebp,%edi |
| 2905 shldl $5,%ebp,%ebp |
| 2906 vpxor %xmm6,%xmm5,%xmm5 |
| 2907 addl %esi,%edx |
| 2908 xorl %ebx,%edi |
| 2909 vpaddd %xmm4,%xmm11,%xmm9 |
| 2910 shrdl $7,%eax,%eax |
| 2911 addl %ebp,%edx |
| 2912 vpxor %xmm8,%xmm5,%xmm5 |
| 2913 addl 20(%rsp),%ecx |
| 2914 xorl %eax,%edi |
| 2915 movl %edx,%esi |
| 2916 shldl $5,%edx,%edx |
| 2917 vpsrld $30,%xmm5,%xmm8 |
| 2918 vmovdqa %xmm9,0(%rsp) |
| 2919 addl %edi,%ecx |
| 2920 xorl %eax,%esi |
| 2921 shrdl $7,%ebp,%ebp |
| 2922 addl %edx,%ecx |
| 2923 vpslld $2,%xmm5,%xmm5 |
| 2924 addl 24(%rsp),%ebx |
| 2925 xorl %ebp,%esi |
| 2926 movl %ecx,%edi |
| 2927 shldl $5,%ecx,%ecx |
| 2928 addl %esi,%ebx |
| 2929 xorl %ebp,%edi |
| 2930 shrdl $7,%edx,%edx |
| 2931 addl %ecx,%ebx |
| 2932 vpor %xmm8,%xmm5,%xmm5 |
| 2933 addl 28(%rsp),%eax |
| 2934 shrdl $7,%ecx,%ecx |
| 2935 movl %ebx,%esi |
| 2936 xorl %edx,%edi |
| 2937 shldl $5,%ebx,%ebx |
| 2938 addl %edi,%eax |
| 2939 xorl %ecx,%esi |
| 2940 xorl %edx,%ecx |
| 2941 addl %ebx,%eax |
| 2942 vpalignr $8,%xmm4,%xmm5,%xmm8 |
| 2943 vpxor %xmm2,%xmm6,%xmm6 |
| 2944 addl 32(%rsp),%ebp |
| 2945 andl %ecx,%esi |
| 2946 xorl %edx,%ecx |
| 2947 shrdl $7,%ebx,%ebx |
| 2948 vpxor %xmm7,%xmm6,%xmm6 |
| 2949 movl %eax,%edi |
| 2950 xorl %ecx,%esi |
| 2951 vpaddd %xmm5,%xmm11,%xmm9 |
| 2952 shldl $5,%eax,%eax |
| 2953 addl %esi,%ebp |
| 2954 vpxor %xmm8,%xmm6,%xmm6 |
| 2955 xorl %ebx,%edi |
| 2956 xorl %ecx,%ebx |
| 2957 addl %eax,%ebp |
| 2958 addl 36(%rsp),%edx |
| 2959 vpsrld $30,%xmm6,%xmm8 |
| 2960 vmovdqa %xmm9,16(%rsp) |
| 2961 andl %ebx,%edi |
| 2962 xorl %ecx,%ebx |
| 2963 shrdl $7,%eax,%eax |
| 2964 movl %ebp,%esi |
| 2965 vpslld $2,%xmm6,%xmm6 |
| 2966 xorl %ebx,%edi |
| 2967 shldl $5,%ebp,%ebp |
| 2968 addl %edi,%edx |
| 2969 xorl %eax,%esi |
| 2970 xorl %ebx,%eax |
| 2971 addl %ebp,%edx |
| 2972 addl 40(%rsp),%ecx |
| 2973 andl %eax,%esi |
| 2974 vpor %xmm8,%xmm6,%xmm6 |
| 2975 xorl %ebx,%eax |
| 2976 shrdl $7,%ebp,%ebp |
| 2977 movl %edx,%edi |
| 2978 xorl %eax,%esi |
| 2979 shldl $5,%edx,%edx |
| 2980 addl %esi,%ecx |
| 2981 xorl %ebp,%edi |
| 2982 xorl %eax,%ebp |
| 2983 addl %edx,%ecx |
| 2984 addl 44(%rsp),%ebx |
| 2985 andl %ebp,%edi |
| 2986 xorl %eax,%ebp |
| 2987 shrdl $7,%edx,%edx |
| 2988 movl %ecx,%esi |
| 2989 xorl %ebp,%edi |
| 2990 shldl $5,%ecx,%ecx |
| 2991 addl %edi,%ebx |
| 2992 xorl %edx,%esi |
| 2993 xorl %ebp,%edx |
| 2994 addl %ecx,%ebx |
| 2995 vpalignr $8,%xmm5,%xmm6,%xmm8 |
| 2996 vpxor %xmm3,%xmm7,%xmm7 |
| 2997 addl 48(%rsp),%eax |
| 2998 andl %edx,%esi |
| 2999 xorl %ebp,%edx |
| 3000 shrdl $7,%ecx,%ecx |
| 3001 vpxor %xmm0,%xmm7,%xmm7 |
| 3002 movl %ebx,%edi |
| 3003 xorl %edx,%esi |
| 3004 vpaddd %xmm6,%xmm11,%xmm9 |
| 3005 vmovdqa 32(%r11),%xmm11 |
| 3006 shldl $5,%ebx,%ebx |
| 3007 addl %esi,%eax |
| 3008 vpxor %xmm8,%xmm7,%xmm7 |
| 3009 xorl %ecx,%edi |
| 3010 xorl %edx,%ecx |
| 3011 addl %ebx,%eax |
| 3012 addl 52(%rsp),%ebp |
| 3013 vpsrld $30,%xmm7,%xmm8 |
| 3014 vmovdqa %xmm9,32(%rsp) |
| 3015 andl %ecx,%edi |
| 3016 xorl %edx,%ecx |
| 3017 shrdl $7,%ebx,%ebx |
| 3018 movl %eax,%esi |
| 3019 vpslld $2,%xmm7,%xmm7 |
| 3020 xorl %ecx,%edi |
| 3021 shldl $5,%eax,%eax |
| 3022 addl %edi,%ebp |
| 3023 xorl %ebx,%esi |
| 3024 xorl %ecx,%ebx |
| 3025 addl %eax,%ebp |
| 3026 addl 56(%rsp),%edx |
| 3027 andl %ebx,%esi |
| 3028 vpor %xmm8,%xmm7,%xmm7 |
| 3029 xorl %ecx,%ebx |
| 3030 shrdl $7,%eax,%eax |
| 3031 movl %ebp,%edi |
| 3032 xorl %ebx,%esi |
| 3033 shldl $5,%ebp,%ebp |
| 3034 addl %esi,%edx |
| 3035 xorl %eax,%edi |
| 3036 xorl %ebx,%eax |
| 3037 addl %ebp,%edx |
| 3038 addl 60(%rsp),%ecx |
| 3039 andl %eax,%edi |
| 3040 xorl %ebx,%eax |
| 3041 shrdl $7,%ebp,%ebp |
| 3042 movl %edx,%esi |
| 3043 xorl %eax,%edi |
| 3044 shldl $5,%edx,%edx |
| 3045 addl %edi,%ecx |
| 3046 xorl %ebp,%esi |
| 3047 xorl %eax,%ebp |
| 3048 addl %edx,%ecx |
| 3049 vpalignr $8,%xmm6,%xmm7,%xmm8 |
| 3050 vpxor %xmm4,%xmm0,%xmm0 |
| 3051 addl 0(%rsp),%ebx |
| 3052 andl %ebp,%esi |
| 3053 xorl %eax,%ebp |
| 3054 shrdl $7,%edx,%edx |
| 3055 vpxor %xmm1,%xmm0,%xmm0 |
| 3056 movl %ecx,%edi |
| 3057 xorl %ebp,%esi |
| 3058 vpaddd %xmm7,%xmm11,%xmm9 |
| 3059 shldl $5,%ecx,%ecx |
| 3060 addl %esi,%ebx |
| 3061 vpxor %xmm8,%xmm0,%xmm0 |
| 3062 xorl %edx,%edi |
| 3063 xorl %ebp,%edx |
| 3064 addl %ecx,%ebx |
| 3065 addl 4(%rsp),%eax |
| 3066 vpsrld $30,%xmm0,%xmm8 |
| 3067 vmovdqa %xmm9,48(%rsp) |
| 3068 andl %edx,%edi |
| 3069 xorl %ebp,%edx |
| 3070 shrdl $7,%ecx,%ecx |
| 3071 movl %ebx,%esi |
| 3072 vpslld $2,%xmm0,%xmm0 |
| 3073 xorl %edx,%edi |
| 3074 shldl $5,%ebx,%ebx |
| 3075 addl %edi,%eax |
| 3076 xorl %ecx,%esi |
| 3077 xorl %edx,%ecx |
| 3078 addl %ebx,%eax |
| 3079 addl 8(%rsp),%ebp |
| 3080 andl %ecx,%esi |
| 3081 vpor %xmm8,%xmm0,%xmm0 |
| 3082 xorl %edx,%ecx |
| 3083 shrdl $7,%ebx,%ebx |
| 3084 movl %eax,%edi |
| 3085 xorl %ecx,%esi |
| 3086 shldl $5,%eax,%eax |
| 3087 addl %esi,%ebp |
| 3088 xorl %ebx,%edi |
| 3089 xorl %ecx,%ebx |
| 3090 addl %eax,%ebp |
| 3091 addl 12(%rsp),%edx |
| 3092 andl %ebx,%edi |
| 3093 xorl %ecx,%ebx |
| 3094 shrdl $7,%eax,%eax |
| 3095 movl %ebp,%esi |
| 3096 xorl %ebx,%edi |
| 3097 shldl $5,%ebp,%ebp |
| 3098 addl %edi,%edx |
| 3099 xorl %eax,%esi |
| 3100 xorl %ebx,%eax |
| 3101 addl %ebp,%edx |
| 3102 vpalignr $8,%xmm7,%xmm0,%xmm8 |
| 3103 vpxor %xmm5,%xmm1,%xmm1 |
| 3104 addl 16(%rsp),%ecx |
| 3105 andl %eax,%esi |
| 3106 xorl %ebx,%eax |
| 3107 shrdl $7,%ebp,%ebp |
| 3108 vpxor %xmm2,%xmm1,%xmm1 |
| 3109 movl %edx,%edi |
| 3110 xorl %eax,%esi |
| 3111 vpaddd %xmm0,%xmm11,%xmm9 |
| 3112 shldl $5,%edx,%edx |
| 3113 addl %esi,%ecx |
| 3114 vpxor %xmm8,%xmm1,%xmm1 |
| 3115 xorl %ebp,%edi |
| 3116 xorl %eax,%ebp |
| 3117 addl %edx,%ecx |
| 3118 addl 20(%rsp),%ebx |
| 3119 vpsrld $30,%xmm1,%xmm8 |
| 3120 vmovdqa %xmm9,0(%rsp) |
| 3121 andl %ebp,%edi |
| 3122 xorl %eax,%ebp |
| 3123 shrdl $7,%edx,%edx |
| 3124 movl %ecx,%esi |
| 3125 vpslld $2,%xmm1,%xmm1 |
| 3126 xorl %ebp,%edi |
| 3127 shldl $5,%ecx,%ecx |
| 3128 addl %edi,%ebx |
| 3129 xorl %edx,%esi |
| 3130 xorl %ebp,%edx |
| 3131 addl %ecx,%ebx |
| 3132 addl 24(%rsp),%eax |
| 3133 andl %edx,%esi |
| 3134 vpor %xmm8,%xmm1,%xmm1 |
| 3135 xorl %ebp,%edx |
| 3136 shrdl $7,%ecx,%ecx |
| 3137 movl %ebx,%edi |
| 3138 xorl %edx,%esi |
| 3139 shldl $5,%ebx,%ebx |
| 3140 addl %esi,%eax |
| 3141 xorl %ecx,%edi |
| 3142 xorl %edx,%ecx |
| 3143 addl %ebx,%eax |
| 3144 addl 28(%rsp),%ebp |
| 3145 andl %ecx,%edi |
| 3146 xorl %edx,%ecx |
| 3147 shrdl $7,%ebx,%ebx |
| 3148 movl %eax,%esi |
| 3149 xorl %ecx,%edi |
| 3150 shldl $5,%eax,%eax |
| 3151 addl %edi,%ebp |
| 3152 xorl %ebx,%esi |
| 3153 xorl %ecx,%ebx |
| 3154 addl %eax,%ebp |
| 3155 vpalignr $8,%xmm0,%xmm1,%xmm8 |
| 3156 vpxor %xmm6,%xmm2,%xmm2 |
| 3157 addl 32(%rsp),%edx |
| 3158 andl %ebx,%esi |
| 3159 xorl %ecx,%ebx |
| 3160 shrdl $7,%eax,%eax |
| 3161 vpxor %xmm3,%xmm2,%xmm2 |
| 3162 movl %ebp,%edi |
| 3163 xorl %ebx,%esi |
| 3164 vpaddd %xmm1,%xmm11,%xmm9 |
| 3165 shldl $5,%ebp,%ebp |
| 3166 addl %esi,%edx |
| 3167 vpxor %xmm8,%xmm2,%xmm2 |
| 3168 xorl %eax,%edi |
| 3169 xorl %ebx,%eax |
| 3170 addl %ebp,%edx |
| 3171 addl 36(%rsp),%ecx |
| 3172 vpsrld $30,%xmm2,%xmm8 |
| 3173 vmovdqa %xmm9,16(%rsp) |
| 3174 andl %eax,%edi |
| 3175 xorl %ebx,%eax |
| 3176 shrdl $7,%ebp,%ebp |
| 3177 movl %edx,%esi |
| 3178 vpslld $2,%xmm2,%xmm2 |
| 3179 xorl %eax,%edi |
| 3180 shldl $5,%edx,%edx |
| 3181 addl %edi,%ecx |
| 3182 xorl %ebp,%esi |
| 3183 xorl %eax,%ebp |
| 3184 addl %edx,%ecx |
| 3185 addl 40(%rsp),%ebx |
| 3186 andl %ebp,%esi |
| 3187 vpor %xmm8,%xmm2,%xmm2 |
| 3188 xorl %eax,%ebp |
| 3189 shrdl $7,%edx,%edx |
| 3190 movl %ecx,%edi |
| 3191 xorl %ebp,%esi |
| 3192 shldl $5,%ecx,%ecx |
| 3193 addl %esi,%ebx |
| 3194 xorl %edx,%edi |
| 3195 xorl %ebp,%edx |
| 3196 addl %ecx,%ebx |
| 3197 addl 44(%rsp),%eax |
| 3198 andl %edx,%edi |
| 3199 xorl %ebp,%edx |
| 3200 shrdl $7,%ecx,%ecx |
| 3201 movl %ebx,%esi |
| 3202 xorl %edx,%edi |
| 3203 shldl $5,%ebx,%ebx |
| 3204 addl %edi,%eax |
| 3205 xorl %edx,%esi |
| 3206 addl %ebx,%eax |
| 3207 vpalignr $8,%xmm1,%xmm2,%xmm8 |
| 3208 vpxor %xmm7,%xmm3,%xmm3 |
| 3209 addl 48(%rsp),%ebp |
| 3210 xorl %ecx,%esi |
| 3211 movl %eax,%edi |
| 3212 shldl $5,%eax,%eax |
| 3213 vpxor %xmm4,%xmm3,%xmm3 |
| 3214 addl %esi,%ebp |
| 3215 xorl %ecx,%edi |
| 3216 vpaddd %xmm2,%xmm11,%xmm9 |
| 3217 shrdl $7,%ebx,%ebx |
| 3218 addl %eax,%ebp |
| 3219 vpxor %xmm8,%xmm3,%xmm3 |
| 3220 addl 52(%rsp),%edx |
| 3221 xorl %ebx,%edi |
| 3222 movl %ebp,%esi |
| 3223 shldl $5,%ebp,%ebp |
| 3224 vpsrld $30,%xmm3,%xmm8 |
| 3225 vmovdqa %xmm9,32(%rsp) |
| 3226 addl %edi,%edx |
| 3227 xorl %ebx,%esi |
| 3228 shrdl $7,%eax,%eax |
| 3229 addl %ebp,%edx |
| 3230 vpslld $2,%xmm3,%xmm3 |
| 3231 addl 56(%rsp),%ecx |
| 3232 xorl %eax,%esi |
| 3233 movl %edx,%edi |
| 3234 shldl $5,%edx,%edx |
| 3235 addl %esi,%ecx |
| 3236 xorl %eax,%edi |
| 3237 shrdl $7,%ebp,%ebp |
| 3238 addl %edx,%ecx |
| 3239 vpor %xmm8,%xmm3,%xmm3 |
| 3240 addl 60(%rsp),%ebx |
| 3241 xorl %ebp,%edi |
| 3242 movl %ecx,%esi |
| 3243 shldl $5,%ecx,%ecx |
| 3244 addl %edi,%ebx |
| 3245 xorl %ebp,%esi |
| 3246 shrdl $7,%edx,%edx |
| 3247 addl %ecx,%ebx |
| 3248 addl 0(%rsp),%eax |
| 3249 vpaddd %xmm3,%xmm11,%xmm9 |
| 3250 xorl %edx,%esi |
| 3251 movl %ebx,%edi |
| 3252 shldl $5,%ebx,%ebx |
| 3253 addl %esi,%eax |
| 3254 vmovdqa %xmm9,48(%rsp) |
| 3255 xorl %edx,%edi |
| 3256 shrdl $7,%ecx,%ecx |
| 3257 addl %ebx,%eax |
| 3258 addl 4(%rsp),%ebp |
| 3259 xorl %ecx,%edi |
| 3260 movl %eax,%esi |
| 3261 shldl $5,%eax,%eax |
| 3262 addl %edi,%ebp |
| 3263 xorl %ecx,%esi |
| 3264 shrdl $7,%ebx,%ebx |
| 3265 addl %eax,%ebp |
| 3266 addl 8(%rsp),%edx |
| 3267 xorl %ebx,%esi |
| 3268 movl %ebp,%edi |
| 3269 shldl $5,%ebp,%ebp |
| 3270 addl %esi,%edx |
| 3271 xorl %ebx,%edi |
| 3272 shrdl $7,%eax,%eax |
| 3273 addl %ebp,%edx |
| 3274 addl 12(%rsp),%ecx |
| 3275 xorl %eax,%edi |
| 3276 movl %edx,%esi |
| 3277 shldl $5,%edx,%edx |
| 3278 addl %edi,%ecx |
| 3279 xorl %eax,%esi |
| 3280 shrdl $7,%ebp,%ebp |
| 3281 addl %edx,%ecx |
| 3282 cmpq %r10,%r9 |
| 3283 je L$done_avx |
| 3284 vmovdqa 64(%r11),%xmm6 |
| 3285 vmovdqa -64(%r11),%xmm11 |
| 3286 vmovdqu 0(%r9),%xmm0 |
| 3287 vmovdqu 16(%r9),%xmm1 |
| 3288 vmovdqu 32(%r9),%xmm2 |
| 3289 vmovdqu 48(%r9),%xmm3 |
| 3290 vpshufb %xmm6,%xmm0,%xmm0 |
| 3291 addq $64,%r9 |
| 3292 addl 16(%rsp),%ebx |
| 3293 xorl %ebp,%esi |
| 3294 vpshufb %xmm6,%xmm1,%xmm1 |
| 3295 movl %ecx,%edi |
| 3296 shldl $5,%ecx,%ecx |
| 3297 vpaddd %xmm11,%xmm0,%xmm4 |
| 3298 addl %esi,%ebx |
| 3299 xorl %ebp,%edi |
| 3300 shrdl $7,%edx,%edx |
| 3301 addl %ecx,%ebx |
| 3302 vmovdqa %xmm4,0(%rsp) |
| 3303 addl 20(%rsp),%eax |
| 3304 xorl %edx,%edi |
| 3305 movl %ebx,%esi |
| 3306 shldl $5,%ebx,%ebx |
| 3307 addl %edi,%eax |
| 3308 xorl %edx,%esi |
| 3309 shrdl $7,%ecx,%ecx |
| 3310 addl %ebx,%eax |
| 3311 addl 24(%rsp),%ebp |
| 3312 xorl %ecx,%esi |
| 3313 movl %eax,%edi |
| 3314 shldl $5,%eax,%eax |
| 3315 addl %esi,%ebp |
| 3316 xorl %ecx,%edi |
| 3317 shrdl $7,%ebx,%ebx |
| 3318 addl %eax,%ebp |
| 3319 addl 28(%rsp),%edx |
| 3320 xorl %ebx,%edi |
| 3321 movl %ebp,%esi |
| 3322 shldl $5,%ebp,%ebp |
| 3323 addl %edi,%edx |
| 3324 xorl %ebx,%esi |
| 3325 shrdl $7,%eax,%eax |
| 3326 addl %ebp,%edx |
| 3327 addl 32(%rsp),%ecx |
| 3328 xorl %eax,%esi |
| 3329 vpshufb %xmm6,%xmm2,%xmm2 |
| 3330 movl %edx,%edi |
| 3331 shldl $5,%edx,%edx |
| 3332 vpaddd %xmm11,%xmm1,%xmm5 |
| 3333 addl %esi,%ecx |
| 3334 xorl %eax,%edi |
| 3335 shrdl $7,%ebp,%ebp |
| 3336 addl %edx,%ecx |
| 3337 vmovdqa %xmm5,16(%rsp) |
| 3338 addl 36(%rsp),%ebx |
| 3339 xorl %ebp,%edi |
| 3340 movl %ecx,%esi |
| 3341 shldl $5,%ecx,%ecx |
| 3342 addl %edi,%ebx |
| 3343 xorl %ebp,%esi |
| 3344 shrdl $7,%edx,%edx |
| 3345 addl %ecx,%ebx |
| 3346 addl 40(%rsp),%eax |
| 3347 xorl %edx,%esi |
| 3348 movl %ebx,%edi |
| 3349 shldl $5,%ebx,%ebx |
| 3350 addl %esi,%eax |
| 3351 xorl %edx,%edi |
| 3352 shrdl $7,%ecx,%ecx |
| 3353 addl %ebx,%eax |
| 3354 addl 44(%rsp),%ebp |
| 3355 xorl %ecx,%edi |
| 3356 movl %eax,%esi |
| 3357 shldl $5,%eax,%eax |
| 3358 addl %edi,%ebp |
| 3359 xorl %ecx,%esi |
| 3360 shrdl $7,%ebx,%ebx |
| 3361 addl %eax,%ebp |
| 3362 addl 48(%rsp),%edx |
| 3363 xorl %ebx,%esi |
| 3364 vpshufb %xmm6,%xmm3,%xmm3 |
| 3365 movl %ebp,%edi |
| 3366 shldl $5,%ebp,%ebp |
| 3367 vpaddd %xmm11,%xmm2,%xmm6 |
| 3368 addl %esi,%edx |
| 3369 xorl %ebx,%edi |
| 3370 shrdl $7,%eax,%eax |
| 3371 addl %ebp,%edx |
| 3372 vmovdqa %xmm6,32(%rsp) |
| 3373 addl 52(%rsp),%ecx |
| 3374 xorl %eax,%edi |
| 3375 movl %edx,%esi |
| 3376 shldl $5,%edx,%edx |
| 3377 addl %edi,%ecx |
| 3378 xorl %eax,%esi |
| 3379 shrdl $7,%ebp,%ebp |
| 3380 addl %edx,%ecx |
| 3381 addl 56(%rsp),%ebx |
| 3382 xorl %ebp,%esi |
| 3383 movl %ecx,%edi |
| 3384 shldl $5,%ecx,%ecx |
| 3385 addl %esi,%ebx |
| 3386 xorl %ebp,%edi |
| 3387 shrdl $7,%edx,%edx |
| 3388 addl %ecx,%ebx |
| 3389 addl 60(%rsp),%eax |
| 3390 xorl %edx,%edi |
| 3391 movl %ebx,%esi |
| 3392 shldl $5,%ebx,%ebx |
| 3393 addl %edi,%eax |
| 3394 shrdl $7,%ecx,%ecx |
| 3395 addl %ebx,%eax |
| 3396 addl 0(%r8),%eax |
| 3397 addl 4(%r8),%esi |
| 3398 addl 8(%r8),%ecx |
| 3399 addl 12(%r8),%edx |
| 3400 movl %eax,0(%r8) |
| 3401 addl 16(%r8),%ebp |
| 3402 movl %esi,4(%r8) |
| 3403 movl %esi,%ebx |
| 3404 movl %ecx,8(%r8) |
| 3405 movl %ecx,%edi |
| 3406 movl %edx,12(%r8) |
| 3407 xorl %edx,%edi |
| 3408 movl %ebp,16(%r8) |
| 3409 andl %edi,%esi |
| 3410 jmp L$oop_avx |
| 3411 |
| 3412 .p2align 4 |
| 3413 L$done_avx: |
| 3414 addl 16(%rsp),%ebx |
| 3415 xorl %ebp,%esi |
| 3416 movl %ecx,%edi |
| 3417 shldl $5,%ecx,%ecx |
| 3418 addl %esi,%ebx |
| 3419 xorl %ebp,%edi |
| 3420 shrdl $7,%edx,%edx |
| 3421 addl %ecx,%ebx |
| 3422 addl 20(%rsp),%eax |
| 3423 xorl %edx,%edi |
| 3424 movl %ebx,%esi |
| 3425 shldl $5,%ebx,%ebx |
| 3426 addl %edi,%eax |
| 3427 xorl %edx,%esi |
| 3428 shrdl $7,%ecx,%ecx |
| 3429 addl %ebx,%eax |
| 3430 addl 24(%rsp),%ebp |
| 3431 xorl %ecx,%esi |
| 3432 movl %eax,%edi |
| 3433 shldl $5,%eax,%eax |
| 3434 addl %esi,%ebp |
| 3435 xorl %ecx,%edi |
| 3436 shrdl $7,%ebx,%ebx |
| 3437 addl %eax,%ebp |
| 3438 addl 28(%rsp),%edx |
| 3439 xorl %ebx,%edi |
| 3440 movl %ebp,%esi |
| 3441 shldl $5,%ebp,%ebp |
| 3442 addl %edi,%edx |
| 3443 xorl %ebx,%esi |
| 3444 shrdl $7,%eax,%eax |
| 3445 addl %ebp,%edx |
| 3446 addl 32(%rsp),%ecx |
| 3447 xorl %eax,%esi |
| 3448 movl %edx,%edi |
| 3449 shldl $5,%edx,%edx |
| 3450 addl %esi,%ecx |
| 3451 xorl %eax,%edi |
| 3452 shrdl $7,%ebp,%ebp |
| 3453 addl %edx,%ecx |
| 3454 addl 36(%rsp),%ebx |
| 3455 xorl %ebp,%edi |
| 3456 movl %ecx,%esi |
| 3457 shldl $5,%ecx,%ecx |
| 3458 addl %edi,%ebx |
| 3459 xorl %ebp,%esi |
| 3460 shrdl $7,%edx,%edx |
| 3461 addl %ecx,%ebx |
| 3462 addl 40(%rsp),%eax |
| 3463 xorl %edx,%esi |
| 3464 movl %ebx,%edi |
| 3465 shldl $5,%ebx,%ebx |
| 3466 addl %esi,%eax |
| 3467 xorl %edx,%edi |
| 3468 shrdl $7,%ecx,%ecx |
| 3469 addl %ebx,%eax |
| 3470 addl 44(%rsp),%ebp |
| 3471 xorl %ecx,%edi |
| 3472 movl %eax,%esi |
| 3473 shldl $5,%eax,%eax |
| 3474 addl %edi,%ebp |
| 3475 xorl %ecx,%esi |
| 3476 shrdl $7,%ebx,%ebx |
| 3477 addl %eax,%ebp |
| 3478 addl 48(%rsp),%edx |
| 3479 xorl %ebx,%esi |
| 3480 movl %ebp,%edi |
| 3481 shldl $5,%ebp,%ebp |
| 3482 addl %esi,%edx |
| 3483 xorl %ebx,%edi |
| 3484 shrdl $7,%eax,%eax |
| 3485 addl %ebp,%edx |
| 3486 addl 52(%rsp),%ecx |
| 3487 xorl %eax,%edi |
| 3488 movl %edx,%esi |
| 3489 shldl $5,%edx,%edx |
| 3490 addl %edi,%ecx |
| 3491 xorl %eax,%esi |
| 3492 shrdl $7,%ebp,%ebp |
| 3493 addl %edx,%ecx |
| 3494 addl 56(%rsp),%ebx |
| 3495 xorl %ebp,%esi |
| 3496 movl %ecx,%edi |
| 3497 shldl $5,%ecx,%ecx |
| 3498 addl %esi,%ebx |
| 3499 xorl %ebp,%edi |
| 3500 shrdl $7,%edx,%edx |
| 3501 addl %ecx,%ebx |
| 3502 addl 60(%rsp),%eax |
| 3503 xorl %edx,%edi |
| 3504 movl %ebx,%esi |
| 3505 shldl $5,%ebx,%ebx |
| 3506 addl %edi,%eax |
| 3507 shrdl $7,%ecx,%ecx |
| 3508 addl %ebx,%eax |
| 3509 vzeroupper |
| 3510 |
| 3511 addl 0(%r8),%eax |
| 3512 addl 4(%r8),%esi |
| 3513 addl 8(%r8),%ecx |
| 3514 movl %eax,0(%r8) |
| 3515 addl 12(%r8),%edx |
| 3516 movl %esi,4(%r8) |
| 3517 addl 16(%r8),%ebp |
| 3518 movl %ecx,8(%r8) |
| 3519 movl %edx,12(%r8) |
| 3520 movl %ebp,16(%r8) |
| 3521 leaq (%r14),%rsi |
| 3522 movq -40(%rsi),%r14 |
| 3523 movq -32(%rsi),%r13 |
| 3524 movq -24(%rsi),%r12 |
| 3525 movq -16(%rsi),%rbp |
| 3526 movq -8(%rsi),%rbx |
| 3527 leaq (%rsi),%rsp |
| 3528 L$epilogue_avx: |
| 3529 .byte 0xf3,0xc3 |
| 3530 |
2410 .p2align 6 | 3531 .p2align 6 |
2411 K_XX_XX: | 3532 K_XX_XX: |
2412 .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 | 3533 .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 |
2413 .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 | 3534 .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 |
2414 .long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 | 3535 .long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 |
2415 .long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 | 3536 .long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 |
2416 .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc | 3537 .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc |
2417 .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc | 3538 .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc |
2418 .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 | 3539 .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 |
2419 .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 | 3540 .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 |
2420 .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f | 3541 .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f |
2421 .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f | 3542 .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f |
2422 .byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 | 3543 .byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 |
2423 .byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,3
2,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,12
1,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 | 3544 .byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,3
2,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,12
1,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 |
2424 .p2align 6 | 3545 .p2align 6 |
2425 #endif | 3546 #endif |
OLD | NEW |