OLD | NEW |
1 #if defined(__x86_64__) | 1 #if defined(__x86_64__) |
2 .text | 2 .text |
3 | 3 |
4 | 4 |
5 .globl _sha1_block_data_order | 5 .globl _sha1_block_data_order |
6 .private_extern _sha1_block_data_order | 6 .private_extern _sha1_block_data_order |
7 | 7 |
8 .p2align 4 | 8 .p2align 4 |
9 _sha1_block_data_order: | 9 _sha1_block_data_order: |
10 movl _OPENSSL_ia32cap_P+0(%rip),%r9d | 10 movl _OPENSSL_ia32cap_P+0(%rip),%r9d |
11 movl _OPENSSL_ia32cap_P+4(%rip),%r8d | 11 movl _OPENSSL_ia32cap_P+4(%rip),%r8d |
12 movl _OPENSSL_ia32cap_P+8(%rip),%r10d | 12 movl _OPENSSL_ia32cap_P+8(%rip),%r10d |
13 testl $512,%r8d | 13 testl $512,%r8d |
14 jz L$ialu | 14 jz L$ialu |
15 andl $268435456,%r8d | |
16 andl $1073741824,%r9d | |
17 orl %r9d,%r8d | |
18 cmpl $1342177280,%r8d | |
19 je _avx_shortcut | |
20 jmp _ssse3_shortcut | 15 jmp _ssse3_shortcut |
21 | 16 |
22 .p2align 4 | 17 .p2align 4 |
23 L$ialu: | 18 L$ialu: |
24 movq %rsp,%rax | 19 movq %rsp,%rax |
25 pushq %rbx | 20 pushq %rbx |
26 pushq %rbp | 21 pushq %rbp |
27 pushq %r12 | 22 pushq %r12 |
28 pushq %r13 | 23 pushq %r13 |
29 pushq %r14 | 24 pushq %r14 |
(...skipping 2375 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2405 leaq (%r14),%rsi | 2400 leaq (%r14),%rsi |
2406 movq -40(%rsi),%r14 | 2401 movq -40(%rsi),%r14 |
2407 movq -32(%rsi),%r13 | 2402 movq -32(%rsi),%r13 |
2408 movq -24(%rsi),%r12 | 2403 movq -24(%rsi),%r12 |
2409 movq -16(%rsi),%rbp | 2404 movq -16(%rsi),%rbp |
2410 movq -8(%rsi),%rbx | 2405 movq -8(%rsi),%rbx |
2411 leaq (%rsi),%rsp | 2406 leaq (%rsi),%rsp |
2412 L$epilogue_ssse3: | 2407 L$epilogue_ssse3: |
2413 .byte 0xf3,0xc3 | 2408 .byte 0xf3,0xc3 |
2414 | 2409 |
2415 | |
2416 .p2align 4 | |
2417 sha1_block_data_order_avx: | |
2418 _avx_shortcut: | |
2419 movq %rsp,%rax | |
2420 pushq %rbx | |
2421 pushq %rbp | |
2422 pushq %r12 | |
2423 pushq %r13 | |
2424 pushq %r14 | |
2425 leaq -64(%rsp),%rsp | |
2426 vzeroupper | |
2427 movq %rax,%r14 | |
2428 andq $-64,%rsp | |
2429 movq %rdi,%r8 | |
2430 movq %rsi,%r9 | |
2431 movq %rdx,%r10 | |
2432 | |
2433 shlq $6,%r10 | |
2434 addq %r9,%r10 | |
2435 leaq K_XX_XX+64(%rip),%r11 | |
2436 | |
2437 movl 0(%r8),%eax | |
2438 movl 4(%r8),%ebx | |
2439 movl 8(%r8),%ecx | |
2440 movl 12(%r8),%edx | |
2441 movl %ebx,%esi | |
2442 movl 16(%r8),%ebp | |
2443 movl %ecx,%edi | |
2444 xorl %edx,%edi | |
2445 andl %edi,%esi | |
2446 | |
2447 vmovdqa 64(%r11),%xmm6 | |
2448 vmovdqa -64(%r11),%xmm11 | |
2449 vmovdqu 0(%r9),%xmm0 | |
2450 vmovdqu 16(%r9),%xmm1 | |
2451 vmovdqu 32(%r9),%xmm2 | |
2452 vmovdqu 48(%r9),%xmm3 | |
2453 vpshufb %xmm6,%xmm0,%xmm0 | |
2454 addq $64,%r9 | |
2455 vpshufb %xmm6,%xmm1,%xmm1 | |
2456 vpshufb %xmm6,%xmm2,%xmm2 | |
2457 vpshufb %xmm6,%xmm3,%xmm3 | |
2458 vpaddd %xmm11,%xmm0,%xmm4 | |
2459 vpaddd %xmm11,%xmm1,%xmm5 | |
2460 vpaddd %xmm11,%xmm2,%xmm6 | |
2461 vmovdqa %xmm4,0(%rsp) | |
2462 vmovdqa %xmm5,16(%rsp) | |
2463 vmovdqa %xmm6,32(%rsp) | |
2464 jmp L$oop_avx | |
2465 .p2align 4 | |
2466 L$oop_avx: | |
2467 shrdl $2,%ebx,%ebx | |
2468 xorl %edx,%esi | |
2469 vpalignr $8,%xmm0,%xmm1,%xmm4 | |
2470 movl %eax,%edi | |
2471 addl 0(%rsp),%ebp | |
2472 vpaddd %xmm3,%xmm11,%xmm9 | |
2473 xorl %ecx,%ebx | |
2474 shldl $5,%eax,%eax | |
2475 vpsrldq $4,%xmm3,%xmm8 | |
2476 addl %esi,%ebp | |
2477 andl %ebx,%edi | |
2478 vpxor %xmm0,%xmm4,%xmm4 | |
2479 xorl %ecx,%ebx | |
2480 addl %eax,%ebp | |
2481 vpxor %xmm2,%xmm8,%xmm8 | |
2482 shrdl $7,%eax,%eax | |
2483 xorl %ecx,%edi | |
2484 movl %ebp,%esi | |
2485 addl 4(%rsp),%edx | |
2486 vpxor %xmm8,%xmm4,%xmm4 | |
2487 xorl %ebx,%eax | |
2488 shldl $5,%ebp,%ebp | |
2489 vmovdqa %xmm9,48(%rsp) | |
2490 addl %edi,%edx | |
2491 andl %eax,%esi | |
2492 vpsrld $31,%xmm4,%xmm8 | |
2493 xorl %ebx,%eax | |
2494 addl %ebp,%edx | |
2495 shrdl $7,%ebp,%ebp | |
2496 xorl %ebx,%esi | |
2497 vpslldq $12,%xmm4,%xmm10 | |
2498 vpaddd %xmm4,%xmm4,%xmm4 | |
2499 movl %edx,%edi | |
2500 addl 8(%rsp),%ecx | |
2501 xorl %eax,%ebp | |
2502 shldl $5,%edx,%edx | |
2503 vpsrld $30,%xmm10,%xmm9 | |
2504 vpor %xmm8,%xmm4,%xmm4 | |
2505 addl %esi,%ecx | |
2506 andl %ebp,%edi | |
2507 xorl %eax,%ebp | |
2508 addl %edx,%ecx | |
2509 vpslld $2,%xmm10,%xmm10 | |
2510 vpxor %xmm9,%xmm4,%xmm4 | |
2511 shrdl $7,%edx,%edx | |
2512 xorl %eax,%edi | |
2513 movl %ecx,%esi | |
2514 addl 12(%rsp),%ebx | |
2515 vpxor %xmm10,%xmm4,%xmm4 | |
2516 xorl %ebp,%edx | |
2517 shldl $5,%ecx,%ecx | |
2518 addl %edi,%ebx | |
2519 andl %edx,%esi | |
2520 xorl %ebp,%edx | |
2521 addl %ecx,%ebx | |
2522 shrdl $7,%ecx,%ecx | |
2523 xorl %ebp,%esi | |
2524 vpalignr $8,%xmm1,%xmm2,%xmm5 | |
2525 movl %ebx,%edi | |
2526 addl 16(%rsp),%eax | |
2527 vpaddd %xmm4,%xmm11,%xmm9 | |
2528 xorl %edx,%ecx | |
2529 shldl $5,%ebx,%ebx | |
2530 vpsrldq $4,%xmm4,%xmm8 | |
2531 addl %esi,%eax | |
2532 andl %ecx,%edi | |
2533 vpxor %xmm1,%xmm5,%xmm5 | |
2534 xorl %edx,%ecx | |
2535 addl %ebx,%eax | |
2536 vpxor %xmm3,%xmm8,%xmm8 | |
2537 shrdl $7,%ebx,%ebx | |
2538 xorl %edx,%edi | |
2539 movl %eax,%esi | |
2540 addl 20(%rsp),%ebp | |
2541 vpxor %xmm8,%xmm5,%xmm5 | |
2542 xorl %ecx,%ebx | |
2543 shldl $5,%eax,%eax | |
2544 vmovdqa %xmm9,0(%rsp) | |
2545 addl %edi,%ebp | |
2546 andl %ebx,%esi | |
2547 vpsrld $31,%xmm5,%xmm8 | |
2548 xorl %ecx,%ebx | |
2549 addl %eax,%ebp | |
2550 shrdl $7,%eax,%eax | |
2551 xorl %ecx,%esi | |
2552 vpslldq $12,%xmm5,%xmm10 | |
2553 vpaddd %xmm5,%xmm5,%xmm5 | |
2554 movl %ebp,%edi | |
2555 addl 24(%rsp),%edx | |
2556 xorl %ebx,%eax | |
2557 shldl $5,%ebp,%ebp | |
2558 vpsrld $30,%xmm10,%xmm9 | |
2559 vpor %xmm8,%xmm5,%xmm5 | |
2560 addl %esi,%edx | |
2561 andl %eax,%edi | |
2562 xorl %ebx,%eax | |
2563 addl %ebp,%edx | |
2564 vpslld $2,%xmm10,%xmm10 | |
2565 vpxor %xmm9,%xmm5,%xmm5 | |
2566 shrdl $7,%ebp,%ebp | |
2567 xorl %ebx,%edi | |
2568 movl %edx,%esi | |
2569 addl 28(%rsp),%ecx | |
2570 vpxor %xmm10,%xmm5,%xmm5 | |
2571 xorl %eax,%ebp | |
2572 shldl $5,%edx,%edx | |
2573 vmovdqa -32(%r11),%xmm11 | |
2574 addl %edi,%ecx | |
2575 andl %ebp,%esi | |
2576 xorl %eax,%ebp | |
2577 addl %edx,%ecx | |
2578 shrdl $7,%edx,%edx | |
2579 xorl %eax,%esi | |
2580 vpalignr $8,%xmm2,%xmm3,%xmm6 | |
2581 movl %ecx,%edi | |
2582 addl 32(%rsp),%ebx | |
2583 vpaddd %xmm5,%xmm11,%xmm9 | |
2584 xorl %ebp,%edx | |
2585 shldl $5,%ecx,%ecx | |
2586 vpsrldq $4,%xmm5,%xmm8 | |
2587 addl %esi,%ebx | |
2588 andl %edx,%edi | |
2589 vpxor %xmm2,%xmm6,%xmm6 | |
2590 xorl %ebp,%edx | |
2591 addl %ecx,%ebx | |
2592 vpxor %xmm4,%xmm8,%xmm8 | |
2593 shrdl $7,%ecx,%ecx | |
2594 xorl %ebp,%edi | |
2595 movl %ebx,%esi | |
2596 addl 36(%rsp),%eax | |
2597 vpxor %xmm8,%xmm6,%xmm6 | |
2598 xorl %edx,%ecx | |
2599 shldl $5,%ebx,%ebx | |
2600 vmovdqa %xmm9,16(%rsp) | |
2601 addl %edi,%eax | |
2602 andl %ecx,%esi | |
2603 vpsrld $31,%xmm6,%xmm8 | |
2604 xorl %edx,%ecx | |
2605 addl %ebx,%eax | |
2606 shrdl $7,%ebx,%ebx | |
2607 xorl %edx,%esi | |
2608 vpslldq $12,%xmm6,%xmm10 | |
2609 vpaddd %xmm6,%xmm6,%xmm6 | |
2610 movl %eax,%edi | |
2611 addl 40(%rsp),%ebp | |
2612 xorl %ecx,%ebx | |
2613 shldl $5,%eax,%eax | |
2614 vpsrld $30,%xmm10,%xmm9 | |
2615 vpor %xmm8,%xmm6,%xmm6 | |
2616 addl %esi,%ebp | |
2617 andl %ebx,%edi | |
2618 xorl %ecx,%ebx | |
2619 addl %eax,%ebp | |
2620 vpslld $2,%xmm10,%xmm10 | |
2621 vpxor %xmm9,%xmm6,%xmm6 | |
2622 shrdl $7,%eax,%eax | |
2623 xorl %ecx,%edi | |
2624 movl %ebp,%esi | |
2625 addl 44(%rsp),%edx | |
2626 vpxor %xmm10,%xmm6,%xmm6 | |
2627 xorl %ebx,%eax | |
2628 shldl $5,%ebp,%ebp | |
2629 addl %edi,%edx | |
2630 andl %eax,%esi | |
2631 xorl %ebx,%eax | |
2632 addl %ebp,%edx | |
2633 shrdl $7,%ebp,%ebp | |
2634 xorl %ebx,%esi | |
2635 vpalignr $8,%xmm3,%xmm4,%xmm7 | |
2636 movl %edx,%edi | |
2637 addl 48(%rsp),%ecx | |
2638 vpaddd %xmm6,%xmm11,%xmm9 | |
2639 xorl %eax,%ebp | |
2640 shldl $5,%edx,%edx | |
2641 vpsrldq $4,%xmm6,%xmm8 | |
2642 addl %esi,%ecx | |
2643 andl %ebp,%edi | |
2644 vpxor %xmm3,%xmm7,%xmm7 | |
2645 xorl %eax,%ebp | |
2646 addl %edx,%ecx | |
2647 vpxor %xmm5,%xmm8,%xmm8 | |
2648 shrdl $7,%edx,%edx | |
2649 xorl %eax,%edi | |
2650 movl %ecx,%esi | |
2651 addl 52(%rsp),%ebx | |
2652 vpxor %xmm8,%xmm7,%xmm7 | |
2653 xorl %ebp,%edx | |
2654 shldl $5,%ecx,%ecx | |
2655 vmovdqa %xmm9,32(%rsp) | |
2656 addl %edi,%ebx | |
2657 andl %edx,%esi | |
2658 vpsrld $31,%xmm7,%xmm8 | |
2659 xorl %ebp,%edx | |
2660 addl %ecx,%ebx | |
2661 shrdl $7,%ecx,%ecx | |
2662 xorl %ebp,%esi | |
2663 vpslldq $12,%xmm7,%xmm10 | |
2664 vpaddd %xmm7,%xmm7,%xmm7 | |
2665 movl %ebx,%edi | |
2666 addl 56(%rsp),%eax | |
2667 xorl %edx,%ecx | |
2668 shldl $5,%ebx,%ebx | |
2669 vpsrld $30,%xmm10,%xmm9 | |
2670 vpor %xmm8,%xmm7,%xmm7 | |
2671 addl %esi,%eax | |
2672 andl %ecx,%edi | |
2673 xorl %edx,%ecx | |
2674 addl %ebx,%eax | |
2675 vpslld $2,%xmm10,%xmm10 | |
2676 vpxor %xmm9,%xmm7,%xmm7 | |
2677 shrdl $7,%ebx,%ebx | |
2678 xorl %edx,%edi | |
2679 movl %eax,%esi | |
2680 addl 60(%rsp),%ebp | |
2681 vpxor %xmm10,%xmm7,%xmm7 | |
2682 xorl %ecx,%ebx | |
2683 shldl $5,%eax,%eax | |
2684 addl %edi,%ebp | |
2685 andl %ebx,%esi | |
2686 xorl %ecx,%ebx | |
2687 addl %eax,%ebp | |
2688 vpalignr $8,%xmm6,%xmm7,%xmm8 | |
2689 vpxor %xmm4,%xmm0,%xmm0 | |
2690 shrdl $7,%eax,%eax | |
2691 xorl %ecx,%esi | |
2692 movl %ebp,%edi | |
2693 addl 0(%rsp),%edx | |
2694 vpxor %xmm1,%xmm0,%xmm0 | |
2695 xorl %ebx,%eax | |
2696 shldl $5,%ebp,%ebp | |
2697 vpaddd %xmm7,%xmm11,%xmm9 | |
2698 addl %esi,%edx | |
2699 andl %eax,%edi | |
2700 vpxor %xmm8,%xmm0,%xmm0 | |
2701 xorl %ebx,%eax | |
2702 addl %ebp,%edx | |
2703 shrdl $7,%ebp,%ebp | |
2704 xorl %ebx,%edi | |
2705 vpsrld $30,%xmm0,%xmm8 | |
2706 vmovdqa %xmm9,48(%rsp) | |
2707 movl %edx,%esi | |
2708 addl 4(%rsp),%ecx | |
2709 xorl %eax,%ebp | |
2710 shldl $5,%edx,%edx | |
2711 vpslld $2,%xmm0,%xmm0 | |
2712 addl %edi,%ecx | |
2713 andl %ebp,%esi | |
2714 xorl %eax,%ebp | |
2715 addl %edx,%ecx | |
2716 shrdl $7,%edx,%edx | |
2717 xorl %eax,%esi | |
2718 movl %ecx,%edi | |
2719 addl 8(%rsp),%ebx | |
2720 vpor %xmm8,%xmm0,%xmm0 | |
2721 xorl %ebp,%edx | |
2722 shldl $5,%ecx,%ecx | |
2723 addl %esi,%ebx | |
2724 andl %edx,%edi | |
2725 xorl %ebp,%edx | |
2726 addl %ecx,%ebx | |
2727 addl 12(%rsp),%eax | |
2728 xorl %ebp,%edi | |
2729 movl %ebx,%esi | |
2730 shldl $5,%ebx,%ebx | |
2731 addl %edi,%eax | |
2732 xorl %edx,%esi | |
2733 shrdl $7,%ecx,%ecx | |
2734 addl %ebx,%eax | |
2735 vpalignr $8,%xmm7,%xmm0,%xmm8 | |
2736 vpxor %xmm5,%xmm1,%xmm1 | |
2737 addl 16(%rsp),%ebp | |
2738 xorl %ecx,%esi | |
2739 movl %eax,%edi | |
2740 shldl $5,%eax,%eax | |
2741 vpxor %xmm2,%xmm1,%xmm1 | |
2742 addl %esi,%ebp | |
2743 xorl %ecx,%edi | |
2744 vpaddd %xmm0,%xmm11,%xmm9 | |
2745 shrdl $7,%ebx,%ebx | |
2746 addl %eax,%ebp | |
2747 vpxor %xmm8,%xmm1,%xmm1 | |
2748 addl 20(%rsp),%edx | |
2749 xorl %ebx,%edi | |
2750 movl %ebp,%esi | |
2751 shldl $5,%ebp,%ebp | |
2752 vpsrld $30,%xmm1,%xmm8 | |
2753 vmovdqa %xmm9,0(%rsp) | |
2754 addl %edi,%edx | |
2755 xorl %ebx,%esi | |
2756 shrdl $7,%eax,%eax | |
2757 addl %ebp,%edx | |
2758 vpslld $2,%xmm1,%xmm1 | |
2759 addl 24(%rsp),%ecx | |
2760 xorl %eax,%esi | |
2761 movl %edx,%edi | |
2762 shldl $5,%edx,%edx | |
2763 addl %esi,%ecx | |
2764 xorl %eax,%edi | |
2765 shrdl $7,%ebp,%ebp | |
2766 addl %edx,%ecx | |
2767 vpor %xmm8,%xmm1,%xmm1 | |
2768 addl 28(%rsp),%ebx | |
2769 xorl %ebp,%edi | |
2770 movl %ecx,%esi | |
2771 shldl $5,%ecx,%ecx | |
2772 addl %edi,%ebx | |
2773 xorl %ebp,%esi | |
2774 shrdl $7,%edx,%edx | |
2775 addl %ecx,%ebx | |
2776 vpalignr $8,%xmm0,%xmm1,%xmm8 | |
2777 vpxor %xmm6,%xmm2,%xmm2 | |
2778 addl 32(%rsp),%eax | |
2779 xorl %edx,%esi | |
2780 movl %ebx,%edi | |
2781 shldl $5,%ebx,%ebx | |
2782 vpxor %xmm3,%xmm2,%xmm2 | |
2783 addl %esi,%eax | |
2784 xorl %edx,%edi | |
2785 vpaddd %xmm1,%xmm11,%xmm9 | |
2786 vmovdqa 0(%r11),%xmm11 | |
2787 shrdl $7,%ecx,%ecx | |
2788 addl %ebx,%eax | |
2789 vpxor %xmm8,%xmm2,%xmm2 | |
2790 addl 36(%rsp),%ebp | |
2791 xorl %ecx,%edi | |
2792 movl %eax,%esi | |
2793 shldl $5,%eax,%eax | |
2794 vpsrld $30,%xmm2,%xmm8 | |
2795 vmovdqa %xmm9,16(%rsp) | |
2796 addl %edi,%ebp | |
2797 xorl %ecx,%esi | |
2798 shrdl $7,%ebx,%ebx | |
2799 addl %eax,%ebp | |
2800 vpslld $2,%xmm2,%xmm2 | |
2801 addl 40(%rsp),%edx | |
2802 xorl %ebx,%esi | |
2803 movl %ebp,%edi | |
2804 shldl $5,%ebp,%ebp | |
2805 addl %esi,%edx | |
2806 xorl %ebx,%edi | |
2807 shrdl $7,%eax,%eax | |
2808 addl %ebp,%edx | |
2809 vpor %xmm8,%xmm2,%xmm2 | |
2810 addl 44(%rsp),%ecx | |
2811 xorl %eax,%edi | |
2812 movl %edx,%esi | |
2813 shldl $5,%edx,%edx | |
2814 addl %edi,%ecx | |
2815 xorl %eax,%esi | |
2816 shrdl $7,%ebp,%ebp | |
2817 addl %edx,%ecx | |
2818 vpalignr $8,%xmm1,%xmm2,%xmm8 | |
2819 vpxor %xmm7,%xmm3,%xmm3 | |
2820 addl 48(%rsp),%ebx | |
2821 xorl %ebp,%esi | |
2822 movl %ecx,%edi | |
2823 shldl $5,%ecx,%ecx | |
2824 vpxor %xmm4,%xmm3,%xmm3 | |
2825 addl %esi,%ebx | |
2826 xorl %ebp,%edi | |
2827 vpaddd %xmm2,%xmm11,%xmm9 | |
2828 shrdl $7,%edx,%edx | |
2829 addl %ecx,%ebx | |
2830 vpxor %xmm8,%xmm3,%xmm3 | |
2831 addl 52(%rsp),%eax | |
2832 xorl %edx,%edi | |
2833 movl %ebx,%esi | |
2834 shldl $5,%ebx,%ebx | |
2835 vpsrld $30,%xmm3,%xmm8 | |
2836 vmovdqa %xmm9,32(%rsp) | |
2837 addl %edi,%eax | |
2838 xorl %edx,%esi | |
2839 shrdl $7,%ecx,%ecx | |
2840 addl %ebx,%eax | |
2841 vpslld $2,%xmm3,%xmm3 | |
2842 addl 56(%rsp),%ebp | |
2843 xorl %ecx,%esi | |
2844 movl %eax,%edi | |
2845 shldl $5,%eax,%eax | |
2846 addl %esi,%ebp | |
2847 xorl %ecx,%edi | |
2848 shrdl $7,%ebx,%ebx | |
2849 addl %eax,%ebp | |
2850 vpor %xmm8,%xmm3,%xmm3 | |
2851 addl 60(%rsp),%edx | |
2852 xorl %ebx,%edi | |
2853 movl %ebp,%esi | |
2854 shldl $5,%ebp,%ebp | |
2855 addl %edi,%edx | |
2856 xorl %ebx,%esi | |
2857 shrdl $7,%eax,%eax | |
2858 addl %ebp,%edx | |
2859 vpalignr $8,%xmm2,%xmm3,%xmm8 | |
2860 vpxor %xmm0,%xmm4,%xmm4 | |
2861 addl 0(%rsp),%ecx | |
2862 xorl %eax,%esi | |
2863 movl %edx,%edi | |
2864 shldl $5,%edx,%edx | |
2865 vpxor %xmm5,%xmm4,%xmm4 | |
2866 addl %esi,%ecx | |
2867 xorl %eax,%edi | |
2868 vpaddd %xmm3,%xmm11,%xmm9 | |
2869 shrdl $7,%ebp,%ebp | |
2870 addl %edx,%ecx | |
2871 vpxor %xmm8,%xmm4,%xmm4 | |
2872 addl 4(%rsp),%ebx | |
2873 xorl %ebp,%edi | |
2874 movl %ecx,%esi | |
2875 shldl $5,%ecx,%ecx | |
2876 vpsrld $30,%xmm4,%xmm8 | |
2877 vmovdqa %xmm9,48(%rsp) | |
2878 addl %edi,%ebx | |
2879 xorl %ebp,%esi | |
2880 shrdl $7,%edx,%edx | |
2881 addl %ecx,%ebx | |
2882 vpslld $2,%xmm4,%xmm4 | |
2883 addl 8(%rsp),%eax | |
2884 xorl %edx,%esi | |
2885 movl %ebx,%edi | |
2886 shldl $5,%ebx,%ebx | |
2887 addl %esi,%eax | |
2888 xorl %edx,%edi | |
2889 shrdl $7,%ecx,%ecx | |
2890 addl %ebx,%eax | |
2891 vpor %xmm8,%xmm4,%xmm4 | |
2892 addl 12(%rsp),%ebp | |
2893 xorl %ecx,%edi | |
2894 movl %eax,%esi | |
2895 shldl $5,%eax,%eax | |
2896 addl %edi,%ebp | |
2897 xorl %ecx,%esi | |
2898 shrdl $7,%ebx,%ebx | |
2899 addl %eax,%ebp | |
2900 vpalignr $8,%xmm3,%xmm4,%xmm8 | |
2901 vpxor %xmm1,%xmm5,%xmm5 | |
2902 addl 16(%rsp),%edx | |
2903 xorl %ebx,%esi | |
2904 movl %ebp,%edi | |
2905 shldl $5,%ebp,%ebp | |
2906 vpxor %xmm6,%xmm5,%xmm5 | |
2907 addl %esi,%edx | |
2908 xorl %ebx,%edi | |
2909 vpaddd %xmm4,%xmm11,%xmm9 | |
2910 shrdl $7,%eax,%eax | |
2911 addl %ebp,%edx | |
2912 vpxor %xmm8,%xmm5,%xmm5 | |
2913 addl 20(%rsp),%ecx | |
2914 xorl %eax,%edi | |
2915 movl %edx,%esi | |
2916 shldl $5,%edx,%edx | |
2917 vpsrld $30,%xmm5,%xmm8 | |
2918 vmovdqa %xmm9,0(%rsp) | |
2919 addl %edi,%ecx | |
2920 xorl %eax,%esi | |
2921 shrdl $7,%ebp,%ebp | |
2922 addl %edx,%ecx | |
2923 vpslld $2,%xmm5,%xmm5 | |
2924 addl 24(%rsp),%ebx | |
2925 xorl %ebp,%esi | |
2926 movl %ecx,%edi | |
2927 shldl $5,%ecx,%ecx | |
2928 addl %esi,%ebx | |
2929 xorl %ebp,%edi | |
2930 shrdl $7,%edx,%edx | |
2931 addl %ecx,%ebx | |
2932 vpor %xmm8,%xmm5,%xmm5 | |
2933 addl 28(%rsp),%eax | |
2934 shrdl $7,%ecx,%ecx | |
2935 movl %ebx,%esi | |
2936 xorl %edx,%edi | |
2937 shldl $5,%ebx,%ebx | |
2938 addl %edi,%eax | |
2939 xorl %ecx,%esi | |
2940 xorl %edx,%ecx | |
2941 addl %ebx,%eax | |
2942 vpalignr $8,%xmm4,%xmm5,%xmm8 | |
2943 vpxor %xmm2,%xmm6,%xmm6 | |
2944 addl 32(%rsp),%ebp | |
2945 andl %ecx,%esi | |
2946 xorl %edx,%ecx | |
2947 shrdl $7,%ebx,%ebx | |
2948 vpxor %xmm7,%xmm6,%xmm6 | |
2949 movl %eax,%edi | |
2950 xorl %ecx,%esi | |
2951 vpaddd %xmm5,%xmm11,%xmm9 | |
2952 shldl $5,%eax,%eax | |
2953 addl %esi,%ebp | |
2954 vpxor %xmm8,%xmm6,%xmm6 | |
2955 xorl %ebx,%edi | |
2956 xorl %ecx,%ebx | |
2957 addl %eax,%ebp | |
2958 addl 36(%rsp),%edx | |
2959 vpsrld $30,%xmm6,%xmm8 | |
2960 vmovdqa %xmm9,16(%rsp) | |
2961 andl %ebx,%edi | |
2962 xorl %ecx,%ebx | |
2963 shrdl $7,%eax,%eax | |
2964 movl %ebp,%esi | |
2965 vpslld $2,%xmm6,%xmm6 | |
2966 xorl %ebx,%edi | |
2967 shldl $5,%ebp,%ebp | |
2968 addl %edi,%edx | |
2969 xorl %eax,%esi | |
2970 xorl %ebx,%eax | |
2971 addl %ebp,%edx | |
2972 addl 40(%rsp),%ecx | |
2973 andl %eax,%esi | |
2974 vpor %xmm8,%xmm6,%xmm6 | |
2975 xorl %ebx,%eax | |
2976 shrdl $7,%ebp,%ebp | |
2977 movl %edx,%edi | |
2978 xorl %eax,%esi | |
2979 shldl $5,%edx,%edx | |
2980 addl %esi,%ecx | |
2981 xorl %ebp,%edi | |
2982 xorl %eax,%ebp | |
2983 addl %edx,%ecx | |
2984 addl 44(%rsp),%ebx | |
2985 andl %ebp,%edi | |
2986 xorl %eax,%ebp | |
2987 shrdl $7,%edx,%edx | |
2988 movl %ecx,%esi | |
2989 xorl %ebp,%edi | |
2990 shldl $5,%ecx,%ecx | |
2991 addl %edi,%ebx | |
2992 xorl %edx,%esi | |
2993 xorl %ebp,%edx | |
2994 addl %ecx,%ebx | |
2995 vpalignr $8,%xmm5,%xmm6,%xmm8 | |
2996 vpxor %xmm3,%xmm7,%xmm7 | |
2997 addl 48(%rsp),%eax | |
2998 andl %edx,%esi | |
2999 xorl %ebp,%edx | |
3000 shrdl $7,%ecx,%ecx | |
3001 vpxor %xmm0,%xmm7,%xmm7 | |
3002 movl %ebx,%edi | |
3003 xorl %edx,%esi | |
3004 vpaddd %xmm6,%xmm11,%xmm9 | |
3005 vmovdqa 32(%r11),%xmm11 | |
3006 shldl $5,%ebx,%ebx | |
3007 addl %esi,%eax | |
3008 vpxor %xmm8,%xmm7,%xmm7 | |
3009 xorl %ecx,%edi | |
3010 xorl %edx,%ecx | |
3011 addl %ebx,%eax | |
3012 addl 52(%rsp),%ebp | |
3013 vpsrld $30,%xmm7,%xmm8 | |
3014 vmovdqa %xmm9,32(%rsp) | |
3015 andl %ecx,%edi | |
3016 xorl %edx,%ecx | |
3017 shrdl $7,%ebx,%ebx | |
3018 movl %eax,%esi | |
3019 vpslld $2,%xmm7,%xmm7 | |
3020 xorl %ecx,%edi | |
3021 shldl $5,%eax,%eax | |
3022 addl %edi,%ebp | |
3023 xorl %ebx,%esi | |
3024 xorl %ecx,%ebx | |
3025 addl %eax,%ebp | |
3026 addl 56(%rsp),%edx | |
3027 andl %ebx,%esi | |
3028 vpor %xmm8,%xmm7,%xmm7 | |
3029 xorl %ecx,%ebx | |
3030 shrdl $7,%eax,%eax | |
3031 movl %ebp,%edi | |
3032 xorl %ebx,%esi | |
3033 shldl $5,%ebp,%ebp | |
3034 addl %esi,%edx | |
3035 xorl %eax,%edi | |
3036 xorl %ebx,%eax | |
3037 addl %ebp,%edx | |
3038 addl 60(%rsp),%ecx | |
3039 andl %eax,%edi | |
3040 xorl %ebx,%eax | |
3041 shrdl $7,%ebp,%ebp | |
3042 movl %edx,%esi | |
3043 xorl %eax,%edi | |
3044 shldl $5,%edx,%edx | |
3045 addl %edi,%ecx | |
3046 xorl %ebp,%esi | |
3047 xorl %eax,%ebp | |
3048 addl %edx,%ecx | |
3049 vpalignr $8,%xmm6,%xmm7,%xmm8 | |
3050 vpxor %xmm4,%xmm0,%xmm0 | |
3051 addl 0(%rsp),%ebx | |
3052 andl %ebp,%esi | |
3053 xorl %eax,%ebp | |
3054 shrdl $7,%edx,%edx | |
3055 vpxor %xmm1,%xmm0,%xmm0 | |
3056 movl %ecx,%edi | |
3057 xorl %ebp,%esi | |
3058 vpaddd %xmm7,%xmm11,%xmm9 | |
3059 shldl $5,%ecx,%ecx | |
3060 addl %esi,%ebx | |
3061 vpxor %xmm8,%xmm0,%xmm0 | |
3062 xorl %edx,%edi | |
3063 xorl %ebp,%edx | |
3064 addl %ecx,%ebx | |
3065 addl 4(%rsp),%eax | |
3066 vpsrld $30,%xmm0,%xmm8 | |
3067 vmovdqa %xmm9,48(%rsp) | |
3068 andl %edx,%edi | |
3069 xorl %ebp,%edx | |
3070 shrdl $7,%ecx,%ecx | |
3071 movl %ebx,%esi | |
3072 vpslld $2,%xmm0,%xmm0 | |
3073 xorl %edx,%edi | |
3074 shldl $5,%ebx,%ebx | |
3075 addl %edi,%eax | |
3076 xorl %ecx,%esi | |
3077 xorl %edx,%ecx | |
3078 addl %ebx,%eax | |
3079 addl 8(%rsp),%ebp | |
3080 andl %ecx,%esi | |
3081 vpor %xmm8,%xmm0,%xmm0 | |
3082 xorl %edx,%ecx | |
3083 shrdl $7,%ebx,%ebx | |
3084 movl %eax,%edi | |
3085 xorl %ecx,%esi | |
3086 shldl $5,%eax,%eax | |
3087 addl %esi,%ebp | |
3088 xorl %ebx,%edi | |
3089 xorl %ecx,%ebx | |
3090 addl %eax,%ebp | |
3091 addl 12(%rsp),%edx | |
3092 andl %ebx,%edi | |
3093 xorl %ecx,%ebx | |
3094 shrdl $7,%eax,%eax | |
3095 movl %ebp,%esi | |
3096 xorl %ebx,%edi | |
3097 shldl $5,%ebp,%ebp | |
3098 addl %edi,%edx | |
3099 xorl %eax,%esi | |
3100 xorl %ebx,%eax | |
3101 addl %ebp,%edx | |
3102 vpalignr $8,%xmm7,%xmm0,%xmm8 | |
3103 vpxor %xmm5,%xmm1,%xmm1 | |
3104 addl 16(%rsp),%ecx | |
3105 andl %eax,%esi | |
3106 xorl %ebx,%eax | |
3107 shrdl $7,%ebp,%ebp | |
3108 vpxor %xmm2,%xmm1,%xmm1 | |
3109 movl %edx,%edi | |
3110 xorl %eax,%esi | |
3111 vpaddd %xmm0,%xmm11,%xmm9 | |
3112 shldl $5,%edx,%edx | |
3113 addl %esi,%ecx | |
3114 vpxor %xmm8,%xmm1,%xmm1 | |
3115 xorl %ebp,%edi | |
3116 xorl %eax,%ebp | |
3117 addl %edx,%ecx | |
3118 addl 20(%rsp),%ebx | |
3119 vpsrld $30,%xmm1,%xmm8 | |
3120 vmovdqa %xmm9,0(%rsp) | |
3121 andl %ebp,%edi | |
3122 xorl %eax,%ebp | |
3123 shrdl $7,%edx,%edx | |
3124 movl %ecx,%esi | |
3125 vpslld $2,%xmm1,%xmm1 | |
3126 xorl %ebp,%edi | |
3127 shldl $5,%ecx,%ecx | |
3128 addl %edi,%ebx | |
3129 xorl %edx,%esi | |
3130 xorl %ebp,%edx | |
3131 addl %ecx,%ebx | |
3132 addl 24(%rsp),%eax | |
3133 andl %edx,%esi | |
3134 vpor %xmm8,%xmm1,%xmm1 | |
3135 xorl %ebp,%edx | |
3136 shrdl $7,%ecx,%ecx | |
3137 movl %ebx,%edi | |
3138 xorl %edx,%esi | |
3139 shldl $5,%ebx,%ebx | |
3140 addl %esi,%eax | |
3141 xorl %ecx,%edi | |
3142 xorl %edx,%ecx | |
3143 addl %ebx,%eax | |
3144 addl 28(%rsp),%ebp | |
3145 andl %ecx,%edi | |
3146 xorl %edx,%ecx | |
3147 shrdl $7,%ebx,%ebx | |
3148 movl %eax,%esi | |
3149 xorl %ecx,%edi | |
3150 shldl $5,%eax,%eax | |
3151 addl %edi,%ebp | |
3152 xorl %ebx,%esi | |
3153 xorl %ecx,%ebx | |
3154 addl %eax,%ebp | |
3155 vpalignr $8,%xmm0,%xmm1,%xmm8 | |
3156 vpxor %xmm6,%xmm2,%xmm2 | |
3157 addl 32(%rsp),%edx | |
3158 andl %ebx,%esi | |
3159 xorl %ecx,%ebx | |
3160 shrdl $7,%eax,%eax | |
3161 vpxor %xmm3,%xmm2,%xmm2 | |
3162 movl %ebp,%edi | |
3163 xorl %ebx,%esi | |
3164 vpaddd %xmm1,%xmm11,%xmm9 | |
3165 shldl $5,%ebp,%ebp | |
3166 addl %esi,%edx | |
3167 vpxor %xmm8,%xmm2,%xmm2 | |
3168 xorl %eax,%edi | |
3169 xorl %ebx,%eax | |
3170 addl %ebp,%edx | |
3171 addl 36(%rsp),%ecx | |
3172 vpsrld $30,%xmm2,%xmm8 | |
3173 vmovdqa %xmm9,16(%rsp) | |
3174 andl %eax,%edi | |
3175 xorl %ebx,%eax | |
3176 shrdl $7,%ebp,%ebp | |
3177 movl %edx,%esi | |
3178 vpslld $2,%xmm2,%xmm2 | |
3179 xorl %eax,%edi | |
3180 shldl $5,%edx,%edx | |
3181 addl %edi,%ecx | |
3182 xorl %ebp,%esi | |
3183 xorl %eax,%ebp | |
3184 addl %edx,%ecx | |
3185 addl 40(%rsp),%ebx | |
3186 andl %ebp,%esi | |
3187 vpor %xmm8,%xmm2,%xmm2 | |
3188 xorl %eax,%ebp | |
3189 shrdl $7,%edx,%edx | |
3190 movl %ecx,%edi | |
3191 xorl %ebp,%esi | |
3192 shldl $5,%ecx,%ecx | |
3193 addl %esi,%ebx | |
3194 xorl %edx,%edi | |
3195 xorl %ebp,%edx | |
3196 addl %ecx,%ebx | |
3197 addl 44(%rsp),%eax | |
3198 andl %edx,%edi | |
3199 xorl %ebp,%edx | |
3200 shrdl $7,%ecx,%ecx | |
3201 movl %ebx,%esi | |
3202 xorl %edx,%edi | |
3203 shldl $5,%ebx,%ebx | |
3204 addl %edi,%eax | |
3205 xorl %edx,%esi | |
3206 addl %ebx,%eax | |
3207 vpalignr $8,%xmm1,%xmm2,%xmm8 | |
3208 vpxor %xmm7,%xmm3,%xmm3 | |
3209 addl 48(%rsp),%ebp | |
3210 xorl %ecx,%esi | |
3211 movl %eax,%edi | |
3212 shldl $5,%eax,%eax | |
3213 vpxor %xmm4,%xmm3,%xmm3 | |
3214 addl %esi,%ebp | |
3215 xorl %ecx,%edi | |
3216 vpaddd %xmm2,%xmm11,%xmm9 | |
3217 shrdl $7,%ebx,%ebx | |
3218 addl %eax,%ebp | |
3219 vpxor %xmm8,%xmm3,%xmm3 | |
3220 addl 52(%rsp),%edx | |
3221 xorl %ebx,%edi | |
3222 movl %ebp,%esi | |
3223 shldl $5,%ebp,%ebp | |
3224 vpsrld $30,%xmm3,%xmm8 | |
3225 vmovdqa %xmm9,32(%rsp) | |
3226 addl %edi,%edx | |
3227 xorl %ebx,%esi | |
3228 shrdl $7,%eax,%eax | |
3229 addl %ebp,%edx | |
3230 vpslld $2,%xmm3,%xmm3 | |
3231 addl 56(%rsp),%ecx | |
3232 xorl %eax,%esi | |
3233 movl %edx,%edi | |
3234 shldl $5,%edx,%edx | |
3235 addl %esi,%ecx | |
3236 xorl %eax,%edi | |
3237 shrdl $7,%ebp,%ebp | |
3238 addl %edx,%ecx | |
3239 vpor %xmm8,%xmm3,%xmm3 | |
3240 addl 60(%rsp),%ebx | |
3241 xorl %ebp,%edi | |
3242 movl %ecx,%esi | |
3243 shldl $5,%ecx,%ecx | |
3244 addl %edi,%ebx | |
3245 xorl %ebp,%esi | |
3246 shrdl $7,%edx,%edx | |
3247 addl %ecx,%ebx | |
3248 addl 0(%rsp),%eax | |
3249 vpaddd %xmm3,%xmm11,%xmm9 | |
3250 xorl %edx,%esi | |
3251 movl %ebx,%edi | |
3252 shldl $5,%ebx,%ebx | |
3253 addl %esi,%eax | |
3254 vmovdqa %xmm9,48(%rsp) | |
3255 xorl %edx,%edi | |
3256 shrdl $7,%ecx,%ecx | |
3257 addl %ebx,%eax | |
3258 addl 4(%rsp),%ebp | |
3259 xorl %ecx,%edi | |
3260 movl %eax,%esi | |
3261 shldl $5,%eax,%eax | |
3262 addl %edi,%ebp | |
3263 xorl %ecx,%esi | |
3264 shrdl $7,%ebx,%ebx | |
3265 addl %eax,%ebp | |
3266 addl 8(%rsp),%edx | |
3267 xorl %ebx,%esi | |
3268 movl %ebp,%edi | |
3269 shldl $5,%ebp,%ebp | |
3270 addl %esi,%edx | |
3271 xorl %ebx,%edi | |
3272 shrdl $7,%eax,%eax | |
3273 addl %ebp,%edx | |
3274 addl 12(%rsp),%ecx | |
3275 xorl %eax,%edi | |
3276 movl %edx,%esi | |
3277 shldl $5,%edx,%edx | |
3278 addl %edi,%ecx | |
3279 xorl %eax,%esi | |
3280 shrdl $7,%ebp,%ebp | |
3281 addl %edx,%ecx | |
3282 cmpq %r10,%r9 | |
3283 je L$done_avx | |
3284 vmovdqa 64(%r11),%xmm6 | |
3285 vmovdqa -64(%r11),%xmm11 | |
3286 vmovdqu 0(%r9),%xmm0 | |
3287 vmovdqu 16(%r9),%xmm1 | |
3288 vmovdqu 32(%r9),%xmm2 | |
3289 vmovdqu 48(%r9),%xmm3 | |
3290 vpshufb %xmm6,%xmm0,%xmm0 | |
3291 addq $64,%r9 | |
3292 addl 16(%rsp),%ebx | |
3293 xorl %ebp,%esi | |
3294 vpshufb %xmm6,%xmm1,%xmm1 | |
3295 movl %ecx,%edi | |
3296 shldl $5,%ecx,%ecx | |
3297 vpaddd %xmm11,%xmm0,%xmm4 | |
3298 addl %esi,%ebx | |
3299 xorl %ebp,%edi | |
3300 shrdl $7,%edx,%edx | |
3301 addl %ecx,%ebx | |
3302 vmovdqa %xmm4,0(%rsp) | |
3303 addl 20(%rsp),%eax | |
3304 xorl %edx,%edi | |
3305 movl %ebx,%esi | |
3306 shldl $5,%ebx,%ebx | |
3307 addl %edi,%eax | |
3308 xorl %edx,%esi | |
3309 shrdl $7,%ecx,%ecx | |
3310 addl %ebx,%eax | |
3311 addl 24(%rsp),%ebp | |
3312 xorl %ecx,%esi | |
3313 movl %eax,%edi | |
3314 shldl $5,%eax,%eax | |
3315 addl %esi,%ebp | |
3316 xorl %ecx,%edi | |
3317 shrdl $7,%ebx,%ebx | |
3318 addl %eax,%ebp | |
3319 addl 28(%rsp),%edx | |
3320 xorl %ebx,%edi | |
3321 movl %ebp,%esi | |
3322 shldl $5,%ebp,%ebp | |
3323 addl %edi,%edx | |
3324 xorl %ebx,%esi | |
3325 shrdl $7,%eax,%eax | |
3326 addl %ebp,%edx | |
3327 addl 32(%rsp),%ecx | |
3328 xorl %eax,%esi | |
3329 vpshufb %xmm6,%xmm2,%xmm2 | |
3330 movl %edx,%edi | |
3331 shldl $5,%edx,%edx | |
3332 vpaddd %xmm11,%xmm1,%xmm5 | |
3333 addl %esi,%ecx | |
3334 xorl %eax,%edi | |
3335 shrdl $7,%ebp,%ebp | |
3336 addl %edx,%ecx | |
3337 vmovdqa %xmm5,16(%rsp) | |
3338 addl 36(%rsp),%ebx | |
3339 xorl %ebp,%edi | |
3340 movl %ecx,%esi | |
3341 shldl $5,%ecx,%ecx | |
3342 addl %edi,%ebx | |
3343 xorl %ebp,%esi | |
3344 shrdl $7,%edx,%edx | |
3345 addl %ecx,%ebx | |
3346 addl 40(%rsp),%eax | |
3347 xorl %edx,%esi | |
3348 movl %ebx,%edi | |
3349 shldl $5,%ebx,%ebx | |
3350 addl %esi,%eax | |
3351 xorl %edx,%edi | |
3352 shrdl $7,%ecx,%ecx | |
3353 addl %ebx,%eax | |
3354 addl 44(%rsp),%ebp | |
3355 xorl %ecx,%edi | |
3356 movl %eax,%esi | |
3357 shldl $5,%eax,%eax | |
3358 addl %edi,%ebp | |
3359 xorl %ecx,%esi | |
3360 shrdl $7,%ebx,%ebx | |
3361 addl %eax,%ebp | |
3362 addl 48(%rsp),%edx | |
3363 xorl %ebx,%esi | |
3364 vpshufb %xmm6,%xmm3,%xmm3 | |
3365 movl %ebp,%edi | |
3366 shldl $5,%ebp,%ebp | |
3367 vpaddd %xmm11,%xmm2,%xmm6 | |
3368 addl %esi,%edx | |
3369 xorl %ebx,%edi | |
3370 shrdl $7,%eax,%eax | |
3371 addl %ebp,%edx | |
3372 vmovdqa %xmm6,32(%rsp) | |
3373 addl 52(%rsp),%ecx | |
3374 xorl %eax,%edi | |
3375 movl %edx,%esi | |
3376 shldl $5,%edx,%edx | |
3377 addl %edi,%ecx | |
3378 xorl %eax,%esi | |
3379 shrdl $7,%ebp,%ebp | |
3380 addl %edx,%ecx | |
3381 addl 56(%rsp),%ebx | |
3382 xorl %ebp,%esi | |
3383 movl %ecx,%edi | |
3384 shldl $5,%ecx,%ecx | |
3385 addl %esi,%ebx | |
3386 xorl %ebp,%edi | |
3387 shrdl $7,%edx,%edx | |
3388 addl %ecx,%ebx | |
3389 addl 60(%rsp),%eax | |
3390 xorl %edx,%edi | |
3391 movl %ebx,%esi | |
3392 shldl $5,%ebx,%ebx | |
3393 addl %edi,%eax | |
3394 shrdl $7,%ecx,%ecx | |
3395 addl %ebx,%eax | |
3396 addl 0(%r8),%eax | |
3397 addl 4(%r8),%esi | |
3398 addl 8(%r8),%ecx | |
3399 addl 12(%r8),%edx | |
3400 movl %eax,0(%r8) | |
3401 addl 16(%r8),%ebp | |
3402 movl %esi,4(%r8) | |
3403 movl %esi,%ebx | |
3404 movl %ecx,8(%r8) | |
3405 movl %ecx,%edi | |
3406 movl %edx,12(%r8) | |
3407 xorl %edx,%edi | |
3408 movl %ebp,16(%r8) | |
3409 andl %edi,%esi | |
3410 jmp L$oop_avx | |
3411 | |
3412 .p2align 4 | |
3413 L$done_avx: | |
3414 addl 16(%rsp),%ebx | |
3415 xorl %ebp,%esi | |
3416 movl %ecx,%edi | |
3417 shldl $5,%ecx,%ecx | |
3418 addl %esi,%ebx | |
3419 xorl %ebp,%edi | |
3420 shrdl $7,%edx,%edx | |
3421 addl %ecx,%ebx | |
3422 addl 20(%rsp),%eax | |
3423 xorl %edx,%edi | |
3424 movl %ebx,%esi | |
3425 shldl $5,%ebx,%ebx | |
3426 addl %edi,%eax | |
3427 xorl %edx,%esi | |
3428 shrdl $7,%ecx,%ecx | |
3429 addl %ebx,%eax | |
3430 addl 24(%rsp),%ebp | |
3431 xorl %ecx,%esi | |
3432 movl %eax,%edi | |
3433 shldl $5,%eax,%eax | |
3434 addl %esi,%ebp | |
3435 xorl %ecx,%edi | |
3436 shrdl $7,%ebx,%ebx | |
3437 addl %eax,%ebp | |
3438 addl 28(%rsp),%edx | |
3439 xorl %ebx,%edi | |
3440 movl %ebp,%esi | |
3441 shldl $5,%ebp,%ebp | |
3442 addl %edi,%edx | |
3443 xorl %ebx,%esi | |
3444 shrdl $7,%eax,%eax | |
3445 addl %ebp,%edx | |
3446 addl 32(%rsp),%ecx | |
3447 xorl %eax,%esi | |
3448 movl %edx,%edi | |
3449 shldl $5,%edx,%edx | |
3450 addl %esi,%ecx | |
3451 xorl %eax,%edi | |
3452 shrdl $7,%ebp,%ebp | |
3453 addl %edx,%ecx | |
3454 addl 36(%rsp),%ebx | |
3455 xorl %ebp,%edi | |
3456 movl %ecx,%esi | |
3457 shldl $5,%ecx,%ecx | |
3458 addl %edi,%ebx | |
3459 xorl %ebp,%esi | |
3460 shrdl $7,%edx,%edx | |
3461 addl %ecx,%ebx | |
3462 addl 40(%rsp),%eax | |
3463 xorl %edx,%esi | |
3464 movl %ebx,%edi | |
3465 shldl $5,%ebx,%ebx | |
3466 addl %esi,%eax | |
3467 xorl %edx,%edi | |
3468 shrdl $7,%ecx,%ecx | |
3469 addl %ebx,%eax | |
3470 addl 44(%rsp),%ebp | |
3471 xorl %ecx,%edi | |
3472 movl %eax,%esi | |
3473 shldl $5,%eax,%eax | |
3474 addl %edi,%ebp | |
3475 xorl %ecx,%esi | |
3476 shrdl $7,%ebx,%ebx | |
3477 addl %eax,%ebp | |
3478 addl 48(%rsp),%edx | |
3479 xorl %ebx,%esi | |
3480 movl %ebp,%edi | |
3481 shldl $5,%ebp,%ebp | |
3482 addl %esi,%edx | |
3483 xorl %ebx,%edi | |
3484 shrdl $7,%eax,%eax | |
3485 addl %ebp,%edx | |
3486 addl 52(%rsp),%ecx | |
3487 xorl %eax,%edi | |
3488 movl %edx,%esi | |
3489 shldl $5,%edx,%edx | |
3490 addl %edi,%ecx | |
3491 xorl %eax,%esi | |
3492 shrdl $7,%ebp,%ebp | |
3493 addl %edx,%ecx | |
3494 addl 56(%rsp),%ebx | |
3495 xorl %ebp,%esi | |
3496 movl %ecx,%edi | |
3497 shldl $5,%ecx,%ecx | |
3498 addl %esi,%ebx | |
3499 xorl %ebp,%edi | |
3500 shrdl $7,%edx,%edx | |
3501 addl %ecx,%ebx | |
3502 addl 60(%rsp),%eax | |
3503 xorl %edx,%edi | |
3504 movl %ebx,%esi | |
3505 shldl $5,%ebx,%ebx | |
3506 addl %edi,%eax | |
3507 shrdl $7,%ecx,%ecx | |
3508 addl %ebx,%eax | |
3509 vzeroupper | |
3510 | |
3511 addl 0(%r8),%eax | |
3512 addl 4(%r8),%esi | |
3513 addl 8(%r8),%ecx | |
3514 movl %eax,0(%r8) | |
3515 addl 12(%r8),%edx | |
3516 movl %esi,4(%r8) | |
3517 addl 16(%r8),%ebp | |
3518 movl %ecx,8(%r8) | |
3519 movl %edx,12(%r8) | |
3520 movl %ebp,16(%r8) | |
3521 leaq (%r14),%rsi | |
3522 movq -40(%rsi),%r14 | |
3523 movq -32(%rsi),%r13 | |
3524 movq -24(%rsi),%r12 | |
3525 movq -16(%rsi),%rbp | |
3526 movq -8(%rsi),%rbx | |
3527 leaq (%rsi),%rsp | |
3528 L$epilogue_avx: | |
3529 .byte 0xf3,0xc3 | |
3530 | |
3531 .p2align 6 | 2410 .p2align 6 |
3532 K_XX_XX: | 2411 K_XX_XX: |
3533 .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 | 2412 .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 |
3534 .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 | 2413 .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 |
3535 .long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 | 2414 .long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 |
3536 .long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 | 2415 .long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 |
3537 .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc | 2416 .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc |
3538 .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc | 2417 .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc |
3539 .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 | 2418 .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 |
3540 .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 | 2419 .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 |
3541 .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f | 2420 .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f |
3542 .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f | 2421 .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f |
3543 .byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 | 2422 .byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 |
3544 .byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,3
2,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,12
1,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 | 2423 .byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,3
2,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,12
1,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 |
3545 .p2align 6 | 2424 .p2align 6 |
3546 #endif | 2425 #endif |
OLD | NEW |