OLD | NEW |
1 #if defined(__x86_64__) | 1 #if defined(__x86_64__) |
2 .text | 2 .text |
3 .extern OPENSSL_ia32cap_P | 3 .extern OPENSSL_ia32cap_P |
4 .hidden OPENSSL_ia32cap_P | 4 .hidden OPENSSL_ia32cap_P |
5 | 5 |
6 .globl sha1_block_data_order | 6 .globl sha1_block_data_order |
7 .hidden sha1_block_data_order | 7 .hidden sha1_block_data_order |
8 .type sha1_block_data_order,@function | 8 .type sha1_block_data_order,@function |
9 .align 16 | 9 .align 16 |
10 sha1_block_data_order: | 10 sha1_block_data_order: |
11 movl OPENSSL_ia32cap_P+0(%rip),%r9d | 11 movl OPENSSL_ia32cap_P+0(%rip),%r9d |
12 movl OPENSSL_ia32cap_P+4(%rip),%r8d | 12 movl OPENSSL_ia32cap_P+4(%rip),%r8d |
13 movl OPENSSL_ia32cap_P+8(%rip),%r10d | 13 movl OPENSSL_ia32cap_P+8(%rip),%r10d |
14 testl $512,%r8d | 14 testl $512,%r8d |
15 jz .Lialu | 15 jz .Lialu |
16 andl $268435456,%r8d | |
17 andl $1073741824,%r9d | |
18 orl %r9d,%r8d | |
19 cmpl $1342177280,%r8d | |
20 je _avx_shortcut | |
21 jmp _ssse3_shortcut | 16 jmp _ssse3_shortcut |
22 | 17 |
23 .align 16 | 18 .align 16 |
24 .Lialu: | 19 .Lialu: |
25 movq %rsp,%rax | 20 movq %rsp,%rax |
26 pushq %rbx | 21 pushq %rbx |
27 pushq %rbp | 22 pushq %rbp |
28 pushq %r12 | 23 pushq %r12 |
29 pushq %r13 | 24 pushq %r13 |
30 pushq %r14 | 25 pushq %r14 |
(...skipping 2375 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2406 leaq (%r14),%rsi | 2401 leaq (%r14),%rsi |
2407 movq -40(%rsi),%r14 | 2402 movq -40(%rsi),%r14 |
2408 movq -32(%rsi),%r13 | 2403 movq -32(%rsi),%r13 |
2409 movq -24(%rsi),%r12 | 2404 movq -24(%rsi),%r12 |
2410 movq -16(%rsi),%rbp | 2405 movq -16(%rsi),%rbp |
2411 movq -8(%rsi),%rbx | 2406 movq -8(%rsi),%rbx |
2412 leaq (%rsi),%rsp | 2407 leaq (%rsi),%rsp |
2413 .Lepilogue_ssse3: | 2408 .Lepilogue_ssse3: |
2414 .byte 0xf3,0xc3 | 2409 .byte 0xf3,0xc3 |
2415 .size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3 | 2410 .size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3 |
2416 .type sha1_block_data_order_avx,@function | |
2417 .align 16 | |
2418 sha1_block_data_order_avx: | |
2419 _avx_shortcut: | |
2420 movq %rsp,%rax | |
2421 pushq %rbx | |
2422 pushq %rbp | |
2423 pushq %r12 | |
2424 pushq %r13 | |
2425 pushq %r14 | |
2426 leaq -64(%rsp),%rsp | |
2427 vzeroupper | |
2428 movq %rax,%r14 | |
2429 andq $-64,%rsp | |
2430 movq %rdi,%r8 | |
2431 movq %rsi,%r9 | |
2432 movq %rdx,%r10 | |
2433 | |
2434 shlq $6,%r10 | |
2435 addq %r9,%r10 | |
2436 leaq K_XX_XX+64(%rip),%r11 | |
2437 | |
2438 movl 0(%r8),%eax | |
2439 movl 4(%r8),%ebx | |
2440 movl 8(%r8),%ecx | |
2441 movl 12(%r8),%edx | |
2442 movl %ebx,%esi | |
2443 movl 16(%r8),%ebp | |
2444 movl %ecx,%edi | |
2445 xorl %edx,%edi | |
2446 andl %edi,%esi | |
2447 | |
2448 vmovdqa 64(%r11),%xmm6 | |
2449 vmovdqa -64(%r11),%xmm11 | |
2450 vmovdqu 0(%r9),%xmm0 | |
2451 vmovdqu 16(%r9),%xmm1 | |
2452 vmovdqu 32(%r9),%xmm2 | |
2453 vmovdqu 48(%r9),%xmm3 | |
2454 vpshufb %xmm6,%xmm0,%xmm0 | |
2455 addq $64,%r9 | |
2456 vpshufb %xmm6,%xmm1,%xmm1 | |
2457 vpshufb %xmm6,%xmm2,%xmm2 | |
2458 vpshufb %xmm6,%xmm3,%xmm3 | |
2459 vpaddd %xmm11,%xmm0,%xmm4 | |
2460 vpaddd %xmm11,%xmm1,%xmm5 | |
2461 vpaddd %xmm11,%xmm2,%xmm6 | |
2462 vmovdqa %xmm4,0(%rsp) | |
2463 vmovdqa %xmm5,16(%rsp) | |
2464 vmovdqa %xmm6,32(%rsp) | |
2465 jmp .Loop_avx | |
2466 .align 16 | |
2467 .Loop_avx: | |
2468 shrdl $2,%ebx,%ebx | |
2469 xorl %edx,%esi | |
2470 vpalignr $8,%xmm0,%xmm1,%xmm4 | |
2471 movl %eax,%edi | |
2472 addl 0(%rsp),%ebp | |
2473 vpaddd %xmm3,%xmm11,%xmm9 | |
2474 xorl %ecx,%ebx | |
2475 shldl $5,%eax,%eax | |
2476 vpsrldq $4,%xmm3,%xmm8 | |
2477 addl %esi,%ebp | |
2478 andl %ebx,%edi | |
2479 vpxor %xmm0,%xmm4,%xmm4 | |
2480 xorl %ecx,%ebx | |
2481 addl %eax,%ebp | |
2482 vpxor %xmm2,%xmm8,%xmm8 | |
2483 shrdl $7,%eax,%eax | |
2484 xorl %ecx,%edi | |
2485 movl %ebp,%esi | |
2486 addl 4(%rsp),%edx | |
2487 vpxor %xmm8,%xmm4,%xmm4 | |
2488 xorl %ebx,%eax | |
2489 shldl $5,%ebp,%ebp | |
2490 vmovdqa %xmm9,48(%rsp) | |
2491 addl %edi,%edx | |
2492 andl %eax,%esi | |
2493 vpsrld $31,%xmm4,%xmm8 | |
2494 xorl %ebx,%eax | |
2495 addl %ebp,%edx | |
2496 shrdl $7,%ebp,%ebp | |
2497 xorl %ebx,%esi | |
2498 vpslldq $12,%xmm4,%xmm10 | |
2499 vpaddd %xmm4,%xmm4,%xmm4 | |
2500 movl %edx,%edi | |
2501 addl 8(%rsp),%ecx | |
2502 xorl %eax,%ebp | |
2503 shldl $5,%edx,%edx | |
2504 vpsrld $30,%xmm10,%xmm9 | |
2505 vpor %xmm8,%xmm4,%xmm4 | |
2506 addl %esi,%ecx | |
2507 andl %ebp,%edi | |
2508 xorl %eax,%ebp | |
2509 addl %edx,%ecx | |
2510 vpslld $2,%xmm10,%xmm10 | |
2511 vpxor %xmm9,%xmm4,%xmm4 | |
2512 shrdl $7,%edx,%edx | |
2513 xorl %eax,%edi | |
2514 movl %ecx,%esi | |
2515 addl 12(%rsp),%ebx | |
2516 vpxor %xmm10,%xmm4,%xmm4 | |
2517 xorl %ebp,%edx | |
2518 shldl $5,%ecx,%ecx | |
2519 addl %edi,%ebx | |
2520 andl %edx,%esi | |
2521 xorl %ebp,%edx | |
2522 addl %ecx,%ebx | |
2523 shrdl $7,%ecx,%ecx | |
2524 xorl %ebp,%esi | |
2525 vpalignr $8,%xmm1,%xmm2,%xmm5 | |
2526 movl %ebx,%edi | |
2527 addl 16(%rsp),%eax | |
2528 vpaddd %xmm4,%xmm11,%xmm9 | |
2529 xorl %edx,%ecx | |
2530 shldl $5,%ebx,%ebx | |
2531 vpsrldq $4,%xmm4,%xmm8 | |
2532 addl %esi,%eax | |
2533 andl %ecx,%edi | |
2534 vpxor %xmm1,%xmm5,%xmm5 | |
2535 xorl %edx,%ecx | |
2536 addl %ebx,%eax | |
2537 vpxor %xmm3,%xmm8,%xmm8 | |
2538 shrdl $7,%ebx,%ebx | |
2539 xorl %edx,%edi | |
2540 movl %eax,%esi | |
2541 addl 20(%rsp),%ebp | |
2542 vpxor %xmm8,%xmm5,%xmm5 | |
2543 xorl %ecx,%ebx | |
2544 shldl $5,%eax,%eax | |
2545 vmovdqa %xmm9,0(%rsp) | |
2546 addl %edi,%ebp | |
2547 andl %ebx,%esi | |
2548 vpsrld $31,%xmm5,%xmm8 | |
2549 xorl %ecx,%ebx | |
2550 addl %eax,%ebp | |
2551 shrdl $7,%eax,%eax | |
2552 xorl %ecx,%esi | |
2553 vpslldq $12,%xmm5,%xmm10 | |
2554 vpaddd %xmm5,%xmm5,%xmm5 | |
2555 movl %ebp,%edi | |
2556 addl 24(%rsp),%edx | |
2557 xorl %ebx,%eax | |
2558 shldl $5,%ebp,%ebp | |
2559 vpsrld $30,%xmm10,%xmm9 | |
2560 vpor %xmm8,%xmm5,%xmm5 | |
2561 addl %esi,%edx | |
2562 andl %eax,%edi | |
2563 xorl %ebx,%eax | |
2564 addl %ebp,%edx | |
2565 vpslld $2,%xmm10,%xmm10 | |
2566 vpxor %xmm9,%xmm5,%xmm5 | |
2567 shrdl $7,%ebp,%ebp | |
2568 xorl %ebx,%edi | |
2569 movl %edx,%esi | |
2570 addl 28(%rsp),%ecx | |
2571 vpxor %xmm10,%xmm5,%xmm5 | |
2572 xorl %eax,%ebp | |
2573 shldl $5,%edx,%edx | |
2574 vmovdqa -32(%r11),%xmm11 | |
2575 addl %edi,%ecx | |
2576 andl %ebp,%esi | |
2577 xorl %eax,%ebp | |
2578 addl %edx,%ecx | |
2579 shrdl $7,%edx,%edx | |
2580 xorl %eax,%esi | |
2581 vpalignr $8,%xmm2,%xmm3,%xmm6 | |
2582 movl %ecx,%edi | |
2583 addl 32(%rsp),%ebx | |
2584 vpaddd %xmm5,%xmm11,%xmm9 | |
2585 xorl %ebp,%edx | |
2586 shldl $5,%ecx,%ecx | |
2587 vpsrldq $4,%xmm5,%xmm8 | |
2588 addl %esi,%ebx | |
2589 andl %edx,%edi | |
2590 vpxor %xmm2,%xmm6,%xmm6 | |
2591 xorl %ebp,%edx | |
2592 addl %ecx,%ebx | |
2593 vpxor %xmm4,%xmm8,%xmm8 | |
2594 shrdl $7,%ecx,%ecx | |
2595 xorl %ebp,%edi | |
2596 movl %ebx,%esi | |
2597 addl 36(%rsp),%eax | |
2598 vpxor %xmm8,%xmm6,%xmm6 | |
2599 xorl %edx,%ecx | |
2600 shldl $5,%ebx,%ebx | |
2601 vmovdqa %xmm9,16(%rsp) | |
2602 addl %edi,%eax | |
2603 andl %ecx,%esi | |
2604 vpsrld $31,%xmm6,%xmm8 | |
2605 xorl %edx,%ecx | |
2606 addl %ebx,%eax | |
2607 shrdl $7,%ebx,%ebx | |
2608 xorl %edx,%esi | |
2609 vpslldq $12,%xmm6,%xmm10 | |
2610 vpaddd %xmm6,%xmm6,%xmm6 | |
2611 movl %eax,%edi | |
2612 addl 40(%rsp),%ebp | |
2613 xorl %ecx,%ebx | |
2614 shldl $5,%eax,%eax | |
2615 vpsrld $30,%xmm10,%xmm9 | |
2616 vpor %xmm8,%xmm6,%xmm6 | |
2617 addl %esi,%ebp | |
2618 andl %ebx,%edi | |
2619 xorl %ecx,%ebx | |
2620 addl %eax,%ebp | |
2621 vpslld $2,%xmm10,%xmm10 | |
2622 vpxor %xmm9,%xmm6,%xmm6 | |
2623 shrdl $7,%eax,%eax | |
2624 xorl %ecx,%edi | |
2625 movl %ebp,%esi | |
2626 addl 44(%rsp),%edx | |
2627 vpxor %xmm10,%xmm6,%xmm6 | |
2628 xorl %ebx,%eax | |
2629 shldl $5,%ebp,%ebp | |
2630 addl %edi,%edx | |
2631 andl %eax,%esi | |
2632 xorl %ebx,%eax | |
2633 addl %ebp,%edx | |
2634 shrdl $7,%ebp,%ebp | |
2635 xorl %ebx,%esi | |
2636 vpalignr $8,%xmm3,%xmm4,%xmm7 | |
2637 movl %edx,%edi | |
2638 addl 48(%rsp),%ecx | |
2639 vpaddd %xmm6,%xmm11,%xmm9 | |
2640 xorl %eax,%ebp | |
2641 shldl $5,%edx,%edx | |
2642 vpsrldq $4,%xmm6,%xmm8 | |
2643 addl %esi,%ecx | |
2644 andl %ebp,%edi | |
2645 vpxor %xmm3,%xmm7,%xmm7 | |
2646 xorl %eax,%ebp | |
2647 addl %edx,%ecx | |
2648 vpxor %xmm5,%xmm8,%xmm8 | |
2649 shrdl $7,%edx,%edx | |
2650 xorl %eax,%edi | |
2651 movl %ecx,%esi | |
2652 addl 52(%rsp),%ebx | |
2653 vpxor %xmm8,%xmm7,%xmm7 | |
2654 xorl %ebp,%edx | |
2655 shldl $5,%ecx,%ecx | |
2656 vmovdqa %xmm9,32(%rsp) | |
2657 addl %edi,%ebx | |
2658 andl %edx,%esi | |
2659 vpsrld $31,%xmm7,%xmm8 | |
2660 xorl %ebp,%edx | |
2661 addl %ecx,%ebx | |
2662 shrdl $7,%ecx,%ecx | |
2663 xorl %ebp,%esi | |
2664 vpslldq $12,%xmm7,%xmm10 | |
2665 vpaddd %xmm7,%xmm7,%xmm7 | |
2666 movl %ebx,%edi | |
2667 addl 56(%rsp),%eax | |
2668 xorl %edx,%ecx | |
2669 shldl $5,%ebx,%ebx | |
2670 vpsrld $30,%xmm10,%xmm9 | |
2671 vpor %xmm8,%xmm7,%xmm7 | |
2672 addl %esi,%eax | |
2673 andl %ecx,%edi | |
2674 xorl %edx,%ecx | |
2675 addl %ebx,%eax | |
2676 vpslld $2,%xmm10,%xmm10 | |
2677 vpxor %xmm9,%xmm7,%xmm7 | |
2678 shrdl $7,%ebx,%ebx | |
2679 xorl %edx,%edi | |
2680 movl %eax,%esi | |
2681 addl 60(%rsp),%ebp | |
2682 vpxor %xmm10,%xmm7,%xmm7 | |
2683 xorl %ecx,%ebx | |
2684 shldl $5,%eax,%eax | |
2685 addl %edi,%ebp | |
2686 andl %ebx,%esi | |
2687 xorl %ecx,%ebx | |
2688 addl %eax,%ebp | |
2689 vpalignr $8,%xmm6,%xmm7,%xmm8 | |
2690 vpxor %xmm4,%xmm0,%xmm0 | |
2691 shrdl $7,%eax,%eax | |
2692 xorl %ecx,%esi | |
2693 movl %ebp,%edi | |
2694 addl 0(%rsp),%edx | |
2695 vpxor %xmm1,%xmm0,%xmm0 | |
2696 xorl %ebx,%eax | |
2697 shldl $5,%ebp,%ebp | |
2698 vpaddd %xmm7,%xmm11,%xmm9 | |
2699 addl %esi,%edx | |
2700 andl %eax,%edi | |
2701 vpxor %xmm8,%xmm0,%xmm0 | |
2702 xorl %ebx,%eax | |
2703 addl %ebp,%edx | |
2704 shrdl $7,%ebp,%ebp | |
2705 xorl %ebx,%edi | |
2706 vpsrld $30,%xmm0,%xmm8 | |
2707 vmovdqa %xmm9,48(%rsp) | |
2708 movl %edx,%esi | |
2709 addl 4(%rsp),%ecx | |
2710 xorl %eax,%ebp | |
2711 shldl $5,%edx,%edx | |
2712 vpslld $2,%xmm0,%xmm0 | |
2713 addl %edi,%ecx | |
2714 andl %ebp,%esi | |
2715 xorl %eax,%ebp | |
2716 addl %edx,%ecx | |
2717 shrdl $7,%edx,%edx | |
2718 xorl %eax,%esi | |
2719 movl %ecx,%edi | |
2720 addl 8(%rsp),%ebx | |
2721 vpor %xmm8,%xmm0,%xmm0 | |
2722 xorl %ebp,%edx | |
2723 shldl $5,%ecx,%ecx | |
2724 addl %esi,%ebx | |
2725 andl %edx,%edi | |
2726 xorl %ebp,%edx | |
2727 addl %ecx,%ebx | |
2728 addl 12(%rsp),%eax | |
2729 xorl %ebp,%edi | |
2730 movl %ebx,%esi | |
2731 shldl $5,%ebx,%ebx | |
2732 addl %edi,%eax | |
2733 xorl %edx,%esi | |
2734 shrdl $7,%ecx,%ecx | |
2735 addl %ebx,%eax | |
2736 vpalignr $8,%xmm7,%xmm0,%xmm8 | |
2737 vpxor %xmm5,%xmm1,%xmm1 | |
2738 addl 16(%rsp),%ebp | |
2739 xorl %ecx,%esi | |
2740 movl %eax,%edi | |
2741 shldl $5,%eax,%eax | |
2742 vpxor %xmm2,%xmm1,%xmm1 | |
2743 addl %esi,%ebp | |
2744 xorl %ecx,%edi | |
2745 vpaddd %xmm0,%xmm11,%xmm9 | |
2746 shrdl $7,%ebx,%ebx | |
2747 addl %eax,%ebp | |
2748 vpxor %xmm8,%xmm1,%xmm1 | |
2749 addl 20(%rsp),%edx | |
2750 xorl %ebx,%edi | |
2751 movl %ebp,%esi | |
2752 shldl $5,%ebp,%ebp | |
2753 vpsrld $30,%xmm1,%xmm8 | |
2754 vmovdqa %xmm9,0(%rsp) | |
2755 addl %edi,%edx | |
2756 xorl %ebx,%esi | |
2757 shrdl $7,%eax,%eax | |
2758 addl %ebp,%edx | |
2759 vpslld $2,%xmm1,%xmm1 | |
2760 addl 24(%rsp),%ecx | |
2761 xorl %eax,%esi | |
2762 movl %edx,%edi | |
2763 shldl $5,%edx,%edx | |
2764 addl %esi,%ecx | |
2765 xorl %eax,%edi | |
2766 shrdl $7,%ebp,%ebp | |
2767 addl %edx,%ecx | |
2768 vpor %xmm8,%xmm1,%xmm1 | |
2769 addl 28(%rsp),%ebx | |
2770 xorl %ebp,%edi | |
2771 movl %ecx,%esi | |
2772 shldl $5,%ecx,%ecx | |
2773 addl %edi,%ebx | |
2774 xorl %ebp,%esi | |
2775 shrdl $7,%edx,%edx | |
2776 addl %ecx,%ebx | |
2777 vpalignr $8,%xmm0,%xmm1,%xmm8 | |
2778 vpxor %xmm6,%xmm2,%xmm2 | |
2779 addl 32(%rsp),%eax | |
2780 xorl %edx,%esi | |
2781 movl %ebx,%edi | |
2782 shldl $5,%ebx,%ebx | |
2783 vpxor %xmm3,%xmm2,%xmm2 | |
2784 addl %esi,%eax | |
2785 xorl %edx,%edi | |
2786 vpaddd %xmm1,%xmm11,%xmm9 | |
2787 vmovdqa 0(%r11),%xmm11 | |
2788 shrdl $7,%ecx,%ecx | |
2789 addl %ebx,%eax | |
2790 vpxor %xmm8,%xmm2,%xmm2 | |
2791 addl 36(%rsp),%ebp | |
2792 xorl %ecx,%edi | |
2793 movl %eax,%esi | |
2794 shldl $5,%eax,%eax | |
2795 vpsrld $30,%xmm2,%xmm8 | |
2796 vmovdqa %xmm9,16(%rsp) | |
2797 addl %edi,%ebp | |
2798 xorl %ecx,%esi | |
2799 shrdl $7,%ebx,%ebx | |
2800 addl %eax,%ebp | |
2801 vpslld $2,%xmm2,%xmm2 | |
2802 addl 40(%rsp),%edx | |
2803 xorl %ebx,%esi | |
2804 movl %ebp,%edi | |
2805 shldl $5,%ebp,%ebp | |
2806 addl %esi,%edx | |
2807 xorl %ebx,%edi | |
2808 shrdl $7,%eax,%eax | |
2809 addl %ebp,%edx | |
2810 vpor %xmm8,%xmm2,%xmm2 | |
2811 addl 44(%rsp),%ecx | |
2812 xorl %eax,%edi | |
2813 movl %edx,%esi | |
2814 shldl $5,%edx,%edx | |
2815 addl %edi,%ecx | |
2816 xorl %eax,%esi | |
2817 shrdl $7,%ebp,%ebp | |
2818 addl %edx,%ecx | |
2819 vpalignr $8,%xmm1,%xmm2,%xmm8 | |
2820 vpxor %xmm7,%xmm3,%xmm3 | |
2821 addl 48(%rsp),%ebx | |
2822 xorl %ebp,%esi | |
2823 movl %ecx,%edi | |
2824 shldl $5,%ecx,%ecx | |
2825 vpxor %xmm4,%xmm3,%xmm3 | |
2826 addl %esi,%ebx | |
2827 xorl %ebp,%edi | |
2828 vpaddd %xmm2,%xmm11,%xmm9 | |
2829 shrdl $7,%edx,%edx | |
2830 addl %ecx,%ebx | |
2831 vpxor %xmm8,%xmm3,%xmm3 | |
2832 addl 52(%rsp),%eax | |
2833 xorl %edx,%edi | |
2834 movl %ebx,%esi | |
2835 shldl $5,%ebx,%ebx | |
2836 vpsrld $30,%xmm3,%xmm8 | |
2837 vmovdqa %xmm9,32(%rsp) | |
2838 addl %edi,%eax | |
2839 xorl %edx,%esi | |
2840 shrdl $7,%ecx,%ecx | |
2841 addl %ebx,%eax | |
2842 vpslld $2,%xmm3,%xmm3 | |
2843 addl 56(%rsp),%ebp | |
2844 xorl %ecx,%esi | |
2845 movl %eax,%edi | |
2846 shldl $5,%eax,%eax | |
2847 addl %esi,%ebp | |
2848 xorl %ecx,%edi | |
2849 shrdl $7,%ebx,%ebx | |
2850 addl %eax,%ebp | |
2851 vpor %xmm8,%xmm3,%xmm3 | |
2852 addl 60(%rsp),%edx | |
2853 xorl %ebx,%edi | |
2854 movl %ebp,%esi | |
2855 shldl $5,%ebp,%ebp | |
2856 addl %edi,%edx | |
2857 xorl %ebx,%esi | |
2858 shrdl $7,%eax,%eax | |
2859 addl %ebp,%edx | |
2860 vpalignr $8,%xmm2,%xmm3,%xmm8 | |
2861 vpxor %xmm0,%xmm4,%xmm4 | |
2862 addl 0(%rsp),%ecx | |
2863 xorl %eax,%esi | |
2864 movl %edx,%edi | |
2865 shldl $5,%edx,%edx | |
2866 vpxor %xmm5,%xmm4,%xmm4 | |
2867 addl %esi,%ecx | |
2868 xorl %eax,%edi | |
2869 vpaddd %xmm3,%xmm11,%xmm9 | |
2870 shrdl $7,%ebp,%ebp | |
2871 addl %edx,%ecx | |
2872 vpxor %xmm8,%xmm4,%xmm4 | |
2873 addl 4(%rsp),%ebx | |
2874 xorl %ebp,%edi | |
2875 movl %ecx,%esi | |
2876 shldl $5,%ecx,%ecx | |
2877 vpsrld $30,%xmm4,%xmm8 | |
2878 vmovdqa %xmm9,48(%rsp) | |
2879 addl %edi,%ebx | |
2880 xorl %ebp,%esi | |
2881 shrdl $7,%edx,%edx | |
2882 addl %ecx,%ebx | |
2883 vpslld $2,%xmm4,%xmm4 | |
2884 addl 8(%rsp),%eax | |
2885 xorl %edx,%esi | |
2886 movl %ebx,%edi | |
2887 shldl $5,%ebx,%ebx | |
2888 addl %esi,%eax | |
2889 xorl %edx,%edi | |
2890 shrdl $7,%ecx,%ecx | |
2891 addl %ebx,%eax | |
2892 vpor %xmm8,%xmm4,%xmm4 | |
2893 addl 12(%rsp),%ebp | |
2894 xorl %ecx,%edi | |
2895 movl %eax,%esi | |
2896 shldl $5,%eax,%eax | |
2897 addl %edi,%ebp | |
2898 xorl %ecx,%esi | |
2899 shrdl $7,%ebx,%ebx | |
2900 addl %eax,%ebp | |
2901 vpalignr $8,%xmm3,%xmm4,%xmm8 | |
2902 vpxor %xmm1,%xmm5,%xmm5 | |
2903 addl 16(%rsp),%edx | |
2904 xorl %ebx,%esi | |
2905 movl %ebp,%edi | |
2906 shldl $5,%ebp,%ebp | |
2907 vpxor %xmm6,%xmm5,%xmm5 | |
2908 addl %esi,%edx | |
2909 xorl %ebx,%edi | |
2910 vpaddd %xmm4,%xmm11,%xmm9 | |
2911 shrdl $7,%eax,%eax | |
2912 addl %ebp,%edx | |
2913 vpxor %xmm8,%xmm5,%xmm5 | |
2914 addl 20(%rsp),%ecx | |
2915 xorl %eax,%edi | |
2916 movl %edx,%esi | |
2917 shldl $5,%edx,%edx | |
2918 vpsrld $30,%xmm5,%xmm8 | |
2919 vmovdqa %xmm9,0(%rsp) | |
2920 addl %edi,%ecx | |
2921 xorl %eax,%esi | |
2922 shrdl $7,%ebp,%ebp | |
2923 addl %edx,%ecx | |
2924 vpslld $2,%xmm5,%xmm5 | |
2925 addl 24(%rsp),%ebx | |
2926 xorl %ebp,%esi | |
2927 movl %ecx,%edi | |
2928 shldl $5,%ecx,%ecx | |
2929 addl %esi,%ebx | |
2930 xorl %ebp,%edi | |
2931 shrdl $7,%edx,%edx | |
2932 addl %ecx,%ebx | |
2933 vpor %xmm8,%xmm5,%xmm5 | |
2934 addl 28(%rsp),%eax | |
2935 shrdl $7,%ecx,%ecx | |
2936 movl %ebx,%esi | |
2937 xorl %edx,%edi | |
2938 shldl $5,%ebx,%ebx | |
2939 addl %edi,%eax | |
2940 xorl %ecx,%esi | |
2941 xorl %edx,%ecx | |
2942 addl %ebx,%eax | |
2943 vpalignr $8,%xmm4,%xmm5,%xmm8 | |
2944 vpxor %xmm2,%xmm6,%xmm6 | |
2945 addl 32(%rsp),%ebp | |
2946 andl %ecx,%esi | |
2947 xorl %edx,%ecx | |
2948 shrdl $7,%ebx,%ebx | |
2949 vpxor %xmm7,%xmm6,%xmm6 | |
2950 movl %eax,%edi | |
2951 xorl %ecx,%esi | |
2952 vpaddd %xmm5,%xmm11,%xmm9 | |
2953 shldl $5,%eax,%eax | |
2954 addl %esi,%ebp | |
2955 vpxor %xmm8,%xmm6,%xmm6 | |
2956 xorl %ebx,%edi | |
2957 xorl %ecx,%ebx | |
2958 addl %eax,%ebp | |
2959 addl 36(%rsp),%edx | |
2960 vpsrld $30,%xmm6,%xmm8 | |
2961 vmovdqa %xmm9,16(%rsp) | |
2962 andl %ebx,%edi | |
2963 xorl %ecx,%ebx | |
2964 shrdl $7,%eax,%eax | |
2965 movl %ebp,%esi | |
2966 vpslld $2,%xmm6,%xmm6 | |
2967 xorl %ebx,%edi | |
2968 shldl $5,%ebp,%ebp | |
2969 addl %edi,%edx | |
2970 xorl %eax,%esi | |
2971 xorl %ebx,%eax | |
2972 addl %ebp,%edx | |
2973 addl 40(%rsp),%ecx | |
2974 andl %eax,%esi | |
2975 vpor %xmm8,%xmm6,%xmm6 | |
2976 xorl %ebx,%eax | |
2977 shrdl $7,%ebp,%ebp | |
2978 movl %edx,%edi | |
2979 xorl %eax,%esi | |
2980 shldl $5,%edx,%edx | |
2981 addl %esi,%ecx | |
2982 xorl %ebp,%edi | |
2983 xorl %eax,%ebp | |
2984 addl %edx,%ecx | |
2985 addl 44(%rsp),%ebx | |
2986 andl %ebp,%edi | |
2987 xorl %eax,%ebp | |
2988 shrdl $7,%edx,%edx | |
2989 movl %ecx,%esi | |
2990 xorl %ebp,%edi | |
2991 shldl $5,%ecx,%ecx | |
2992 addl %edi,%ebx | |
2993 xorl %edx,%esi | |
2994 xorl %ebp,%edx | |
2995 addl %ecx,%ebx | |
2996 vpalignr $8,%xmm5,%xmm6,%xmm8 | |
2997 vpxor %xmm3,%xmm7,%xmm7 | |
2998 addl 48(%rsp),%eax | |
2999 andl %edx,%esi | |
3000 xorl %ebp,%edx | |
3001 shrdl $7,%ecx,%ecx | |
3002 vpxor %xmm0,%xmm7,%xmm7 | |
3003 movl %ebx,%edi | |
3004 xorl %edx,%esi | |
3005 vpaddd %xmm6,%xmm11,%xmm9 | |
3006 vmovdqa 32(%r11),%xmm11 | |
3007 shldl $5,%ebx,%ebx | |
3008 addl %esi,%eax | |
3009 vpxor %xmm8,%xmm7,%xmm7 | |
3010 xorl %ecx,%edi | |
3011 xorl %edx,%ecx | |
3012 addl %ebx,%eax | |
3013 addl 52(%rsp),%ebp | |
3014 vpsrld $30,%xmm7,%xmm8 | |
3015 vmovdqa %xmm9,32(%rsp) | |
3016 andl %ecx,%edi | |
3017 xorl %edx,%ecx | |
3018 shrdl $7,%ebx,%ebx | |
3019 movl %eax,%esi | |
3020 vpslld $2,%xmm7,%xmm7 | |
3021 xorl %ecx,%edi | |
3022 shldl $5,%eax,%eax | |
3023 addl %edi,%ebp | |
3024 xorl %ebx,%esi | |
3025 xorl %ecx,%ebx | |
3026 addl %eax,%ebp | |
3027 addl 56(%rsp),%edx | |
3028 andl %ebx,%esi | |
3029 vpor %xmm8,%xmm7,%xmm7 | |
3030 xorl %ecx,%ebx | |
3031 shrdl $7,%eax,%eax | |
3032 movl %ebp,%edi | |
3033 xorl %ebx,%esi | |
3034 shldl $5,%ebp,%ebp | |
3035 addl %esi,%edx | |
3036 xorl %eax,%edi | |
3037 xorl %ebx,%eax | |
3038 addl %ebp,%edx | |
3039 addl 60(%rsp),%ecx | |
3040 andl %eax,%edi | |
3041 xorl %ebx,%eax | |
3042 shrdl $7,%ebp,%ebp | |
3043 movl %edx,%esi | |
3044 xorl %eax,%edi | |
3045 shldl $5,%edx,%edx | |
3046 addl %edi,%ecx | |
3047 xorl %ebp,%esi | |
3048 xorl %eax,%ebp | |
3049 addl %edx,%ecx | |
3050 vpalignr $8,%xmm6,%xmm7,%xmm8 | |
3051 vpxor %xmm4,%xmm0,%xmm0 | |
3052 addl 0(%rsp),%ebx | |
3053 andl %ebp,%esi | |
3054 xorl %eax,%ebp | |
3055 shrdl $7,%edx,%edx | |
3056 vpxor %xmm1,%xmm0,%xmm0 | |
3057 movl %ecx,%edi | |
3058 xorl %ebp,%esi | |
3059 vpaddd %xmm7,%xmm11,%xmm9 | |
3060 shldl $5,%ecx,%ecx | |
3061 addl %esi,%ebx | |
3062 vpxor %xmm8,%xmm0,%xmm0 | |
3063 xorl %edx,%edi | |
3064 xorl %ebp,%edx | |
3065 addl %ecx,%ebx | |
3066 addl 4(%rsp),%eax | |
3067 vpsrld $30,%xmm0,%xmm8 | |
3068 vmovdqa %xmm9,48(%rsp) | |
3069 andl %edx,%edi | |
3070 xorl %ebp,%edx | |
3071 shrdl $7,%ecx,%ecx | |
3072 movl %ebx,%esi | |
3073 vpslld $2,%xmm0,%xmm0 | |
3074 xorl %edx,%edi | |
3075 shldl $5,%ebx,%ebx | |
3076 addl %edi,%eax | |
3077 xorl %ecx,%esi | |
3078 xorl %edx,%ecx | |
3079 addl %ebx,%eax | |
3080 addl 8(%rsp),%ebp | |
3081 andl %ecx,%esi | |
3082 vpor %xmm8,%xmm0,%xmm0 | |
3083 xorl %edx,%ecx | |
3084 shrdl $7,%ebx,%ebx | |
3085 movl %eax,%edi | |
3086 xorl %ecx,%esi | |
3087 shldl $5,%eax,%eax | |
3088 addl %esi,%ebp | |
3089 xorl %ebx,%edi | |
3090 xorl %ecx,%ebx | |
3091 addl %eax,%ebp | |
3092 addl 12(%rsp),%edx | |
3093 andl %ebx,%edi | |
3094 xorl %ecx,%ebx | |
3095 shrdl $7,%eax,%eax | |
3096 movl %ebp,%esi | |
3097 xorl %ebx,%edi | |
3098 shldl $5,%ebp,%ebp | |
3099 addl %edi,%edx | |
3100 xorl %eax,%esi | |
3101 xorl %ebx,%eax | |
3102 addl %ebp,%edx | |
3103 vpalignr $8,%xmm7,%xmm0,%xmm8 | |
3104 vpxor %xmm5,%xmm1,%xmm1 | |
3105 addl 16(%rsp),%ecx | |
3106 andl %eax,%esi | |
3107 xorl %ebx,%eax | |
3108 shrdl $7,%ebp,%ebp | |
3109 vpxor %xmm2,%xmm1,%xmm1 | |
3110 movl %edx,%edi | |
3111 xorl %eax,%esi | |
3112 vpaddd %xmm0,%xmm11,%xmm9 | |
3113 shldl $5,%edx,%edx | |
3114 addl %esi,%ecx | |
3115 vpxor %xmm8,%xmm1,%xmm1 | |
3116 xorl %ebp,%edi | |
3117 xorl %eax,%ebp | |
3118 addl %edx,%ecx | |
3119 addl 20(%rsp),%ebx | |
3120 vpsrld $30,%xmm1,%xmm8 | |
3121 vmovdqa %xmm9,0(%rsp) | |
3122 andl %ebp,%edi | |
3123 xorl %eax,%ebp | |
3124 shrdl $7,%edx,%edx | |
3125 movl %ecx,%esi | |
3126 vpslld $2,%xmm1,%xmm1 | |
3127 xorl %ebp,%edi | |
3128 shldl $5,%ecx,%ecx | |
3129 addl %edi,%ebx | |
3130 xorl %edx,%esi | |
3131 xorl %ebp,%edx | |
3132 addl %ecx,%ebx | |
3133 addl 24(%rsp),%eax | |
3134 andl %edx,%esi | |
3135 vpor %xmm8,%xmm1,%xmm1 | |
3136 xorl %ebp,%edx | |
3137 shrdl $7,%ecx,%ecx | |
3138 movl %ebx,%edi | |
3139 xorl %edx,%esi | |
3140 shldl $5,%ebx,%ebx | |
3141 addl %esi,%eax | |
3142 xorl %ecx,%edi | |
3143 xorl %edx,%ecx | |
3144 addl %ebx,%eax | |
3145 addl 28(%rsp),%ebp | |
3146 andl %ecx,%edi | |
3147 xorl %edx,%ecx | |
3148 shrdl $7,%ebx,%ebx | |
3149 movl %eax,%esi | |
3150 xorl %ecx,%edi | |
3151 shldl $5,%eax,%eax | |
3152 addl %edi,%ebp | |
3153 xorl %ebx,%esi | |
3154 xorl %ecx,%ebx | |
3155 addl %eax,%ebp | |
3156 vpalignr $8,%xmm0,%xmm1,%xmm8 | |
3157 vpxor %xmm6,%xmm2,%xmm2 | |
3158 addl 32(%rsp),%edx | |
3159 andl %ebx,%esi | |
3160 xorl %ecx,%ebx | |
3161 shrdl $7,%eax,%eax | |
3162 vpxor %xmm3,%xmm2,%xmm2 | |
3163 movl %ebp,%edi | |
3164 xorl %ebx,%esi | |
3165 vpaddd %xmm1,%xmm11,%xmm9 | |
3166 shldl $5,%ebp,%ebp | |
3167 addl %esi,%edx | |
3168 vpxor %xmm8,%xmm2,%xmm2 | |
3169 xorl %eax,%edi | |
3170 xorl %ebx,%eax | |
3171 addl %ebp,%edx | |
3172 addl 36(%rsp),%ecx | |
3173 vpsrld $30,%xmm2,%xmm8 | |
3174 vmovdqa %xmm9,16(%rsp) | |
3175 andl %eax,%edi | |
3176 xorl %ebx,%eax | |
3177 shrdl $7,%ebp,%ebp | |
3178 movl %edx,%esi | |
3179 vpslld $2,%xmm2,%xmm2 | |
3180 xorl %eax,%edi | |
3181 shldl $5,%edx,%edx | |
3182 addl %edi,%ecx | |
3183 xorl %ebp,%esi | |
3184 xorl %eax,%ebp | |
3185 addl %edx,%ecx | |
3186 addl 40(%rsp),%ebx | |
3187 andl %ebp,%esi | |
3188 vpor %xmm8,%xmm2,%xmm2 | |
3189 xorl %eax,%ebp | |
3190 shrdl $7,%edx,%edx | |
3191 movl %ecx,%edi | |
3192 xorl %ebp,%esi | |
3193 shldl $5,%ecx,%ecx | |
3194 addl %esi,%ebx | |
3195 xorl %edx,%edi | |
3196 xorl %ebp,%edx | |
3197 addl %ecx,%ebx | |
3198 addl 44(%rsp),%eax | |
3199 andl %edx,%edi | |
3200 xorl %ebp,%edx | |
3201 shrdl $7,%ecx,%ecx | |
3202 movl %ebx,%esi | |
3203 xorl %edx,%edi | |
3204 shldl $5,%ebx,%ebx | |
3205 addl %edi,%eax | |
3206 xorl %edx,%esi | |
3207 addl %ebx,%eax | |
3208 vpalignr $8,%xmm1,%xmm2,%xmm8 | |
3209 vpxor %xmm7,%xmm3,%xmm3 | |
3210 addl 48(%rsp),%ebp | |
3211 xorl %ecx,%esi | |
3212 movl %eax,%edi | |
3213 shldl $5,%eax,%eax | |
3214 vpxor %xmm4,%xmm3,%xmm3 | |
3215 addl %esi,%ebp | |
3216 xorl %ecx,%edi | |
3217 vpaddd %xmm2,%xmm11,%xmm9 | |
3218 shrdl $7,%ebx,%ebx | |
3219 addl %eax,%ebp | |
3220 vpxor %xmm8,%xmm3,%xmm3 | |
3221 addl 52(%rsp),%edx | |
3222 xorl %ebx,%edi | |
3223 movl %ebp,%esi | |
3224 shldl $5,%ebp,%ebp | |
3225 vpsrld $30,%xmm3,%xmm8 | |
3226 vmovdqa %xmm9,32(%rsp) | |
3227 addl %edi,%edx | |
3228 xorl %ebx,%esi | |
3229 shrdl $7,%eax,%eax | |
3230 addl %ebp,%edx | |
3231 vpslld $2,%xmm3,%xmm3 | |
3232 addl 56(%rsp),%ecx | |
3233 xorl %eax,%esi | |
3234 movl %edx,%edi | |
3235 shldl $5,%edx,%edx | |
3236 addl %esi,%ecx | |
3237 xorl %eax,%edi | |
3238 shrdl $7,%ebp,%ebp | |
3239 addl %edx,%ecx | |
3240 vpor %xmm8,%xmm3,%xmm3 | |
3241 addl 60(%rsp),%ebx | |
3242 xorl %ebp,%edi | |
3243 movl %ecx,%esi | |
3244 shldl $5,%ecx,%ecx | |
3245 addl %edi,%ebx | |
3246 xorl %ebp,%esi | |
3247 shrdl $7,%edx,%edx | |
3248 addl %ecx,%ebx | |
3249 addl 0(%rsp),%eax | |
3250 vpaddd %xmm3,%xmm11,%xmm9 | |
3251 xorl %edx,%esi | |
3252 movl %ebx,%edi | |
3253 shldl $5,%ebx,%ebx | |
3254 addl %esi,%eax | |
3255 vmovdqa %xmm9,48(%rsp) | |
3256 xorl %edx,%edi | |
3257 shrdl $7,%ecx,%ecx | |
3258 addl %ebx,%eax | |
3259 addl 4(%rsp),%ebp | |
3260 xorl %ecx,%edi | |
3261 movl %eax,%esi | |
3262 shldl $5,%eax,%eax | |
3263 addl %edi,%ebp | |
3264 xorl %ecx,%esi | |
3265 shrdl $7,%ebx,%ebx | |
3266 addl %eax,%ebp | |
3267 addl 8(%rsp),%edx | |
3268 xorl %ebx,%esi | |
3269 movl %ebp,%edi | |
3270 shldl $5,%ebp,%ebp | |
3271 addl %esi,%edx | |
3272 xorl %ebx,%edi | |
3273 shrdl $7,%eax,%eax | |
3274 addl %ebp,%edx | |
3275 addl 12(%rsp),%ecx | |
3276 xorl %eax,%edi | |
3277 movl %edx,%esi | |
3278 shldl $5,%edx,%edx | |
3279 addl %edi,%ecx | |
3280 xorl %eax,%esi | |
3281 shrdl $7,%ebp,%ebp | |
3282 addl %edx,%ecx | |
3283 cmpq %r10,%r9 | |
3284 je .Ldone_avx | |
3285 vmovdqa 64(%r11),%xmm6 | |
3286 vmovdqa -64(%r11),%xmm11 | |
3287 vmovdqu 0(%r9),%xmm0 | |
3288 vmovdqu 16(%r9),%xmm1 | |
3289 vmovdqu 32(%r9),%xmm2 | |
3290 vmovdqu 48(%r9),%xmm3 | |
3291 vpshufb %xmm6,%xmm0,%xmm0 | |
3292 addq $64,%r9 | |
3293 addl 16(%rsp),%ebx | |
3294 xorl %ebp,%esi | |
3295 vpshufb %xmm6,%xmm1,%xmm1 | |
3296 movl %ecx,%edi | |
3297 shldl $5,%ecx,%ecx | |
3298 vpaddd %xmm11,%xmm0,%xmm4 | |
3299 addl %esi,%ebx | |
3300 xorl %ebp,%edi | |
3301 shrdl $7,%edx,%edx | |
3302 addl %ecx,%ebx | |
3303 vmovdqa %xmm4,0(%rsp) | |
3304 addl 20(%rsp),%eax | |
3305 xorl %edx,%edi | |
3306 movl %ebx,%esi | |
3307 shldl $5,%ebx,%ebx | |
3308 addl %edi,%eax | |
3309 xorl %edx,%esi | |
3310 shrdl $7,%ecx,%ecx | |
3311 addl %ebx,%eax | |
3312 addl 24(%rsp),%ebp | |
3313 xorl %ecx,%esi | |
3314 movl %eax,%edi | |
3315 shldl $5,%eax,%eax | |
3316 addl %esi,%ebp | |
3317 xorl %ecx,%edi | |
3318 shrdl $7,%ebx,%ebx | |
3319 addl %eax,%ebp | |
3320 addl 28(%rsp),%edx | |
3321 xorl %ebx,%edi | |
3322 movl %ebp,%esi | |
3323 shldl $5,%ebp,%ebp | |
3324 addl %edi,%edx | |
3325 xorl %ebx,%esi | |
3326 shrdl $7,%eax,%eax | |
3327 addl %ebp,%edx | |
3328 addl 32(%rsp),%ecx | |
3329 xorl %eax,%esi | |
3330 vpshufb %xmm6,%xmm2,%xmm2 | |
3331 movl %edx,%edi | |
3332 shldl $5,%edx,%edx | |
3333 vpaddd %xmm11,%xmm1,%xmm5 | |
3334 addl %esi,%ecx | |
3335 xorl %eax,%edi | |
3336 shrdl $7,%ebp,%ebp | |
3337 addl %edx,%ecx | |
3338 vmovdqa %xmm5,16(%rsp) | |
3339 addl 36(%rsp),%ebx | |
3340 xorl %ebp,%edi | |
3341 movl %ecx,%esi | |
3342 shldl $5,%ecx,%ecx | |
3343 addl %edi,%ebx | |
3344 xorl %ebp,%esi | |
3345 shrdl $7,%edx,%edx | |
3346 addl %ecx,%ebx | |
3347 addl 40(%rsp),%eax | |
3348 xorl %edx,%esi | |
3349 movl %ebx,%edi | |
3350 shldl $5,%ebx,%ebx | |
3351 addl %esi,%eax | |
3352 xorl %edx,%edi | |
3353 shrdl $7,%ecx,%ecx | |
3354 addl %ebx,%eax | |
3355 addl 44(%rsp),%ebp | |
3356 xorl %ecx,%edi | |
3357 movl %eax,%esi | |
3358 shldl $5,%eax,%eax | |
3359 addl %edi,%ebp | |
3360 xorl %ecx,%esi | |
3361 shrdl $7,%ebx,%ebx | |
3362 addl %eax,%ebp | |
3363 addl 48(%rsp),%edx | |
3364 xorl %ebx,%esi | |
3365 vpshufb %xmm6,%xmm3,%xmm3 | |
3366 movl %ebp,%edi | |
3367 shldl $5,%ebp,%ebp | |
3368 vpaddd %xmm11,%xmm2,%xmm6 | |
3369 addl %esi,%edx | |
3370 xorl %ebx,%edi | |
3371 shrdl $7,%eax,%eax | |
3372 addl %ebp,%edx | |
3373 vmovdqa %xmm6,32(%rsp) | |
3374 addl 52(%rsp),%ecx | |
3375 xorl %eax,%edi | |
3376 movl %edx,%esi | |
3377 shldl $5,%edx,%edx | |
3378 addl %edi,%ecx | |
3379 xorl %eax,%esi | |
3380 shrdl $7,%ebp,%ebp | |
3381 addl %edx,%ecx | |
3382 addl 56(%rsp),%ebx | |
3383 xorl %ebp,%esi | |
3384 movl %ecx,%edi | |
3385 shldl $5,%ecx,%ecx | |
3386 addl %esi,%ebx | |
3387 xorl %ebp,%edi | |
3388 shrdl $7,%edx,%edx | |
3389 addl %ecx,%ebx | |
3390 addl 60(%rsp),%eax | |
3391 xorl %edx,%edi | |
3392 movl %ebx,%esi | |
3393 shldl $5,%ebx,%ebx | |
3394 addl %edi,%eax | |
3395 shrdl $7,%ecx,%ecx | |
3396 addl %ebx,%eax | |
3397 addl 0(%r8),%eax | |
3398 addl 4(%r8),%esi | |
3399 addl 8(%r8),%ecx | |
3400 addl 12(%r8),%edx | |
3401 movl %eax,0(%r8) | |
3402 addl 16(%r8),%ebp | |
3403 movl %esi,4(%r8) | |
3404 movl %esi,%ebx | |
3405 movl %ecx,8(%r8) | |
3406 movl %ecx,%edi | |
3407 movl %edx,12(%r8) | |
3408 xorl %edx,%edi | |
3409 movl %ebp,16(%r8) | |
3410 andl %edi,%esi | |
3411 jmp .Loop_avx | |
3412 | |
3413 .align 16 | |
3414 .Ldone_avx: | |
3415 addl 16(%rsp),%ebx | |
3416 xorl %ebp,%esi | |
3417 movl %ecx,%edi | |
3418 shldl $5,%ecx,%ecx | |
3419 addl %esi,%ebx | |
3420 xorl %ebp,%edi | |
3421 shrdl $7,%edx,%edx | |
3422 addl %ecx,%ebx | |
3423 addl 20(%rsp),%eax | |
3424 xorl %edx,%edi | |
3425 movl %ebx,%esi | |
3426 shldl $5,%ebx,%ebx | |
3427 addl %edi,%eax | |
3428 xorl %edx,%esi | |
3429 shrdl $7,%ecx,%ecx | |
3430 addl %ebx,%eax | |
3431 addl 24(%rsp),%ebp | |
3432 xorl %ecx,%esi | |
3433 movl %eax,%edi | |
3434 shldl $5,%eax,%eax | |
3435 addl %esi,%ebp | |
3436 xorl %ecx,%edi | |
3437 shrdl $7,%ebx,%ebx | |
3438 addl %eax,%ebp | |
3439 addl 28(%rsp),%edx | |
3440 xorl %ebx,%edi | |
3441 movl %ebp,%esi | |
3442 shldl $5,%ebp,%ebp | |
3443 addl %edi,%edx | |
3444 xorl %ebx,%esi | |
3445 shrdl $7,%eax,%eax | |
3446 addl %ebp,%edx | |
3447 addl 32(%rsp),%ecx | |
3448 xorl %eax,%esi | |
3449 movl %edx,%edi | |
3450 shldl $5,%edx,%edx | |
3451 addl %esi,%ecx | |
3452 xorl %eax,%edi | |
3453 shrdl $7,%ebp,%ebp | |
3454 addl %edx,%ecx | |
3455 addl 36(%rsp),%ebx | |
3456 xorl %ebp,%edi | |
3457 movl %ecx,%esi | |
3458 shldl $5,%ecx,%ecx | |
3459 addl %edi,%ebx | |
3460 xorl %ebp,%esi | |
3461 shrdl $7,%edx,%edx | |
3462 addl %ecx,%ebx | |
3463 addl 40(%rsp),%eax | |
3464 xorl %edx,%esi | |
3465 movl %ebx,%edi | |
3466 shldl $5,%ebx,%ebx | |
3467 addl %esi,%eax | |
3468 xorl %edx,%edi | |
3469 shrdl $7,%ecx,%ecx | |
3470 addl %ebx,%eax | |
3471 addl 44(%rsp),%ebp | |
3472 xorl %ecx,%edi | |
3473 movl %eax,%esi | |
3474 shldl $5,%eax,%eax | |
3475 addl %edi,%ebp | |
3476 xorl %ecx,%esi | |
3477 shrdl $7,%ebx,%ebx | |
3478 addl %eax,%ebp | |
3479 addl 48(%rsp),%edx | |
3480 xorl %ebx,%esi | |
3481 movl %ebp,%edi | |
3482 shldl $5,%ebp,%ebp | |
3483 addl %esi,%edx | |
3484 xorl %ebx,%edi | |
3485 shrdl $7,%eax,%eax | |
3486 addl %ebp,%edx | |
3487 addl 52(%rsp),%ecx | |
3488 xorl %eax,%edi | |
3489 movl %edx,%esi | |
3490 shldl $5,%edx,%edx | |
3491 addl %edi,%ecx | |
3492 xorl %eax,%esi | |
3493 shrdl $7,%ebp,%ebp | |
3494 addl %edx,%ecx | |
3495 addl 56(%rsp),%ebx | |
3496 xorl %ebp,%esi | |
3497 movl %ecx,%edi | |
3498 shldl $5,%ecx,%ecx | |
3499 addl %esi,%ebx | |
3500 xorl %ebp,%edi | |
3501 shrdl $7,%edx,%edx | |
3502 addl %ecx,%ebx | |
3503 addl 60(%rsp),%eax | |
3504 xorl %edx,%edi | |
3505 movl %ebx,%esi | |
3506 shldl $5,%ebx,%ebx | |
3507 addl %edi,%eax | |
3508 shrdl $7,%ecx,%ecx | |
3509 addl %ebx,%eax | |
3510 vzeroupper | |
3511 | |
3512 addl 0(%r8),%eax | |
3513 addl 4(%r8),%esi | |
3514 addl 8(%r8),%ecx | |
3515 movl %eax,0(%r8) | |
3516 addl 12(%r8),%edx | |
3517 movl %esi,4(%r8) | |
3518 addl 16(%r8),%ebp | |
3519 movl %ecx,8(%r8) | |
3520 movl %edx,12(%r8) | |
3521 movl %ebp,16(%r8) | |
3522 leaq (%r14),%rsi | |
3523 movq -40(%rsi),%r14 | |
3524 movq -32(%rsi),%r13 | |
3525 movq -24(%rsi),%r12 | |
3526 movq -16(%rsi),%rbp | |
3527 movq -8(%rsi),%rbx | |
3528 leaq (%rsi),%rsp | |
3529 .Lepilogue_avx: | |
3530 .byte 0xf3,0xc3 | |
3531 .size sha1_block_data_order_avx,.-sha1_block_data_order_avx | |
3532 .align 64 | 2411 .align 64 |
3533 K_XX_XX: | 2412 K_XX_XX: |
3534 .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 | 2413 .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 |
3535 .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 | 2414 .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 |
3536 .long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 | 2415 .long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 |
3537 .long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 | 2416 .long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 |
3538 .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc | 2417 .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc |
3539 .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc | 2418 .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc |
3540 .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 | 2419 .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 |
3541 .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 | 2420 .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 |
3542 .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f | 2421 .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f |
3543 .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f | 2422 .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f |
3544 .byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 | 2423 .byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 |
3545 .byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,3
2,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,12
1,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 | 2424 .byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,3
2,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,12
1,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 |
3546 .align 64 | 2425 .align 64 |
3547 #endif | 2426 #endif |
OLD | NEW |