| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 ; | 3 ; |
| 4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
| 5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
| 6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
| 7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
| 8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
| 9 ; | 9 ; |
| 10 | 10 |
| (...skipping 825 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 836 | 836 |
| 837 ; begin epilog | 837 ; begin epilog |
| 838 add rsp, 16 | 838 add rsp, 16 |
| 839 pop rdi | 839 pop rdi |
| 840 pop rsi | 840 pop rsi |
| 841 RESTORE_GOT | 841 RESTORE_GOT |
| 842 UNSHADOW_ARGS | 842 UNSHADOW_ARGS |
| 843 pop rbp | 843 pop rbp |
| 844 ret | 844 ret |
| 845 | 845 |
| 846 ;unsigned int vp8_get16x16pred_error_mmx | |
| 847 ;( | |
| 848 ; unsigned char *src_ptr, | |
| 849 ; int src_stride, | |
| 850 ; unsigned char *ref_ptr, | |
| 851 ; int ref_stride | |
| 852 ;) | |
| 853 global sym(vp8_get16x16pred_error_mmx) | |
| 854 sym(vp8_get16x16pred_error_mmx): | |
| 855 push rbp | |
| 856 mov rbp, rsp | |
| 857 SHADOW_ARGS_TO_STACK 4 | |
| 858 GET_GOT rbx | |
| 859 push rsi | |
| 860 push rdi | |
| 861 sub rsp, 16 | |
| 862 ; end prolog | |
| 863 | |
| 864 mov rsi, arg(0) ;DWORD PTR [src_ptr] | |
| 865 mov rdi, arg(2) ;DWORD PTR [ref_ptr] | |
| 866 | |
| 867 movsxd rax, DWORD PTR arg(1) ;[src_stride] | |
| 868 movsxd rdx, DWORD PTR arg(3) ;[ref_stride] | |
| 869 | |
| 870 pxor mm0, mm0 ; clear xmm0 for unp
ack | |
| 871 pxor mm7, mm7 ; clear xmm7 for acc
umulating diffs | |
| 872 | |
| 873 pxor mm6, mm6 ; clear xmm6 for acc
umulating sse | |
| 874 mov rcx, 16 | |
| 875 | |
| 876 var16loop: | |
| 877 | |
| 878 movq mm1, [rsi] | |
| 879 movq mm2, [rdi] | |
| 880 | |
| 881 movq mm3, mm1 | |
| 882 movq mm4, mm2 | |
| 883 | |
| 884 punpcklbw mm1, mm0 | |
| 885 punpckhbw mm3, mm0 | |
| 886 | |
| 887 punpcklbw mm2, mm0 | |
| 888 punpckhbw mm4, mm0 | |
| 889 | |
| 890 psubw mm1, mm2 | |
| 891 psubw mm3, mm4 | |
| 892 | |
| 893 paddw mm7, mm1 | |
| 894 pmaddwd mm1, mm1 | |
| 895 | |
| 896 paddw mm7, mm3 | |
| 897 pmaddwd mm3, mm3 | |
| 898 | |
| 899 paddd mm6, mm1 | |
| 900 paddd mm6, mm3 | |
| 901 | |
| 902 | |
| 903 movq mm1, [rsi+8] | |
| 904 movq mm2, [rdi+8] | |
| 905 | |
| 906 movq mm3, mm1 | |
| 907 movq mm4, mm2 | |
| 908 | |
| 909 punpcklbw mm1, mm0 | |
| 910 punpckhbw mm3, mm0 | |
| 911 | |
| 912 punpcklbw mm2, mm0 | |
| 913 punpckhbw mm4, mm0 | |
| 914 | |
| 915 psubw mm1, mm2 | |
| 916 psubw mm3, mm4 | |
| 917 | |
| 918 paddw mm7, mm1 | |
| 919 pmaddwd mm1, mm1 | |
| 920 | |
| 921 paddw mm7, mm3 | |
| 922 pmaddwd mm3, mm3 | |
| 923 | |
| 924 paddd mm6, mm1 | |
| 925 paddd mm6, mm3 | |
| 926 | |
| 927 add rsi, rax | |
| 928 add rdi, rdx | |
| 929 | |
| 930 sub rcx, 1 | |
| 931 jnz var16loop | |
| 932 | |
| 933 | |
| 934 movq mm1, mm6 | |
| 935 pxor mm6, mm6 | |
| 936 | |
| 937 pxor mm5, mm5 | |
| 938 punpcklwd mm6, mm7 | |
| 939 | |
| 940 punpckhwd mm5, mm7 | |
| 941 psrad mm5, 16 | |
| 942 | |
| 943 psrad mm6, 16 | |
| 944 paddd mm6, mm5 | |
| 945 | |
| 946 movq mm2, mm1 | |
| 947 psrlq mm1, 32 | |
| 948 | |
| 949 paddd mm2, mm1 | |
| 950 movq mm7, mm6 | |
| 951 | |
| 952 psrlq mm6, 32 | |
| 953 paddd mm6, mm7 | |
| 954 | |
| 955 movd DWORD PTR [rsp], mm6 ;Sum | |
| 956 movd DWORD PTR [rsp+4], mm2 ;SSE | |
| 957 | |
| 958 ; return (SSE-((Sum*Sum)>>8)); | |
| 959 movsxd rdx, dword ptr [rsp] | |
| 960 imul rdx, rdx | |
| 961 sar rdx, 8 | |
| 962 movsxd rax, dword ptr [rsp + 4] | |
| 963 sub rax, rdx | |
| 964 | |
| 965 | |
| 966 ; begin epilog | |
| 967 add rsp, 16 | |
| 968 pop rdi | |
| 969 pop rsi | |
| 970 RESTORE_GOT | |
| 971 UNSHADOW_ARGS | |
| 972 pop rbp | |
| 973 ret | |
| 974 | |
| 975 | |
| 976 | 846 |
| 977 SECTION_RODATA | 847 SECTION_RODATA |
| 978 ;short mmx_bi_rd[4] = { 64, 64, 64, 64}; | 848 ;short mmx_bi_rd[4] = { 64, 64, 64, 64}; |
| 979 align 16 | 849 align 16 |
| 980 mmx_bi_rd: | 850 mmx_bi_rd: |
| 981 times 4 dw 64 | 851 times 4 dw 64 |
| OLD | NEW |