| Index: source/libvpx/vp9/encoder/x86/vp9_highbd_subpel_variance.asm
|
| diff --git a/source/libvpx/vp9/encoder/x86/vp9_highbd_subpel_variance.asm b/source/libvpx/vp9/encoder/x86/vp9_highbd_subpel_variance.asm
|
| index aebe63b74ff27ef9c9b61a1e0b9428b49233fab4..987729f962c35730a7068f2ee3e0fddb5f071869 100644
|
| --- a/source/libvpx/vp9/encoder/x86/vp9_highbd_subpel_variance.asm
|
| +++ b/source/libvpx/vp9/encoder/x86/vp9_highbd_subpel_variance.asm
|
| @@ -199,6 +199,9 @@ SECTION .text
|
| %if %1 < 16
|
| sar h, 1
|
| %endif
|
| +%if %2 == 1 ; avg
|
| + shl sec_str, 1
|
| +%endif
|
|
|
| ; FIXME(rbultje) replace by jumptable?
|
| test x_offsetd, x_offsetd
|
| @@ -223,7 +226,7 @@ SECTION .text
|
| lea srcq, [srcq + src_strideq*2]
|
| lea dstq, [dstq + dst_strideq*2]
|
| %if %2 == 1 ; avg
|
| - lea secq, [secq + sec_str*2]
|
| + add secq, sec_str
|
| %endif
|
| %else ; %1 < 16
|
| movu m0, [srcq]
|
| @@ -232,14 +235,15 @@ SECTION .text
|
| mova m3, [dstq + dst_strideq*2]
|
| %if %2 == 1 ; avg
|
| pavgw m0, [secq]
|
| - pavgw m2, [secq + sec_str*2]
|
| + add secq, sec_str
|
| + pavgw m2, [secq]
|
| %endif
|
| SUM_SSE m0, m1, m2, m3, m6, m7
|
|
|
| lea srcq, [srcq + src_strideq*4]
|
| lea dstq, [dstq + dst_strideq*4]
|
| %if %2 == 1 ; avg
|
| - lea secq, [secq + sec_str*4]
|
| + add secq, sec_str
|
| %endif
|
| %endif
|
| dec h
|
| @@ -270,7 +274,7 @@ SECTION .text
|
| lea srcq, [srcq + src_strideq*2]
|
| lea dstq, [dstq + dst_strideq*2]
|
| %if %2 == 1 ; avg
|
| - lea secq, [secq + sec_str*2]
|
| + add secq, sec_str
|
| %endif
|
| %else ; %1 < 16
|
| movu m0, [srcq]
|
| @@ -282,14 +286,15 @@ SECTION .text
|
| pavgw m1, m5
|
| %if %2 == 1 ; avg
|
| pavgw m0, [secq]
|
| - pavgw m1, [secq+sec_str*2]
|
| + add secq, sec_str
|
| + pavgw m1, [secq]
|
| %endif
|
| SUM_SSE m0, m2, m1, m3, m6, m7
|
|
|
| lea srcq, [srcq + src_strideq*4]
|
| lea dstq, [dstq + dst_strideq*4]
|
| %if %2 == 1 ; avg
|
| - lea secq, [secq + sec_str*4]
|
| + add secq, sec_str
|
| %endif
|
| %endif
|
| dec h
|
| @@ -358,7 +363,7 @@ SECTION .text
|
| lea srcq, [srcq + src_strideq*2]
|
| lea dstq, [dstq + dst_strideq*2]
|
| %if %2 == 1 ; avg
|
| - lea secq, [secq + sec_str*2]
|
| + add secq, sec_str
|
| %endif
|
| %else ; %1 < 16
|
| movu m0, [srcq]
|
| @@ -379,14 +384,15 @@ SECTION .text
|
| psrlw m0, 4
|
| %if %2 == 1 ; avg
|
| pavgw m0, [secq]
|
| - pavgw m1, [secq+sec_str*2]
|
| + add secq, sec_str
|
| + pavgw m1, [secq]
|
| %endif
|
| SUM_SSE m0, m2, m1, m3, m6, m7
|
|
|
| lea srcq, [srcq + src_strideq*4]
|
| lea dstq, [dstq + dst_strideq*4]
|
| %if %2 == 1 ; avg
|
| - lea secq, [secq + sec_str*4]
|
| + add secq, sec_str
|
| %endif
|
| %endif
|
| dec h
|
| @@ -423,7 +429,7 @@ SECTION .text
|
| lea srcq, [srcq + src_strideq*2]
|
| lea dstq, [dstq + dst_strideq*2]
|
| %if %2 == 1 ; avg
|
| - lea secq, [secq + sec_str*2]
|
| + add secq, sec_str
|
| %endif
|
| %else ; %1 < 16
|
| movu m0, [srcq]
|
| @@ -436,14 +442,15 @@ SECTION .text
|
| pavgw m1, m5
|
| %if %2 == 1 ; avg
|
| pavgw m0, [secq]
|
| - pavgw m1, [secq+sec_str*2]
|
| + add secq, sec_str
|
| + pavgw m1, [secq]
|
| %endif
|
| SUM_SSE m0, m2, m1, m3, m6, m7
|
|
|
| lea srcq, [srcq + src_strideq*4]
|
| lea dstq, [dstq + dst_strideq*4]
|
| %if %2 == 1 ; avg
|
| - lea secq, [secq + sec_str*4]
|
| + add secq, sec_str
|
| %endif
|
| %endif
|
| dec h
|
| @@ -485,7 +492,7 @@ SECTION .text
|
| lea srcq, [srcq + src_strideq*2]
|
| lea dstq, [dstq + dst_strideq*2]
|
| %if %2 == 1 ; avg
|
| - lea secq, [secq + sec_str*2]
|
| + add secq, sec_str
|
| %endif
|
| %else ; %1 < 16
|
| movu m0, [srcq]
|
| @@ -505,7 +512,8 @@ SECTION .text
|
| mova m5, [dstq + dst_strideq*2]
|
| %if %2 == 1 ; avg
|
| pavgw m0, [secq]
|
| - pavgw m2, [secq+sec_str*2]
|
| + add secq, sec_str
|
| + pavgw m2, [secq]
|
| %endif
|
| SUM_SSE m0, m4, m2, m5, m6, m7
|
| mova m0, m3
|
| @@ -513,7 +521,7 @@ SECTION .text
|
| lea srcq, [srcq + src_strideq*4]
|
| lea dstq, [dstq + dst_strideq*4]
|
| %if %2 == 1 ; avg
|
| - lea secq, [secq + sec_str*4]
|
| + add secq, sec_str
|
| %endif
|
| %endif
|
| dec h
|
| @@ -590,7 +598,7 @@ SECTION .text
|
| lea srcq, [srcq + src_strideq*2]
|
| lea dstq, [dstq + dst_strideq*2]
|
| %if %2 == 1 ; avg
|
| - lea secq, [secq + sec_str*2]
|
| + add secq, sec_str
|
| %endif
|
| %else ; %1 < 16
|
| movu m0, [srcq]
|
| @@ -620,7 +628,8 @@ SECTION .text
|
| mova m3, [dstq+dst_strideq*2]
|
| %if %2 == 1 ; avg
|
| pavgw m0, [secq]
|
| - pavgw m4, [secq+sec_str*2]
|
| + add secq, sec_str
|
| + pavgw m4, [secq]
|
| %endif
|
| SUM_SSE m0, m2, m4, m3, m6, m7
|
| mova m0, m5
|
| @@ -628,7 +637,7 @@ SECTION .text
|
| lea srcq, [srcq + src_strideq*4]
|
| lea dstq, [dstq + dst_strideq*4]
|
| %if %2 == 1 ; avg
|
| - lea secq, [secq + sec_str*4]
|
| + add secq, sec_str
|
| %endif
|
| %endif
|
| dec h
|
| @@ -698,7 +707,7 @@ SECTION .text
|
| lea srcq, [srcq+src_strideq*2]
|
| lea dstq, [dstq+dst_strideq*2]
|
| %if %2 == 1 ; avg
|
| - lea secq, [secq + sec_str*2]
|
| + add secq, sec_str
|
| %endif
|
| %else ; %1 < 16
|
| movu m0, [srcq]
|
| @@ -719,14 +728,15 @@ SECTION .text
|
| psrlw m0, 4
|
| %if %2 == 1 ; avg
|
| pavgw m0, [secq]
|
| - pavgw m1, [secq+sec_str*2]
|
| + add secq, sec_str
|
| + pavgw m1, [secq]
|
| %endif
|
| SUM_SSE m0, m4, m1, m5, m6, m7
|
|
|
| lea srcq, [srcq+src_strideq*4]
|
| lea dstq, [dstq+dst_strideq*4]
|
| %if %2 == 1 ; avg
|
| - lea secq, [secq + sec_str*4]
|
| + add secq, sec_str
|
| %endif
|
| %endif
|
| dec h
|
| @@ -815,7 +825,7 @@ SECTION .text
|
| lea srcq, [srcq+src_strideq*2]
|
| lea dstq, [dstq+dst_strideq*2]
|
| %if %2 == 1 ; avg
|
| - lea secq, [secq + sec_str*2]
|
| + add secq, sec_str
|
| %endif
|
| %else ; %1 < 16
|
| movu m0, [srcq]
|
| @@ -847,7 +857,8 @@ SECTION .text
|
| pavgw m2, m3
|
| %if %2 == 1 ; avg
|
| pavgw m0, [secq]
|
| - pavgw m2, [secq+sec_str*2]
|
| + add secq, sec_str
|
| + pavgw m2, [secq]
|
| %endif
|
| SUM_SSE m0, m4, m2, m5, m6, m7
|
| mova m0, m3
|
| @@ -855,7 +866,7 @@ SECTION .text
|
| lea srcq, [srcq+src_strideq*4]
|
| lea dstq, [dstq+dst_strideq*4]
|
| %if %2 == 1 ; avg
|
| - lea secq, [secq + sec_str*4]
|
| + add secq, sec_str
|
| %endif
|
| %endif
|
| dec h
|
| @@ -969,7 +980,7 @@ SECTION .text
|
| INC_SRC_BY_SRC_STRIDE
|
| lea dstq, [dstq + dst_strideq * 2]
|
| %if %2 == 1 ; avg
|
| - lea secq, [secq + sec_str*2]
|
| + add secq, sec_str
|
| %endif
|
| %else ; %1 < 16
|
| movu m0, [srcq]
|
| @@ -1013,7 +1024,8 @@ SECTION .text
|
| mova m3, [dstq+dst_strideq*2]
|
| %if %2 == 1 ; avg
|
| pavgw m0, [secq]
|
| - pavgw m4, [secq+sec_str*2]
|
| + add secq, sec_str
|
| + pavgw m4, [secq]
|
| %endif
|
| SUM_SSE m0, m2, m4, m3, m6, m7
|
| mova m0, m5
|
| @@ -1021,7 +1033,7 @@ SECTION .text
|
| INC_SRC_BY_SRC_2STRIDE
|
| lea dstq, [dstq + dst_strideq * 4]
|
| %if %2 == 1 ; avg
|
| - lea secq, [secq + sec_str*4]
|
| + add secq, sec_str
|
| %endif
|
| %endif
|
| dec h
|
|
|