Index: libvpx/source/libvpx/vp8/common/x86/idctllm_sse2.asm |
diff --git a/libvpx/source/libvpx/vp8/common/x86/idctllm_sse2.asm b/libvpx/source/libvpx/vp8/common/x86/idctllm_sse2.asm |
index 34a7e18aea727ec7175f57de50d3832d4a13df2a..edee1578e2607ec0b1311665a68b504f6c620fb1 100644 |
--- a/libvpx/source/libvpx/vp8/common/x86/idctllm_sse2.asm |
+++ b/libvpx/source/libvpx/vp8/common/x86/idctllm_sse2.asm |
@@ -32,6 +32,9 @@ sym(idct_dequant_0_2x_sse2): |
mov rdx, arg(1) ; dequant |
mov rax, arg(0) ; qcoeff |
+ ; Zero out xmm7, for use unpacking |
+ pxor xmm7, xmm7 |
+ |
movd xmm4, [rax] |
movd xmm5, [rdx] |
@@ -40,12 +43,9 @@ sym(idct_dequant_0_2x_sse2): |
pmullw xmm4, xmm5 |
- ; Zero out xmm5, for use unpacking |
- pxor xmm5, xmm5 |
- |
; clear coeffs |
- movd [rax], xmm5 |
- movd [rax+32], xmm5 |
+ movd [rax], xmm7 |
+ movd [rax+32], xmm7 |
;pshufb |
pshuflw xmm4, xmm4, 00000000b |
pshufhw xmm4, xmm4, 00000000b |
@@ -62,10 +62,10 @@ sym(idct_dequant_0_2x_sse2): |
lea rcx, [3*rcx] |
movq xmm3, [rax+rcx] |
- punpcklbw xmm0, xmm5 |
- punpcklbw xmm1, xmm5 |
- punpcklbw xmm2, xmm5 |
- punpcklbw xmm3, xmm5 |
+ punpcklbw xmm0, xmm7 |
+ punpcklbw xmm1, xmm7 |
+ punpcklbw xmm2, xmm7 |
+ punpcklbw xmm3, xmm7 |
mov rax, arg(3) ; dst |
movsxd rdx, dword ptr arg(4) ; dst_stride |
@@ -77,10 +77,10 @@ sym(idct_dequant_0_2x_sse2): |
paddw xmm3, xmm4 |
; pack up before storing |
- packuswb xmm0, xmm5 |
- packuswb xmm1, xmm5 |
- packuswb xmm2, xmm5 |
- packuswb xmm3, xmm5 |
+ packuswb xmm0, xmm7 |
+ packuswb xmm1, xmm7 |
+ packuswb xmm2, xmm7 |
+ packuswb xmm3, xmm7 |
; store blocks back out |
movq [rax], xmm0 |
@@ -102,7 +102,6 @@ sym(idct_dequant_full_2x_sse2): |
push rbp |
mov rbp, rsp |
SHADOW_ARGS_TO_STACK 7 |
- SAVE_XMM 7 |
GET_GOT rbx |
push rsi |
push rdi |
@@ -348,7 +347,6 @@ sym(idct_dequant_full_2x_sse2): |
pop rdi |
pop rsi |
RESTORE_GOT |
- RESTORE_XMM |
UNSHADOW_ARGS |
pop rbp |
ret |
@@ -379,8 +377,8 @@ sym(idct_dequant_dc_0_2x_sse2): |
mov rdi, arg(3) ; dst |
mov rdx, arg(5) ; dc |
- ; Zero out xmm5, for use unpacking |
- pxor xmm5, xmm5 |
+ ; Zero out xmm7, for use unpacking |
+ pxor xmm7, xmm7 |
; load up 2 dc words here == 2*16 = doubleword |
movd xmm4, [rdx] |
@@ -400,10 +398,10 @@ sym(idct_dequant_dc_0_2x_sse2): |
psraw xmm4, 3 |
; Predict buffer needs to be expanded from bytes to words |
- punpcklbw xmm0, xmm5 |
- punpcklbw xmm1, xmm5 |
- punpcklbw xmm2, xmm5 |
- punpcklbw xmm3, xmm5 |
+ punpcklbw xmm0, xmm7 |
+ punpcklbw xmm1, xmm7 |
+ punpcklbw xmm2, xmm7 |
+ punpcklbw xmm3, xmm7 |
; Add to predict buffer |
paddw xmm0, xmm4 |
@@ -412,10 +410,10 @@ sym(idct_dequant_dc_0_2x_sse2): |
paddw xmm3, xmm4 |
; pack up before storing |
- packuswb xmm0, xmm5 |
- packuswb xmm1, xmm5 |
- packuswb xmm2, xmm5 |
- packuswb xmm3, xmm5 |
+ packuswb xmm0, xmm7 |
+ packuswb xmm1, xmm7 |
+ packuswb xmm2, xmm7 |
+ packuswb xmm3, xmm7 |
; Load destination stride before writing out, |
; doesn't need to persist |
@@ -443,7 +441,6 @@ sym(idct_dequant_dc_full_2x_sse2): |
push rbp |
mov rbp, rsp |
SHADOW_ARGS_TO_STACK 7 |
- SAVE_XMM 7 |
GET_GOT rbx |
push rsi |
push rdi |
@@ -695,7 +692,6 @@ sym(idct_dequant_dc_full_2x_sse2): |
pop rdi |
pop rsi |
RESTORE_GOT |
- RESTORE_XMM |
UNSHADOW_ARGS |
pop rbp |
ret |