Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(131)

Unified Diff: libvpx/source/libvpx/vp8/encoder/x86/variance_impl_sse2.asm

Issue 7624054: Revert r97185 "Update libvpx snapshot to v0.9.7-p1 (Cayuga)." (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party
Patch Set: Created 9 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: libvpx/source/libvpx/vp8/encoder/x86/variance_impl_sse2.asm
diff --git a/libvpx/source/libvpx/vp8/encoder/x86/variance_impl_sse2.asm b/libvpx/source/libvpx/vp8/encoder/x86/variance_impl_sse2.asm
index b7a6b3286c17f4f7b1401fa624168b71a617ca86..c2c30deb27a74973f67eb334b89d63e8a8f4c940 100644
--- a/libvpx/source/libvpx/vp8/encoder/x86/variance_impl_sse2.asm
+++ b/libvpx/source/libvpx/vp8/encoder/x86/variance_impl_sse2.asm
@@ -85,7 +85,6 @@ sym(vp8_get16x16var_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
push rbx
push rsi
push rdi
@@ -207,12 +206,125 @@ var16loop:
pop rdi
pop rsi
pop rbx
- RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
+;unsigned int vp8_get16x16pred_error_sse2
+;(
+; unsigned char *src_ptr,
+; int src_stride,
+; unsigned char *ref_ptr,
+; int ref_stride
+;)
+global sym(vp8_get16x16pred_error_sse2)
+sym(vp8_get16x16pred_error_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ GET_GOT rbx
+ push rsi
+ push rdi
+ sub rsp, 16
+ ; end prolog
+
+ mov rsi, arg(0) ;[src_ptr]
+ mov rdi, arg(2) ;[ref_ptr]
+
+ movsxd rax, DWORD PTR arg(1) ;[src_stride]
+ movsxd rdx, DWORD PTR arg(3) ;[ref_stride]
+
+ pxor xmm0, xmm0 ; clear xmm0 for unpack
+ pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs
+
+ pxor xmm6, xmm6 ; clear xmm6 for accumulating sse
+ mov rcx, 16
+
+var16peloop:
+ movdqu xmm1, XMMWORD PTR [rsi]
+ movdqu xmm2, XMMWORD PTR [rdi]
+
+ movdqa xmm3, xmm1
+ movdqa xmm4, xmm2
+
+ punpcklbw xmm1, xmm0
+ punpckhbw xmm3, xmm0
+
+ punpcklbw xmm2, xmm0
+ punpckhbw xmm4, xmm0
+
+ psubw xmm1, xmm2
+ psubw xmm3, xmm4
+
+ paddw xmm7, xmm1
+ pmaddwd xmm1, xmm1
+
+ paddw xmm7, xmm3
+ pmaddwd xmm3, xmm3
+
+ paddd xmm6, xmm1
+ paddd xmm6, xmm3
+
+ add rsi, rax
+ add rdi, rdx
+
+ sub rcx, 1
+ jnz var16peloop
+
+
+ movdqa xmm1, xmm6
+ pxor xmm6, xmm6
+
+ pxor xmm5, xmm5
+ punpcklwd xmm6, xmm7
+
+ punpckhwd xmm5, xmm7
+ psrad xmm5, 16
+
+ psrad xmm6, 16
+ paddd xmm6, xmm5
+
+ movdqa xmm2, xmm1
+ punpckldq xmm1, xmm0
+
+ punpckhdq xmm2, xmm0
+ movdqa xmm7, xmm6
+
+ paddd xmm1, xmm2
+ punpckldq xmm6, xmm0
+
+ punpckhdq xmm7, xmm0
+ paddd xmm6, xmm7
+
+ movdqa xmm2, xmm1
+ movdqa xmm7, xmm6
+
+ psrldq xmm1, 8
+ psrldq xmm6, 8
+
+ paddd xmm7, xmm6
+ paddd xmm1, xmm2
+
+ movd DWORD PTR [rsp], xmm7 ;Sum
+ movd DWORD PTR [rsp+4], xmm1 ;SSE
+
+ ; return (SSE-((Sum*Sum)>>8));
+ movsxd rdx, dword ptr [rsp]
+ imul rdx, rdx
+ sar rdx, 8
+ movsxd rax, dword ptr [rsp + 4]
+ sub rax, rdx
+
+ ; begin epilog
+ add rsp, 16
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
;unsigned int vp8_get8x8var_sse2
@@ -229,7 +341,6 @@ sym(vp8_get8x8var_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -395,7 +506,6 @@ sym(vp8_get8x8var_sse2):
pop rdi
pop rsi
RESTORE_GOT
- RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -418,7 +528,7 @@ sym(vp8_filter_block2d_bil_var_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 9
- SAVE_XMM 7
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@@ -695,7 +805,6 @@ sym(vp8_half_horiz_vert_variance8x_h_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -797,7 +906,6 @@ vp8_half_horiz_vert_variance8x_h_1:
pop rdi
pop rsi
RESTORE_GOT
- RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -817,7 +925,7 @@ sym(vp8_half_horiz_vert_variance16x_h_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@@ -933,7 +1041,6 @@ sym(vp8_half_vert_variance8x_h_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -1020,7 +1127,6 @@ vp8_half_vert_variance8x_h_1:
pop rdi
pop rsi
RESTORE_GOT
- RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -1040,7 +1146,7 @@ sym(vp8_half_vert_variance16x_h_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@@ -1148,7 +1254,6 @@ sym(vp8_half_horiz_variance8x_h_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -1233,7 +1338,6 @@ vp8_half_horiz_variance8x_h_1:
pop rdi
pop rsi
RESTORE_GOT
- RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -1253,7 +1357,7 @@ sym(vp8_half_horiz_variance16x_h_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
+ SAVE_XMM
GET_GOT rbx
push rsi
push rdi

Powered by Google App Engine
This is Rietveld 408576698