Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1466)

Unified Diff: source/libvpx/vpx_dsp/x86/variance_impl_mmx.asm

Issue 1162573005: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/libvpx/vpx_dsp/x86/variance_impl_avx2.c ('k') | source/libvpx/vpx_dsp/x86/variance_mmx.c » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/libvpx/vpx_dsp/x86/variance_impl_mmx.asm
diff --git a/source/libvpx/vp8/common/x86/variance_impl_mmx.asm b/source/libvpx/vpx_dsp/x86/variance_impl_mmx.asm
similarity index 52%
copy from source/libvpx/vp8/common/x86/variance_impl_mmx.asm
copy to source/libvpx/vpx_dsp/x86/variance_impl_mmx.asm
index 7d5e6810bf0d35c1ca896a81c3cff6503044e237..a8d7d99dbc0d043ffb36bfe02d42a5aaeb48d2d9 100644
--- a/source/libvpx/vp8/common/x86/variance_impl_mmx.asm
+++ b/source/libvpx/vpx_dsp/x86/variance_impl_mmx.asm
@@ -11,9 +11,9 @@
%include "vpx_ports/x86_abi_support.asm"
-;unsigned int vp8_get_mb_ss_mmx( short *src_ptr )
-global sym(vp8_get_mb_ss_mmx) PRIVATE
-sym(vp8_get_mb_ss_mmx):
+;unsigned int vpx_get_mb_ss_mmx( short *src_ptr )
+global sym(vpx_get_mb_ss_mmx) PRIVATE
+sym(vpx_get_mb_ss_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
@@ -63,7 +63,7 @@ sym(vp8_get_mb_ss_mmx):
ret
-;unsigned int vp8_get8x8var_mmx
+;void vpx_get8x8var_mmx
;(
; unsigned char *src_ptr,
; int source_stride,
@@ -72,8 +72,8 @@ sym(vp8_get_mb_ss_mmx):
; unsigned int *SSE,
; int *Sum
;)
-global sym(vp8_get8x8var_mmx) PRIVATE
-sym(vp8_get8x8var_mmx):
+global sym(vpx_get8x8var_mmx) PRIVATE
+sym(vpx_get8x8var_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
@@ -310,8 +310,8 @@ sym(vp8_get8x8var_mmx):
-;unsigned int
-;vp8_get4x4var_mmx
+;void
+;vpx_get4x4var_mmx
;(
; unsigned char *src_ptr,
; int source_stride,
@@ -320,8 +320,8 @@ sym(vp8_get8x8var_mmx):
; unsigned int *SSE,
; int *Sum
;)
-global sym(vp8_get4x4var_mmx) PRIVATE
-sym(vp8_get4x4var_mmx):
+global sym(vpx_get4x4var_mmx) PRIVATE
+sym(vpx_get4x4var_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
@@ -422,430 +422,3 @@ sym(vp8_get4x4var_mmx):
UNSHADOW_ARGS
pop rbp
ret
-
-
-
-;unsigned int
-;vp8_get4x4sse_cs_mmx
-;(
-; unsigned char *src_ptr,
-; int source_stride,
-; unsigned char *ref_ptr,
-; int recon_stride
-;)
-global sym(vp8_get4x4sse_cs_mmx) PRIVATE
-sym(vp8_get4x4sse_cs_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
- push rsi
- push rdi
- push rbx
- ; end prolog
-
-
- pxor mm6, mm6 ; Blank mmx7
- pxor mm7, mm7 ; Blank mmx7
-
- mov rax, arg(0) ;[src_ptr] ; Load base addresses
- mov rbx, arg(2) ;[ref_ptr]
- movsxd rcx, dword ptr arg(1) ;[source_stride]
- movsxd rdx, dword ptr arg(3) ;[recon_stride]
- ; Row 1
- movd mm0, [rax] ; Copy eight bytes to mm0
- movd mm1, [rbx] ; Copy eight bytes to mm1
- punpcklbw mm0, mm6 ; unpack to higher prrcision
- punpcklbw mm1, mm6
- psubsw mm0, mm1 ; A-B (low order) to MM0
- pmaddwd mm0, mm0 ; square and accumulate
- add rbx,rdx ; Inc pointer into ref data
- add rax,rcx ; Inc pointer into the new data
- movd mm1, [rbx] ; Copy eight bytes to mm1
- paddd mm7, mm0 ; accumulate in mm7
-
- ; Row 2
- movd mm0, [rax] ; Copy eight bytes to mm0
- punpcklbw mm0, mm6 ; unpack to higher prrcision
- punpcklbw mm1, mm6
- psubsw mm0, mm1 ; A-B (low order) to MM0
- pmaddwd mm0, mm0 ; square and accumulate
- add rbx,rdx ; Inc pointer into ref data
- add rax,rcx ; Inc pointer into the new data
- movd mm1, [rbx] ; Copy eight bytes to mm1
- paddd mm7, mm0 ; accumulate in mm7
-
- ; Row 3
- movd mm0, [rax] ; Copy eight bytes to mm0
- punpcklbw mm1, mm6
- punpcklbw mm0, mm6 ; unpack to higher prrcision
- psubsw mm0, mm1 ; A-B (low order) to MM0
-
- pmaddwd mm0, mm0 ; square and accumulate
- add rbx,rdx ; Inc pointer into ref data
- add rax,rcx ; Inc pointer into the new data
- movd mm1, [rbx] ; Copy eight bytes to mm1
- paddd mm7, mm0 ; accumulate in mm7
-
- ; Row 4
- movd mm0, [rax] ; Copy eight bytes to mm0
- punpcklbw mm0, mm6 ; unpack to higher prrcision
- punpcklbw mm1, mm6
- psubsw mm0, mm1 ; A-B (low order) to MM0
- pmaddwd mm0, mm0 ; square and accumulate
- paddd mm7, mm0 ; accumulate in mm7
-
- movq mm0, mm7 ;
- psrlq mm7, 32
-
- paddd mm0, mm7
- movq rax, mm0
-
-
- ; begin epilog
- pop rbx
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-%define mmx_filter_shift 7
-
-;void vp8_filter_block2d_bil4x4_var_mmx
-;(
-; unsigned char *ref_ptr,
-; int ref_pixels_per_line,
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; unsigned short *HFilter,
-; unsigned short *VFilter,
-; int *sum,
-; unsigned int *sumsquared
-;)
-global sym(vp8_filter_block2d_bil4x4_var_mmx) PRIVATE
-sym(vp8_filter_block2d_bil4x4_var_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 8
- GET_GOT rbx
- push rsi
- push rdi
- sub rsp, 16
- ; end prolog
-
-
- pxor mm6, mm6 ;
- pxor mm7, mm7 ;
-
- mov rax, arg(4) ;HFilter ;
- mov rdx, arg(5) ;VFilter ;
-
- mov rsi, arg(0) ;ref_ptr ;
- mov rdi, arg(2) ;src_ptr ;
-
- mov rcx, 4 ;
- pxor mm0, mm0 ;
-
- movd mm1, [rsi] ;
- movd mm3, [rsi+1] ;
-
- punpcklbw mm1, mm0 ;
- pmullw mm1, [rax] ;
-
- punpcklbw mm3, mm0 ;
- pmullw mm3, [rax+8] ;
-
- paddw mm1, mm3 ;
- paddw mm1, [GLOBAL(mmx_bi_rd)] ;
-
- psraw mm1, mmx_filter_shift ;
- movq mm5, mm1
-
-%if ABI_IS_32BIT
- add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
-%else
- movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ;
- add rsi, r8
-%endif
-
-.filter_block2d_bil4x4_var_mmx_loop:
-
- movd mm1, [rsi] ;
- movd mm3, [rsi+1] ;
-
- punpcklbw mm1, mm0 ;
- pmullw mm1, [rax] ;
-
- punpcklbw mm3, mm0 ;
- pmullw mm3, [rax+8] ;
-
- paddw mm1, mm3 ;
- paddw mm1, [GLOBAL(mmx_bi_rd)] ;
-
- psraw mm1, mmx_filter_shift ;
- movq mm3, mm5 ;
-
- movq mm5, mm1 ;
- pmullw mm3, [rdx] ;
-
- pmullw mm1, [rdx+8] ;
- paddw mm1, mm3 ;
-
-
- paddw mm1, [GLOBAL(mmx_bi_rd)] ;
- psraw mm1, mmx_filter_shift ;
-
- movd mm3, [rdi] ;
- punpcklbw mm3, mm0 ;
-
- psubw mm1, mm3 ;
- paddw mm6, mm1 ;
-
- pmaddwd mm1, mm1 ;
- paddd mm7, mm1 ;
-
-%if ABI_IS_32BIT
- add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
- add rdi, dword ptr arg(3) ;src_pixels_per_line ;
-%else
- movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
- movsxd r9, dword ptr arg(3) ;src_pixels_per_line
- add rsi, r8
- add rdi, r9
-%endif
- sub rcx, 1 ;
- jnz .filter_block2d_bil4x4_var_mmx_loop ;
-
-
- pxor mm3, mm3 ;
- pxor mm2, mm2 ;
-
- punpcklwd mm2, mm6 ;
- punpckhwd mm3, mm6 ;
-
- paddd mm2, mm3 ;
- movq mm6, mm2 ;
-
- psrlq mm6, 32 ;
- paddd mm2, mm6 ;
-
- psrad mm2, 16 ;
- movq mm4, mm7 ;
-
- psrlq mm4, 32 ;
- paddd mm4, mm7 ;
-
- mov rdi, arg(6) ;sum
- mov rsi, arg(7) ;sumsquared
-
- movd dword ptr [rdi], mm2 ;
- movd dword ptr [rsi], mm4 ;
-
-
-
- ; begin epilog
- add rsp, 16
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-
-
-;void vp8_filter_block2d_bil_var_mmx
-;(
-; unsigned char *ref_ptr,
-; int ref_pixels_per_line,
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; unsigned int Height,
-; unsigned short *HFilter,
-; unsigned short *VFilter,
-; int *sum,
-; unsigned int *sumsquared
-;)
-global sym(vp8_filter_block2d_bil_var_mmx) PRIVATE
-sym(vp8_filter_block2d_bil_var_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 9
- GET_GOT rbx
- push rsi
- push rdi
- sub rsp, 16
- ; end prolog
-
- pxor mm6, mm6 ;
- pxor mm7, mm7 ;
- mov rax, arg(5) ;HFilter ;
-
- mov rdx, arg(6) ;VFilter ;
- mov rsi, arg(0) ;ref_ptr ;
-
- mov rdi, arg(2) ;src_ptr ;
- movsxd rcx, dword ptr arg(4) ;Height ;
-
- pxor mm0, mm0 ;
- movq mm1, [rsi] ;
-
- movq mm3, [rsi+1] ;
- movq mm2, mm1 ;
-
- movq mm4, mm3 ;
- punpcklbw mm1, mm0 ;
-
- punpckhbw mm2, mm0 ;
- pmullw mm1, [rax] ;
-
- pmullw mm2, [rax] ;
- punpcklbw mm3, mm0 ;
-
- punpckhbw mm4, mm0 ;
- pmullw mm3, [rax+8] ;
-
- pmullw mm4, [rax+8] ;
- paddw mm1, mm3 ;
-
- paddw mm2, mm4 ;
- paddw mm1, [GLOBAL(mmx_bi_rd)] ;
-
- psraw mm1, mmx_filter_shift ;
- paddw mm2, [GLOBAL(mmx_bi_rd)] ;
-
- psraw mm2, mmx_filter_shift ;
- movq mm5, mm1
-
- packuswb mm5, mm2 ;
-%if ABI_IS_32BIT
- add rsi, dword ptr arg(1) ;ref_pixels_per_line
-%else
- movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
- add rsi, r8
-%endif
-
-.filter_block2d_bil_var_mmx_loop:
-
- movq mm1, [rsi] ;
- movq mm3, [rsi+1] ;
-
- movq mm2, mm1 ;
- movq mm4, mm3 ;
-
- punpcklbw mm1, mm0 ;
- punpckhbw mm2, mm0 ;
-
- pmullw mm1, [rax] ;
- pmullw mm2, [rax] ;
-
- punpcklbw mm3, mm0 ;
- punpckhbw mm4, mm0 ;
-
- pmullw mm3, [rax+8] ;
- pmullw mm4, [rax+8] ;
-
- paddw mm1, mm3 ;
- paddw mm2, mm4 ;
-
- paddw mm1, [GLOBAL(mmx_bi_rd)] ;
- psraw mm1, mmx_filter_shift ;
-
- paddw mm2, [GLOBAL(mmx_bi_rd)] ;
- psraw mm2, mmx_filter_shift ;
-
- movq mm3, mm5 ;
- movq mm4, mm5 ;
-
- punpcklbw mm3, mm0 ;
- punpckhbw mm4, mm0 ;
-
- movq mm5, mm1 ;
- packuswb mm5, mm2 ;
-
- pmullw mm3, [rdx] ;
- pmullw mm4, [rdx] ;
-
- pmullw mm1, [rdx+8] ;
- pmullw mm2, [rdx+8] ;
-
- paddw mm1, mm3 ;
- paddw mm2, mm4 ;
-
- paddw mm1, [GLOBAL(mmx_bi_rd)] ;
- paddw mm2, [GLOBAL(mmx_bi_rd)] ;
-
- psraw mm1, mmx_filter_shift ;
- psraw mm2, mmx_filter_shift ;
-
- movq mm3, [rdi] ;
- movq mm4, mm3 ;
-
- punpcklbw mm3, mm0 ;
- punpckhbw mm4, mm0 ;
-
- psubw mm1, mm3 ;
- psubw mm2, mm4 ;
-
- paddw mm6, mm1 ;
- pmaddwd mm1, mm1 ;
-
- paddw mm6, mm2 ;
- pmaddwd mm2, mm2 ;
-
- paddd mm7, mm1 ;
- paddd mm7, mm2 ;
-
-%if ABI_IS_32BIT
- add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
- add rdi, dword ptr arg(3) ;src_pixels_per_line ;
-%else
- movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ;
- movsxd r9, dword ptr arg(3) ;src_pixels_per_line ;
- add rsi, r8
- add rdi, r9
-%endif
- sub rcx, 1 ;
- jnz .filter_block2d_bil_var_mmx_loop ;
-
-
- pxor mm3, mm3 ;
- pxor mm2, mm2 ;
-
- punpcklwd mm2, mm6 ;
- punpckhwd mm3, mm6 ;
-
- paddd mm2, mm3 ;
- movq mm6, mm2 ;
-
- psrlq mm6, 32 ;
- paddd mm2, mm6 ;
-
- psrad mm2, 16 ;
- movq mm4, mm7 ;
-
- psrlq mm4, 32 ;
- paddd mm4, mm7 ;
-
- mov rdi, arg(7) ;sum
- mov rsi, arg(8) ;sumsquared
-
- movd dword ptr [rdi], mm2 ;
- movd dword ptr [rsi], mm4 ;
-
- ; begin epilog
- add rsp, 16
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-SECTION_RODATA
-;short mmx_bi_rd[4] = { 64, 64, 64, 64};
-align 16
-mmx_bi_rd:
- times 4 dw 64
« no previous file with comments | « source/libvpx/vpx_dsp/x86/variance_impl_avx2.c ('k') | source/libvpx/vpx_dsp/x86/variance_mmx.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698